基于深度学习的二维伊辛模型相变识别训练结果异常问题排查

基于深度学习的二维伊辛模型相变识别训练结果异常问题排查

阿华AIGC实验室

2026-4-15

基于深度学习的二维伊辛模型相变识别训练结果异常问题排查

我正在用Python3做一个数值项目，目标是用深度学习估计二维伊辛模型的相变点。目前已经完成了用Metropolis蒙特卡洛方法计算自旋构型的部分，具体执行的步骤如下：

用Metropolis方法生成两类自旋构型数据各10000组：一类是温度范围0<T<2的铁磁相，另一类是2.5<T<10的顺磁相
给铁磁相数据打标签(1,0)，顺磁相打标签(0,1)
划分8000组作为训练集，2000组作为验证集，用于训练神经网络

现在数据准备看起来是没问题的，但训练结果完全不符合预期：损失函数下降得太快了，我怀疑程序哪里出错了，下面是我的神经网络训练部分代码：

# 2. Build a Neural Network
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(900,)),
    keras.layers.Dense(100, activation=tf.nn.sigmoid),
    # keras.layers.Dropout(0.3),
    keras.layers.Dense(2, activation=tf.nn.softmax)
])

# Compile the Model
model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy'])

# 3. Training
history = model.fit(
    x_train, y_train,
    epochs=500,
    batch_size=64,
    validation_data=(x_val, y_val)
)

# 4. Visualize Training
plt.figure(figsize=(12, 5))

# Plot the Loss Function
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Function Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.yscale('log')
plt.legend()

plt.tight_layout()
plt.show()

这是我生成自旋构型的蒙特卡洛模拟完整代码：

"""
Monte Carlo simulation test for the 2D Ising model using the Metropolis method
"""

import numpy as np
from sklearn.semi_supervised import LabelPropagation
from sklearn.model_selection import train_test_split
import numpy as np
import random as rnd

from random import random, randrange
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, LeakyReLU, Input
from tensorflow.keras import optimizers
from tensorflow.keras import backend as K
import collections
from tensorflow.keras.utils import to_categorical

# Define a function to calculate the energy for a given spin configuration `alis`
def Ecalc2(alis2, Nx, Ny):
    dum = 0
    for i in range(-1, Nx):
        for j in range(0, Ny):

            l = i
            m = j

            if l == -1:
                l = Nx
            if l == Nx:
                l = 0
            if m == -1:
                m = Ny
            if m == Ny:
                m = 0

            ll = i + 1
            if ll == Nx:
                ll = 0
            dum += alis2[l, m] * alis2[ll, m]

    for i in range(0, Nx):
        for j in range(-1, Ny):

            l = i
            m = j

            if l == -1:
                l = Nx
            if l == Nx:
                l = 0
            if m == -1:
                m = Ny
            if m == Ny:
                m = 0

            mm = j + 1
            if mm == Ny:
                mm = 0
            dum += alis2[l, m] * alis2[l, mm]

    return dum

# Generate a random initial state: equal number of up and down spins, M=0
def Initial_rand(s, Nx, Ny):
    NN = int(Nx * Ny / 2)
    for k in range(NN):
        i = randrange(Nx - 1)  # Select a random number from 0 to N-1
        j = randrange(Ny - 1)  # Select a random number from 0 to N-1
        s[i, j] = -1 * s[i, j]

    return s

Nx = 30  # Divide x-direction into 30 parts
Ny = 30  # Divide y-direction into 30 parts
Ntot = Nx * Ny

# Setup
st_mag = []

KBT_lis = np.linspace(0.001, 2, 100)  # Change temperature from 0.001 to 2 (in units of kBT) in 100 steps
for KBT in KBT_lis:

    J = 1  # Spin coupling constant
    B = 0.0  # External magnetic field
    steps = 80000  # MC steps

    # Generate initial state: random spin configuration
    s = np.ones([Nx, Ny], int)  # Set N spins with quantum numbers (Sz = +1 or -1) all to 1
    s = Initial_rand(s, Nx, Ny)

    E = -J * Ecalc2(s, Nx, Ny) - B * np.sum(s)  # Calculate (initial) energy
    E2 = E**2  # Store E^2

    # Main loop
    for k in range(steps):
        i = randrange(Nx - 1)  # Select a random number from 0 to N-1
        j = randrange(Ny - 1)  # Select a random number from 0 to N-1

        s_trial = s.copy()
        s_trial[i, j] = -1 * s[i, j]
        delta_E = 2 * s_trial[i, j] * -1 * J * (s[i + 1, j] + s[i - 1, j] + s[i, j + 1] + s[i, j - 1]) - B * (s_trial[i, j] - s[i, j])

        E_trial = E + delta_E

        # Update state using Metropolis method
        if E_trial < E:
            s = s_trial
            E = E_trial
        else:
            if random() < np.exp(-(delta_E) / KBT):
                s = s_trial
                E = E_trial

        st_mag.append(s)

    del st_mag[-80000:-100]

# Get indices for `st_mag`
all_indices = set(range(len(st_mag)))
val_indices = set(rnd.sample(list(all_indices), 2000))

# Split into selected 2000 data and the remaining 8000 data
st_mag_val = [st_mag[i] for i in val_indices]
remaining_st_mag = [st_mag[i] for i in all_indices - val_indices]

# Assign labels (1, 0) to each spin configuration in `st_mag`
st_mag_val = [(spin_config, (1, 0)) for spin_config in st_mag_val]
remaining_st_mag = [(spin_config, (1, 0)) for spin_config in remaining_st_mag]

# Setup
pa_mag = []

KBT_lis = np.linspace(2.5, 10, 100)  # Change temperature from 2.5 to 10 (in units of kBT) in 100 steps
for KBT in KBT_lis:

    J = 1  # Spin coupling constant
    B = 0.0  # External magnetic field
    steps = 80000  # MC steps

    # Generate initial state: random spin configuration
    s = np.ones([Nx, Ny], int)  # Set N spins with quantum numbers (Sz = +1 or -1) all to 1
    s = Initial_rand(s, Nx, Ny)

    E = -J * Ecalc2(s, Nx, Ny) - B * np.sum(s)  # Calculate (initial) energy
    E2 = E**2  # Store E^2

    # Main loop
    for k in range(steps):
        i = randrange(Nx - 1)  # Select a random number from 0 to N-1
        j = randrange(Ny - 1)  # Select a random number from 0 to N-1

        s_trial = s.copy()
        s_trial[i, j] = -1 * s[i, j]
        delta_E = 2 * s_trial[i, j] * -1 * J * (s[i + 1, j] + s[i - 1, j] + s[i, j + 1] + s[i, j - 1]) - B * (s_trial[i, j] - s[i, j])

        E_trial = E + delta_E

        # Update state using Metropolis method
        if E_trial < E:
            s = s_trial
            E = E_trial
        else:
            if random() < np.exp(-(delta_E) / KBT):
                s = s_trial
                E = E_trial

        pa_mag.append(s)

    del pa_mag[-80000:-100]

# Get indices for `pa_mag`
all_indices = set(range(len(pa_mag)))
val_indices = set(rnd.sample(list(all_indices), 2000))

# Split into selected 2000 data and the remaining 8000 data
pa_mag_val = [pa_mag[i] for i in val_indices]
remaining_pa_mag = [pa_mag[i] for i in all_indices - val_indices]

# Assign labels (0, 1) to each spin configuration in `pa_mag_val`
pa_mag_val = [(spin_config, (0, 1)) for spin_config in pa_mag_val]
remaining_pa_mag = [(spin_config, (0, 1)) for spin_config in remaining_pa_mag]

val_mag = st_mag_val + pa_mag_val
tra_mag = remaining_st_mag + remaining_pa_mag

x_train = np.array([config[0].reshape(-1) for config in tra_mag])  # Flatten spin arrays
y_train = np.array([config[1][0] for config in tra_mag])  # Convert labels to integer format

x_val = np.array([config[0].reshape(-1) for config in val_mag])  # Flatten spin arrays
y_val = np.array([config[1][0] for config in val_mag])  # Convert labels to integer format

y_train = to_categorical(y_train, num_classes=2)
y_val = to_categorical(y_val, num_classes=2)

我验证过程序生成的自旋构型的磁化结果是正确的，所以觉得这部分应该没问题，但还是想确认一下。

另外，训练结果有两个异常情况：

训练过程中途停止，且误差值过小
T=3.3时的自旋构型显示出正常的顺磁特性，这部分是没问题的

备注：内容来源于stack exchange，提问作者Hayato

火山引擎最新活动

方舟 Coding Plan

模型自由，工具不限，免费解锁 ArkClaw，7*24 小时在线的专属智能伙伴

一键部署 OpenClaw

分钟级部署，云服务器包月低至￥9.9，与 CodingPlan 组合购买仅需19.8元

Seedance2.0 体验中心上线

注册即享免费500万Tokens，抢先领略新一代AI视频技术跃迁

新用户特惠专场

大模型19元起，Al应用9.9元畅享，新人首购爆款尽享优惠