In [1]:
import numpy as np
import matplotlib.pyplot as plt

# ---------------------- 1. 课堂教学核心:定义Sigmoid激活函数 ----------------------
def sigmoid(x):
    # 课堂里最基础的激活函数,用于把输出压缩到0~1之间
    return 1 / (1 + np.exp(-x))

# ---------------------- 2. 初始化题目里的9个参数 ----------------------
# 权重w1~w6,偏置b1~b3,共9个参数
params = {
    'w1': np.random.randn(),  # h1对x1的权重
    'w2': np.random.randn(),  # h1对x2的权重
    'b1': np.random.randn(),  # h1的偏置
    'w3': np.random.randn(),  # h2对x1的权重
    'w4': np.random.randn(),  # h2对x2的权重
    'b2': np.random.randn(),  # h2的偏置
    'w5': np.random.randn(),  # 输出y对h1的权重
    'w6': np.random.randn(),  # 输出y对h2的权重
    'b3': np.random.randn()   # 输出y的偏置
}

# 记录每个参数的演化过程(对应题目里的“打印参数演化”)
history = {k: [] for k in params.keys()}
loss_history = []  # 记录Loss变化(对应题目里的“画Loss图形”)

# ---------------------- 3. 课堂教学核心:前向传播(Forward) ----------------------
def forward(x):
    # 输入层 → 隐藏层(h1、h2的计算)
    h1 = sigmoid(params['w1'] * x[0] + params['w2'] * x[1] + params['b1'])
    h2 = sigmoid(params['w3'] * x[0] + params['w4'] * x[1] + params['b2'])
    # 隐藏层 → 输出层(y的计算)
    y_pred = sigmoid(params['w5'] * h1 + params['w6'] * h2 + params['b3'])
    return y_pred, h1, h2

# ---------------------- 4. 课堂教学核心:反向传播(Backward) ----------------------
def backward(x, y_true, y_pred, h1, h2):
    # 输出层误差(梯度)
    delta = (y_pred - y_true) * y_pred * (1 - y_pred)  # Sigmoid的导数链式法则
    
    # 计算每个参数的梯度
    grads = {}
    # 输出层参数梯度(w5, w6, b3)
    grads['w5'] = delta * h1
    grads['w6'] = delta * h2
    grads['b3'] = delta
    
    # 隐藏层误差传递
    delta_h1 = delta * params['w5'] * h1 * (1 - h1)
    delta_h2 = delta * params['w6'] * h2 * (1 - h2)
    
    # 隐藏层参数梯度(w1, w2, b1, w3, w4, b2)
    grads['w1'] = delta_h1 * x[0]
    grads['w2'] = delta_h1 * x[1]
    grads['b1'] = delta_h1
    grads['w3'] = delta_h2 * x[0]
    grads['w4'] = delta_h2 * x[1]
    grads['b2'] = delta_h2
    
    return grads

# ---------------------- 5. 训练过程(课堂里的梯度下降) ----------------------
# 用课堂里最基础的XOR问题作为数据集(二分类入门案例)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])  # XOR的标签值

epochs = 5000  # 训练轮数
lr = 0.5       # 学习率(课堂里常用的0.1~1之间的值)

for epoch in range(epochs):
    total_loss = 0
    for i in range(len(X)):
        x = X[i]
        y_true = y[i]
        
        # 前向传播
        y_pred, h1, h2 = forward(x)
        # 计算损失(均方误差MSE,课堂里最基础的损失函数)
        loss = (y_pred - y_true) ** 2
        total_loss += loss
        
        # 反向传播计算梯度
        grads = backward(x, y_true, y_pred, h1, h2)
        
        # 梯度下降更新参数(课堂里的核心步骤)
        for k in params.keys():
            params[k] -= lr * grads[k]
            # 记录每一步的参数值(对应题目里的“打印参数演化”)
            history[k].append(params[k])
    
    # 记录每一轮的平均损失(对应题目里的“画Loss图形”)
    avg_loss = total_loss / len(X)
    loss_history.append(avg_loss)
In [2]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(loss_history, color='red')
plt.title('Loss Curve (Training Loss vs Epochs)')
plt.xlabel('Training Epochs')
plt.ylabel('MSE Loss')
plt.grid(True)
No description has been provided for this image
In [3]:
plt.subplot(1, 2, 2)
plt.plot(history['w1'], color='purple', label='w1 (Tail Number 1)')
plt.title('Evolution of Parameter w1 (Student ID Tail: 1)')
plt.xlabel('Training Steps')
plt.ylabel('Parameter Value')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

# ---------------------- 8. 打印最终的9个参数(对应题目里的“打印9个参数”) ----------------------
print("=== Final 9 Parameters (w1~w6, b1~b3) ===")
for k, v in params.items():
    print(f"{k}: {v:.4f}")
No description has been provided for this image
=== Final 9 Parameters (w1~w6, b1~b3) ===
w1: -4.3119
w2: -4.3144
b1: 6.3858
w3: -6.2156
w4: -6.2410
b2: 2.4553
w5: 8.6689
w6: -8.8487
b3: -4.0479
In [ ]: