In [1]:
import numpy as np
import matplotlib.pyplot as plt
# ---------------------- 1. 课堂教学核心:定义Sigmoid激活函数 ----------------------
def sigmoid(x):
# 课堂里最基础的激活函数,用于把输出压缩到0~1之间
return 1 / (1 + np.exp(-x))
# ---------------------- 2. 初始化题目里的9个参数 ----------------------
# 权重w1~w6,偏置b1~b3,共9个参数
params = {
'w1': np.random.randn(), # h1对x1的权重
'w2': np.random.randn(), # h1对x2的权重
'b1': np.random.randn(), # h1的偏置
'w3': np.random.randn(), # h2对x1的权重
'w4': np.random.randn(), # h2对x2的权重
'b2': np.random.randn(), # h2的偏置
'w5': np.random.randn(), # 输出y对h1的权重
'w6': np.random.randn(), # 输出y对h2的权重
'b3': np.random.randn() # 输出y的偏置
}
# 记录每个参数的演化过程(对应题目里的“打印参数演化”)
history = {k: [] for k in params.keys()}
loss_history = [] # 记录Loss变化(对应题目里的“画Loss图形”)
# ---------------------- 3. 课堂教学核心:前向传播(Forward) ----------------------
def forward(x):
# 输入层 → 隐藏层(h1、h2的计算)
h1 = sigmoid(params['w1'] * x[0] + params['w2'] * x[1] + params['b1'])
h2 = sigmoid(params['w3'] * x[0] + params['w4'] * x[1] + params['b2'])
# 隐藏层 → 输出层(y的计算)
y_pred = sigmoid(params['w5'] * h1 + params['w6'] * h2 + params['b3'])
return y_pred, h1, h2
# ---------------------- 4. 课堂教学核心:反向传播(Backward) ----------------------
def backward(x, y_true, y_pred, h1, h2):
# 输出层误差(梯度)
delta = (y_pred - y_true) * y_pred * (1 - y_pred) # Sigmoid的导数链式法则
# 计算每个参数的梯度
grads = {}
# 输出层参数梯度(w5, w6, b3)
grads['w5'] = delta * h1
grads['w6'] = delta * h2
grads['b3'] = delta
# 隐藏层误差传递
delta_h1 = delta * params['w5'] * h1 * (1 - h1)
delta_h2 = delta * params['w6'] * h2 * (1 - h2)
# 隐藏层参数梯度(w1, w2, b1, w3, w4, b2)
grads['w1'] = delta_h1 * x[0]
grads['w2'] = delta_h1 * x[1]
grads['b1'] = delta_h1
grads['w3'] = delta_h2 * x[0]
grads['w4'] = delta_h2 * x[1]
grads['b2'] = delta_h2
return grads
# ---------------------- 5. 训练过程(课堂里的梯度下降) ----------------------
# 用课堂里最基础的XOR问题作为数据集(二分类入门案例)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0]) # XOR的标签值
epochs = 5000 # 训练轮数
lr = 0.5 # 学习率(课堂里常用的0.1~1之间的值)
for epoch in range(epochs):
total_loss = 0
for i in range(len(X)):
x = X[i]
y_true = y[i]
# 前向传播
y_pred, h1, h2 = forward(x)
# 计算损失(均方误差MSE,课堂里最基础的损失函数)
loss = (y_pred - y_true) ** 2
total_loss += loss
# 反向传播计算梯度
grads = backward(x, y_true, y_pred, h1, h2)
# 梯度下降更新参数(课堂里的核心步骤)
for k in params.keys():
params[k] -= lr * grads[k]
# 记录每一步的参数值(对应题目里的“打印参数演化”)
history[k].append(params[k])
# 记录每一轮的平均损失(对应题目里的“画Loss图形”)
avg_loss = total_loss / len(X)
loss_history.append(avg_loss)
In [2]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(loss_history, color='red')
plt.title('Loss Curve (Training Loss vs Epochs)')
plt.xlabel('Training Epochs')
plt.ylabel('MSE Loss')
plt.grid(True)
In [3]:
plt.subplot(1, 2, 2)
plt.plot(history['w1'], color='purple', label='w1 (Tail Number 1)')
plt.title('Evolution of Parameter w1 (Student ID Tail: 1)')
plt.xlabel('Training Steps')
plt.ylabel('Parameter Value')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# ---------------------- 8. 打印最终的9个参数(对应题目里的“打印9个参数”) ----------------------
print("=== Final 9 Parameters (w1~w6, b1~b3) ===")
for k, v in params.items():
print(f"{k}: {v:.4f}")
=== Final 9 Parameters (w1~w6, b1~b3) === w1: -4.3119 w2: -4.3144 b1: 6.3858 w3: -6.2156 w4: -6.2410 b2: 2.4553 w5: 8.6689 w6: -8.8487 b3: -4.0479
In [ ]: