动手学深度学习(一)——线性回归从零开始

文章作者:Tyan
博客:noahsnail.com  |  CSDN  |  简书

注:本文为李沐大神的《动手学深度学习》的课程笔记!

参考资料

1
2
3
# 导入mxnet的ndarray, autograd
from mxnet import autograd
from mxnet import ndarray as nd

创建数据集

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# 训练数据的维度
num_inputs = 2

# 训练数据的样本数量
num_examples = 1000

# 实际的权重w
true_w = [2, -3.4]

# 实际的偏置b
true_b = 4.2

# 随机生成均值为0, 方差为1, 服从正态分布的训练数据X,
X = nd.random_normal(shape=(num_examples, num_inputs))

# 根据X, w, b生成对应的输出y
y = true_w[0] * X[:, 0] + true_w[1] * X[:, 1] + true_b

# 给y加上随机噪声
y += 0.01 * nd.random_normal(shape=y.shape)

数据可视化

1
print(X[0], y[0])
(
[ 1.16307867  0.48380461]
<NDArray 2 @cpu(0)>, 
[ 4.87962484]
<NDArray 1 @cpu(0)>)
1
2
3
4
5
6
%matplotlib inline
import matplotlib.pyplot as plt

# 绘制数据的散点图
plt.scatter(X[:, 1].asnumpy(), y.asnumpy())
plt.show()

Figure 1

数据读取

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import random

# 训练时的批数据大小
batch_size = 10

# 通过yield进行数据读取
def data_iter():
# 产生样本的索引
idx = list(range(num_examples))
# 将索引随机打乱
random.shuffle(idx)
# 迭代一个epoch, xrange循环时效率比range更高
for i in xrange(0, num_examples, batch_size):
# 依次取出样本的索引, 这种实现方式在num_examples/batch_size不能整除时也适用
j = nd.array(idx[i:min((i + batch_size), num_examples)])
# 根据提供的索引取元素
yield nd.take(X, j), nd.take(y, j)
1
2
3
4
5
6
7
8
9
10
11
12
# 查看data_iter是否是generator函数
from inspect import isgeneratorfunction
print isgeneratorfunction(data_iter)

# data_iter类似于类的定义, 而data_iter()相当于一个类的实例, 当然是匿名实例
import types
print isinstance(data_iter(), types.GeneratorType)

# 读取数据测试
for data, label in data_iter():
print(data, label)
break
True
True
(
[[ 1.18770552 -0.46362698]
 [-3.15577412  2.19352984]
 [-0.45067298 -0.96665388]
 [ 0.05416773 -1.21203637]
 [-1.49418294 -1.61555624]
 [-0.93778831 -1.69338322]
 [ 0.91439158  1.31797135]
 [ 0.82403505  0.33020774]
 [-0.19660901  1.13431609]
 [ 0.15364595  1.01133049]]
<NDArray 10x2 @cpu(0)>, 
[ 8.17057896 -9.57918072  6.58949089  8.41831684  6.69815683  8.08473206
  1.54548573  4.73358202 -0.0632825   1.06603777]
<NDArray 10 @cpu(0)>)

初始化模型参数

1
2
3
4
5
6
7
8
9
10
# 随机初始化权重w
w = nd.random_normal(shape=(num_inputs, 1))
# 偏置b初始化为0
b = nd.zeros((1,))
# w, b放入list里
params = [w, b]

# 需要计算反向传播, 添加自动求导
for param in params:
param.attach_grad()

定义模型

1
2
3
4
# 定义运算y = w * x + b
def net(X):
# 向量, 矩阵乘用dot
return nd.dot(X, w) + b

损失函数

1
2
3
4
5
# 定义平方损失
def square_loss(yhat, y):
# 注意这里我们把y变形成yhat的形状来避免矩阵形状的自动转换
# loss为预测值减去真实值
return (yhat - y.reshape(yhat.shape)) ** 2

优化

1
2
3
4
5
6
# 定义随机梯度下降法
def SGD(params, lr):
# 对参数进行梯度下降
for param in params:
# 这样写不会创建新的param, 而是会写在原来的param里, 新的param没有梯度
param[:] = param - lr * param.grad

数据可视化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# 模型函数
def real_fn(X):
return true_w[0] * X[:, 0] - true_w[1] * X[:, 1] + true_b

# 绘制损失随训练迭代次数变化的折线图,以及预测值和真实值的散点图
def plot(losses, X, sample_size=100):
xs = list(range(len(losses)))
# 绘制两个子图
fig, (ax1, ax2) = plt.subplots(1, 2)
# 子图一设置标题
ax1.set_title('Loss during training')
# 绘制loss图像, 蓝色实线
ax1.plot(xs, losses, '-b')
# 子图二设置标题
ax2.set_title('Estimated vs Real Function')
# 绘制预测值, 蓝色的小圈
ax2.plot(X[:sample_size, 0].asnumpy(), net(X[:sample_size, :]).asnumpy(), 'ob', label = 'Estimated')
# 绘制实际值, 绿色的星号
ax2.plot(X[:sample_size, 0].asnumpy(), real_fn(X[:sample_size, :]).asnumpy(), '*g', label = 'Real Value')
# 绘制图例
ax2.legend()
# 显示图像
plt.show()

训练

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# 定义训练的迭代周期
epochs = 5
# 定义学习率
learning_rate = 0.01
# 迭代次数
niter = 0
# 保存loss
losses = []
# 移动平均损失(加权)
moving_loss = 0
# 指数平滑系数
smoothing_constant = 0.01

# 训练
for epoch in xrange(epochs):
# 总的loss
total_loss = 0
# 迭代训练
for data, label in data_iter():
# 记录梯度
with autograd.record():
# 计算预测值
output = net(data)
# 计算loss
loss = square_loss(output, label)
# 根据loss进行反向传播计算梯度
loss.backward()
# 使用随机梯度下降求解(BSGD)
SGD(params, learning_rate)
# 计算总的loss
total_loss += nd.sum(loss).asscalar()

# 记录每读取一个数据点后,损失的移动平均值的变化
# 迭代次数加一
niter += 1
# 计算当前损失
current_loss = nd.mean(loss).asscalar()
# 计算移动平均损失,指数平滑方法
moving_loss = (1 - smoothing_constant) * moving_loss + smoothing_constant * current_loss
# 计算估计损失
est_loss = moving_loss / (1 - (1 - smoothing_constant) ** niter)

# 输出迭代信息
if (niter + 1) % 100 == 0:
# 保存估计损失
losses.append(est_loss)
print 'Epoch %s, batch %s. Moving average of loss: %s. Average loss: %f' % (epoch, niter, est_loss, total_loss / num_examples)
plot(losses, X)
Epoch 0, batch 99. Moving average of loss: 0.378590331091. Average loss: 0.625015

Epoch 0

Epoch 1, batch 199. Moving average of loss: 0.10108379838. Average loss: 0.000099

Epoch 1

Epoch 2, batch 299. Moving average of loss: 0.033726038259. Average loss: 0.000099

Epoch 2

Epoch 3, batch 399. Moving average of loss: 0.0120152144263. Average loss: 0.000099

Epoch 3

Epoch 4, batch 499. Moving average of loss: 0.00441111205064. Average loss: 0.000101

Epoch 4

1
2
3
4
print w 
print true_w
print b
print true_b
[[ 1.99982905]
 [-3.40232825]]
<NDArray 2x1 @cpu(0)>
[2, -3.4]

[ 4.20024347]
<NDArray 1 @cpu(0)>
4.2

其他学习率

1
2
3
4
5
6
7
learning_rate = 0.001

Epoch 0, batch 99. Moving average of loss: 4.20676625843. Average loss: 5.549237
Epoch 1, batch 199. Moving average of loss: 1.1782055765. Average loss: 0.098550
Epoch 2, batch 299. Moving average of loss: 0.393321947036. Average loss: 0.001857
Epoch 3, batch 399. Moving average of loss: 0.13944143045. Average loss: 0.000127
Epoch 4, batch 499. Moving average of loss: 0.0505110244825. Average loss: 0.000096
1
2
3
4
5
6
7
learning_rate = 0.1

Epoch 0, batch 99. Moving average of loss: 3.79341099229e+13. Average loss: 26080307360862.457031
Epoch 1, batch 199. Moving average of loss: 1.7174457145e+28. Average loss: 15303785876879711197739352064.000000
Epoch 2, batch 299. Moving average of loss: nan. Average loss: nan
Epoch 3, batch 399. Moving average of loss: nan. Average loss: nan
Epoch 4, batch 499. Moving average of loss: nan. Average loss: nan
1
2
3
4
5
6
7
learning_rate = 1

Epoch 0, batch 99. Moving average of loss: nan. Average loss: nan
Epoch 1, batch 199. Moving average of loss: nan. Average loss: nan
Epoch 2, batch 299. Moving average of loss: nan. Average loss: nan
Epoch 3, batch 399. Moving average of loss: nan. Average loss: nan
Epoch 4, batch 499. Moving average of loss: nan. Average loss: nan

代码地址

https://github.com/SnailTyan/gluon-practice-code

如果有收获,可以请我喝杯咖啡!