线性回归

较底层实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import torch
from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import random

## 生成数据集,一个二维直线带高斯噪声的散点数据
true_w = [2, -3.4]
true_b = 4.2
features = torch.randn(1000, 2, dtype = torch.float32) # 从正态分布中抽样特征样本x
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b # 在直线上的label
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype = torch.float32) #加入噪声


## 数据 绘图
display.set_matplotlib_formats('svg') # 矢量图
plt.rcParams['figure.figsize'] = (3.5, 2.5)
plt.scatter(features[:, 0].numpy(), labels.numpy(), 1) # 查看两个维度的分布情况
plt.scatter(features[:, 1].numpy(), labels.numpy(), 1)


## 读取数据 (随机读取
def data_iter(batch_size, features, labels):
num_examples = len(features)
indices = list(range(num_examples))
random.shuffle(indices) # 对索引进行随机排列
for i in range(0, num_examples, batch_size):
j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)])
yield features.index_select(0, j), labels.index_select(0, j) #返回iterable对象,每次执行到yield时返回这次迭代的值,下次迭代从yield的下条语句继续执行

## 读取一组数据看看
batch_size = 10
for x, y in data_iter(batch_size, features, labels):
print(x, y)
break

## 初始化模型参数 w 是2 * 1的向量
w = torch.tensor(np.random.normal(0, 0.01, (2, 1)), dtype=torch.float32)
b = torch.zeros(1, dtype=torch.float32)

w.requires_grad_(True)
b.requires_grad_(True)


## 定义模型 线性模型
def linreg(x, w, b):
return torch.mm(x, w) + b

# 平方损失
def squared_loss(y_hat, y):
return (y_hat - y.view(y_hat.size())) ** 2 / 2

# 优化算法,小批量随机梯度下降,这里得到的梯度是一个批次的梯度和(loss为一个批次的loss),因此除以batch_size
def sgd(params, lr, batch_size):
for param in params:
param.data -= lr * param.grad / batch_size # 更新时用.data 以免操作计入计算图

lr = 0.03
epochs = 3
net = linreg
loss = squared_loss
batch_size = 10

for epoch in range(epochs):
# 每个迭代周期中,会使用所有样本一次
for x, y in data_iter(batch_size, features, labels):
l = loss(net(x, w, b), y).sum() # 10个样本得到的是(10, 1)维度的loss,因此将其求和再计算梯度
l.backward() # 对计算图反向传播,求梯度
sgd([w, b], lr, batch_size) #用优化算法更新参数
# 更新后梯度手动清零
w.grad.data.zero_()
b.grad.data.zero_()
train_loss = loss(net(features, w, b), labels)
print('epoch %d, loss %f' % (epoch + 1, train_loss.mean().item()))

print(true_w, '\n', w)

pytorch包实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import torch
from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import random



# 初始化样本(特征x)
n_feature = 2
true_w = [2, -3.4]
true_b = 4.2
features = torch.randn(1000, n_feature, dtype = torch.float32) # 从正态分布中抽样特征样本x, 1000份,每份2个维度
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b #
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype = torch.float32)# 高斯噪声

# 设置打印参数
display.set_matplotlib_formats('svg')
plt.rcParams['figure.figsize'] = (3.5, 2.5)
plt.scatter(features[:, 0].numpy(), labels.numpy(), 1)
plt.scatter(features[:, 1].numpy(), labels.numpy(), 1)

# 读取数据,使用torch提供的data包,不用自己写读取函数
import torch.utils.data as Data
batch_size = 10
dataset = Data.TensorDataset(features, labels) # 将数据组合
data_iter = Data.DataLoader(dataset, batch_size, shuffle=True) # 生成批次迭代数据

# for x, y in data_iter:
# print(x, y)
# break

# 定义模型,使用torch的nn包来创建网络
import torch.nn as nn
# 继承nn.Module来创建网络
class LinearNet(nn.Module):
def __init__(self, n_feature):
super(LinearNet, self).__init__()
self.linear = nn.Linear(n_feature, 1) # 通过nn.linear 函数来定义线性网络,需要给出特征的数量2

def forward(self, x):
y = self.linear(x)
return y

net = LinearNet(2)
# print(net)

################################################
# 除了继承Module, 对简单顺序网络也可通过nn.Sequential 搭建, 有三个写法
# ## 写法1
# net = nn.Sequential(
# nn.Linear(n_feature, 1)
# # 此处可继续添加后面的层
# )
# ## 写法2 , 可为每个层取名
# net = nn.Sequential()
# net.add_module('linear layer', nn.Linear(n_feature, 1))
# ## 写法3
# from collections import OrderedDict
# net = nn.Sequential(OrderedDict([
# ('linear layer', nn.Linear(n_feature, 1))
# # ...
# ]))
# print(net)
# print(net[0])

################################################

# net.parameters() 可查看模型所有可学习参数,它返回一个可迭代生成器
# for param in net.parameters():
# print(param)


## 初始化模型参数, 这里使用torch中的init来进行
from torch.nn import init
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)
# net.bias.data.fill_(0)

# 定义损失函数
loss = nn.MSELoss()

# 定义优化函数
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr = 0.03)

# # 可为不同子网络设置不同的学习率
# optimizer = optim.SGD([
# # 没有进行指定的默认使用最外层学习率
# {'params': net.subnet1.parameters()}, # 默认0.03
# {'params': net.subnet2.parameters(), 'lr':0.01}
# ], lr=0.03)
# # 修改学习率
# for param_group in optimizer.param_groups:
# param_group['lr'] *= 0.1

# 训练模型
epochs = 3
for epoch in range(1, epochs + 1):
for x, y in data_iter:
output = net(x)
l = loss(output, y.view(-1, 1)) # y.view将其转换为batch_size * 1 的向量
# 对计算图反向传播,求梯度
l.backward()
optimizer.step()
optimizer.zero_grad() # 等价net.zero_grad() 不必再对每个参数手动清零
print('epoch %d, loss: %f' % (epoch, l.item()))


print(true_w, net.linear.weight)
print(true_b, net.linear.bias)