目录
AutoDL-品质GPU租用平台-租GPU就上AutoDL
一、学习1
kaggle 数据集在colab 上引用
解压zip
1、线性模型
监督学习
验证集
模型设计
计算损失
损失曲线
2、 梯度下降算法
优化问题
梯度下降(贪心)
局部最优、鞍点
随机梯度下降
3、反向传播
权重w
计算图
非线性函数
forward 前向 *** 作
backward 反向传播
计算图样例
tensor in pytorch
pytorch实现线性模型
4、用pytorch 实现线性回归
流程
数据准备
设计模型
优化器和损失函数
训练过程
代码
5、逻辑斯蒂回归
Logistic function
模型
损失函数
6、处理多维特征的输入
数据集dataset
模型
Example
使用不同的激活函数
激活函数改为ReLU
7、加载数据集
epoch,batch_size,iteration
dataloader
流程
样例代码
练习
9、多分类问题
softMax层
损失函数
练习
Minst 数据集
练习2
10、卷积神经网络基础
基本结构
卷积过程
卷积层
池化 /填充
stride 步长
下采样 Max Pooling Layer
卷积神经网络实例
练习
11、卷积神经网络高级
GoogLeNet
Inception Module
𝟏 × 𝟏 convolution
Inception Module 的实现
Residual Net(残差)
梯度消失
解决方法
简单网络实现
练习1
练习2
13、循环神经网络(基础)
啥是RNN
计算过程
实现
pytorch 实现RNN
什么是numlayer
例子1
嵌入层
练习
14、循环神经网络(高级)
RNN 分类器 名字分类
准备数据
模型设计
名字转tensor
代码
练习
AutoDL-品质GPU租用平台-租GPU就上AutoDL一、学习1
《PyTorch深度学习实践》完结合集_哔哩哔哩_bilibili
kaggle 数据集在colab 上引用!pip install -U -q kaggle
!mkdir -p ~/.kaggle
!echo '{"username":"jiadongwangl8","key":"258a6528804078542767f289934296cb"}' > ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c digit-recognizer
! kaggle competitions download -c sentiment-analysis-on-movie-reviews (下载需要的文件)
解压zipimport zipfile
#zipfile解压
z = zipfile.ZipFile('/content/drive/MyDrive/ML2021/deepL1/sentiment-analysis-on-movie-reviews.zip', 'r')
z.extractall(path=r"./")
z.close()
1、线性模型
监督学习
x,y 成对给出
验证集将训练集在分为 训练集合验证集
模型设计y 上 加一个 hat 表示预测值
计算损失下图是计算权重等于3的时候,模型的损失
loss - 对应一个样本的损失
cost - 一组训练数据的损失
在这里也用到了MSE
损失曲线import numpy as np
import matplotlib.pyplot as plt
#数据集准备
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
#定义模型
def forward(x):
return x * w
#定义损失
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) * (y_pred - y)
w_list = [] #权重
mse_list = [] #cost
#训练过程
for w in np.arange(0.0, 4.1, 0.1):
print('w=', w)
l_sum = 0
for x_val, y_val in zip(x_data, y_data):
y_pred_val = forward(x_val)
loss_val = loss(x_val, y_val)
l_sum += loss_val
print('\t', x_val, y_val, y_pred_val, loss_val)
print('MSE=', l_sum / 3)
w_list.append(w)
mse_list.append(l_sum / 3)
#绘图
plt.plot(w_list, mse_list)
plt.ylabel('Loss')
plt.xlabel('w')
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#数据集准备
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
#定义模型, 修改后加了一个常数
def forward(x):
return x * w + b
#定义损失
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) * (y_pred - y)
mse_list = [] #cost
#不能像之前一样的用列表画图,因为是嵌套的循环,所以维度会不一样,一个4 另一个4*4
#训练过程
for w in np.arange(0.0, 4.1, 0.1):
print('w=', w)
for b in np.arange(-2.0, 2.1, 0.1):
print('b=', b)
l_sum = 0
#将所有样本的损失集合在一起
for x_val, y_val in zip(x_data, y_data):
y_pred_val = forward(x_val)
loss_val = loss(x_val, y_val)
l_sum += loss_val
print('\t', x_val, y_val, y_pred_val, loss_val)
mse_list.append(l_sum / 3)
print('MSE=', l_sum / 3)
#plot
fig = plt.figure()
ax = Axes3D(fig)
x = np.arange(0.0, 4.1, 0.1)
y = np.arange(-2.0, 2.1, 0.1)
print(x.shape)
print(y.shape)
z = np.reshape(mse_list,(x.__len__(), y.__len__())) #这句是关键,Z是一个面
x, y = np.meshgrid(x, y)
# print(z)
ax.plot_surface(x, y, z)
plt.show()
2、 梯度下降算法
优化问题
找到一个合适的权重,使得目标值最优
梯度下降(贪心)梯度的每次更新,都是网梯度的负方向走一小步
局部最优、鞍点#数据准备
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w = 1.0 #初始权重的设定
epoch_list = []
cost_list = []
def forward(x):
return x * w
def cost(xs, ys):
cost = 0
for x, y in zip(xs, ys):
y_pred = forward(x)
cost += (y_pred - y) ** 2
return cost / len(xs)
def gradient(xs, ys):
grad = 0
for x, y in zip(xs, ys):
grad += 2 * x * (x * w - y)
return grad / len(xs)
print('Predict (before training)', 4, forward(4))
for epoch in range(100):
epoch_list.append(epoch)
cost_val = cost(x_data, y_data)
cost_list.append(cost_val)
grad_val = gradient(x_data, y_data)
w -= 0.01 * grad_val
print('Epoch:', epoch, 'w=', w, 'loss=', cost_val)
print('Predict (after training)', 4, forward(4))
#绘图
fig, ax = plt.subplots(figsize=(8,5))
# 绘制三条线 线性 平方 立方
ax.plot(epoch_list, cost_list, label='linear')
ax.set_xlabel('epoch') # Add an x-label to the axes.
ax.set_ylabel('cost') # Add a y-label to the axes.
ax.set_title("gradient descent") # Add a title to the axes.
plt.show()
随机梯度下降
对每一个样本都进行权重的更新,因此相对梯度下降效果更好但是时间复杂度也更高
所以在深度学习用 batch, 每次用一组batch 去做权重的更新
#数据准备
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w = 1.0 #初始权重的设定
epoch_list = []
cost_list = []
def forward(x):
return x * w
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) ** 2
#因为是对每一个权重进行更新,所以不需要在计算cost,只需计算每个样本的loss即可
def gradient(xs, ys):
return 2 * x * (x * w - y)
print('Predict (before training)', 4, forward(4))
for epoch in range(100):
epoch_list.append(epoch)
cost = 0
for x, y in zip(x_data, y_data):
grad = gradient(x, y)
w = w - 0.01 * grad
print("\tgrad: ", x, y, grad)
l = loss(x, y)
cost += l
cost_list.append(cost/3)
print("progress:", epoch, "w=", w, "loss=", l, "cost=", cost)
print('Predict (after training)', 4, forward(4))
#绘图
fig, ax = plt.subplots(figsize=(8,5))
# 绘制三条线 线性 平方 立方
ax.plot(epoch_list, cost_list, label='linear')
ax.set_xlabel('epoch') # Add an x-label to the axes.
ax.set_ylabel('cost') # Add a y-label to the axes.
ax.set_title("gradient descent") # Add a title to the axes.
plt.show()
可以看出使用SGD的误差下降的速度比之前的方法快。
反向传播,损失对权重求偏导,将权重进行一个更新
权重w 计算图一个两层的神经网络
为保证模型复杂度,保证多层的神经网络不会化为两层的形式,做以下的 *** 作
非线性函数 forward 前向 *** 作求loss
backward 反向传播求梯度(导数)
计算图样例tensor in pytorch
在tensor 变量里,一个权重变量w,存着两个内容,一个是权重w的值,一个是损失对这个权重的梯度
pytorch实现线性模型import torch
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w = torch.Tensor([1.0]) #将权重设置为tensor变量
w.requires_grad = True #说明 w 是需要计算梯度的
#构件模型
def forward(x):
return x * w
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) ** 2
print("predict (before training)", 4, forward(4).item())
for epoch in range(100):
for x, y in zip(x_data, y_data):
l = loss(x, y) #计算损失(构建出一个计算图) 随机梯度下降,使用loss 而不是 cost
l.backward() #反向传播
print('\tgrad:', x, y, w.grad.item())
w.data = w.data - 0.01 * w.grad.data #因为w 包含data 和 grad,权重更新
w.grad.data.zero_() #!!!! 更新后,要将梯度清零
print("progress:", epoch, l.item())
print("predict (after training)", 4, forward(4).item())
练习
import torch
x_data = [1.0, 2.0, 3.0]
y_data = [2.0, 4.0, 6.0]
w1 = torch.Tensor([1.0]) #将权重设置为tensor变量
w1.requires_grad = True #说明 w 是需要计算梯度的
w2 = torch.Tensor([1.0])
w2.requires_grad = True
b = torch.Tensor([1.0])
b.requires_grad = True
#构件模型
def forward(x):
return x**2 * w1 + x * w2 + b
def loss(x, y):
y_pred = forward(x)
return (y_pred - y) ** 2
print("predict (before training)", 4, forward(4).item())
for epoch in range(1000):
for x, y in zip(x_data, y_data):
l = loss(x, y) #计算损失(构建出一个计算图) 随机梯度下降,使用loss 而不是 cost
l.backward() #反向传播
print('\tgrad:', x, y, w1.grad.item(), w2.grad.item(), b.grad.item())
w1.data = w1.data - 0.01 * w1.grad.data #因为w 包含data 和 grad,权重更新
w2.data = w2.data - 0.01 * w2.grad.data # 0.01 是学习率
b.data = b.data - 0.01 * b.grad.data
w1.grad.data.zero_() #!!!! 更新后,要将梯度清零
w2.grad.data.zero_()
b.grad.data.zero_()
print("progress:", epoch, l.item())
print("predict (after training)", 4, forward(4).item())
print("after train ",w1.data, w2.data, b.data)
4、用pytorch 实现线性回归 流程 数据准备
mini-batch
(56条消息) mini batch详解_小镇大爱的博客-CSDN博客_minibatch什么意思
设计模型
torch.nn — PyTorch master documentation
优化器和损失函数
torch.nn — PyTorch master documentation
torch.optim — PyTorch master documentation
训练过程代码
import torch
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[2.0], [4.0], [6.0]])
print("x ", x_data)
print("y ", y_data)
class LinearModel(torch.nn.Module): #反向传播,pytorch的model会自动实现的
def __init__(self): # 构造函数
super(LinearModel, self).__init__()
self.linear = torch.nn.Linear(1, 1)
def forward(self, x): #override
y_pred = self.linear(x)
return y_pred
model = LinearModel() # 实例 = 类名() <- 实例化操作
criterion = torch.nn.MSELoss(size_average=False)#size_average=False 不求1/N
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for epoch in range(1000):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
print(epoch, loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Output weight and bias
print('w = ', model.linear.weight.item())
print('b = ', model.linear.bias.item())
# Test Model
x_test = torch.Tensor([[4.0]])
y_test = model(x_test)
print('y_pred = ', y_test.data)
import torch
import numpy as np
import matplotlib.pyplot as plt
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[2.0], [4.0], [6.0]])
print("x ", x_data)
print("y ", y_data)
class LinearModel(torch.nn.Module): #反向传播,pytorch的model会自动实现的
def __init__(self): # 构造函数
super(LinearModel, self).__init__()
self.linear = torch.nn.Linear(1, 1)
def forward(self, x): #override
y_pred = self.linear(x)
return y_pred
model = LinearModel() # 实例 = 类名() <- 实例化操作
criterion = torch.nn.MSELoss(size_average=False)#size_average=False 不求1/N
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
epoch_list = []
loss_SGD = []
for epoch in range(100):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
print(epoch, loss)
epoch_list.append(epoch)
loss_SGD.append(loss.data)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# plot the data
fig, ax = plt.subplots(figsize=(10,8))
ax = fig.add_subplot(1, 1, 1)
ax.plot(epoch_list, loss_SGD, label='optmizer SGD')
# set the plot
ax.set_xlabel('epoch') # Add an x-label to the axes.
ax.set_ylabel('loss') # Add a y-label to the axes.
ax.set_title("loss of optimizers") # Add a title to the axes.
ax.legend()
plt.show()
与Adam的对比
import torch
import numpy as np
import matplotlib.pyplot as plt
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[2.0], [4.0], [6.0]])
print("x ", x_data)
print("y ", y_data)
class LinearModel(torch.nn.Module): #反向传播,pytorch的model会自动实现的
def __init__(self): # 构造函数
super(LinearModel, self).__init__()
self.linear = torch.nn.Linear(1, 1)
def forward(self, x): #override
y_pred = self.linear(x)
return y_pred
model = LinearModel() # 实例 = 类名() <- 实例化操作
criterion = torch.nn.MSELoss(size_average=False)#size_average=False 不求1/N
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
epoch_list = []
loss_SGD = []
for epoch in range(100):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
print(epoch, loss)
epoch_list.append(epoch)
loss_SGD.append(loss.data)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# plot the data
fig, ax = plt.subplots(figsize=(10,8))
ax = fig.add_subplot(1, 1, 1)
ax.plot(epoch_list, loss_SGD, label='optmizer Adam')
# set the plot
ax.set_xlabel('epoch') # Add an x-label to the axes.
ax.set_ylabel('loss') # Add a y-label to the axes.
ax.set_title("loss of optimizers") # Add a title to the axes.
ax.legend()
plt.show()
5、逻辑斯蒂回归
用来解决分类任务
Logistic functionA logistic function or logistic curve is a common S-shaped curve (sigmoid curve) with equation.
就是智能控制的老师讲的网络的输入经过一个函数决定其是抑制还是激活。
实现的效果,y=1时,预测值越接近1损失越小,y=0,预测值越接近0损失越小
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
#这个例子实现二分类问题
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[0], [0], [1]])
class LogisticRegressionModel(torch.nn.Module):
def __init__(self):
super(LogisticRegressionModel, self).__init__()
self.linear = torch.nn.Linear(1, 1)
def forward(self, x):
y_pred = F.sigmoid(self.linear(x))#加了sigmod 激活函数
return y_pred
model = LogisticRegressionModel()
criterion = torch.nn.BCELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for epoch in range(1000):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
print(epoch, loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
x = np.linspace(0, 10, 200)
x_t = torch.Tensor(x).view((200, 1)) #reshape
y_t = model(x_t)
y = y_t.data.numpy()#转成numpy类型才可以画图
#plot
plt.plot(x, y)
plt.plot([0, 10], [0.5, 0.5], c='r')
plt.xlabel('Hours')
plt.ylabel('Probability of Pass')
plt.grid()
plt.show()
6、处理多维特征的输入 数据集dataset 模型
输入的维度是8维,输出的维度是1维
Example#先转到该文件夹下
import numpy as np
import torch
#读取数据 *** 作
xy = np.loadtxt('./diabetes.csv', delimiter=',', dtype=np.float32)
x_data = torch.from_numpy(xy[:,:-1])
y_data = torch.from_numpy(xy[:, [-1]])
#print("x_data : ",x_data) #特征
#print("y_data : ",y_data) #类别 0/1
import torch
#模型设计
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8, 6)#线性层1 输入8维 输出6维
self.linear2 = torch.nn.Linear(6, 4)
self.linear3 = torch.nn.Linear(4, 1)
self.sigmoid = torch.nn.Sigmoid() #Applies the element-wise function,和torch.nn.functional.sigmoid(input)不一样哦
def forward(self, x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x
model = Model()
criterion = torch.nn.BCELoss(reduction='mean') #Mini-Batch Loss Function for Binary Classification 创建一个度量目标和输出之间的二元交叉熵的标准
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
for epoch in range(100):
# Forward
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
print(epoch, loss.item())
# Backward
optimizer.zero_grad()
loss.backward()
# Update
optimizer.step()
使用不同的激活函数
激活函数改为ReLU
效果就是,小于0 的输出为0,大于0的 输出不变
import numpy as np
import torch
#读取数据 *** 作
xy = np.loadtxt('./diabetes.csv', delimiter=',', dtype=np.float32)
x_data = torch.from_numpy(xy[:,:-1])
y_data = torch.from_numpy(xy[:, [-1]])
#print("x_data : ",x_data) #特征
#print("y_data : ",y_data) #类别 0/1
import torch
#模型设计
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8, 6)
self.linear2 = torch.nn.Linear(6, 4)
self.linear3 = torch.nn.Linear(4, 1)
self.activate = torch.nn.ReLU()#修改激活函数只要在这里 *** 作就行
def forward(self, x):
x = self.activate(self.linear1(x))
x = self.activate(self.linear2(x))
x = self.activate(self.linear3(x))
return x
model = Model()
criterion = torch.nn.BCELoss(reduction='mean') #Mini-Batch Loss Function for Binary Classification 创建一个度量目标和输出之间的二元交叉熵的标准
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
for epoch in range(100):
# Forward
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
print(epoch, loss.item()/100)
# Backward
optimizer.zero_grad()
loss.backward()
# Update
optimizer.step()
7、加载数据集 epoch,batch_size,iteration dataloader
dataloader 是可以实例化的,注意dataset(是抽象类)只能继承,不能实例化
流程 样例代码使用的是之前的糖尿病数据集
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
class DiabetesDataset(Dataset):
def __init__(self, filepath):
xy = np.loadtxt(filepath, delimiter=',', dtype=np.float32)
self.len = xy.shape[0]
self.x_data = torch.from_numpy(xy[:, :-1])
self.y_data = torch.from_numpy(xy[:, [-1]])
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.len
dataset = DiabetesDataset('./diabetes.csv')
train_loader = DataLoader(dataset=dataset,
batch_size=32,
shuffle=True,
num_workers=2)
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8, 6)
self.linear2 = torch.nn.Linear(6, 4)
self.linear3 = torch.nn.Linear(4, 1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x
model = Model()
criterion = torch.nn.BCELoss(size_average=True)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for epoch in range(100):
for i, data in enumerate(train_loader, 0):
# 1. Prepare data
inputs, labels = data
# 2. Forward
y_pred = model(inputs)
loss = criterion(y_pred, labels)
print(epoch, i, loss.item())
# 3. Backward
optimizer.zero_grad()
loss.backward()
# 4. Update
optimizer.step()
练习
数据处理部分
data = pd.read_csv('tantic_train.csv')
data.head()
data = pd.read_csv('tantic_train.csv')
features = ["Pclass", "Sex", "SibSp", "Parch", "Fare","Age"]
data = pd.get_dummies(data[features])
data.head()
import torch
import pandas as pd
from torch.utils.data import Dataset,DataLoader
import numpy as np
#定义一个gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# prepare dataset
# 训练集
class Train_TitanicDataset(Dataset):
def __init__(self, filepath):
#导入数据
data = pd.read_csv(filepath)
labels = ["Survived"]
features = ["Pclass", "Sex", "SibSp", "Parch", "Fare","Age"]
data["Age"] = data["Age"].fillna(data["Age"].mean())
self.len = data.shape[0] # shape(行,列)
self.train_data = data[:int(self.len*0.8)] #训练集占80%
# data[features]的类型是DataFrame,先进行独热表示,然后转成array,最后转成tensor用于进行矩阵计算。
self.x_data = torch.from_numpy(np.array(pd.get_dummies(self.train_data[features])).astype(np.float32))
self.y_data = torch.from_numpy(np.array(self.train_data[labels]).astype(np.float32))
self.train_len = self.train_data.shape[0]
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.train_len
train_dataset = Train_TitanicDataset('tantic_train.csv')
train_loader = DataLoader(dataset = train_dataset, batch_size = 32, shuffle = True, num_workers = 2)
# 验证集
class Dev_TitanicDataset(Dataset):
def __init__(self, filepath):
#导入数据
data = pd.read_csv(filepath)
labels = ["Survived"]
features = ["Pclass", "Sex", "SibSp", "Parch", "Fare","Age"]
data["Age"] = data["Age"].fillna(data["Age"].mean())
self.len = data.shape[0] # shape(行,列)
self.dev_data = data[int(self.len*0.8):] #验证集占20%,用训练集剩下的部分
# data[features]的类型是DataFrame,先进行独热表示,然后转成array,最后转成tensor用于进行矩阵计算。
self.x_data = torch.from_numpy(np.array(pd.get_dummies(self.dev_data[features])).astype(np.float32))
self.y_data = torch.from_numpy(np.array(self.dev_data[labels]).astype(np.float32))
self.dev_len = self.dev_data.shape[0]
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.dev_len
dev_dataset = Dev_TitanicDataset('tantic_train.csv')
dev_loader = DataLoader(dataset = dev_dataset, batch_size = 8, shuffle = False, num_workers = 2)
# design model using class
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
# 选取的五个特征经过独热表示后变为6维,性别多了一维。
self.linear1 = torch.nn.Linear(7, 6)
self.linear2 = torch.nn.Linear(6, 6)
self.linear3 = torch.nn.Linear(6, 3)
self.linear4 = torch.nn.Linear(3, 2)
self.linear5 = torch.nn.Linear(2, 1)
self.relu = torch.nn.ReLU()
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
x = self.relu(self.linear1(x))
x = self.relu(self.linear2(x))
x = self.relu(self.linear3(x))
x = self.relu(self.linear4(x))
x = self.sigmoid(self.linear5(x))
return x
model = Model()
model.to(device)
# construct loss and optimizer
criterion = torch.nn.BCELoss(reduction = 'mean')
optimizer = torch.optim.Adam(model.parameters(),
lr=0.01,
betas=(0.9, 0.999),
eps=1e-08,
weight_decay=0,
amsgrad=False)
# training cycle forward, backward, update
def train(epoch):
train_loss = 0.0
count = 0.0
for i, data in enumerate(train_loader, 0): # start = 0,train_loader 是先shuffle后mini_batch
#inputs, labels = data
inputs, labels = data[0].to(device), data[1].to(device) # 使用gpu训练
y_pred = model(inputs)
loss = criterion(y_pred, labels)
#print(epoch, i, loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()+ 1e-6
count = i
print('epoch:', epoch+1, 'train loss:', train_loss/count, end = ',')
# 验证
def dev():
with torch.no_grad():
correct = 0.0
total = 0.0
dev_mean_loss = 0.0
for i, data in enumerate(dev_loader, 0):
inputs, labels = data[0].to(device), data[1].to(device)
outputs = model(inputs)
dev_loss = criterion(outputs, labels)
dev_mean_loss += dev_loss.item()+ 1e-6
total += labels.size(0)
correct += (np.round(outputs.cpu()).to(device) == labels).sum().item()
acc = correct / total
count = i
print('dev loss:', dev_mean_loss/count,'Accuracy on dev set:', acc)
if __name__ =='__main__':
for epoch in range(100):
train(epoch)
dev()
#测试并保存预测结果为csv文件
test_data = pd.read_csv('tantic_test.csv')
features = ["Pclass", "Sex", "SibSp", "Parch", "Fare","Age"]
test_data["Age"] = test_data["Age"].fillna(test_data["Age"].mean())
test = torch.from_numpy(np.array(pd.get_dummies(test_data[features])).astype(np.float32)).to(device)
with torch.no_grad():
y_pred = model(test)
y = []
for i in y_pred:
if i>=0.5: #四舍五入,y>=0.5认为存活,否则视为死亡
y.append(1)
else:
y.append(0)
output = pd.DataFrame({'PassengerId':test_data.PassengerId, 'Survived': y})
output.to_csv('my_predict.csv', index=False)
将年龄改为取中值后
import torch
import pandas as pd
from torch.utils.data import Dataset,DataLoader
import numpy as np
#定义一个gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# prepare dataset
# 训练集
class Train_TitanicDataset(Dataset):
def __init__(self, filepath):
#导入数据
data = pd.read_csv(filepath)
labels = ["Survived"]
features = ["Pclass", "Sex", "SibSp", "Parch", "Fare","Age"]
data["Age"] = data["Age"].fillna(data["Age"].median())
self.len = data.shape[0] # shape(行,列)
self.train_data = data[:int(self.len*0.8)] #训练集占80%
# data[features]的类型是DataFrame,先进行独热表示,然后转成array,最后转成tensor用于进行矩阵计算。
self.x_data = torch.from_numpy(np.array(pd.get_dummies(self.train_data[features])).astype(np.float32))
self.y_data = torch.from_numpy(np.array(self.train_data[labels]).astype(np.float32))
self.train_len = self.train_data.shape[0]
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.train_len
train_dataset = Train_TitanicDataset('tantic_train.csv')
train_loader = DataLoader(dataset = train_dataset, batch_size = 32, shuffle = True, num_workers = 2)
# 验证集
class Dev_TitanicDataset(Dataset):
def __init__(self, filepath):
#导入数据
data = pd.read_csv(filepath)
labels = ["Survived"]
features = ["Pclass", "Sex", "SibSp", "Parch", "Fare","Age"]
data["Age"] = data["Age"].fillna(data["Age"].median())
self.len = data.shape[0] # shape(行,列)
self.dev_data = data[int(self.len*0.8):] #验证集占20%,用训练集剩下的部分
# data[features]的类型是DataFrame,先进行独热表示,然后转成array,最后转成tensor用于进行矩阵计算。
self.x_data = torch.from_numpy(np.array(pd.get_dummies(self.dev_data[features])).astype(np.float32))
self.y_data = torch.from_numpy(np.array(self.dev_data[labels]).astype(np.float32))
self.dev_len = self.dev_data.shape[0]
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.dev_len
dev_dataset = Dev_TitanicDataset('tantic_train.csv')
dev_loader = DataLoader(dataset = dev_dataset, batch_size = 8, shuffle = False, num_workers = 2)
# design model using class
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
# 选取的五个特征经过独热表示后变为6维,性别多了一维。
self.linear1 = torch.nn.Linear(7, 6)
self.linear2 = torch.nn.Linear(6, 6)
self.linear3 = torch.nn.Linear(6, 3)
self.linear4 = torch.nn.Linear(3, 2)
self.linear5 = torch.nn.Linear(2, 1)
self.relu = torch.nn.ReLU()
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
x = self.relu(self.linear1(x))
x = self.relu(self.linear2(x))
x = self.relu(self.linear3(x))
x = self.relu(self.linear4(x))
x = self.sigmoid(self.linear5(x))
return x
model = Model()
model.to(device)
# construct loss and optimizer
criterion = torch.nn.BCELoss(reduction = 'mean')
optimizer = torch.optim.Adam(model.parameters(),
lr=0.01,
betas=(0.9, 0.999),
eps=1e-08,
weight_decay=0,
amsgrad=False)
# training cycle forward, backward, update
def train(epoch):
train_loss = 0.0
count = 0.0
for i, data in enumerate(train_loader, 0): # start = 0,train_loader 是先shuffle后mini_batch
#inputs, labels = data
inputs, labels = data[0].to(device), data[1].to(device) # 使用gpu训练
y_pred = model(inputs)
loss = criterion(y_pred, labels)
#print(epoch, i, loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()+ 1e-6
count = i
print('epoch:', epoch+1, 'train loss:', train_loss/count, end = ',')
# 验证
def dev():
with torch.no_grad():
correct = 0.0
total = 0.0
dev_mean_loss = 0.0
for i, data in enumerate(dev_loader, 0):
inputs, labels = data[0].to(device), data[1].to(device)
outputs = model(inputs)
dev_loss = criterion(outputs, labels)
dev_mean_loss += dev_loss.item()+ 1e-6
total += labels.size(0)
correct += (np.round(outputs.cpu()).to(device) == labels).sum().item()
acc = correct / total
count = i
print('dev loss:', dev_mean_loss/count,'Accuracy on dev set:', acc)
if __name__ =='__main__':
for epoch in range(100):
train(epoch)
dev()
#测试并保存预测结果为csv文件
test_data = pd.read_csv('tantic_test.csv')
features = ["Pclass", "Sex", "SibSp", "Parch", "Fare","Age"]
test_data["Age"] = test_data["Age"].fillna(test_data["Age"].median())
test = torch.from_numpy(np.array(pd.get_dummies(test_data[features])).astype(np.float32)).to(device)
with torch.no_grad():
y_pred = model(test)
y = []
for i in y_pred:
if i>=0.5: #四舍五入,y>=0.5认为存活,否则视为死亡
y.append(1)
else:
y.append(0)
output = pd.DataFrame({'PassengerId':test_data.PassengerId, 'Survived': y})
output.to_csv('my_predict.csv', index=False)
9、多分类问题
softMax层
意义就是让神经网络的output的值全为正值,且所有值的和为1
损失函数注意
The input is expected to contain raw, unnormalized scores for each class. 就是说torch.nn 的交叉熵损失函数已经包括了softMax,所以不需要自己额外加一个激活函数了。
1、CrossEntropyLoss
这个损失函数是两个损失函数之和
It is useful when training a classification problem with C classes(c,表示类别数). If provided, the optional argument weight
should be a 1D Tensor assigning weight to each of the classes. This is particularly useful when you have an unbalanced training set.(训练集不平衡时很有用)
在一个分类问题中,当你想预测的所有类中,如果一个或多个类的样本数量非常少,你可能会面临数据中不平衡类的问题。
import torch
# input is of size nBatch x nClasses = 3 x 5
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)# 前面的参数对应input的batch 5对应总共5类(我们的label就在0-5以内随机选)
cross_entropy_loss = torch.nn.CrossEntropyLoss()
output = cross_entropy_loss(input, target)
output.backward()
print('output: ', output)
2、NLLLoss
It is useful to train a classification problem with C classes.
This is particularly useful when you have an unbalanced training set.
下面这个就是两者的不同之处,一个不用激活函数,一个需要激活函数
The input given through a forward call is expected to contain log-probabilities of each class.
import torch.nn as nn
m = nn.LogSoftmax(dim=1)#需要使用softmax
loss = nn.NLLLoss()
# input is of size N x C = 3 x 5
input = torch.randn(3, 5, requires_grad=True)
# each element in target has to have 0 <= value < C
target = torch.tensor([1, 0, 4])
output = loss(m(input), target) #m(input),要先用softMax转换
output.backward()
print('output: ', output)
Minst 数据集
网络设计
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
#定义一个gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#数据集下载和处理
batch_size = 64
#转tensor 和 归一化
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307, ), (0.3081, ))])
train_dataset = datasets.MNIST(root='../dataset/mnist/',train=True,download=True,transform=transform)
train_loader = DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset/mnist/',train=False,download=True,transform=transform)
test_loader = DataLoader(test_dataset,shuffle=False,batch_size=batch_size)
#模型设计
#输入 (N,1,28,28),这是Minst数据集的图片大小是28,28,28*28=784,将一张图像的数据排到一起
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.l1 = torch.nn.Linear(784, 512)
self.l2 = torch.nn.Linear(512, 256)
self.l3 = torch.nn.Linear(256, 128)
self.l4 = torch.nn.Linear(128, 64)
self.l5 = torch.nn.Linear(64, 10)#s输出为10个类别的概率
def forward(self, x):
x = x.view(-1, 784)
x = F.relu(self.l1(x))
x = F.relu(self.l2(x))
x = F.relu(self.l3(x))
x = F.relu(self.l4(x))
return self.l5(x)#因为使用的交叉熵损失自带激活函数
model = Net()
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
#训练
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
inputs, target = inputs.to(device), target.to(device)
optimizer.zero_grad()#优化器清零
# forward + backward + update
outputs = model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item() #.item() 防止构件计算图
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
#测试
def test():
correct = 0
total = 0
with torch.no_grad(): #.no_grad() 不计算梯度
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy on test set: %d %%' % (100 * correct / total))
if __name__ =='__main__':
for epoch in range(100):
train(epoch)
test()
练习2
您必须提交一个包含产品id、所有候选类名和每个类的概率的csv文件。
import pandas as pd
import numpy as np
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.optim as optim
#数据处理
otto_data = pd.read_csv("train.csv")
otto_data.describe()#对数据进行了一个统计
otto_data.shape #(61878, 95)
#统计不同类的数量
import seaborn as sns
import matplotlib.pyplot as plt
sns.countplot(otto_data["target"])
plt.show()
import pandas as pd
import numpy as np
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import torch.optim as optim
#因为在数据集中类别是字符变量,class1...,转换成数字
def target2idx(targets):
target_idx = []
target_labels = ['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6', 'Class_7', 'Class_8', 'Class_9']
for target in targets:
target_idx.append(target_labels.index(target))
return target_idx
class OttoDataset(Dataset):
def __init__(self,filepath):
data = pd.read_csv(filepath)
labels = target2idx(data['target'])
self.len = data.shape[0]
self.X_data = torch.tensor(np.array(data)[:,1:-1].astype(np.float32))#:,1:-1,把第一个的序号给去掉了
self.y_data = torch.from_numpy(np.array(labels))
def __getitem__(self, index):
return self.X_data[index], self.y_data[index]
def __len__(self):
return self.len
otto_dataset = OttoDataset('otrain.csv')
train_loader = DataLoader(dataset=otto_dataset, batch_size=64, shuffle=True, num_workers=2)
#定义一个gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#模型设计
#输入 93个特征 输出9个类别的概率
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.linear1 = torch.nn.Linear(93, 64)
self.linear2 = torch.nn.Linear(64, 32)
self.linear3 = torch.nn.Linear(32, 16)
self.linear4 = torch.nn.Linear(16, 9)
self.relu = torch.nn.ReLU()
def forward(self, x):
x = self.relu(self.linear1(x))
x = self.relu(self.linear2(x))
x = self.relu(self.linear3(x))
x = self.linear4(x)
return x
model = Net()
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
#训练
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
inputs, target = inputs.to(device), target.to(device)
optimizer.zero_grad()#优化器清零
# forward + backward + update
outputs = model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item() #.item() 防止构件计算图
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
# 定义预测保存函数,用于保存预测结果。
def predict_save():
with torch.no_grad():
test_data = pd.read_csv('otest.csv')
x_text = torch.tensor(np.array(test_data)[:, 1:].astype(float)).to(device)
y_pred = model(x_text.float())
_, predicted = torch.max(y_pred, dim=1) # 这里先取出最大概率的索引,即是所预测的类别。
out = pd.get_dummies(predicted.cpu()) # get_dummies 利用pandas实现one hot encode,方便保存为预测文件。
lables = ['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6', 'Class_7', 'Class_8', 'Class_9']
# 添加列标签
out.columns = lables
# 插入id行
out.insert(0, 'id', test_data['id'])
result = pd.DataFrame(out)
out.to_csv('otto_predict.csv', index=False)
if __name__ =='__main__':
for epoch in range(1000):
train(epoch)
#进行预测
predict_save()
效果很差,得之后学习了怎么优化模型。
之前学的都是全连接层的神经网络
基本结构卷积-改变通道和维数
下采样-改变维数,不改变通道数
卷积过程以彩色图为例,有3个通道,out的通道数与卷积核相关
单通道(灰度图)的卷积如下
N通道
为什么5*5 变 3*3
因为一个3*3的卷积核在一次的横向移动中只能移动3次
N维输入与N维度输出
cat -- 叠加
卷积层实例1
import torch
in_channels, out_channels= 5, 10 #输入输出通道
width, height = 100, 100 #维度数
kernel_size = 3 #卷积核尺寸
batch_size = 1
input = torch.randn(batch_size,
in_channels,
width,
height)
conv_layer = torch.nn.Conv2d(in_channels,
out_channels,
kernel_size=kernel_size)
output = conv_layer(input)
print(input.shape)
print(output.shape)
print(conv_layer.weight.shape)
这里的1 是batchSize 的值
池化 /填充3*3的卷积核,其padding = 1,这样就实现输入的维度与卷积后的维度相同
import torch
input = [3,4,6,5,7,
2,4,6,8,2,
1,6,7,8,4,
9,7,4,6,2,
3,7,5,4,1]
input = torch.Tensor(input).view(1, 1, 5, 5)#BatchSize\Channel\W\H
conv_layer = torch.nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False)
kernel = torch.Tensor([1,2,3,4,5,6,7,8,9]).view(1, 1, 3, 3)
#设置卷积核,输入通道数,输出通道数,W,H
conv_layer.weight.data = kernel.data
output = conv_layer(input)
print(output)
stride 步长
看到output 的维度只有2*2
import torch
input = [3,4,6,5,7,
2,4,6,8,2,
1,6,7,8,4,
9,7,4,6,2,
3,7,5,4,1]
input = torch.Tensor(input).view(1, 1, 5, 5)#BatchSize\Channel\W\H
conv_layer = torch.nn.Conv2d(1, 1, kernel_size=3, stride=2, bias=False)
kernel = torch.Tensor([1,2,3,4,5,6,7,8,9]).view(1, 1, 3, 3)
#设置卷积核,输入通道数,输出通道数,W,H
conv_layer.weight.data = kernel.data
output = conv_layer(input)
print(output)
下采样 Max Pooling Layer
下采样时只针对一个通道, 可以看到不改变通道数,但是会改变W\H
import torch
input = [3,4,6,5,
2,4,6,8,
1,6,7,8,
9,7,4,6,
]
#下采样要与input的输入要匹配
input = torch.Tensor(input).view(1, 1, 4, 4)
maxpooling_layer = torch.nn.MaxPool2d(kernel_size=2)
output = maxpooling_layer(input)
print(output)
卷积神经网络实例
一定要清楚最后的线性层的输入的大小
老师说的,可以不用手动计算线性层的输入,利用pytorch计算
例子如下
这是设计好的模型,且线性层的输入为320
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
in_channels, out_channels= 1, 10 #输入输出通道
width, height = 28, 28 #维度数,MInst数据集的大小
kernel_size = 3 #卷积核尺寸
batch_size = 1
input = torch.randn(batch_size,
in_channels,
width,
height)
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
self.pooling = torch.nn.MaxPool2d(2)
def forward(self, x):
# Flatten data from (n, 1, 28, 28) to (n, 784)
batch_size = x.size(0)
x = F.relu(self.pooling(self.conv1(x)))
x = F.relu(self.pooling(self.conv2(x)))
x = x.view(batch_size, -1) # flatten
return x
test = Net()
output = test(input)
print(input.shape)
print(output.shape)
可以看到计算结果为320
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
#定义一个gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#数据集下载和处理
batch_size = 64
#转tensor 和 归一化
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307, ), (0.3081, ))])
train_dataset = datasets.MNIST(root='../dataset/mnist/',train=True,download=True,transform=transform)
train_loader = DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset/mnist/',train=False,download=True,transform=transform)
test_loader = DataLoader(test_dataset,shuffle=False,batch_size=batch_size)
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
self.pooling = torch.nn.MaxPool2d(2)
self.fc = torch.nn.Linear(320, 10)
def forward(self, x):
# Flatten data from (n, 1, 28, 28) to (n, 784)
batch_size = x.size(0)
x = F.relu(self.pooling(self.conv1(x)))
x = F.relu(self.pooling(self.conv2(x)))
x = x.view(batch_size, -1) # flatten
x = self.fc(x)
return x
model = Net()
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
#训练
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
inputs, target = inputs.to(device), target.to(device)
optimizer.zero_grad()#优化器清零
# forward + backward + update
outputs = model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item() #.item() 防止构件计算图
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
#测试
def test():
correct = 0
total = 0
with torch.no_grad(): #.no_grad() 不计算梯度
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy on test set: %d %%' % (100 * correct / total))
if __name__ =='__main__':
for epoch in range(100):
train(epoch)
test()
练习
照着上面修改的网络
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
#定义一个gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#数据集下载和处理
batch_size = 64
#转tensor 和 归一化
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307, ), (0.3081, ))])
train_dataset = datasets.MNIST(root='../dataset/mnist/',train=True,download=True,transform=transform)
train_loader = DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset/mnist/',train=False,download=True,transform=transform)
test_loader = DataLoader(test_dataset,shuffle=False,batch_size=batch_size)
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=3)
self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=3)
self.conv3 = torch.nn.Conv2d(20, 40, kernel_size=3)
self.pooling = torch.nn.MaxPool2d(2)
self.lin1 = torch.nn.Linear(40, 30)
self.lin2 = torch.nn.Linear(30, 15)
self.lin3 = torch.nn.Linear(15, 10)
def forward(self, x):
# Flatten data from (n, 1, 28, 28) to (n, 784)
batch_size = x.size(0)
x = F.relu(self.pooling(self.conv1(x)))
x = F.relu(self.pooling(self.conv2(x)))
x = F.relu(self.pooling(self.conv3(x)))
x = x.view(batch_size, -1) # flatten
x = F.relu(self.lin1(x))
x = F.relu(self.lin2(x))
x = self.lin3(x)
return x
model = Net()
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
#训练
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
inputs, target = inputs.to(device), target.to(device)
optimizer.zero_grad()#优化器清零
# forward + backward + update
outputs = model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item() #.item() 防止构件计算图
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
#测试
def test():
correct = 0
total = 0
with torch.no_grad(): #.no_grad() 不计算梯度
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy on test set: %d %%' % (100 * correct / total))
if __name__ =='__main__':
for epoch in range(100):
train(epoch)
test()
11、卷积神经网络高级
学习的目的,理解一个道理,如果你的网络比较复杂,可以构造一些新的类去进行封装
GoogLeNet圈起来的部分是重复的地方,因此使用Inception模块
Inception Module因为不知道哪一个卷积核效果好,所以把一系列卷积集成起来,都使用一下,效果好的卷积核其权重就相对较大。
Concatenate,把张量延通道(channel)拼接到一起
Average Pooling,均值池化, 之前学的MxPooling(最大值池化),Max pooling uses the maximum value of each local cluster of neurons in the feature map,𝟏 × 𝟏 convolutionwhile average pooling takes the average value
𝟏 × 𝟏 convolution 输出的数据的某个位置(比如上图的输出矩阵的中心)的数据是输入的三个通道的相同位置的信息的融合,和其他位置的信息没有关系。
作用:改变数据通道的数量
改变通道在卷积后,其运算量大大减小。
dim = 1,
因为tensor 的维度包括 B*C*W*H,按channel进行拼接,故dim = 1。
colab 白嫖太久不让我用了,故本地运行
inception 部分就是参照上面的部分进行设计的,然后在Net设计进行调用
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
import os
# 定义一个gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 数据集下载和处理
batch_size = 64
DOWNLOAD_MNIST = False # 是否网上下载数据
#数据准备
# Mnist digits dataset
if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):#判断mnist数据集是否已下载
# not mnist dir or mnist is empyt dir
DOWNLOAD_MNIST = True
train_dataset = datasets.MNIST(
root = './mnist',
train= True, #download train data
transform = transforms.ToTensor(),
download=DOWNLOAD_MNIST
)
test_dataset = datasets.MNIST(
root='./mnist',
train=False, #download test data False就表示下载测试集的数据
transform=transforms.ToTensor(),
download=DOWNLOAD_MNIST
)
#该接口主要用来将自定义的数据读取接口的输出或者PyTorch已有的数据读取接口的输入
# 按照batch size封装成Tensor,后续只需要再包装成Variable即可作为模型的输入
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) #shuffle 是否打乱加载数据
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
class InceptionA(nn.Module):
def __init__(self, in_channels):
super(InceptionA, self).__init__()
self.branch1x1 = nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch5x5_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch5x5_2 = nn.Conv2d(16, 24, kernel_size=5, padding=2)
self.branch3x3_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch3x3_2 = nn.Conv2d(16, 24, kernel_size=3, padding=1)
self.branch3x3_3 = nn.Conv2d(24, 24, kernel_size=3, padding=1)
self.branch_pool = nn.Conv2d(in_channels, 24, kernel_size=1)
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch5x5 = self.branch5x5_1(x)
branch5x5 = self.branch5x5_2(branch5x5)
branch3x3 = self.branch3x3_1(x)
branch3x3 = self.branch3x3_2(branch3x3)
branch3x3 = self.branch3x3_3(branch3x3)
branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
branch_pool = self.branch_pool(branch_pool)
outputs = [branch1x1, branch5x5, branch3x3, branch_pool]
return torch.cat(outputs, dim=1)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(88, 20, kernel_size=5)
self.incep1 = InceptionA(in_channels=10)
self.incep2 = InceptionA(in_channels=20)
self.mp = nn.MaxPool2d(2)
self.fc = nn.Linear(1408, 10)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.mp(self.conv1(x)))
x = self.incep1(x)
x = F.relu(self.mp(self.conv2(x)))
x = self.incep2(x)
x = x.view(in_size, -1)
x = self.fc(x)
return x
model = Net()
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# 训练
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
inputs, target = inputs.to(device), target.to(device)
optimizer.zero_grad() # 优化器清零
# forward + backward + update
outputs = model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item() # .item() 防止构件计算图
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
# 测试
def test():
correct = 0
total = 0
with torch.no_grad(): # .no_grad() 不计算梯度
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy on test set: %d %%' % (100 * correct / total))
if __name__ == '__main__':
for epoch in range(100):
train(epoch)
test()
第五轮的时候测试误差就降到98%了
Residual Net(残差)为了解决梯度消失的问题
梯度消失梯度为0,趋近于0,则权重不会在进行优化更新
解决方法右边的输出加上了了一个最原始的输入
简单网络实现注意residual block 的输入和输出的W、H 是一致的
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
import os
# 定义一个gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 数据集下载和处理
batch_size = 64
DOWNLOAD_MNIST = False # 是否网上下载数据
#数据准备
# Mnist digits dataset
if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):#判断mnist数据集是否已下载
# not mnist dir or mnist is empyt dir
DOWNLOAD_MNIST = True
train_dataset = datasets.MNIST(
root = './mnist',
train= True, #download train data
transform = transforms.ToTensor(),
download=DOWNLOAD_MNIST
)
test_dataset = datasets.MNIST(
root='./mnist',
train=False, #download test data False就表示下载测试集的数据
transform=transforms.ToTensor(),
download=DOWNLOAD_MNIST
)
#该接口主要用来将自定义的数据读取接口的输出或者PyTorch已有的数据读取接口的输入
# 按照batch size封装成Tensor,后续只需要再包装成Variable即可作为模型的输入
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) #shuffle 是否打乱加载数据
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
class ResidualBlock(nn.Module):
def __init__(self, channels):
super(ResidualBlock, self).__init__()
self.channels = channels
self.conv1 = nn.Conv2d(channels, channels,
kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(channels, channels,
kernel_size=3, padding=1)
def forward(self, x):
y = F.relu(self.conv1(x))
y = self.conv2(y)
return F.relu(x + y)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5)
self.mp = nn.MaxPool2d(2)
self.rblock1 = ResidualBlock(16)
self.rblock2 = ResidualBlock(32)
self.fc = nn.Linear(512, 10)
def forward(self, x):
in_size = x.size(0)
x = self.mp(F.relu(self.conv1(x)))
x = self.rblock1(x)
x = self.mp(F.relu(self.conv2(x)))
x = self.rblock2(x)
x = x.view(in_size, -1)
x = self.fc(x)
return x
model = Net()
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# 训练
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
inputs, target = inputs.to(device), target.to(device)
optimizer.zero_grad() # 优化器清零
# forward + backward + update
outputs = model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item() # .item() 防止构件计算图
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
# 测试
def test():
correct = 0
total = 0
with torch.no_grad(): # .no_grad() 不计算梯度
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy on test set: %d %%' % (100 * correct / total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
test()
到了98以后就没什么变化了
练习1(64条消息) 论文笔记-Identity Mappings in Deep Residual Networks_slim1017的博客-CSDN博客
文章分析了 ResNet 中 Identity mapping 为什么比较好,为何能让梯度在网络中顺畅的传递而不会爆炸或消失,实验方面 1001层的 ResNet 在CIFAR10上4.62%的错误率,在CIFAR100和ImageNet上也做了实验。
就是说这篇论文证明了为什么我们的ResNet 中 加上的值等于原始值( Identity mapping --恒等映射)
下面选择上图中的e,实现一下
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
import os
import matplotlib.pyplot as plt
# 定义一个gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 数据集下载和处理
batch_size = 64
DOWNLOAD_MNIST = False # 是否网上下载数据
#数据准备
# Mnist digits dataset
if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):#判断mnist数据集是否已下载
# not mnist dir or mnist is empyt dir
DOWNLOAD_MNIST = True
train_dataset = datasets.MNIST(
root = './mnist',
train= True, #download train data
transform = transforms.ToTensor(),
download=DOWNLOAD_MNIST
)
test_dataset = datasets.MNIST(
root='./mnist',
train=False, #download test data False就表示下载测试集的数据
transform=transforms.ToTensor(),
download=DOWNLOAD_MNIST
)
#该接口主要用来将自定义的数据读取接口的输出或者PyTorch已有的数据读取接口的输入
# 按照batch size封装成Tensor,后续只需要再包装成Variable即可作为模型的输入
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) #shuffle 是否打乱加载数据
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
class ResidualBlock(nn.Module):
def __init__(self, channels):
super(ResidualBlock, self).__init__()
self.channels = channels
self.conv1 = nn.Conv2d(channels, channels,
kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(channels, channels,
kernel_size=3, padding=1)
self.oneConv = nn.Conv2d(channels, channels,
kernel_size=1)#进行1*1的卷积
def forward(self, x):
y = F.relu(self.conv1(x))
y = self.conv2(y)
return F.relu(self.oneConv(x) + y)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5)
self.mp = nn.MaxPool2d(2)
self.rblock1 = ResidualBlock(16)
self.rblock2 = ResidualBlock(32)
self.fc = nn.Linear(512, 10)
def forward(self, x):
in_size = x.size(0)
x = self.mp(F.relu(self.conv1(x)))
x = self.rblock1(x)
x = self.mp(F.relu(self.conv2(x)))
x = self.rblock2(x)
x = x.view(in_size, -1)
x = self.fc(x)
return x
model = Net()
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epoch_list = []
loss_list = []
# 训练
def train(epoch):
epoch_list.append(epoch)
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
inputs, target = inputs.to(device), target.to(device)
optimizer.zero_grad() # 优化器清零
# forward + backward + update
outputs = model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item() # .item() 防止构件计算图
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
# 测试
def test():
correct = 0
total = 0
with torch.no_grad(): # .no_grad() 不计算梯度
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy on test set: %d %%' % (100 * correct / total))
loss_list.append(100 * correct / total)
if __name__ == '__main__':
for epoch in range(20):
train(epoch)
test()
# 绘图
fig, ax = plt.subplots(figsize=(8, 5))
# 绘制三条线 线性 平方 立方
ax.plot(epoch_list, loss_list, label='linear')
ax.set_xlabel('epoch') # Add an x-label to the axes.
ax.set_ylabel('test loss') # Add a y-label to the axes.
ax.set_title("resnet") # Add a title to the axes.
plt.show()
练习2
(64条消息) DenseNet算法详解_AI之路的博客-CSDN博客_densenet
(64条消息) 卷积神经网络:DenseNet_丁天牛的博客-CSDN博客
(64条消息) Pytorch Note32 稠密连接的卷积网络 DenseNet_风信子的猫Redamancy的博客-CSDN博客_稠密卷积网络
(64条消息) 神经网络中concat与add的区别_沃特么.拆基.达柴机的博客-CSDN博客_add和concat的区别
(64条消息) 如何理解卷积中的add与concatnate *** 作_Dojanro的博客-CSDN博客_concat卷积
如果卷积网络包含接近输入层和接近输出层之间更短的连接,那么它的训练就可以更深入、更准确、更有效。
在本文中,我们接受了这一观察结果,并引入了稠密卷积网络(DenseNet),该网络以前馈方式将每一层连接到其他每一层。
传统的L层卷积网络有L个连接,每一层和它的后续层之间有一个连接,而我们的网络有L(L2+1)个直接连接。
对于每一层,前面所有层的特征图被用作输入,它自己的特征图被用作所有后续层的输入。
densenet有几个引人注目的优点:它们减轻了梯度消失问题,加强了特征传播,鼓励特征重用,并大大减少了参数的数量。
我们在四个高度竞争的目标识别基准任务(CIFAR-10、CIFAR-100、SVHN和ImageNet)上评估了我们提出的体系结构。
densenet在大多数技术上获得了显著的改进,同时需要更少的计算来实现高性能。
之前98,现在这个模型能99
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
import os
import matplotlib.pyplot as plt
# 定义一个gpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 数据集下载和处理
batch_size = 64
DOWNLOAD_MNIST = False # 是否网上下载数据
#数据准备
# Mnist digits dataset
if not(os.path.exists('./mnist/')) or not os.listdir('./mnist/'):#判断mnist数据集是否已下载
# not mnist dir or mnist is empyt dir
DOWNLOAD_MNIST = True
train_dataset = datasets.MNIST(
root = './mnist',
train= True, #download train data
transform = transforms.ToTensor(),
download=DOWNLOAD_MNIST
)
test_dataset = datasets.MNIST(
root='./mnist',
train=False, #download test data False就表示下载测试集的数据
transform=transforms.ToTensor(),
download=DOWNLOAD_MNIST
)
#该接口主要用来将自定义的数据读取接口的输出或者PyTorch已有的数据读取接口的输入
# 按照batch size封装成Tensor,后续只需要再包装成Variable即可作为模型的输入
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) #shuffle 是否打乱加载数据
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
class Bottleneck(nn.Module): #Booleneck 是一个dense block 的组成单元
def __init__(self, in_channels, growth_rate, bn_size, drop_rate=0):
super(Bottleneck, self).__init__()
self.drop_rate = drop_rate
self.bottleneck = nn.Sequential(
nn.BatchNorm2d(in_channels), #批量归一化
nn.ReLU(True),
nn.Conv2d(in_channels, bn_size*growth_rate, 1, stride=1, padding=0, bias=False),#一维的卷积只改变通道数
nn.BatchNorm2d(bn_size*growth_rate),
nn.ReLU(True),
nn.Conv2d(bn_size*growth_rate, growth_rate, 3, stride=1, padding=1, bias=False)
)
self.dropout = nn.Dropout(self.drop_rate) #用伯努利分布的样本随机地将输入张量的一些元素以概率p归零。
def forward(self, x):
y = self.bottleneck(x)
if self.drop_rate > 0:
# 如果需要dropout的话
y = self.dropout(y)
# 特征融合 ,关键之处
output = torch.cat([x, y], 1) #cat 和 add 不同,cat 是通道的叠加
return output
class DenseBlock(nn.Module): # 将一定数目的Bottleneck 叠加成一个 dense block
# 组合多个Bottleneck
def __init__(self, layers_num, in_channels, growth_rate, bn_size, drop_rate=0):
super(DenseBlock, self).__init__()
layers = []
for i in range(layers_num):
# 按growth rate叠加输入的通道数
layers.append(Bottleneck(in_channels+i*growth_rate, growth_rate, bn_size, drop_rate))
# 将列表中的每一层按序传给Sequential
self.layers = nn.Sequential(*layers) #*layers 去除列表里面的元素 组成一个dense block
def forward(self, x):
output = self.layers(x)
return output
class Transition(nn.Module): #每个dense block 之间的一个连接的部分,起到降低通道数目作用
def __init__(self, in_channels, channels):
super(Transition, self).__init__()
self.transition = nn.Sequential(
nn.BatchNorm2d(in_channels),
nn.ReLU(True),
nn.Conv2d(in_channels, channels, 1, stride=1, padding=0, bias=False),
nn.AvgPool2d(2, stride=2)
)
def forward(self, x):
output = self.transition(x)
return output
#init_channels -- 初始通道数
#growth_rate=32每一个 Bottleneck 输出的特征通道数是相同的。
同时可以看到,经过 Concatnate *** 作后的通道数是按 32 的增长量增加的,因此这个 32 也被称为 GrowthRate
#blocks=[6, 12, 24, 16] 依次的dense block 的层数
#num_classes=10,图片的类别
#因为我这用的minst 数据集的图片太小了,所以没必要弄太多层
class Net(nn.Module):
def __init__(self, init_channels=64, growth_rate=32, blocks=[6, 12, 24, 16], num_classes=10):
super(Net, self).__init__()
bn_size = 4
drop_rate = 0
self.conv1 = nn.Sequential(
nn.Conv2d(1, init_channels, 3, stride=2, padding=3, bias=False),
nn.BatchNorm2d(init_channels),
nn.ReLU(True),
nn.MaxPool2d(3, stride=2, padding=1)
)
num_features = init_channels # Block/Layer的输入通道数
# 每过一层Block后,num_feature按growth_rate增加
self.block1 = DenseBlock(blocks[0], num_features, growth_rate, bn_size, drop_rate)
num_features = num_features + blocks[0] * growth_rate
# 每过一层Transition后,num_feature减半
self.transition1 = Transition(num_features, num_features // 2)
num_features = num_features // 2
#新的一个 denseblock + Transition 依次叠加,但我这里在卷积下去就没了
self.avgpool = nn.AvgPool2d(3, stride=1)
self.fc = nn.Linear(512, num_classes)
def forward(self, x):
x = self.conv1(x)
x = self.block1(x)
x = self.transition1(x)
x = self.avgpool(x)
# 展平
x = x.view(x.size(0), -1)
output = self.fc(x)
return output
model = Net()
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epoch_list = []
loss_list = []
# 训练
def train(epoch):
epoch_list.append(epoch)
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
inputs, target = inputs.to(device), target.to(device)
optimizer.zero_grad() # 优化器清零
# forward + backward + update
outputs = model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
running_loss += loss.item() # .item() 防止构件计算图
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
# 测试
def test():
correct = 0
total = 0
with torch.no_grad(): # .no_grad() 不计算梯度
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy on test set: %d %%' % (100 * correct / total))
loss_list.append(100 * correct / total)
if __name__ == '__main__':
for epoch in range(20):
train(epoch)
test()
# 绘图
fig, ax = plt.subplots(figsize=(8, 5))
# 绘制三条线 线性 平方 立方
ax.plot(epoch_list, loss_list, label='linear')
ax.set_xlabel('epoch') # Add an x-label to the axes.
ax.set_ylabel('test loss') # Add a y-label to the axes.
ax.set_title("resnet") # Add a title to the axes.
plt.show()
13、循环神经网络(基础)
啥是RNN
解决有序列关系的数据,权重共享机制
实现将一个序列转化为另一个序列
x1,x2...是输入的特征,所使用的RNN cell 是同一个线性层。
在代码里实现就是做了一个循环,因此循环神经网络
计算过程 实现import torch
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
cell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)
# (seq, batch, features) seq表示序列的维度
dataset = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(batch_size, hidden_size)
for idx, input in enumerate(dataset):
print('=' * 20, idx, '=' * 20)
print('Input size: ', input.shape)
hidden = cell(input, hidden)
print('outputs size: ', hidden.shape)
print(hidden)
pytorch 实现RNN
通过pytorch 实现RNN,其中
输入的inputs 指的是框出来的一组xi
输入的hidden 指的是左边的h0
输出的out 指的是框出来的h2
输出的 hidden 指的是右边的hn
什么是numlayer
表示所使用的的RNN的层数
上图使用了三个线性层,一行表示一个线性层
import torch
batch_size = 1
seq_len = 3 #三个x输入
input_size = 4
hidden_size = 2
num_layers = 1 #一层
cell = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size,
num_layers=num_layers)
# (seqLen, batchSize, inputSize)
inputs = torch.randn(seq_len, batch_size, input_size)
hidden = torch.zeros(num_layers, batch_size, hidden_size)
out, hidden = cell(inputs, hidden)
print('Output size:', out.shape)
print('Output:', out)
print('Hidden size: ', hidden.shape)
print('Hidden: ', hidden)
从上面能知道,output 想对于 input 来说 inputSize 变成 hiddenSize
例子1训练一个模型 将hello 转换成 ohlol
独热向量
相当于求一个多分类问题的损失
交叉熵损失
使用RNNCell
self.rnncell = torch.nn.RNNCell(input_size=self.input_size,
hidden_size=self.hidden_size)
import torch
input_size = 4 #四个类别
hidden_size = 4
batch_size = 1
#数据准备
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3] # hello
y_data = [3, 1, 2, 3, 2] # ohlol
one_hot_lookup = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]
#相当于字典的查询动作, 组成一个独热向量的列表
inputs = torch.Tensor(x_one_hot).view(-1, batch_size, input_size)
labels = torch.LongTensor(y_data).view(-1, 1)
class Model(torch.nn.Module):
def __init__(self, input_size, hidden_size, batch_size):
super(Model, self).__init__()
# self.num_layers = num_layers
self.batch_size = batch_size
self.input_size = input_size
self.hidden_size = hidden_size
self.rnncell = torch.nn.RNNCell(input_size=self.input_size,
hidden_size=self.hidden_size)
def forward(self, input, hidden):
hidden = self.rnncell(input, hidden)
return hidden
def init_hidden(self):
return torch.zeros(self.batch_size, self.hidden_size)
net = Model(input_size, hidden_size, batch_size)
#优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.1)
#训练
for epoch in range(15):
loss = 0
optimizer.zero_grad()#清零
hidden = net.init_hidden() #初始化 hidden0
print('Predicted string: ', end='')
#inputs (seqs,batchsize,inputsize) input(batchsize,inputsize)
#labels (seqs,1) label(1)
for input, label in zip(inputs, labels):
hidden = net(input, hidden)
loss += criterion(hidden, label) #注意这里没有像之前一样使用item取值的 *** 作
_, idx = hidden.max(dim=1)
print(idx2char[idx.item()], end='')
loss.backward()#反向传播
optimizer.step()#优化
print(', Epoch [%d/15] loss=%.4f' % (epoch+1, loss.item()))
使用RNN
(65条消息) 【PyTorch学习笔记】21:nn.RNN和nn.RNNCell的使用_LauZyHou的博客-CSDN博客_nn.rnn
RNN 循环神经网络
RNNCell 循环神经网络层
import torch
input_size = 4
hidden_size = 4
num_layers = 1
batch_size = 1
seq_len = 5
idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]
one_hot_lookup = [[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size, input_size)
labels = torch.LongTensor(y_data)
class Model(torch.nn.Module):
def __init__(self, input_size, hidden_size, batch_size, num_layers=1):
super(Model, self).__init__()
self.num_layers = num_layers
self.batch_size = batch_size
self.input_size = input_size
self.hidden_size = hidden_size
self.rnn = torch.nn.RNN(input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=num_layers)
def forward(self, input):
hidden = torch.zeros(self.num_layers,
self.batch_size,
self.hidden_size)
out, _ = self.rnn(input, hidden)
return out.view(-1, self.hidden_size)
net = Model(input_size, hidden_size, batch_size, num_layers)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(15):
#训练过程相对RNNCell,这个更简单
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1)
idx = idx.data.numpy()
print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))
嵌入层
独热向量是比较稀疏的,使用嵌入层可以使其稠密
(65条消息) 独热向量和嵌入层向量的区别以及嵌入层的应用_还记得樱花正开~的博客-CSDN博客
import torch
# parameters
num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5
idx2char = ['e', 'h', 'l', 'o']
x_data = [[1, 0, 2, 2, 3]] # (batch, seq_len)
y_data = [3, 1, 2, 3, 2] # (batch * seq_len)
inputs = torch.LongTensor(x_data)
labels = torch.LongTensor(y_data)
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.emb = torch.nn.Embedding(input_size, embedding_size)#嵌入层,代替之前的独热向量
self.rnn = torch.nn.RNN(input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True)
self.fc = torch.nn.Linear(hidden_size, num_class)
def forward(self, x):
hidden = torch.zeros(num_layers, x.size(0), hidden_size)
x = self.emb(x) # (batch, seqLen, embeddingSize)
x, _ = self.rnn(x, hidden)
x = self.fc(x)
return x.view(-1, num_class)
net = Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(15):
#训练过程相对RNNCell,这个更简单
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1)
idx = idx.data.numpy()
print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))
可以看到相比之前预测正确的时间提前了
练习RNN的改进网络
1、LSTM
LSTM网络非常适合基于时间序列数据进行分类、处理和预测,因为在一个时间序列中,重要事件之间可能存在未知的时滞。
lstm是为了解决训练传统神经网络时可能遇到的梯度消失问题而开发的。
相对于rnn、隐马尔可夫模型和其他序列学习方法,LSTM对间隙长度的相对不敏感是其在众多应用中的优势。
import torch
# parameters
num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5
idx2char = ['e', 'h', 'l', 'o']
x_data = [[1, 0, 2, 2, 3]] # (batch, seq_len)
y_data = [3, 1, 2, 3, 2] # (batch * seq_len)
inputs = torch.LongTensor(x_data) #torch.Size([1, 5])
labels = torch.LongTensor(y_data) #torch.Size([5])
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.emb = torch.nn.Embedding(input_size, embedding_size)#嵌入层,代替之前的独热向量
self.lstm = torch.nn.LSTM(input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True)
self.fc = torch.nn.Linear(hidden_size, num_class)
def forward(self, x):
hidden = torch.zeros(num_layers, x.size(0), hidden_size)#torch.Size([2, 1, 8])
#LSTM 多了一个参数,这是和RNN不同的关键关键
c = torch.zeros(num_layers, x.size(0), hidden_size) #torch.Size([2, 1, 8])
x = self.emb(x) # (batch, seqLen, embeddingSize) torch.Size([1, 5, 10])
out, (ht, ct)= self.lstm(x, (hidden,c))
x = self.fc(out)
return x.view(-1, num_class)
net = Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(15):
#训练过程相对RNNCell,这个更简单
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1)
idx = idx.data.numpy()
print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))
2、GRU
Comparison of GRU and LSTM in keras with an example
GRU和LSTM的关键区别在于,GRU的包有两个门,分别是复位和更新,而LSTM有三个门,分别是输入、输出和遗忘。
GRU比LSTM复杂,因为它有更少的门数。
如果数据集很小,那么GRU是首选,而对于较大的数据集,LSTM是首选。
import torch
# parameters
num_class = 4
input_size = 4
hidden_size = 8
embedding_size = 10
num_layers = 2
batch_size = 1
seq_len = 5
idx2char = ['e', 'h', 'l', 'o']
x_data = [[1, 0, 2, 2, 3]] # (batch, seq_len)
y_data = [3, 1, 2, 3, 2] # (batch * seq_len)
inputs = torch.LongTensor(x_data) #torch.Size([1, 5])
labels = torch.LongTensor(y_data) #torch.Size([5])
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.emb = torch.nn.Embedding(input_size, embedding_size)#嵌入层,代替之前的独热向量
#GRU
self.gru = torch.nn.GRU(input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True)
self.fc = torch.nn.Linear(hidden_size, num_class)
def forward(self, x):
hidden = torch.zeros(num_layers, x.size(0), hidden_size)#torch.Size([2, 1, 8])
x = self.emb(x) # (batch, seqLen, embeddingSize) torch.Size([1, 5, 10])
out, hn= self.gru(x, hidden)
x = self.fc(out)
return x.view(-1, num_class)
net = Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1)
idx = idx.data.numpy()
print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
print(', Epoch [%d/15] loss = %.3f' % (epoch + 1, loss.item()))
14、循环神经网络(高级)
RNN 分类器 名字分类
输入的是名字,输出的是对应的国家
准备数据每个名字由一串英文字符组成,将字符串转成字符列表,再根据其在字典的位置(ASCII),转成数字列表
名字有长短,所以要填充成一样的长短
给不同的国家分配一个索引值
模型设计双向循环神经网络
GRU 网络的输出
输出的output 是上面的几个h
输出的hidden 是左右两边的h
名字转tensor 代码import gzip
import csv
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence
# Parameters
HIDDEN_SIZE = 100 # 隐藏数
BATCH_SIZE = 256
N_LAYER = 2 # GRU的层数
N_EPOCHS = 100
N_CHARS = 128 # 字符,ASCII码表里的字符总共有128个
USE_GPU = True # GPU
class NameDataset(Dataset):
def __init__(self, is_train_set=True):
filename = 'names_train.csv.gz' if is_train_set else 'names_test.csv.gz'
with gzip.open(filename, 'rt') as f:
reader = csv.reader(f)
rows = list(reader)
self.names = [row[0] for row in rows] # 提取Name元素,每行的第0个元素
self.len = len(self.names) # 总共有几个名字
self.countries = [row[1] for row in rows] # 提取国家元素,每行的第1个元素
self.country_list = list(sorted(set(self.countries))) # 保留不重复的国家元素
self.country_dict = self.getCountryDict() # 形成词典
self.country_num = len(self.country_list)
def __getitem__(self, index): # 返回输入的名字字符串,返回对应的国家的在国家词典索引
return self.names[index], self.country_dict[self.countries[index]]
def __len__(self):
return self.len
def getCountryDict(self): # 构件国家字典
country_dict = dict() # 字典变量
for idx, country_name in enumerate(self.country_list, 0):
country_dict[country_name] = idx
return country_dict
def idx2country(self, index): # 返回字典对应索引的国家
return self.country_list[index]
def getCountriesNum(self): # 返回国家字典的数目
return self.country_num
trainset = NameDataset(is_train_set=True)
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testset = NameDataset(is_train_set=False)
testloader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)
N_COUNTRY = trainset.getCountriesNum() # 训练集的国家类别的数目 ,决定模型的线性层最终的输出的维度
def name2list(name):
arr = [ord(c) for c in name]
return arr, len(arr) # 返回一个元组 列表+列表的长度
def create_tensor(tensor):
if USE_GPU:
device = torch.device("cuda:0")
tensor = tensor.to(device)
return tensor
def make_tensors(names, countries):
sequences_and_lengths = [name2list(name) for name in names] # 把每一个名字都变成一个列表
name_sequences = [sl[0] for sl in sequences_and_lengths]
seq_lengths = torch.LongTensor([sl[1] for sl in sequences_and_lengths])
countries = countries.long()
# make tensor of name, BatchSize x SeqLen
seq_tensor = torch.zeros(len(name_sequences), seq_lengths.max()).long()
for idx, (seq, seq_len) in enumerate(zip(name_sequences, seq_lengths), 0):
seq_tensor[idx, :seq_len] = torch.LongTensor(seq)
# sort by length to use pack_padded_sequence
seq_lengths, perm_idx = seq_lengths.sort(dim=0, descending=True)
seq_tensor = seq_tensor[perm_idx]
countries = countries[perm_idx]
return create_tensor(seq_tensor), \
create_tensor(seq_lengths), \
create_tensor(countries)
class RNNClassifier(torch.nn.Module):
def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
super(RNNClassifier, self).__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
# 选择单向或双向的循环神经网络
self.n_directions = 2 if bidirectional else 1
self.embedding = torch.nn.Embedding(input_size, hidden_size)
self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers,
bidirectional=bidirectional)
self.fc = torch.nn.Linear(hidden_size * self.n_directions,
output_size) # hidden_size * self.n_directions 取决于是否双向
def _init_hidden(self, batch_size):
hidden = torch.zeros(self.n_layers * self.n_directions,
batch_size, self.hidden_size)
return create_tensor(hidden)
def forward(self, input, seq_lengths):
# input shape : B x S -> S x B
input = input.t() # 进行转置,用于输入到embedding层
batch_size = input.size(1)
hidden = self._init_hidden(batch_size)
embedding = self.embedding(input)
# pack them up 使得GRU可以处理长短不一的序列,起到了提升训练速度的作用
gru_input = pack_padded_sequence(embedding, seq_lengths)
output, hidden = self.gru(gru_input, hidden)
if self.n_directions == 2: # 是双向神经网络的形式
hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim=1) # 结果要叠加
else:
hidden_cat = hidden[-1]
fc_output = self.fc(hidden_cat)
return fc_output
def trainModel():
total_loss = 0
for i, (names, countries) in enumerate(trainloader, 1):
inputs, seq_lengths, target = make_tensors(names, countries)
output = classifier(inputs, seq_lengths) # 前馈 计算模型输出
loss = criterion(output, target) # 前馈 计算损失
optimizer.zero_grad() # 归零
loss.backward() # 反馈
optimizer.step() # 优化 权重更新
total_loss += loss.item()
if i % 10 == 0:
print(f'[{i * len(inputs)}/{len(trainset)}] ', end='')
print(f'loss={total_loss / (i * len(inputs))}')
return total_loss
def testModel():
correct = 0
total = len(testset)
print("evaluating trained model ...")
with torch.no_grad():
for i, (names, countries) in enumerate(testloader, 1):
inputs, seq_lengths, target = make_tensors(names, countries)
output = classifier(inputs, seq_lengths)
pred = output.max(dim=1, keepdim=True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
percent = '%.2f' % (100 * correct / total)
print(f'Test set: Accuracy {correct}/{total} {percent}%')
return correct / total
if __name__ == '__main__':
# 实例化分类器的模型
classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER)
if USE_GPU:
device = torch.device("cuda:0")
classifier.to(device)
# 损失和优化器,分类问题--CrossEntropyLoss(交叉熵损失)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
print("Training for %d epochs..." % N_EPOCHS)
acc_list = [] # 测试结果列表
for epoch in range(1, N_EPOCHS + 1):
# Train cycle
trainModel() # 训练
acc = testModel() # 测试
acc_list.append(acc)
# 绘图
import matplotlib.pyplot as plt
import numpy as np
epoch = np.arange(1, len(acc_list) + 1, 1)
acc_list = np.array(acc_list)
plt.plot(epoch, acc_list)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid()
plt.show()
练习
根据评论 评级 相当于 一个多分类问题
import pandas as pd
filepath = './train.tsv'
data = pd.read_csv(filepath, sep='\t')
data
import math
import torch
from itertools import chain
import pandas as pd
from torch.nn.utils.rnn import pack_padded_sequence
from torch.utils.data import Dataset, DataLoader
import time
import matplotlib.pyplot as plt
#这部分值得借鉴,我之前面对这种有顺序的数据时不知道怎么分成数据集和训练集
class SAData(Dataset):
def __init__(self, train):
# 构建数据样本
self.train = train
self.data = pd.read_csv('./train.tsv', sep='\t')
if self.train:
# 随机选取80%作为训练集,不可按索引顺序取,数据会不全面
self.data = self.data.sample(frac=0.8, replace=False, random_state=1, axis=0)
# self.data = self.data[:int(self.data.shape[0] * 0.8)]#这就是我之前采用的方式
self.data = self.data.reset_index(drop=True) # 重新生成索引
### 正式训练要训练所有数据 ###
# self.data = self.data
self.len = self.data.shape[0]
else:
# 20%作为验证集
self.data = self.data.sample(frac=0.2, replace=False, random_state=1, axis=0)
# self.data = self.data[int(self.data.shape[0] * 0.8):]
self.data = self.data.reset_index(drop=True) # 重新生成索引
self.len = self.data.shape[0]
self.x_data, self.y_data = self.data['Phrase'], self.data['Sentiment']
def __getitem__(self, index):
# 根据数据索引获取样本
return self.x_data[index], self.y_data[index]
def __len__(self):
# 返回数据长度
return self.len
# 训练集验证集数据对象
train_set = SAData(train=True) #训练集
validation_set = SAData(train=False) #验证集
# Hyper Parameters
N_CHARS = 128 # ASCII码个数
HIDDEN_SIZE = 100
N_LAYER = 2
BATCH_SIZE = 256
N_EPOCHS = 50
USE_GPU = True
#评价的5个等级012345 set(train_set.y_data)对y_data 进行筛选只保留不同的种类
N_CLASS = len(set(train_set.y_data))
# 训练集验证集数据加载对象
train_loader = DataLoader(
dataset=train_set,
batch_size=BATCH_SIZE,
shuffle=True,
# num_workers=2
)
validation_loader = DataLoader(
dataset=validation_set,
batch_size=BATCH_SIZE,
shuffle=False, # 测试集不打乱有利于观察结果
# num_workers=2
)
def time_since(since):
s = time.time() - since
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
def phrase2list(phrase):
arr = [ord(c) for c in phrase] # ord() 返回对应的ASCII码
return arr, len(arr)
def create_tensor(tensor):
if USE_GPU:
device = torch.device('cuda:0')
tensor = tensor.to(device)
return tensor
def make_tensor(phrase, sentiment):
sequences_and_lengths = [phrase2list(phrase) for phrase in phrase] # 名字字符串->字符数组->对应ASCII码
phrase_sequences = [sl[0] for sl in sequences_and_lengths]
seq_lengths = torch.LongTensor([sl[1] for sl in sequences_and_lengths])
sentiment = sentiment.long()
# make tensor of name, batchSize x seqLen
seq_tensor = torch.zeros(len(phrase_sequences), seq_lengths.max()).long()
for idx, (seq, seq_len) in enumerate(zip(phrase_sequences, seq_lengths)): # 填充零
seq_tensor[idx, :seq_len] = torch.LongTensor(seq) # name_sequences不够最大长度的位置补零
# 排序 sort by length to use pack_padded_sequence
seq_lengths, perm_idx = seq_lengths.sort(dim=0, descending=True) # perm_idx表示排完序元素原本的索引
seq_tensor = seq_tensor[perm_idx] # 对补零后的name_sequences按照长度排序
sentiment = sentiment[perm_idx]
return create_tensor(seq_tensor), create_tensor(seq_lengths), create_tensor(sentiment)
class RNNClassifier(torch.nn.Module):
def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirection=True):
super(RNNClassifier, self).__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.n_directions = 2 if bidirection else 1
self.embedding = torch.nn.Embedding(input_size, hidden_size)
self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers, bidirectional=bidirection)
self.fc = torch.nn.Linear(hidden_size * self.n_directions, output_size)
def _init_hidden(self, batch_size):
hidden = torch.zeros(self.n_layers * self.n_directions, batch_size, self.hidden_size)
return create_tensor(hidden)
def forward(self, input, seq_lengths):
input = input.t() # 转置 B x S -> S x B
batch_size = input.size(1)
hidden = self._init_hidden(batch_size)
embedding = self.embedding(input)
# 这里的pack,理解成压紧比较好。
# 将一个 填充过的变长序列 压紧。
(填充时候,会有冗余,所以压紧一下)
gru_input = pack_padded_sequence(embedding, seq_lengths) # pack them up
output, hidden = self.gru(gru_input, hidden)
if self.n_directions == 2:
hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim=1)
else:
hidden_cat = hidden[-1]
fc_output = self.fc(hidden_cat)
return fc_output
def trainModel():
total_loss = 0
for i, (phrase, sentiment) in enumerate(train_loader, 1):
inputs, seq_lengths, target = make_tensor(phrase, sentiment)
output = classifier(inputs, seq_lengths)
loss = criterion(output, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
if i % 10 == 0:
print(f'[{time_since(start)}] Epoch {epoch}', end='')
print(f'[{i * len(inputs)}/{len(train_set)}]', end='')
print(f'loss={total_loss / (i * len(inputs))}')
def evalModel():
correct = 0
total = len(validation_set)
print("Evaluating trained model...")
with torch.no_grad():
for i, (phrase, sentiment) in enumerate(validation_loader, 1):
inputs, seq_lengths, target = make_tensor(phrase, sentiment)
output = classifier(inputs, seq_lengths)
pred = output.max(dim=1, keepdim=True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
percent = '%.2f' % (100 * correct / total)
print(f'Test set: Accuracy {correct}/{total} {percent}%')
return correct / total
# 获取测试集
def get_test_set():
test_set = pd.read_csv('./test.tsv', sep='\t')
PhraseId = test_set['PhraseId']
Phrase = test_set['Phrase']
return PhraseId, Phrase
# 为测试集写的处理文本函数
def make_tensor_test(phrase):
sequences_and_lengths = [phrase2list(phrase) for phrase in phrase] # 名字字符串->字符数组->对应ASCII码
phrase_sequences = [sl[0] for sl in sequences_and_lengths]
seq_lengths = torch.LongTensor([sl[1] for sl in sequences_and_lengths])
# make tensor of name, batchSize x seqLen
seq_tensor = torch.zeros(len(phrase_sequences), seq_lengths.max()).long()
for idx, (seq, seq_len) in enumerate(zip(phrase_sequences, seq_lengths)): # 填充零
seq_tensor[idx, :seq_len] = torch.LongTensor(seq) # name_sequences不够最大长度的位置补零
# 排序 sort by length to use pack_padded_sequence
seq_lengths, perm_idx = seq_lengths.sort(dim=0, descending=True) # perm_idx表示排完序元素原本的索引
seq_tensor = seq_tensor[perm_idx] # 对补零后的name_sequences按照长度排序
# 因为这里将测试集的每个Batch的文本顺序打乱了,记录原本的顺序org_idx,以便将预测出的结果顺序还原
_, org_idx = perm_idx.sort(descending=False)
return create_tensor(seq_tensor), create_tensor(seq_lengths), org_idx
def predict():
# 使用模型得到结果
PhraseId, Phrase = get_test_set() # 获取测试集
sentiment_list = [] # 定义预测结果列表
batchNum = math.ceil(PhraseId.shape[0] / BATCH_SIZE) # 获取总的Batch数
classifier = torch.load('./results/sentimentAnalyst.pkl')
if USE_GPU:
device = torch.device("cuda:0")
classifier.to(device)
with torch.no_grad():
for i in range(batchNum):
print(i)
if i == batchNum - 1:
phraseBatch = Phrase[BATCH_SIZE * i:] # 处理最后不足BATCH_SIZE的情况
else:
phraseBatch = Phrase[BATCH_SIZE * i:BATCH_SIZE * (i + 1)]
inputs, seq_lengths, org_idx = make_tensor_test(phraseBatch)
output = classifier(inputs, seq_lengths)
sentiment = output.max(dim=1, keepdim=True)[1]
sentiment = sentiment[org_idx].squeeze(1)
sentiment_list.append(sentiment.cpu().numpy().tolist())
sentiment_list = list(chain.from_iterable(sentiment_list)) # 将sentiment_list按行拼成一维列表
result = pd.DataFrame({'PhraseId': PhraseId, 'Sentiment': sentiment_list})
result.to_csv('./results/SA_predict.csv', index=False) # 保存结果
# Main Cycle
if __name__ == '__main__':
classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_CLASS, N_LAYER)
if USE_GPU:
device = torch.device("cuda:0")
classifier.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
start = time.time()
print("Training for %d epochs..." % N_EPOCHS)
acc_list = []
for epoch in range(1, N_EPOCHS + 1):
trainModel()
acc = evalModel()
acc_list.append(acc)
# 保存最优时的模型 ################################
if acc >= max(acc_list):
torch.save(classifier, './results/sentimentAnalyst.pkl')
print('Save Model!')
predict() # 在测试集上预测结果
# Plot Accuracy
epoch = [epoch + 1 for epoch in range(len(acc_list))]
plt.plot(epoch, acc_list)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid()
plt.show()
# 模型训练一段时间会出现的问题:
# RuntimeError: cuDNN error: CUDNN_STATUS_INTERNAL_ERROR
# 大约是因为显存不足
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)