目录
论文:
keras
torch版本:
训练:
测试和可视化
论文:
论文名:Gradient-Based Learning Applied to Document Recognition
kerasfrom keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers.convolutional import Conv2D,MaxPooling2D
from keras.utils.np_utils import to_categorical
import pickle
import gzip
import warnings
from keras import optimizers
warnings.filterwarnings("ignore",category=DeprecationWarning)
def build_LeNet():
# 定义网络
model = Sequential()
# N x 28 x 28 x 1 -> N x 24 x 24 x 32
# parameter : 5x5x32+32(bias) = 832
model.add(Conv2D(32, (5, 5), strides=(1,1), input_shape=(28,28,1), padding='valid', activation='relu', kernel_initializer='uniform'))
# N x 24 x 24 x 32 -> N x 12 x 12 x 32
model.add(MaxPooling2D(pool_size=(2,2)))
# N x 12 x 12 x 32 -> N x 8 x 8 x 64
model.add(Conv2D(64, (5,5), strides=(1,1),padding='valid',activation='relu',kernel_initializer='uniform'))
# N x 8 x 8 x 64 -> N x 4 x 4 x 64
model.add(MaxPooling2D(pool_size=(2,2)))
# N x 4 x 4 x 64 -> N x 1024
model.add(Flatten())
# N x 1024 -> N x 100
model.add(Dense(100,activation='relu'))
# N x 100 -> N x 10
model.add(Dense(10,activation='softmax'))
# 定义训练参数
# compile(optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None)
model.compile(optimizer=optimizers.adam(lr = 0.01),loss='categorical_crossentropy',metrics=['accuracy'])
return model
if __name__ == "__main__":
# 读入数据
f = gzip.open(r'mnist.pkl.gz', 'rb')
train_set, valid_set, test_set = pickle.load(f, encoding='unicode-escape')
f.close()
# 检查数据规格
print("train size ", len(train_set[0])," type ",type(train_set[0][0]))
print("valid size ", len(valid_set[0]))
print("test size ", len(test_set[0]))
print("train_set[0][0]", train_set[0][0].shape)
print("train_set[0][0]", train_set[0][0])
# 提取训练集,验证集,测试集,并将label转换为独热码
train_x = train_set[0].reshape((-1,28,28,1))
train_y = to_categorical(train_set[1]) # 转独热码
valid_x = valid_set[0].reshape((-1,28,28,1))
valid_y = to_categorical(valid_set[1])
test_x = test_set[0].reshape((-1,28,28,1))
test_y = to_categorical(test_set[1])
# 构建网络
model = build_LeNet()
model.fit(train_x, train_y, validation_data=(valid_x, valid_y), batch_size=500, epochs=2, verbose=1)
result = model.evaluate(test_x, test_y, batch_size=20, verbose=2)
print("loss : ", result[0])
print("acc : ", result[1])
torch版本:
训练:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from matplotlib import pyplot as plt
lr = 0.01 # 学习率
momentum = 0.5
log_interval = 10 # 跑多少次batch进行一次日志记录
epochs = 10
batch_size = 64
test_batch_size = 1000
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Sequential( # input_size=(1*28*28)
nn.Conv2d(1, 6, 5, 1, 2), # padding=2保证输入输出尺寸相同
nn.ReLU(), # input_size=(6*28*28)
nn.MaxPool2d(kernel_size=2, stride=2), # output_size=(6*14*14)
)
self.conv2 = nn.Sequential(
nn.Conv2d(6, 16, 5),
nn.ReLU(), # input_size=(16*10*10)
nn.MaxPool2d(2, 2) # output_size=(16*5*5)
)
self.fc1 = nn.Sequential(
nn.Linear(16 * 5 * 5, 120),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(120, 84),
nn.ReLU()
)
self.fc3 = nn.Linear(84, 10)
# 定义前向传播过程,输入为x
def forward(self, x):
x = self.conv1(x)
fp1 = x.detach() # 核心代码
x = self.conv2(x)
# nn.Linear()的输入输出都是维度为一的值,所以要把多维度的tensor展平成一维
x = x.view(x.size()[0], -1)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x # F.softmax(x, dim=1)
def train(epoch): # 定义每个epoch的训练细节
total = 0
correct = 0.0
model.train() # 设置为trainning模式
for batch_idx, (data, target) in enumerate(train_loader):
data = data.to(device)
target = target.to(device)
data, target = Variable(data), Variable(target) # 把数据转换成Variable
optimizer.zero_grad() # 优化器梯度初始化为零
output = model(data) # 把数据输入网络并得到输出,即进行前向传播
loss = F.cross_entropy(output, target) # 交叉熵损失函数
# 获取最大概率的预测结果
# dim=1表示返回每一行的最大值对应的列下标
predict = output.argmax(dim=1)
total += target.size(0)
correct += (predict == target).sum().item()
loss.backward() # 反向传播梯度
optimizer.step() # 结束一次前传+反传之后,更新参数
if batch_idx % log_interval == 0: # 准备打印相关信息
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
Loss.append(loss.item())
Accuracy.append(correct / total)
return loss.item(), correct / total
def test():
model.eval() # 设置为test模式
test_loss = 0 # 初始化测试损失值为0
correct = 0 # 初始化预测正确的数据个数为0
for data, target in test_loader:
data = data.to(device)
target = target.to(device)
data, target = Variable(data), Variable(target) # 计算前要把变量变成Variable形式,因为这样子才有梯度
output = model(data)
test_loss += F.cross_entropy(output, target, size_average=False).item() # sum up batch loss 把所有loss值进行累加
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum() # 对预测正确的数据个数进行累加
test_loss /= len(test_loader.dataset) # 因为把所有loss值进行过累加,所以最后要除以总得数据长度才得平均loss
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
if __name__ == '__main__':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 启用GPU
train_loader = torch.utils.data.DataLoader( # 加载训练数据
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
# # 随机旋转图片
# transforms.RandomHorizontalFlip(),
# # 将图片尺寸resize到32x32
# transforms.Resize((32, 32)),
# 将图片转化为Tensor格式
transforms.ToTensor(),
# 正则化(当模型出现过拟合的情况时,用来降低模型的复杂度)
transforms.Normalize((0.1307,), (0.3081,)) # 数据集给出的均值和标准差系数,每个数据集都不同的,都数据集提供方给出的
])),
batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader( # 加载训练数据,详细用法参考我的Pytorch打怪路(一)系列-(1)
datasets.MNIST('../data', train=False, transform=transforms.Compose([
# # 将图片尺寸resize到32x32
# transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)) # 数据集给出的均值和标准差系数,每个数据集都不同的,都数据集提供方给出的
])),
batch_size=test_batch_size, shuffle=True)
model = LeNet() # 实例化一个网络对象
model = model.to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) # 初始化优化器
Loss = []
Accuracy = []
for epoch in range(1, epochs + 1): # 以epoch为单位进行循环
loss, acc = train(epoch)
Loss.append(loss)
Accuracy.append(acc)
test()
print('Finished Training')
plt.subplot(2, 1, 1)
plt.plot(Loss)
plt.title('Loss')
plt.show()
plt.subplot(2, 1, 2)
plt.plot(Accuracy)
plt.title('Accuracy')
plt.show()
torch.save(model, 'model.pth') # 保存模型
测试和可视化
import torch
import torch.nn as nn
import cv2
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Sequential( # input_size=(1*28*28)
nn.Conv2d(1, 6, 5, 1, 2), # padding=2保证输入输出尺寸相同
nn.ReLU(), # input_size=(6*28*28)
nn.MaxPool2d(kernel_size=2, stride=2), # output_size=(6*14*14)
)
self.conv2 = nn.Sequential(
nn.Conv2d(6, 16, 5),
nn.ReLU(), # input_size=(16*10*10)
nn.MaxPool2d(2, 2) # output_size=(16*5*5)
)
self.fc1 = nn.Sequential(
nn.Linear(16 * 5 * 5, 120),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(120, 84),
nn.ReLU()
)
self.fc3 = nn.Linear(84, 10)
# 定义前向传播过程,输入为x
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# nn.Linear()的输入输出都是维度为一的值,所以要把多维度的tensor展平成一维
x = x.view(x.size()[0], -1)
x = self.fc1(x)
x = self.fc2(x)
x = self.fc3(x)
return x # F.softmax(x, dim=1)
# 中间特征提取
class FeatureExtractor(nn.Module):
def __init__(self, submodule, extracted_layers):
super(FeatureExtractor, self).__init__()
self.submodule = submodule
self.extracted_layers = extracted_layers
def forward(self, x):
outputs = []
print(self.submodule._modules.items())
for name, module in self.submodule._modules.items():
if "fc" in name:
print(name)
x = x.view(x.size(0), -1)
print(module)
x = module(x)
print(name)
if name in self.extracted_layers:
outputs.append(x)
return outputs
if __name__ == '__main__':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load('model.pth') # 加载模型
model = model.to(device)
model.eval() # 把模型转为test模式
img = cv2.imread("7.jpg") # 读取要预测的图片
trans = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 图片转为灰度图,因为mnist数据集都是灰度图
img = trans(img)
img = img.to(device)
img = img.unsqueeze(0) # 图片扩展多一维,因为输入到保存的模型中是4维的[batch_size,通道,长,宽],而普通图片只有三维,[通道,长,宽]
# 扩展后,为[1,1,28,28]
output = model(img)
prob = F.softmax(output, dim=1)
prob = Variable(prob)
prob = prob.cpu().numpy() # 用GPU的数据训练的模型保存的参数都是gpu形式的,要显示则先要转回cpu,再转回numpy模式
print("概率:", prob) # prob是10个分类的概率
pred = np.argmax(prob) # 选出概率最大的一个
print("预测类别:", pred.item())
# 特征输出
net = LeNet().to(device)
exact_list = ["conv1", "conv2"]
myexactor = FeatureExtractor(net, exact_list)
x = myexactor(img)
# 特征输出可视化
for i in range(6):
ax = plt.subplot(1, 6, i + 1)
ax.set_title('Feature {}'.format(i))
ax.axis('off')
plt.imshow(x[0].data.cpu()[0, i, :, :], cmap='jet')
plt.show()
参考:PyTorch实现经典网络之LeNet5 - 简书
【深度学习】我用 PyTorch 复现了 LeNet-5 神经网络(MNIST 手写数据集篇)!_风度78的博客-CSDN博客
pytorch实现LeNet5手写数字识别+各层特征图可视化_Fancy Wang的博客-CSDN博客_lenet5手写数字识别 torch
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)