手写经典卷积神经网络-LeNet

手写经典卷积神经网络-LeNet,第1张

目录

论文:

keras

torch版本:

训练:

测试和可视化


论文:

论文名:Gradient-Based Learning Applied to Document Recognition

keras
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers.convolutional import Conv2D,MaxPooling2D
from keras.utils.np_utils import to_categorical
import pickle
import gzip
import warnings
from keras import optimizers

warnings.filterwarnings("ignore",category=DeprecationWarning)

def build_LeNet():
    # 定义网络
    model = Sequential()
    # N x 28 x 28 x 1  ->  N x 24 x 24 x 32
    # parameter : 5x5x32+32(bias) = 832
    model.add(Conv2D(32, (5, 5), strides=(1,1), input_shape=(28,28,1), padding='valid', activation='relu', kernel_initializer='uniform'))
    
    # N x 24 x 24 x 32  ->  N x 12 x 12 x 32
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    # N x 12 x 12 x 32  ->  N x 8 x 8 x 64
    model.add(Conv2D(64, (5,5), strides=(1,1),padding='valid',activation='relu',kernel_initializer='uniform'))
    
    # N x 8 x 8 x 64   ->  N x 4 x 4 x 64
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    # N x 4 x 4 x 64 ->  N x 1024
    model.add(Flatten())
    
    # N x 1024 ->  N x 100
    model.add(Dense(100,activation='relu'))
    
    # N x 100  ->  N x 10
    model.add(Dense(10,activation='softmax'))
    
    # 定义训练参数
    # compile(optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None)
    model.compile(optimizer=optimizers.adam(lr = 0.01),loss='categorical_crossentropy',metrics=['accuracy'])
    
    return model

if __name__ == "__main__":
    # 读入数据
    f = gzip.open(r'mnist.pkl.gz', 'rb')
    train_set, valid_set, test_set = pickle.load(f, encoding='unicode-escape')
    f.close()

    # 检查数据规格
    print("train size ", len(train_set[0])," type ",type(train_set[0][0]))
    print("valid size ", len(valid_set[0]))
    print("test size ", len(test_set[0]))

    print("train_set[0][0]", train_set[0][0].shape)
    print("train_set[0][0]", train_set[0][0])

    # 提取训练集,验证集,测试集,并将label转换为独热码
    train_x = train_set[0].reshape((-1,28,28,1))
    train_y = to_categorical(train_set[1]) # 转独热码
    
    valid_x = valid_set[0].reshape((-1,28,28,1))
    valid_y = to_categorical(valid_set[1])
    
    test_x = test_set[0].reshape((-1,28,28,1))
    test_y = to_categorical(test_set[1])

    # 构建网络
    model = build_LeNet()
    
    model.fit(train_x, train_y, validation_data=(valid_x, valid_y), batch_size=500, epochs=2, verbose=1)
    result = model.evaluate(test_x, test_y, batch_size=20, verbose=2)
    print("loss : ", result[0])
    print("acc : ", result[1])
    
torch版本: 训练:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from matplotlib import pyplot as plt

lr = 0.01  # 学习率
momentum = 0.5
log_interval = 10  # 跑多少次batch进行一次日志记录
epochs = 10
batch_size = 64
test_batch_size = 1000


class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Sequential(  # input_size=(1*28*28)
            nn.Conv2d(1, 6, 5, 1, 2),  # padding=2保证输入输出尺寸相同
            nn.ReLU(),  # input_size=(6*28*28)
            nn.MaxPool2d(kernel_size=2, stride=2),  # output_size=(6*14*14)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),  # input_size=(16*10*10)
            nn.MaxPool2d(2, 2)  # output_size=(16*5*5)
        )
        self.fc1 = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(120, 84),
            nn.ReLU()
        )
        self.fc3 = nn.Linear(84, 10)

    # 定义前向传播过程,输入为x
    def forward(self, x):
        x = self.conv1(x)
        fp1 = x.detach()  # 核心代码
        x = self.conv2(x)
        # nn.Linear()的输入输出都是维度为一的值,所以要把多维度的tensor展平成一维
        x = x.view(x.size()[0], -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x  # F.softmax(x, dim=1)


def train(epoch):  # 定义每个epoch的训练细节
    total = 0
    correct = 0.0

    model.train()  # 设置为trainning模式
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.to(device)
        target = target.to(device)
        data, target = Variable(data), Variable(target)  # 把数据转换成Variable
        optimizer.zero_grad()  # 优化器梯度初始化为零
        output = model(data)  # 把数据输入网络并得到输出,即进行前向传播
        loss = F.cross_entropy(output, target)  # 交叉熵损失函数

        # 获取最大概率的预测结果
        # dim=1表示返回每一行的最大值对应的列下标
        predict = output.argmax(dim=1)
        total += target.size(0)
        correct += (predict == target).sum().item()

        loss.backward()  # 反向传播梯度
        optimizer.step()  # 结束一次前传+反传之后,更新参数
        if batch_idx % log_interval == 0:  # 准备打印相关信息
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                       100. * batch_idx / len(train_loader), loss.item()))
            Loss.append(loss.item())
            Accuracy.append(correct / total)
    return loss.item(), correct / total


def test():
    model.eval()  # 设置为test模式
    test_loss = 0  # 初始化测试损失值为0
    correct = 0  # 初始化预测正确的数据个数为0
    for data, target in test_loader:
        data = data.to(device)
        target = target.to(device)
        data, target = Variable(data), Variable(target)  # 计算前要把变量变成Variable形式,因为这样子才有梯度

        output = model(data)
        test_loss += F.cross_entropy(output, target, size_average=False).item()  # sum up batch loss 把所有loss值进行累加
        pred = output.data.max(1, keepdim=True)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()  # 对预测正确的数据个数进行累加

    test_loss /= len(test_loader.dataset)  # 因为把所有loss值进行过累加,所以最后要除以总得数据长度才得平均loss
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


if __name__ == '__main__':
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # 启用GPU

    train_loader = torch.utils.data.DataLoader(  # 加载训练数据
        datasets.MNIST('../data', train=True, download=True,
                       transform=transforms.Compose([
                           # # 随机旋转图片
                           # transforms.RandomHorizontalFlip(),
                           # # 将图片尺寸resize到32x32
                           # transforms.Resize((32, 32)),
                           # 将图片转化为Tensor格式
                           transforms.ToTensor(),
                           # 正则化(当模型出现过拟合的情况时,用来降低模型的复杂度)
                           transforms.Normalize((0.1307,), (0.3081,))  # 数据集给出的均值和标准差系数,每个数据集都不同的,都数据集提供方给出的
                       ])),
        batch_size=batch_size, shuffle=True)

    test_loader = torch.utils.data.DataLoader(  # 加载训练数据,详细用法参考我的Pytorch打怪路(一)系列-(1)
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
            # # 将图片尺寸resize到32x32
            # transforms.Resize((32, 32)),
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))  # 数据集给出的均值和标准差系数,每个数据集都不同的,都数据集提供方给出的
        ])),
        batch_size=test_batch_size, shuffle=True)

    model = LeNet()  # 实例化一个网络对象
    model = model.to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)  # 初始化优化器

    Loss = []
    Accuracy = []
    for epoch in range(1, epochs + 1):  # 以epoch为单位进行循环
        loss, acc = train(epoch)
        Loss.append(loss)
        Accuracy.append(acc)
        test()

    print('Finished Training')
    plt.subplot(2, 1, 1)
    plt.plot(Loss)
    plt.title('Loss')
    plt.show()
    plt.subplot(2, 1, 2)
    plt.plot(Accuracy)
    plt.title('Accuracy')
    plt.show()

    torch.save(model, 'model.pth')  # 保存模型
测试和可视化
import torch
import torch.nn as nn
import cv2
import torch.nn.functional as F
from torch.autograd import Variable
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Sequential(  # input_size=(1*28*28)
            nn.Conv2d(1, 6, 5, 1, 2),  # padding=2保证输入输出尺寸相同
            nn.ReLU(),  # input_size=(6*28*28)
            nn.MaxPool2d(kernel_size=2, stride=2),  # output_size=(6*14*14)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),  # input_size=(16*10*10)
            nn.MaxPool2d(2, 2)  # output_size=(16*5*5)
        )
        self.fc1 = nn.Sequential(
            nn.Linear(16 * 5 * 5, 120),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(120, 84),
            nn.ReLU()
        )
        self.fc3 = nn.Linear(84, 10)

    # 定义前向传播过程,输入为x
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # nn.Linear()的输入输出都是维度为一的值,所以要把多维度的tensor展平成一维
        x = x.view(x.size()[0], -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x  # F.softmax(x, dim=1)

# 中间特征提取
class FeatureExtractor(nn.Module):
    def __init__(self, submodule, extracted_layers):
        super(FeatureExtractor, self).__init__()
        self.submodule = submodule
        self.extracted_layers = extracted_layers

    def forward(self, x):
        outputs = []
        print(self.submodule._modules.items())
        for name, module in self.submodule._modules.items():
            if "fc" in name:
                print(name)
                x = x.view(x.size(0), -1)
            print(module)
            x = module(x)
            print(name)
            if name in self.extracted_layers:
                outputs.append(x)
        return outputs


if __name__ == '__main__':
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = torch.load('model.pth')  # 加载模型
    model = model.to(device)
    model.eval()  # 把模型转为test模式

    img = cv2.imread("7.jpg")  # 读取要预测的图片
    trans = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 图片转为灰度图,因为mnist数据集都是灰度图
    img = trans(img)
    img = img.to(device)
    img = img.unsqueeze(0)  # 图片扩展多一维,因为输入到保存的模型中是4维的[batch_size,通道,长,宽],而普通图片只有三维,[通道,长,宽]
    # 扩展后,为[1,1,28,28]
    output = model(img)
    prob = F.softmax(output, dim=1)
    prob = Variable(prob)
    prob = prob.cpu().numpy()  # 用GPU的数据训练的模型保存的参数都是gpu形式的,要显示则先要转回cpu,再转回numpy模式
    print("概率:", prob)  # prob是10个分类的概率
    pred = np.argmax(prob)  # 选出概率最大的一个
    print("预测类别:", pred.item())

    # 特征输出
    net = LeNet().to(device)
    exact_list = ["conv1", "conv2"]
    myexactor = FeatureExtractor(net, exact_list)
    x = myexactor(img)

    # 特征输出可视化
    for i in range(6):
        ax = plt.subplot(1, 6, i + 1)
        ax.set_title('Feature {}'.format(i))
        ax.axis('off')
        plt.imshow(x[0].data.cpu()[0, i, :, :], cmap='jet')

    plt.show()

参考:PyTorch实现经典网络之LeNet5 - 简书

【深度学习】我用 PyTorch 复现了 LeNet-5 神经网络(MNIST 手写数据集篇)!_风度78的博客-CSDN博客

pytorch实现LeNet5手写数字识别+各层特征图可视化_Fancy Wang的博客-CSDN博客_lenet5手写数字识别 torch

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/724467.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-04-26
下一篇 2022-04-26

发表评论

登录后才能评论

评论列表(0条)

保存