创建模型模型初始化、参数保存/加载模型自动求导梯度函数损失函数
mse二分类 bce多分类 模块的组合
创建模型
如,创建线性模型
class LinearModel(nn.Module): def __init__(self, ndim): super(LinearModel, self).__init__() self.ndim = ndim self.weight = nn.Parameter(torch.randn(ndim, 1)) self.bias = nn.Parameter(torch.randn(1)) def forward(self, x): return x.mm(self.weight) + self.bias
模型初始化、参数
## 模型初始化 lm = LinearModel(5) # 特征数为 5 x = torch.randn(4, 5) # 迷你批次大小为 4 lm(x) ''' tensor([[-3.0970], [-2.9674], [ 3.3265], [ 4.1923]], grad_fn=) ''' x ''' tensor([[-0.3725, -1.7013, -2.6523, -0.8103, -0.1179], [-1.1700, 0.0091, -0.0386, -1.3510, 0.9027], [ 1.5329, 0.9760, -0.4165, 0.2783, -0.6180], [ 1.0752, 0.0267, 0.9067, 2.2452, 0.6527]]) ''' # 获取模型参数(带名字)的生成器 lm.named_parameters() '''list(lm.named_parameters() ) [('weight', Parameter containing: tensor([[2.2394], [0.2185], [0.5514], [0.4709], [0.3480]], requires_grad=True)), ('bias', Parameter containing: tensor([-0.0059], requires_grad=True))] ''' # 获取模型参数(不带名字)的生成器 lm.parameters() ''' list(lm.parameters() ) [Parameter containing: tensor([[2.2394], [0.2185], [0.5514], [0.4709], [0.3480]], requires_grad=True), Parameter containing: tensor([-0.0059], requires_grad=True)] ''' lm.half() # 转换模型参数为半精度浮点数 ''' LinearModel() list(lm.parameters()) [Parameter containing: tensor([[2.2402], [0.2185], [0.5513], [0.4709], [0.3479]], dtype=torch.float16, requires_grad=True), Parameter containing: tensor([-0.0059], dtype=torch.float16, requires_grad=True)] lm.parameters '''
from sklearn.datasets import load_boston boston = load_boston() lm = LinearModel(13) criterion = nn.MSELoss() # 优化器 optim = torch.optim.SGD(lm.parameters(), lr=1e-6) optim ''' SGD ( Parameter Group 0 dampening: 0 lr: 1e-06 momentum: 0 nesterov: False weight_decay: 0 ) ''' data = torch.tensor(boston['data'], requires_grad=True, dtype=torch.float32) data ''' tensor([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02, 4.9800e+00], ... [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02, 7.8800e+00]], requires_grad=True) ''' target = torch.tensor(boston['target'], dtype=torch.float32)
for step in range(10000): predict = lm(data) loss = criterion(predict, target) if step and step%1000 == 0: print('-- loss : {:.3f}'.format(loss.item()) ) # 可以发现损失函数在逐层下降 optim.zero_grad() loss.backward() # 计算所有参数当前反向传播的梯度 optim.step() ''' /Users/xx/opt/anaconda3/lib/python3.7/site-packages/torch/nn/modules/loss.py:446: UserWarning: Using a target size (torch.Size([506])) that is different to the input size (torch.Size([506, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size. return F.mse_loss(input, target, reduction=self.reduction) -- loss : 224.251 -- loss : 150.535 -- loss : 143.163 -- loss : 138.828 -- loss : 135.080 -- loss : 131.752 -- loss : 128.779 -- loss : 126.110 -- loss : 123.706 ''' optim.state_dict() ''' {'state': {}, 'param_groups': [{'lr': 1e-06, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0, 1]}]} '''
保存/加载模型
save_info = { 'iter_num': 10000, 'optimizer': optim.state_dict(), 'model': lm.state_dict(), } save_info ''' {'iter_num': 10000, 'optimizer': {'state': {}, 'param_groups': [{'lr': 1e-06, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0, 1]}]}, 'model': OrderedDict([('weight', tensor([[-0.0506], [ 0.1244], [ 0.9757], [-1.9508], [-0.1465], [-1.9823], [ 0.0850], [ 0.4799], [-0.3672], [ 0.0141], [ 0.4012], [ 0.0298], [-0.5437]])), ('bias', tensor([1.5198]))])} ''' save_path = 'model1.txt' torch.save(save_info, save_path) save_info1 = torch.load(save_path) save_info1 ''' {'iter_num': 10000, 'optimizer': {'state': {}, 'param_groups': [{'lr': 1e-06, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0, 1]}]}, 'model': OrderedDict([('weight', tensor([[-0.0506], [ 0.1244], [ 0.9757], [-1.9508], [-0.1465], [-1.9823], [ 0.0850], [ 0.4799], [-0.3672], [ 0.0141], [ 0.4012], [ 0.0298], [-0.5437]])), ('bias', tensor([1.5198]))])} ''' # 载入信息 optim.load_state_dict(save_info1['optimizer']) lm.load_state_dict(save_info1['model'])
自动求导
import torch t1 = torch.randn(3, 3, requires_grad=True) ''' tensor([[-0.4336, -0.1928, 0.3398], [-0.5616, 0.1290, 0.8002], [-1.1966, 1.4117, -0.3643]], requires_grad=True) ''' t2 = t1.pow(2) ''' tensor([[0.1880, 0.0372, 0.1154], [0.3154, 0.0166, 0.6403], [1.4319, 1.9929, 0.1327]], grad_fn=) ''' t2 = t2.sum() # tensor(4.8705, grad_fn= ) t2.backward() t1.grad # x^2 倒数为 2x, 此处结果是原始分量的 2 倍 ''' tensor([[-0.8671, -0.3855, 0.6795], [-1.1232, 0.2580, 1.6004], [-2.3932, 2.8234, -0.7287]]) ''' t1.grad.zero_() # 单个张量清零梯度 ''' tensor([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]) '''
梯度函数
t1 = torch.randn(3, 3, requires_grad=True) ''' tensor([[ 1.3203, -0.6757, 1.4479], [ 0.6133, -0.8377, -0.9381], [ 0.3214, 0.9020, 0.1285]], requires_grad=True) ''' t2 = t1.sum() # tensor(2.2819, grad_fn=) with torch.no_grad(): t3 = t1.sum() t3 # tensor(2.2819) t1.sum() # tensor(2.2819, grad_fn= ) # 和原来的计算图分离 t1.sum().detach() # tensor(2.2819)
损失函数 mse
import torch.nn as nn import torch mse = nn.MSELoss() t1 = torch.randn(5, requires_grad=True) t2 = torch.randn(5, requires_grad=True) # t1 tensor([-0.5326, -2.1040, -0.0849, 0.0078, -0.3299], requires_grad=True) # t2 tensor([-0.3427, 0.5773, -0.8011, -0.6496, -0.9095], requires_grad=True) mse(t1, t2) # tensor(1.7013, grad_fn=)
二分类 bce
t1 = torch.randn(5, requires_grad=True) # t1 tensor([ 0.3398, 0.8650, -1.2867, -1.4845, 0.6145], requires_grad=True) # 分类标签概率值 t1s = torch.sigmoid(t1) # 求 sigmoid 函数,转化为 (0,1) 之间的概率 # t1s tensor([0.5841, 0.7037, 0.2164, 0.1847, 0.6490], grad_fn=) # 目标数据值;随机生成 0,1 的整数序列,并转化为浮点数 t2 = torch.randint(0, 2, (5, )).float() # t2 tensor([1., 0., 1., 1., 0.]) bce = nn.BCELoss() bce(t1s, t2) # 计算二分类的交叉熵;接收的两个参数都必须是浮点数 # tensor(1.2041, grad_fn= ) # 对数(Logits)交叉损失函数;可以直接省略 sigmoid 计算部分;自动在函数内部添加 sigmoid 激活函数; # 在训练时,使用这个函数可以增加计算数值的稳定性。 bce_logits = nn.BCEWithLogitsLoss() bce_logits(t1, t2) # 与上方结果一致 # tensor(1.2041, grad_fn= )
多分类
N = 10 # 分类数目 t1 = torch.randn(5, N, requires_grad=True) t2 = torch.randint(0, N, (5, )) # t2 tensor([7, 5, 3, 2, 5]) t1s = nn.functional.log_softmax(t1, -1) # 负对数似然函数。 # 根据预测值(经过 softmax 的计算和对数计算) 和目标值(使用独热编码)计算这两个值 按照一一对应的乘积,然后对乘积求和,并取负值。 # 使用它之前,必须先计算 softmax 函数取对数的结果。 n11 = nn.NLLLoss() n11(t1s, t2) # tensor(2.3953, grad_fn=) # 可以避免 LogSoftmax 计算 # 在损失函数中整合 Softmax 输出概率,以及对概率取对数输出损失函数 ce = nn.CrossEntropyLoss() ce(t1, t2) # tensor(2.3953, grad_fn= )
模块的组合
顺序模块构建
## 方式一:使用参数来构建顺序模型 model = nn.Sequential( nn.Conv2d(1, 20, 5), nn.ReLU(), nn.Conv2d(20, 64, 5), nn.ReLU() ) ## 方式二:使用顺序字典来构建顺序模型 model = nn.Sequential( OrderedDict([ ('conv1', nn.Conv2d(1, 20, 5)), ('relu1', nn.ReLU()), ('conv2', nn.Conv2d(20, 64, 5)), ('relu2', nn.ReLU()), ]) )
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)