32 - 文本情感分类项目完整代码精讲

32 - 文本情感分类项目完整代码精讲,第1张

  • 1. nn.init.xavier_uniform_
  • 2. nn.Conv1d
  • 3. torch.sigmoid
  • 4. torch.mean
  • 6. IMDB
  • 7. get_tokenizer
  • 8. build_vocab_from_iterator
  • 9. torch.index_select
  • 11. set_default_index
  • 12. logging
  • 13. clip_grad_norm_
  • 14. torch.masked_select

1. nn.init.xavier_uniform_

pytorch 官网 xavier_uniform
xavier_uniform 的出现是为了训练过程中前后的方差稳定问题,正确的初始化有利于训练的稳定;

2. nn.Conv1d


  • 代码:
import torch 
from torch import nn

# 用pytorch官方的API实现一维卷积 conv1d
# 定义输入 input
input = torch.randn(32, 25, 34)

# 定义net
net = nn.Conv1d(in_channels=25, out_channels=5, kernel_size=2)

# 得到输出
output = net(input)

# 查看定义网络的权重weights和偏置bias
for m, k in net.named_parameters():
	print(m, k.shape)
# 得到输出的形状

# output.shape=torch.Size([32, 5, 33])

# 自定义一维卷积
def conv1d_cus(input, weight, bias):

	:param input: input 输入
	:param weight: conv1d.weights 网络权重
	:param bias:   conv1d.bias 网络偏置
	:return: output 输出
	# input.shape = torch.size([bs,in_channel,T])
	# weight.shape = torch.size([out_channel,in_channel,kernel_size])
	bs, in_channel, T = input.shape
	out_channel, _, kernel_size = weight.shape
	# output.shape = ([bs,out_channel,out_h])
	out_h = T - kernel_size + 1
	output = torch.zeros((bs, out_channel, out_h))

	for i in range(bs): # 遍历批次
		for j in range(out_channel): # 遍历输出通道
			for m in range(out_h): # 遍历输出长度
				# x.shape = torch.Size([in_channel,kernel_size])
				# 获取卷积核在输入的区间x
				x = input[i, :, m:m + kernel_size]
				# 得到每个输出通道的卷积核权重
				# k.shape = torch.Size([in_channel,Kernel_size])
				k = weight[j, :, :]
				# w = x*k+bias
				output[i, j, m] = torch.sum(x * k) + bias[j]
	return output

# 将之前的输入传入到自定义的输入中
cu_input = input
# 将官方api定义的conv1d网络权重weight传入自定义权重中
cu_weight = net.weight
# 将官方api定义的conv1d网络偏置bias传入自定义偏置中
cu_bias = net.bias

# 将同样的参数传入到自定义的函数中得到自定义的输出
cu_output = conv1d_cus(cu_input, cu_weight, cu_bias)

# 比较官网输出output和自定义输出cu_output是否一致
# 如果flags_cu为True那么就表示我们自定义的函数是正确的
flags_cu = torch.isclose(cu_output, output)
# 打印 flags_cu
  • 结果:
weight torch.Size([5, 25, 2])
bias torch.Size([5])
output.shape=torch.Size([32, 5, 33])
flags_cu=tensor([[[True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True]],

        [[True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True]],

        [[True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True]],


        [[True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True]],

        [[True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True]],

        [[True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True],
         [True, True, True,  ..., True, True, True]]])
3. torch.sigmoid

S i g m o i d ( x ) = σ ( x ) = 1 1 + exp ⁡ − x Sigmoid(x)=\sigma(x)=\frac{1}{1+\exp^{-x}} Sigmoid(x)=σ(x)=1+expx1

CLASS  torch.nn.Sigmoid
  • 代码测试
# x = torch.arange(12,dtype=torch.float32)
x = torch.randn((3,4))
y = torch.sigmoid(x)

# 自定义一个sigmoid函数
def sigmoid_cus(x):
	y = 1.0/(1+torch.exp(-x))
	return y

z = sigmoid_cus(x)

# 判断自定义的sigmoid函数的输出和官方API的sigmoid函数时候一致

flags_sigmoid = torch.isclose(y,z)

# x=tensor([[ 1.1050, -1.2536,  0.2727, -0.9987],
#         [ 1.5892,  0.5052,  2.2567,  1.3520],
#         [ 1.7559,  0.4546, -0.7967,  0.0197]])
# y=tensor([[0.7512, 0.2221, 0.5678, 0.2692],
#         [0.8305, 0.6237, 0.9052, 0.7945],
#         [0.8527, 0.6117, 0.3107, 0.5049]])
# flags_sigmoid=tensor([[True, True, True, True],
#         [True, True, True, True],
#         [True, True, True, True]])
4. torch.mean


# x 生成一个3行4列的张量
x = torch.arange(12,dtype=torch.float32).reshape(3,4)
# 调用pytorch官网的API,torch.mean 求得第dim=1维的均值
y = torch.mean(x,dim=1)

# 自定义一个求均值的函数
def mean_cus(input,dim):
	if isinstance(dim,int):
		assert 0<= dim <= max(input.shape)
		y = torch.sum(input,dim=dim)/input.shape[dim]
	return y

z = mean_cus(x,dim=5)

# x=tensor([[ 0.,  1.,  2.,  3.],
#         [ 4.,  5.,  6.,  7.],
#         [ 8.,  9., 10., 11.]])
# y=tensor([1.5000, 5.5000, 9.5000])
# z=tensor([1.5000, 5.5000, 9.5000])

IMDB Dataset数据集
train: 25000;test: 25000

7. get_tokenizer


torchtext.data.utils.get_tokenizer(tokenizer, language='en')

from torchtext.data import get_tokenizer

# 实例化一个分词器tokenizer
tokenizer = get_tokenizer("basic_english")

# 定义输入句子
input = "I like to use pytorchtext as my tools in the future"

# 将输入句子用分词器进行分词
token = tokenizer(input)

# 打印相关结果
# token=['i', 'like', 'to', 'use', 'pytorchtext', 'as', 'my', 'tools', 'in', 'the', 'future']

8. build_vocab_from_iterator 9. torch.index_select
torch.index_select(input, dim, index, *, out=None) → Tensor
  • input: 表示被索引的输入张量
  • dim:指定按行索引还是按列索引
  • index: 索引值,需要输入一个索引向量
import torch 
from torch import nn

# 创建一个张量 4行4列
input_3 = torch.randn(4,4)
# input_3=tensor([[-1.5693, -0.6550,  0.6508, -0.8672],   第 0 行
#                 [ 0.2457,  0.0737,  1.6346, -0.4966],   第 1 行
#                 [-1.4351,  0.6115,  1.5060,  0.2504],   第 2 行
#                 [-0.1475, -2.5242,  1.1654, -1.9561]])  第 3 行

# 创建一个索引,这个索引指定的是取得行索引值
index = torch.randint(0,4,(5,))
# index=tensor([3, 0, 2, 0, 1])
# 取得 第 3 行  [-0.1475, -2.5242,  1.1654, -1.9561]])  第 3 行
# 取得 第 0 行  [-1.5693, -0.6550,  0.6508, -0.8672],   第 0 行
# 取得 第 2 行  [-1.4351,  0.6115,  1.5060,  0.2504],   第 2 行
# 取得 第 0 行  [-1.5693, -0.6550,  0.6508, -0.8672],   第 0 行
# 取得 第 1 行  [ 0.2457,  0.0737,  1.6346, -0.4966],   第 1 行

# 我们根据索引值来不断从input_3中取得行数
output_3 = torch.index_select(input_3,0,index)
# output_3=tensor([[-0.1475, -2.5242,  1.1654, -1.9561],
#                  [-1.5693, -0.6550,  0.6508, -0.8672],
#                  [-1.4351,  0.6115,  1.5060,  0.2504],
#                  [-1.5693, -0.6550,  0.6508, -0.8672],
#                  [ 0.2457,  0.0737,  1.6346, -0.4966]])

# input_3=tensor([[-1.5693, -0.6550,  0.6508, -0.8672],
#         [ 0.2457,  0.0737,  1.6346, -0.4966],
#         [-1.4351,  0.6115,  1.5060,  0.2504],
#         [-0.1475, -2.5242,  1.1654, -1.9561]])
# input_3.shape=torch.Size([4, 4])
# index=tensor([3, 0, 2, 0, 1])
# index.shape=torch.Size([5])
# output_3=tensor([[-0.1475, -2.5242,  1.1654, -1.9561],
#         [-1.5693, -0.6550,  0.6508, -0.8672],
#         [-1.4351,  0.6115,  1.5060,  0.2504],
#         [-1.5693, -0.6550,  0.6508, -0.8672],
#         [ 0.2457,  0.0737,  1.6346, -0.4966]])
# output_3.shape=torch.Size([5, 4])
vocab = build_vocab_from_iterator(yield_tokens(train_data_iter, tokenizer), min_freq=20, specials=[""])
11. set_default_index 12. logging 13. clip_grad_norm_ 14. torch.masked_select
torch.masked_select(input, mask, *, out=None) → Tensor


import torch 

# 创建一个3行4列张量
x_2 = torch.randn(3,4)
# x_2=tensor([[ 0.9040,  0.4787, -0.7427,  1.0943],
#             [ 1.1150, -1.4897, -1.0072, -1.0045],
#             [-0.2445,  1.7155, -0.7584, -0.2749]])
# 比较x_2中的值与0.5的大小,如果大于返回True,否则返回False
mask_2 = x_2.ge(0.5)
# mask_2=tensor([[ True, False, False,  True],
#                [ True, False, False, False],
#                [False,  True, False, False]])
# 根据 mask_2 将x_2中的值( mask=True )挑选出来后生成一个1D张量

y_2 = torch.masked_select(x_2,mask_2)
# y_2=tensor([0.9040, 1.0943, 1.1150, 1.7155])

# x_2=tensor([[ 0.9040,  0.4787, -0.7427,  1.0943],
#         [ 1.1150, -1.4897, -1.0072, -1.0045],
#         [-0.2445,  1.7155, -0.7584, -0.2749]])
# mask_2=tensor([[ True, False, False,  True],
#         [ True, False, False, False],
#         [False,  True, False, False]])
# y_2=tensor([0.9040, 1.0943, 1.1150, 1.7155])


原文地址: http://outofmemory.cn/langs/787324.html

打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-05
下一篇 2022-05-05



