TextCNN 医疗关系抽取

TextCNN 医疗关系抽取,第1张

 模型代码如下所示

import jieba
import torch
import torch.nn as nn
import torch.nn.functional as functions
import numpy as np
import time
# -*- coding: utf-8 -*-
#
# coding: utf-8
class TextCNN_Model(nn.Module):

    def __init__(self, configs):
        super(TextCNN_Model, self).__init__()
        vocab_size = configs.vocab_size
        embedding_dimension = configs.embedding_dimension
        label_num = configs.label_num
        self.sentence_len=210
        self.p_embed_n=25
        self.out_num=50
        self.relation_dic = np.load("rela_dic.npy", allow_pickle=True).item()
        self.conv0 = nn.Sequential(
            nn.Conv1d(in_channels=2*self.p_embed_n+embedding_dimension,out_channels=self.out_num,kernel_size=(3),bias=False,padding=(1)),
            nn.LayerNorm([self.sentence_len]),
            nn.ReLU(),


        )
    
        self.conv1 = nn.Sequential(
            nn.Conv1d(in_channels=2*self.p_embed_n+embedding_dimension, out_channels=self.out_num, kernel_size=(5), bias=False, padding=(2)),
            nn.LayerNorm([self.sentence_len]),
            nn.ReLU(),

        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(in_channels=2*self.p_embed_n+embedding_dimension, out_channels=self.out_num, kernel_size=(7), bias=False,
                      padding=(3)),
            nn.LayerNorm([self.sentence_len]),
            nn.ReLU(),


        )
        self.dense=nn.Sequential(
            nn.Linear( 9*self.out_num,44),
           #暂且不知道这边输入节点
            nn.Softmax(dim=1))
        # 词嵌入和dropout
        self.embed = nn.Embedding(vocab_size, embedding_dimension, padding_idx=0)
        self.p_embed = nn.Embedding(500,self.p_embed_n)
        self.dropout = nn.Dropout(configs.dropout)
    def w_p_embeding(self,sentences):
        device = ("cuda" if torch.cuda.is_available() else "cpu")

        text=sentences["text"]
        batch_size = len(text)

        position_embed=torch.squeeze(text[:,0,:,1:])

        text_tensor=torch.squeeze(text[:,0,:,0])

        txt_mat=self.embed(text_tensor)

        position_embed=self.p_embed(position_embed)
        position_embed = position_embed.reshape(batch_size, self.sentence_len, -1)##为什么可以直接reshape
        txt_mat=torch.cat([txt_mat,position_embed],dim=2)

        return txt_mat.transpose(1,2)



    def split_maxpooling(self,x,sentence):
        device = ("cuda" if torch.cuda.is_available() else "cpu")
        e0,e1=sentence['head'],sentence['tail']
        result=torch.zeros((x.shape[0],x.shape[1],3)).to(device)
        for i in range(len(e0)):
                    piece0,piece1,piece2=x[i:i+1,:,:e0[i]+1],x[i:i+1,:,e0[i]:e1[i]+1],x[i:i+1,:,e1[i]:]
            pool0=nn.MaxPool1d(kernel_size=(int(e0[i]+1)))
            pool1=nn.MaxPool1d(kernel_size=(int(e1[i]-e0[i]+1)))#e1[i]+1-(e0[i]#多一个防止为零
            pool2=nn.MaxPool1d(kernel_size=(int(self.sentence_len-e1[i])))###+1的正确性
            x0=pool0(piece0)
            x1 = pool1(piece1)
            x2 = pool2(piece2)
            # x0=torch.max(piece0,dim=-1).indices.unsqueeze(-1)
            # x1 = torch.max(piece1, dim=-1).indices.unsqueeze(-1)
            # x2 = torch.max(piece2, dim=-1).indices.unsqueeze(-1)
            result[i,:,:]=torch.cat([x0,x1,x2],dim=2)
        return result

    def forward(self, sentences):
        txt_mat=self.w_p_embeding(sentences)
        x0=self.conv0(txt_mat)

        x1 = self.conv1(txt_mat)
        x2 = self.conv2(txt_mat)

        x0 = self.split_maxpooling(x0, sentences)

        x1 = self.split_maxpooling(x1, sentences)

        x2 = self.split_maxpooling(x2, sentences)

        x=torch.cat([x1,x0,x2],dim=2)
        x= x.view(x.size(0),-1)


        x=self.dense(x)
        x=self.dropout(x)

        return x

 

挖个坑先

欢迎分享,转载请注明来源:内存溢出

原文地址: https://outofmemory.cn/langs/735372.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-04-27
下一篇 2022-04-27

发表评论

登录后才能评论

评论列表(0条)

保存