NLP中的对抗训练

NLP中的对抗训练,第1张

本文仅限于代码调试,具体的原理,尚未解释。








model部分(bert为原型,分类问题)

import torch.nn as nn
from transformers import BertModel,BertTokenizer
name='bert-base-uncased'
from torch.utils.data import TensorDataset, DataLoader
model=BertModel.from_pretrained(name)
tokenizer=BertTokenizer.from_pretrained(name)
text=['In fact, the symbol function is canceled, and a scale is made in the second normal form',
     'Gradient of the matrix of words that appear in the input sequence of the sample',
      'In order to realize the plug-in call']

import torch
labels=torch.tensor([0,1,1])
inputs=tokenizer(text,return_tensors='pt',padding=True)
data=TensorDataset(inputs['input_ids'],inputs['attention_mask'],labels)
loss_=nn.CrossEntropyLoss()
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
class Mode(nn.Module):
    def __init__(self):
        super(Mode, self).__init__()
        self.model=model
        self.dense=nn.Linear(768,1)
    def forward(self,inputs_id,att_mask,label):
        o=self.model(inputs_id,att_mask)
        o=o[0][:,0,:]
        # o=o.reshape(768)
        o=self.dense(o)

        l=loss_(o,label)
        return l
fgm_model
class FGM(object):

    def __init__(self, model, emb_name, epsilon=1.0):
        # emb_name这个参数要换成你模型中embedding的参数名
        self.model = model
        self.epsilon = epsilon
        self.emb_name = emb_name
        self.backup = {}

    def attack(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad and self.emb_name in name:
                self.backup[name] = param.data.clone()
                norm = torch.norm(param.grad)
                if norm != 0 and not torch.isnan(norm):
                    r_at = self.epsilon * param.grad / norm
                    param.data.add_(r_at)

    def restore(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad and self.emb_name in name:
                assert name in self.backup
                param.data = self.backup[name]
        self.backup = {}
fgm = FGM(model,epsilon=1,emb_name='word_embeddings.')

试验

mode=Mode()
optimizer=torch.optim.SGD(mode.parameters(),lr=1e-3)
data=DataLoader(data)
for i in data:
    batch_input, att_mask, batch_label=i
    # 正常训练
    loss = mode(batch_input, att_mask,batch_label)
    print(loss)
    loss.backward() # 反向传播,得到正常的grad
    # 对抗训练
    fgm.attack() # 在embedding上添加对抗扰动
    # batch_input, att_mask, batch_label = i
    loss_adv = mode(batch_input, att_mask,batch_label)
    print(loss_adv)
    loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度
    fgm.restore() # 恢复embedding参数
    # 梯度下降,更新参数
    optimizer.step()
    mode.zero_grad()

对抗训练之后的loss是要累加在正常的loss之上的。


PGD–model
class PGD(object):

    def __init__(self, model, emb_name, epsilon=1., alpha=0.3):
        # emb_name这个参数要换成你模型中embedding的参数名
        self.model = model
        self.emb_name = emb_name
        self.epsilon = epsilon
        self.alpha = alpha
        self.emb_backup = {}
        self.grad_backup = {}

    def attack(self, is_first_attack=False):
        for name, param in self.model.named_parameters():
            if param.requires_grad and self.emb_name in name:
                if is_first_attack:
                    self.emb_backup[name] = param.data.clone()
                norm = torch.norm(param.grad)
                if norm != 0:
                    r_at = self.alpha * param.grad / norm
                    param.data.add_(r_at)
                    param.data = self.project(name, param.data, self.epsilon)

    def restore(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad and self.emb_name in name:
                assert name in self.emb_backup
                param.data = self.emb_backup[name]
        self.emb_backup = {}

    def project(self, param_name, param_data, epsilon):
        r = param_data - self.emb_backup[param_name]
        if torch.norm(r) > epsilon:
            r = epsilon * r / torch.norm(r)
        return self.emb_backup[param_name] + r

    def backup_grad(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None:
                self.grad_backup[name] = param.grad.clone()

    def restore_grad(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None:
                param.grad = self.grad_backup[name]


pgd = PGD(model, emb_name='word_embeddings.', epsilon=1.0, alpha=0.3)
K = 3
for i in data:
    batch_input, att_mask, batch_label = i
    # 正常训练
    loss = mode(batch_input, att_mask, batch_label)
    loss.backward()  # 反向传播,得到正常的grad
    pgd.backup_grad()
    # 对抗训练
    for t in range(K):
        pgd.attack(is_first_attack=(t == 0))  # 在embedding上添加对抗扰动, first attack时备份param.processor
        if t != K - 1:
            model.zero_grad()
        else:
            pgd.restore_grad()
        loss_adv = mode(batch_input, att_mask, batch_label)
        loss_adv.backward()  # 反向传播,并在正常的grad基础上,累加对抗训练的梯度
    pgd.restore()  # 恢复embedding参数
    # 梯度下降,更新参数
    optimizer.step()
    model.zero_grad()

以上的代码时基于pytorch实现的,参考:https://codeantenna.com/a/rqVEKQI1Zx
苏剑林老师在keras中的实现代码为:
https://github.com/bojone/bert4keras/blob/master/examples/task_iflytek_adversarial_training.py
是基于bert4keras实现的

在使用对抗训练时,需要注意dropout的使用

为了简便,可以直接设置为0

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/570902.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-04-09
下一篇 2022-04-09

发表评论

登录后才能评论

评论列表(0条)

保存