。
。
。
model部分(bert为原型,分类问题)
import torch.nn as nn
from transformers import BertModel,BertTokenizer
name='bert-base-uncased'
from torch.utils.data import TensorDataset, DataLoader
model=BertModel.from_pretrained(name)
tokenizer=BertTokenizer.from_pretrained(name)
text=['In fact, the symbol function is canceled, and a scale is made in the second normal form',
'Gradient of the matrix of words that appear in the input sequence of the sample',
'In order to realize the plug-in call']
import torch
labels=torch.tensor([0,1,1])
inputs=tokenizer(text,return_tensors='pt',padding=True)
data=TensorDataset(inputs['input_ids'],inputs['attention_mask'],labels)
loss_=nn.CrossEntropyLoss()
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
class Mode(nn.Module):
def __init__(self):
super(Mode, self).__init__()
self.model=model
self.dense=nn.Linear(768,1)
def forward(self,inputs_id,att_mask,label):
o=self.model(inputs_id,att_mask)
o=o[0][:,0,:]
# o=o.reshape(768)
o=self.dense(o)
l=loss_(o,label)
return l
fgm_model
class FGM(object):
def __init__(self, model, emb_name, epsilon=1.0):
# emb_name这个参数要换成你模型中embedding的参数名
self.model = model
self.epsilon = epsilon
self.emb_name = emb_name
self.backup = {}
def attack(self):
for name, param in self.model.named_parameters():
if param.requires_grad and self.emb_name in name:
self.backup[name] = param.data.clone()
norm = torch.norm(param.grad)
if norm != 0 and not torch.isnan(norm):
r_at = self.epsilon * param.grad / norm
param.data.add_(r_at)
def restore(self):
for name, param in self.model.named_parameters():
if param.requires_grad and self.emb_name in name:
assert name in self.backup
param.data = self.backup[name]
self.backup = {}
fgm = FGM(model,epsilon=1,emb_name='word_embeddings.')
试验
mode=Mode()
optimizer=torch.optim.SGD(mode.parameters(),lr=1e-3)
data=DataLoader(data)
for i in data:
batch_input, att_mask, batch_label=i
# 正常训练
loss = mode(batch_input, att_mask,batch_label)
print(loss)
loss.backward() # 反向传播,得到正常的grad
# 对抗训练
fgm.attack() # 在embedding上添加对抗扰动
# batch_input, att_mask, batch_label = i
loss_adv = mode(batch_input, att_mask,batch_label)
print(loss_adv)
loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度
fgm.restore() # 恢复embedding参数
# 梯度下降,更新参数
optimizer.step()
mode.zero_grad()
对抗训练之后的loss是要累加在正常的loss之上的。
class PGD(object):
def __init__(self, model, emb_name, epsilon=1., alpha=0.3):
# emb_name这个参数要换成你模型中embedding的参数名
self.model = model
self.emb_name = emb_name
self.epsilon = epsilon
self.alpha = alpha
self.emb_backup = {}
self.grad_backup = {}
def attack(self, is_first_attack=False):
for name, param in self.model.named_parameters():
if param.requires_grad and self.emb_name in name:
if is_first_attack:
self.emb_backup[name] = param.data.clone()
norm = torch.norm(param.grad)
if norm != 0:
r_at = self.alpha * param.grad / norm
param.data.add_(r_at)
param.data = self.project(name, param.data, self.epsilon)
def restore(self):
for name, param in self.model.named_parameters():
if param.requires_grad and self.emb_name in name:
assert name in self.emb_backup
param.data = self.emb_backup[name]
self.emb_backup = {}
def project(self, param_name, param_data, epsilon):
r = param_data - self.emb_backup[param_name]
if torch.norm(r) > epsilon:
r = epsilon * r / torch.norm(r)
return self.emb_backup[param_name] + r
def backup_grad(self):
for name, param in self.model.named_parameters():
if param.requires_grad and param.grad is not None:
self.grad_backup[name] = param.grad.clone()
def restore_grad(self):
for name, param in self.model.named_parameters():
if param.requires_grad and param.grad is not None:
param.grad = self.grad_backup[name]
pgd = PGD(model, emb_name='word_embeddings.', epsilon=1.0, alpha=0.3)
K = 3
for i in data:
batch_input, att_mask, batch_label = i
# 正常训练
loss = mode(batch_input, att_mask, batch_label)
loss.backward() # 反向传播,得到正常的grad
pgd.backup_grad()
# 对抗训练
for t in range(K):
pgd.attack(is_first_attack=(t == 0)) # 在embedding上添加对抗扰动, first attack时备份param.processor
if t != K - 1:
model.zero_grad()
else:
pgd.restore_grad()
loss_adv = mode(batch_input, att_mask, batch_label)
loss_adv.backward() # 反向传播,并在正常的grad基础上,累加对抗训练的梯度
pgd.restore() # 恢复embedding参数
# 梯度下降,更新参数
optimizer.step()
model.zero_grad()
以上的代码时基于pytorch实现的,参考:https://codeantenna.com/a/rqVEKQI1Zx
苏剑林老师在keras中的实现代码为:
https://github.com/bojone/bert4keras/blob/master/examples/task_iflytek_adversarial_training.py
是基于bert4keras实现的
在使用对抗训练时,需要注意dropout的使用
为了简便,可以直接设置为0
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)