当前位置: 首页 > news >正文

如何做网站长尾关键词布局重庆小程序软件开发公司

如何做网站长尾关键词布局,重庆小程序软件开发公司,国内最好的网站建设公司,茶叶 企业 网站建设BERT-pytorch源码实现#xff0c;解决内存溢出问题 相信大家很多人都在做BERT这个模型#xff0c;但是#xff0c;有些人可能就是直接从transfermer这个模型里直接导入数据#xff0c;但是这种方法不方便我们修改模型#xff0c;于是有些人就通过pytorch详细实现了BERT,但…BERT-pytorch源码实现解决内存溢出问题 相信大家很多人都在做BERT这个模型但是有些人可能就是直接从transfermer这个模型里直接导入数据但是这种方法不方便我们修改模型于是有些人就通过pytorch详细实现了BERT,但是博主发现这些详细实现BERT的代码出现了内存溢出问题博主就做了改进下面代码我们可以解决掉内存溢出问题主要还是因为中间结果并没有完全释放代码如下 注大家如果要解决内存溢出问题关注del语句就可以了。 code by Tae Hwan Jung(Jeff Jung) graykode, modify by wmathorReference : https://github.com/jadore801120/attention-is-all-you-need-pytorchhttps://github.com/JayParks/transformer, https://github.com/dhlee347/pytorchic-bertimport re import math import torch import numpy as np from random import * import torch.nn as nn import torch.optim as optim import torch.utils.data as Dataimport matplotlib.pyplot as plt from data_process import get_datasetences,label,setences_test,label_testget_data() device torch.device(cpu)sentencessetences #text ( # Hello, how are you? I am Romeo.\n # R # Hello, Romeo My name is Juliet. Nice to meet you.\n # J # Nice meet you too. How are you today?\n # R # Great. My baseball team won the competition.\n # J # Oh Congratulations, Juliet\n # R # Thank you Romeo\n # J # Where are you going today?\n # R # I am going shopping. What about you?\n # J # I am going to visit my grandmother. she is not very well # R #) #sentences re.sub([.,!?\\-], , text.lower()).split(\n) # filt #print(sentences)word_list list(set( .join(setences).split())|set( .join(setences_test).split())) # [hello, how, are, you,...] word2idx {[PAD] : 0, [CLS] : 1, [SEP] : 2, [MASK] : 3} for i, w in enumerate(word_list):word2idx[w] i 4 idx2word {i: w for i, w in enumerate(word2idx)} vocab_size len(word2idx)token_list list() for sentence in setences:arr [word2idx[s] for s in sentence.split()]token_list.append(arr)#print(token_list)[[12, 7, 22, 5, 39, 21, 15],[12, 15, 13, 35, 10, 27, 34, 14, 19, 5],[34, 19, 5, 17, 7, 22, 5, 8],[33, 13, 37, 32, 28, 11, 16],[30, 23, 27],[6, 5, 15],[36, 22, 5, 31, 8],[39, 21, 31, 18, 9, 20, 5],[39, 21, 31, 14, 29, 13, 4, 25, 10, 26, 38, 24]]# BERT Parameters maxlen 30 batch_size 6 max_pred 5 # max tokens of prediction n_layers 6 n_heads 12 d_model 768 d_ff 768*4 # 4*d_model, FeedForward dimension d_k d_v 64 # dimension of K(Q), V n_segments 3 # sample IsNext and NotNext to be same in small batch size def make_data():batch []for i in range(len(setences)):tokens_a_index itokens_a token_list[tokens_a_index]input_ids [word2idx[[CLS]]] tokens_a [word2idx[[SEP]]]segment_ids [0] * (1 len(tokens_a) 1) # MASK LMn_pred min(max_pred, max(1, int(len(input_ids) * 0.15))) # 15 % of tokens in one sentencecand_maked_pos [i for i, token in enumerate(input_ids)if token ! word2idx[[CLS]] and token ! word2idx[[SEP]]] # candidate masked positionshuffle(cand_maked_pos)masked_tokens, masked_pos [], []for pos in cand_maked_pos[:n_pred]:masked_pos.append(pos)masked_tokens.append(input_ids[pos])if random() 0.8: # 80%input_ids[pos] word2idx[[MASK]] # make maskelif random() 0.9: # 10%index randint(0, vocab_size - 1) # random index in vocabularywhile index 4: # cant involve CLS, SEP, PADindex randint(0, vocab_size - 1)input_ids[pos] index # replace# Zero Paddingsn_pad maxlen - len(input_ids)input_ids.extend([0] * n_pad)segment_ids.extend([0] * n_pad)# Zero Padding (100% - 15%) tokensif max_pred n_pred:n_pad max_pred - n_predmasked_tokens.extend([0] * n_pad)masked_pos.extend([0] * n_pad)batch.append([input_ids, segment_ids, masked_tokens, masked_pos, label[tokens_a_index]]) # IsNextreturn batch batch make_data() input_ids, segment_ids, masked_tokens, masked_pos, isNext zip(*batch)a [1,2,3] b [4,5,6] zipped zip(a,b) # 打包为元组的列表 [(1, 4), (2, 5), (3, 6)] zip(*zipped) # 与 zip 相反可理解为解压为zip的逆过程可用于矩阵的转置 [(1, 2, 3), (4, 5, 6)]input_ids, segment_ids, masked_tokens, masked_pos, isNext \torch.LongTensor(input_ids), torch.LongTensor(segment_ids), torch.LongTensor(masked_tokens), \torch.LongTensor(masked_pos), torch.LongTensor(isNext)class MyDataSet(Data.Dataset):def __init__(self, input_ids, segment_ids, masked_tokens, masked_pos, isNext):self.input_ids input_idsself.segment_ids segment_idsself.masked_tokens masked_tokensself.masked_pos masked_posself.isNext isNextdef __len__(self):return len(self.input_ids)def __getitem__(self, idx):return self.input_ids[idx], self.segment_ids[idx], self.masked_tokens[idx], self.masked_pos[idx], self.isNext[idx]loader Data.DataLoader(MyDataSet(input_ids, segment_ids, masked_tokens, masked_pos, isNext), batch_size, True) def get_attn_pad_mask(seq_q, seq_k):batch_size, seq_len seq_q.size() #[batch_size,maxlen]# eq(zero) is PAD tokenpad_attn_mask seq_q.data.eq(0).unsqueeze(1) # [batch_size, 1, seq_len]return pad_attn_mask.expand(batch_size, seq_len, seq_len) # [batch_size, seq_len, seq_len]def gelu(x):return x * 0.5 * (1.0 torch.erf(x / math.sqrt(2.0)))class Embedding(nn.Module):def __init__(self):super(Embedding, self).__init__()self.tok_embed nn.Embedding(vocab_size, d_model) # token embeddingself.pos_embed nn.Embedding(maxlen, d_model) # position embeddingself.seg_embed nn.Embedding(n_segments, d_model) # segment(token type) embeddingself.norm nn.LayerNorm(d_model)def forward(self, x, seg):seq_len x.size(1)pos torch.arange(seq_len, dtypetorch.long)# print(pos:,pos)pos: tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])pos pos.unsqueeze(0).expand_as(x).to(device) # [seq_len] - [batch_size, seq_len]# print(pos_batch:, pos)embedding self.tok_embed(x) self.pos_embed(pos) self.seg_embed(seg)del pos,x, segreturn self.norm(embedding)class ScaledDotProductAttention(nn.Module):def __init__(self):super(ScaledDotProductAttention, self).__init__()def forward(self, Q, K, V, attn_mask):scores torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size, n_heads, seq_len, seq_len]scores.masked_fill_(attn_mask, -1e9) # Fills elements of self tensor with value where mask is one.attn nn.Softmax(dim-1)(scores)context torch.matmul(attn, V)del attn,scores,Q, K, V,attn_maskreturn contextclass MultiHeadAttention(nn.Module):def __init__(self):super(MultiHeadAttention, self).__init__()self.W_Q nn.Linear(d_model, d_k * n_heads)self.W_K nn.Linear(d_model, d_k * n_heads)self.W_V nn.Linear(d_model, d_v * n_heads)def forward(self, Q, K, V, attn_mask):# q: [batch_size, seq_len, d_model], k: [batch_size, seq_len, d_model], v: [batch_size, seq_len, d_model]residual, batch_size Q, Q.size(0)residualresidual.to(device)# (B, S, D) -proj- (B, S, D) -split- (B, S, H, W) -trans- (B, H, S, W)q_s self.W_Q(Q).view(batch_size, -1, n_heads, d_k).transpose(1,2) # q_s: [batch_size, n_heads, seq_len, d_k]k_s self.W_K(K).view(batch_size, -1, n_heads, d_k).transpose(1,2) # k_s: [batch_size, n_heads, seq_len, d_k]v_s self.W_V(V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # v_s: [batch_size, n_heads, seq_len, d_v]attn_mask attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1) # attn_mask : [batch_size, n_heads, seq_len, seq_len]# context: [batch_size, n_heads, seq_len, d_v], attn: [batch_size, n_heads, seq_len, seq_len]context ScaledDotProductAttention()(q_s, k_s, v_s, attn_mask)context context.transpose(1, 2).contiguous().view(batch_size, -1, n_heads * d_v)# context: [batch_size, seq_len, n_heads, d_v]output nn.Linear(n_heads * d_v, d_model).to(device)(context)del context,attn_mask,q_s,k_s,v_sreturn nn.LayerNorm(d_model).to(device)(output residual) # output: [batch_size, seq_len, d_model]class PoswiseFeedForwardNet(nn.Module):def __init__(self):super(PoswiseFeedForwardNet, self).__init__()self.fc1 nn.Linear(d_model, d_ff)self.fc2 nn.Linear(d_ff, d_model)def forward(self, x):# (batch_size, seq_len, d_model) - (batch_size, seq_len, d_ff) - (batch_size, seq_len, d_model)return self.fc2(gelu(self.fc1(x)))class EncoderLayer(nn.Module):def __init__(self):super(EncoderLayer, self).__init__()self.enc_self_attn MultiHeadAttention()self.pos_ffn PoswiseFeedForwardNet()def forward(self, enc_inputs, enc_self_attn_mask):enc_outputs self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, enc_self_attn_mask) # enc_inputs to same Q,K,Venc_outputs self.pos_ffn(enc_outputs) # enc_outputs: [batch_size, seq_len, d_model]del enc_self_attn_mask,enc_inputsreturn enc_outputsclass BERT(nn.Module):def __init__(self):super(BERT, self).__init__()self.embedding Embedding()self.layers nn.ModuleList([EncoderLayer() for _ in range(n_layers)])self.fc nn.Sequential(nn.Linear(d_model, d_model),nn.Dropout(0.5),nn.Tanh(),)self.classifier nn.Linear(d_model, 3)self.linear nn.Linear(d_model, d_model)self.activ2 gelu# fc2 is shared with embedding layerembed_weight self.embedding.tok_embed.weightself.fc2 nn.Linear(d_model, vocab_size, biasFalse)self.fc2.weight embed_weightdef forward(self, input_ids, segment_ids, masked_pos):output self.embedding(input_ids, segment_ids) # [bach_size, seq_len, d_model]enc_self_attn_mask get_attn_pad_mask(input_ids, input_ids) # [batch_size, maxlen, maxlen]for layer in self.layers:# output: [batch_size, max_len, d_model]output layer(output, enc_self_attn_mask)# it will be decided by first token(CLS)(fc): Sequential((0): Linear(in_features768, out_features768, biasTrue)(1): Dropout(p0.5, inplaceFalse)(2): Tanh())(classifier): Linear(in_features768, out_features2, biasTrue)(linear): Linear(in_features768, out_features768, biasTrue)(fc2): Linear(in_features768, out_features40, biasFalse)# logits_clsf :根据[CLS]预测是否是连续的句子,[CLS]在第一维h_pooled self.fc(output[:, 0]) # [batch_size, d_model]logits_clsf self.classifier(h_pooled) # [batch_size, 2] predict isNextmasked_pos masked_pos[:, :, None].expand(-1, -1, d_model) # [batch_size, max_pred, d_model]h_masked torch.gather(output, 1, masked_pos) # masking position [batch_size, max_pred, d_model]h_masked self.activ2(self.linear(h_masked)) # [batch_size, max_pred, d_model]#logits_lm预测mask的tokenlogits_lm self.fc2(h_masked) # [batch_size, max_pred, vocab_size]del h_masked,h_pooled,output,enc_self_attn_mask,masked_pos,input_ids,segment_idsreturn logits_lm, logits_clsf model BERT().to(device) # print(model) criterion nn.CrossEntropyLoss() optimizer optim.Adam(model.parameters(), lr0.000001) #out torch.gather(input, dim, index) index torch.from_numpy(np.array([[1, 2, 0], [2, 0, 1]])).type(torch.LongTensor) index index[:, :, None].expand(-1, -1, 10) loss_list[] for epoch in range(10):loss_sum0for input_ids, segment_ids, masked_tokens, masked_pos, isNext in loader:logits_lm, logits_clsf model(input_ids, segment_ids, masked_pos)#logits_lm:[batch_size,max_pred,vocab_size] - [batch_size*max_pred,vocab_size],batch_size*max_pred个词。每个词都有vocab_size种可能。loss_lm criterion(logits_lm.view(-1, vocab_size), masked_tokens.view(-1)) # for masked LMloss_lm (loss_lm.float()).mean()# isNextisNext.to(device)loss_clsf criterion(logits_clsf, isNext) # for sentence classificationloss loss_lm loss_clsfloss_sumloss_sumlossloss_list.append(float(loss))print(Epoch:, %04d % (epoch 1), loss , {:.6f}.format(loss))optimizer.zero_grad()loss.backward()optimizer.step()del loss, logits_clsf, input_ids,segment_ids,masked_tokens,masked_pos,logits_lm,isNext,loss_clsf,loss_lm# Predict mask tokens ans isNextprint(test)token_list[]for sentence in setences_test:arr [word2idx[s] for s in sentence.split()]token_list.append(arr)def make_data_test():batch []for i in range(len(setences_test)):tokens_a_index itokens_a token_list[tokens_a_index]input_ids [word2idx[[CLS]]] tokens_a [word2idx[[SEP]]]segment_ids [0] * (1 len(tokens_a) 1) # MASK LMn_pred min(max_pred, max(1, int(len(input_ids) * 0.15))) # 15 % of tokens in one sentencecand_maked_pos [i for i, token in enumerate(input_ids)if token ! word2idx[[CLS]] and token ! word2idx[[SEP]]] # candidate masked positionshuffle(cand_maked_pos)masked_tokens, masked_pos [], []for pos in cand_maked_pos[:n_pred]:masked_pos.append(pos)masked_tokens.append(input_ids[pos])if random() 0.8: # 80%input_ids[pos] word2idx[[MASK]] # make maskelif random() 0.9: # 10%index randint(0, vocab_size - 1) # random index in vocabularywhile index 4: # cant involve CLS, SEP, PADindex randint(0, vocab_size - 1)input_ids[pos] index # replace# Zero Paddingsn_pad maxlen - len(input_ids)input_ids.extend([0] * n_pad)segment_ids.extend([0] * n_pad)# Zero Padding (100% - 15%) tokensif max_pred n_pred:n_pad max_pred - n_predmasked_tokens.extend([0] * n_pad)masked_pos.extend([0] * n_pad)batch.append([input_ids, segment_ids, masked_tokens, masked_pos, label_test[tokens_a_index]]) # IsNextreturn batch # Proprecessing Finishedbatch make_data_test() input_ids, segment_ids, masked_tokens, masked_pos, isNext zip(*batch)a [1,2,3] b [4,5,6] zipped zip(a,b) # 打包为元组的列表 [(1, 4), (2, 5), (3, 6)] zip(*zipped) # 与 zip 相反可理解为解压为zip的逆过程可用于矩阵的转置 [(1, 2, 3), (4, 5, 6)]input_ids, segment_ids, masked_tokens, masked_pos, isNext \torch.LongTensor(input_ids), torch.LongTensor(segment_ids), torch.LongTensor(masked_tokens), \torch.LongTensor(masked_pos), torch.LongTensor(isNext)predict_list[]for i in range(len(batch)):input_ids, segment_ids, masked_tokens, masked_pos, isNext batch[0]print([idx2word[w] for w in input_ids if idx2word[w] ! [PAD]])logits_lm, logits_clsf model(torch.LongTensor([input_ids]), \torch.LongTensor([segment_ids]), torch.LongTensor([masked_pos]))logits_lm logits_lm.data.max(2)[1][0].data.numpy()print(masked tokens list : ,[pos for pos in masked_tokens if pos ! 0])print(predict masked tokens list : ,[pos for pos in logits_lm if pos ! 0])logits_clsf logits_clsf.data.max(1)[1].data.numpy()[0]print(isNext : , isNext )print(predict isNext : , logits_clsf)predict_list.append(logits_clsf)test_loss 0 correct 0 total 0 target_num [0,0,0] predict_num [0,0,0] p0 acc_num [0,0,0]for i in label_test:target_num[i]1for i in predict_list:# print(i.argmax())indexint(i)if index in [0,1,2]:predict_num[index]1# print(id2word[index],id2word[p])if indexlabel_test[p]:acc_num[index]1pp1#print(target_num) #print(predict_num) #print(acc_num) recallz0 precisionz0 accuracyz0 F1z0ps0 rs0 for i in range(3):if target_num[i]!0:recallzacc_num[i]/target_num[i]else:recallz0if predict_num[i]!0:precisionzacc_num[i]/predict_num[i]else:precisionz0pspsprecisionzrsrsrecallzif recallzprecisionz!0:F1z2*recallz*precisionz/(recallzprecisionz)F1z#recall [acc_num[i]/target_num[i] for i in range(3)]#precision [acc_num[i]/predict_num[i] for i in range(3)]#F1 [2*recall[i]*precision[i]/(recall[i]precision[i]) for i in range(3)]print() accuracy sum(acc_num)/sum(target_num) # 打印格式方便复制 print(recall:,rs/3) print(precision:,ps/3) print(F1:,F1z/3) print(accuracy,accuracy)plt.plot(loss_list,labelBERT) plt.legend() plt.title(loss-epoch) plt.show()
http://www.zqtcl.cn/news/718564/

相关文章:

  • 做网站的软件worddede 后门暴网站
  • 极简风格 网站上市公司seo是什么意思
  • 商城手机网站设计网架公司十大排名
  • 在建设主题网站时邯郸房产信息网恋家网
  • 保山做网站建设做网站zwnet
  • 南阳做网站推广自助个人免费网站
  • 企业做网站怎么做高校档案室网站建设
  • 辽宁省建设厅网站升级期货交易软件定制开发
  • 网站建设公司工资设置mufen wordpress
  • 资阳网站网站建设月夜直播免费完整版
  • 自己的网站打不开了网站建设维护成本
  • 最便宜做网站c2c网站建站的标准
  • 家里电脑做网站服务器下载中国移动商旅100最新版本
  • 深圳建站公司开发费用做网站网页的工作怎么样
  • 网站工程师平均工资网站开发合同里的坑
  • 南通公司建站模板品牌网站建设小蝌蚪
  • 网站备案号 有效期微信小程序开发视频完整教程
  • 给公司做网站需要什么信息html制作百度登录页面
  • 济南市建设执业资格注册中心网站小程序源码模板下载
  • 免费做网站怎么做网站网页生成app制作
  • 网站建设中的财务预算广州网站制作
  • 经营范围网站建设wordpress主题去除友情链接
  • ip开源网站FPGA可以做点什么国外购物平台排行榜前十名
  • 温州网站推广优化公司专业做网站建设公司排名
  • 网站广告推广哪家好wordpress漏洞大全
  • 做a小视频免费观看网站视觉传达设计网站
  • 网站建设属于网络还是软件服务器销售网站源码
  • 上海建设工程咨询网 首页郑州seo野狼
  • 建设网站需要注意什么手续禅城网站设计
  • 重庆网站页面优化wordpress fm