织梦小说网站,fw可以做网站,傻瓜式在线做网站,pc端网站手机版怎么做#x1f9e1;#x1f49b;#x1f49a;TensorFlow2实战-系列教程 总目录 有任何问题欢迎在下面留言 本篇文章的代码运行界面均在Jupyter Notebook中进行 本篇文章配套的代码资源已经上传 8、压缩版本网络模型
class Model(tf.keras.Model):def __init__(self, params):supe…TensorFlow2实战-系列教程 总目录 有任何问题欢迎在下面留言 本篇文章的代码运行界面均在Jupyter Notebook中进行 本篇文章配套的代码资源已经上传 8、压缩版本网络模型
class Model(tf.keras.Model):def __init__(self, params):super().__init__()self.embedding tf.Variable(np.load(./vocab/word.npy),dtypetf.float32,namepretrained_embedding,trainableFalse,)self.drop1 tf.keras.layers.Dropout(params[dropout_rate])self.drop2 tf.keras.layers.Dropout(params[dropout_rate])self.drop3 tf.keras.layers.Dropout(params[dropout_rate])self.rnn1 tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(params[rnn_units], return_sequencesTrue))self.rnn2 tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(params[rnn_units], return_sequencesTrue))self.rnn3 tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(params[rnn_units], return_sequencesTrue))self.drop_fc tf.keras.layers.Dropout(params[dropout_rate])self.fc tf.keras.layers.Dense(2*params[rnn_units], tf.nn.elu)self.out_linear tf.keras.layers.Dense(2)def call(self, inputs, trainingFalse):if inputs.dtype ! tf.int32:inputs tf.cast(inputs, tf.int32)batch_sz tf.shape(inputs)[0]rnn_units 2*params[rnn_units]x tf.nn.embedding_lookup(self.embedding, inputs)x tf.reshape(x, (batch_sz*10*10, 10, 50))x self.drop1(x, trainingtraining)x self.rnn1(x)x tf.reduce_max(x, 1)x tf.reshape(x, (batch_sz*10, 10, rnn_units))x self.drop2(x, trainingtraining)x self.rnn2(x)x tf.reduce_max(x, 1)x tf.reshape(x, (batch_sz, 10, rnn_units))x self.drop3(x, trainingtraining)x self.rnn3(x)x tf.reduce_max(x, 1)x self.drop_fc(x, trainingtraining)x self.fc(x)x self.out_linear(x)return x这是另外一个版本的自定义网络网络定义部分是一样的只是在前向传播的过程中对每一个rnn的输出做了特征压缩每次只取10个特征中值最大的特征数量因此变为了1/10所以这个版本的训练速度回更快
9、模型训练参数
params {vocab_path: ./vocab/word.txt,train_path: ./data/train.txt,test_path: ./data/test.txt,num_samples: 25000,num_labels: 2,batch_size: 32,max_len: 1000,rnn_units: 200,dropout_rate: 0.2,clip_norm: 10.,num_patience: 3,lr: 3e-4,
}语料表路径、训练数据路径、验证数据路径 句子数量、标签输出值个数、batch_size 句子最大长度、rnn_units隐层神经元个数、dropout比例 梯度截断避免梯度剧烈变化控制过拟合、多少次损失没下降停止训练、学习率
def is_descending(history: list):history history[-(params[num_patience]1):]for i in range(1, len(history)):if history[i-1] history[i]:return Falsereturn True 根据损失值、准确率来判断有没有提升效果如果num_patience次数都没提升就停止训练
word2idx {}
with open(params[vocab_path],encodingutf-8) as f:for i, line in enumerate(f):line line.rstrip()word2idx[line] i
params[word2idx] word2idx
params[vocab_size] len(word2idx) 1
读进语料表进行id映射
model Model(params)
model.build(input_shape(None, None))
decay_lr tf.optimizers.schedules.ExponentialDecay(params[lr], 1000, 0.95)#相当于加了一个指数衰减函数
optim tf.optimizers.Adam(params[lr])
global_step 0
history_acc []
best_acc .0t0 time.time()
logger logging.getLogger(tensorflow)
logger.setLevel(logging.INFO)构建模型设置输入的大小或者fit时候也能自动找到学习率衰减优化器迭代次数计数变量保存历史准确率最佳准确率获取当前时间打印日志的设置参数
10、模型训练
while True:# 训练模型for texts, labels in dataset(is_trainingTrue, paramsparams):with tf.GradientTape() as tape:logits model(texts, trainingTrue)loss tf.nn.sparse_softmax_cross_entropy_with_logits(labelslabels, logitslogits)loss tf.reduce_mean(loss)optim.lr.assign(decay_lr(global_step))grads tape.gradient(loss, model.trainable_variables)grads, _ tf.clip_by_global_norm(grads, params[clip_norm]) optim.apply_gradients(zip(grads, model.trainable_variables))if global_step % 50 0:logger.info(Step {} | Loss: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}.format(global_step, loss.numpy().item(), time.time()-t0, optim.lr.numpy().item()))t0 time.time()global_step 1# 验证集效果m tf.keras.metrics.Accuracy()for texts, labels in dataset(is_trainingFalse, paramsparams):logits model(texts, trainingFalse)y_pred tf.argmax(logits, axis-1)m.update_state(y_truelabels, y_predy_pred)acc m.result().numpy()logger.info(Evaluation: Testing Accuracy: {:.3f}.format(acc))history_acc.append(acc)if acc best_acc:best_acc acclogger.info(Best Accuracy: {:.3f}.format(best_acc))if len(history_acc) params[num_patience] and is_descending(history_acc):logger.info(Testing Accuracy not improved over {} epochs, Early Stop.format(params[num_patience]))break按照batch取数据梯度带记录所有在上下文中的操作并且通过调用.gradient()获得任何上下文中计算得出的张量的梯度当前输入经过模型的输出结果计算损失计算平均损失根据自定义的学习率更新策略 更新学习率根据梯度带计算梯度值将梯度限制一下有的时候回更新太猛防止过拟合更新梯度每隔50次打印一下当前训练的结果使用当前训练的网络对验证集的数据进行测试3次没有提升准确率就停止训练如果准确率超过阈值后停止训练
部分训练过程日志 Reading ./data/train.txt INFO:tensorflow:Step 0 | Loss: 0.6997 | Spent: 7.5 secs | LR: 0.000300 … INFO:tensorflow:Evaluation: Testing Accuracy: 0.872 INFO:tensorflow:Best Accuracy: 0.879 Reading ./data/train.txt INFO:tensorflow:Step 10200 | Loss: 0.2801 | Spent: 640.2 secs | LR: 0.000178 INFO:tensorflow:Step 10250 | Loss: 0.1747 | Spent: 77.9 secs | LR: 0.000177 INFO:tensorflow:Step 10300 | Loss: 0.2829 | Spent: 77.7 secs | LR: 0.000177 … INFO:tensorflow:Step 10900 | Loss: 0.2204 | Spent: 77.7 secs | LR: 0.000172 Reading ./data/test.txt INFO:tensorflow:Evaluation: Testing Accuracy: 0.863 INFO:tensorflow:Best Accuracy: 0.879 INFO:tensorflow:Testing Accuracy not improved over 3 epochs, Early Stop