淮南装饰公司网站建设,wordpress固定网址打不开,沈阳网站优化怎么做,发布程序后网站有很多文章目录 前言0、数据集准备1、数据集2、dataset3、model4、训练模型 前言
在pytorch中模型训练一般分为以下几个步骤#xff1a; 0、数据集准备 1、数据集读取#xff08;dataset模块#xff09; 2、数据集转换为tensor#xff08;dataloader模块#xff09; 3、定义模型… 文章目录 前言0、数据集准备1、数据集2、dataset3、model4、训练模型 前言
在pytorch中模型训练一般分为以下几个步骤 0、数据集准备 1、数据集读取dataset模块 2、数据集转换为tensordataloader模块 3、定义模型model编写模型代码主要是前向传播 4、定义损失函数loss 5、定义优化器optimizer 6、最后一步是模型训练阶段train这一步会利用循环把dataset-dataloader-model-loss-optimizer合并起来。 相比于普通的函数神经网络并没有特别神奇的地方我们不妨训练过程看成普通函数参数求解的过程也就是最优化求解参数。以Alex模型为例进行分类训练。
0、数据集准备
分类数据不需要进行标注只需要给出类别就可以了对应分割检测需要借助labelme或者labelimg进行标注。将数据分为训练集验证集测试集。训练集用于模型训练验证集用于训练过程中检验模型训练参数的表现测试集是模型训练完成之后验证模型的表现。
1、数据集
从这里下载数据集The TU Darmstadt Database (formerly the ETHZ Database)一个三种类型115 motorbikes 50 x 2 cars 112 cows 327张照片把数据分为训练train和验证集val 并对train和val文件夹形成对应的标签文件每一行为照片的名称和对应的类别编号从0开始
2、dataset
现在写一个名为dataset.py文件写一个VOCDataset的类来读取训练集和验证集VOCDataset继承了torch.utils.data.Dataset并重写父类的两个函数__getitem__返回每个图像及其对应的标签def __len__返回数据集的数量 import torch
from torch.utils.data import Dataset
from torchvision import datasets, transforms
from PIL import Image
import osclass VOCDataset(Dataset):def __init__(self, img_dir, label_root, transformNone):self.img_root img_dirself.label_root label_rootself.transform transform# 获取所有图像路径self.img_paths [os.path.join(self.img_root, f) for f in os.listdir(self.img_root) if f.endswith(.png)]# 读取txt中class标签,txt文件每行格式为: img_name class_idself.label_classes {}with open(label_root, r) as f:for line in f:img_name, class_id line.strip().split()self.label_classes[img_name] int(class_id)def __len__(self):return len(self.img_paths)def __getitem__(self, idx):img_path self.img_paths[idx]img Image.open(img_path).convert(RGB)# 获取对应的标签img_name os.path.basename(img_path)target self.label_classes.get(img_name, -1)if target -1:raise ValueError(fImage {img_name} not found in label file.)if self.transform:img self.transform(img)else:img transforms.ToTensor()(img)return img, target
3、model
新建一个model.py的文件写一个Alex的类参考动手学深度学习7.1继承torch.nn.Module重写forword函数
from torch import nn
from torchvision import modelsclass AlexNet(nn.Module):def __init__(self,num_class3):super(AlexNet, self).__init__()self.conv2d1nn.Conv2d(in_channels3,out_channels96,kernel_size11,stride4,padding1)self.pool1nn.MaxPool2d(kernel_size3,stride2,padding0)self.conv2d2nn.Conv2d(in_channels96,out_channels256,kernel_size5,stride1,padding2)self.pool2nn.MaxPool2d(kernel_size3,stride2,padding0)self.conv2d3nn.Conv2d(in_channels256,out_channels384,kernel_size3,stride1,padding1)self.conv2d4nn.Conv2d(in_channels384,out_channels384,kernel_size3,stride1,padding1)self.conv2d5nn.Conv2d(in_channels384,out_channels256,kernel_size3,stride1,padding1)self.pool3nn.MaxPool2d(kernel_size3,stride2,padding0)# 全连接层4096self.fc1nn.Linear(256*5*5,4096)self.fc2nn.Linear(4096,4096)self.fc3nn.Linear(4096,num_class)self.sequential nn.Sequential(self.conv2d1,nn.ReLU(),self.pool1,self.conv2d2,nn.ReLU(),self.pool2,self.conv2d3,nn.ReLU(),self.conv2d4,nn.ReLU(),self.conv2d5,nn.ReLU(),self.pool3,nn.Flatten(),self.fc1,nn.ReLU(),nn.Dropout(0.5),self.fc2,nn.ReLU(),nn.Dropout(0.5),self.fc3)# 初始化权重for m in self.modules():if isinstance(m, nn.Conv2d):nn.init.kaiming_normal_(m.weight, modefan_out, nonlinearityrelu)elif isinstance(m, nn.Linear):nn.init.normal_(m.weight, 0, 0.01)nn.init.constant_(m.bias, 0)def forward(self,x):x self.sequential(x)return x
4、训练模型
首先定义损失函数和优化器 criterion torch.nn.CrossEntropyLoss()optimizer torch.optim.SGD(model.parameters(), lrlearning_rate, momentum0.9, weight_decay1e-4)新建一个train.py的文件
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from dataset import VOCDataset
from model import AlexNet, ResnetPretrained
from torchvision import models
from torchvision.datasets import CIFAR10
from dataset import VOCDataset
import tensorboarddef train(model, train_dataset, val_dataset, num_epochs20, batch_size32, learning_rate0.001):# 1. 创建数据加载器train_loader DataLoader(train_dataset, batch_sizebatch_size, shuffleTrue, num_workers4)val_loader DataLoader(val_dataset, batch_sizebatch_size, shuffleFalse, num_workers4)# 2. 定义损失函数和优化器criterion torch.nn.CrossEntropyLoss()optimizer torch.optim.SGD(model.parameters(), lrlearning_rate, momentum0.9, weight_decay1e-4)# optimizer torch.optim.Adam(model.parameters(), lrlearning_rate)# 3. 修正学习率调度器放在循环外scheduler torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, modemax, factor0.5, patience2)# 4. 训练模型best_acc 0.0for epoch in range(num_epochs):model.train()running_loss 0.0total 0for i, (inputs, labels) in enumerate(train_loader):inputs, labels inputs.cuda(), labels.cuda()optimizer.zero_grad()outputs model(inputs)loss criterion(outputs, labels)loss.backward()optimizer.step()running_loss loss.item() * inputs.size(0)total inputs.size(0)if i % 100 0:avg_loss running_loss / totalprint(fEpoch [{epoch1}/{num_epochs}], Step [{i1}/{len(train_loader)}], Loss: {avg_loss:.4f})# 每个epoch结束后验证model.eval()correct 0total_val 0val_loss 0.0with torch.no_grad():for inputs, labels in val_loader:inputs, labels inputs.cuda(), labels.cuda()outputs model(inputs)loss criterion(outputs, labels)_, predicted torch.max(outputs.data, 1)total_val labels.size(0)correct (predicted labels).sum().item()val_loss loss.item() * inputs.size(0)epoch_acc 100 * correct / total_valavg_val_loss val_loss / total_valprint(fEpoch {epoch1}/{num_epochs} | fTrain Loss: {running_loss/total:.4f} | fVal Loss: {avg_val_loss:.4f} | fVal Acc: {epoch_acc:.2f}%)# 更新学习率基于验证集准确率#scheduler.step(epoch_acc)# 保存最佳模型if epoch_acc best_acc:best_acc epoch_acctorch.save(model.state_dict(), best_alexnet_cifar10.pth)print(fBest Validation Accuracy: {best_acc:.2f}%)if __name__ __main__:# 1. 定义数据集路径train_img_dir rF:\dataset\tud\TUDarmstadt\PNGImages\trainval_img_dir rF:\dataset\tud\TUDarmstadt\PNGImages\valtrain_label_file rF:\dataset\tud\TUDarmstadt\PNGImages/train_set.txtval_label_file rF:\dataset\tud\TUDarmstadt\PNGImages/val_set.txt# 2. 创建数据集实例# 增强数据增强transform_train transforms.Compose([transforms.Resize((256, 256)), # 先放大transforms.RandomCrop(224), # 随机裁剪transforms.RandomHorizontalFlip(),transforms.RandomRotation(15),transforms.ColorJitter(brightness0.2, contrast0.2, saturation0.2),transforms.ToTensor(),transforms.Normalize(mean[0.485, 0.456, 0.406], std[0.229, 0.224, 0.225])])# 验证集不需要数据增强但需要同样的预处理transform_val transforms.Compose([transforms.Resize((224, 224)),transforms.ToTensor(),transforms.Normalize(mean[0.485, 0.456, 0.406], std[0.229, 0.224, 0.225])])# 创建训练和验证数据集train_dataset VOCDataset(train_img_dir, train_label_file, transformtransform_train)val_dataset VOCDataset(val_img_dir, val_label_file, transformtransform_val)print(fTrain dataset size: {len(train_dataset)})print(fValidation dataset size: {len(val_dataset)})# 2. 下载并利用CIFAR-10数据集进行分类# # # 定义数据增强和预处理# transform_train transforms.Compose([# transforms.Resize((224, 224)),# transforms.RandomHorizontalFlip(),# transforms.RandomCrop(224, padding4),# transforms.ToTensor(),# transforms.Normalize(mean[0.4914, 0.4822, 0.4465], # std[0.2470, 0.2435, 0.2616])# ])# transform_val transforms.Compose([# transforms.Resize((224, 224)),# transforms.ToTensor(),# transforms.Normalize(mean[0.4914, 0.4822, 0.4465], # std[0.2470, 0.2435, 0.2616])# ])# # 下载CIFAR-10训练集和验证集# train_dataset CIFAR10(rootdata, trainTrue, downloadTrue, transformtransform_train)# val_dataset CIFAR10(rootdata, trainFalse, downloadTrue, transformtransform_val)# print(fTrain dataset size: {len(train_dataset)})# print(fValidation dataset size: {len(val_dataset)})# 3. 创建模型实例model AlexNet(num_class10) # CIFAR-10有10个类别 # 检查是否有可用的GPUdevice torch.device(cuda if torch.cuda.is_available() else cpu)model.to(device) # 将模型移动到GPU或CPU# 打印模型结构#print(model)# 4. 开始训练train(model, train_dataset, val_dataset, num_epochs20, batch_size32, learning_rate0.001)print(Finished Training)# 5. 保存模型torch.save(model.state_dict(), output/alexnet.pth)print(Model saved as alexnet.pth)
运行main函数就可以进行训练了后面会讲一些如何改进这个模型和一些训练技巧。
参考 1 2 3