长春长春网站建设网,网站建设优化开发公司哪家好,抖音代运营mcn,网站内容授权书《pytorch图像分类》p3VGG网络详解及感受野的计算 一、零碎知识点1.nn.Sequential2.**kwargs 二、VGG网络模型详解1.感受野2.模型手算 三、代码1.module.py2.train.py3.predict.py 一、零碎知识点
论文连接#xff1a;VERY DEEP CONVOLUTIONAL NETWORKS FOR LARGE-SCALE IMAG… 《pytorch图像分类》p3VGG网络详解及感受野的计算 一、零碎知识点1.nn.Sequential2.**kwargs 二、VGG网络模型详解1.感受野2.模型手算 三、代码1.module.py2.train.py3.predict.py 一、零碎知识点
论文连接VERY DEEP CONVOLUTIONAL NETWORKS FOR LARGE-SCALE IMAGE RECOGNITION 代码链接霹雳吧啦Wzdeep-learning-for-image-processing
1.nn.Sequential
nn.Sequential是PyTorch中的一个类用于按顺序组织和堆叠神经网络的层或模块。它提供了一种便捷的方式来构建简单的前向传播网络。
import torch
import torch.nn as nnmodel nn.Sequential(
in_channels,out_channels,kernel_sizenn.Conv2d(in_channels,out_channels,kernel_size)nn.ReLU(), # 添加激活函数nn.Linear(hidden_features, out_features) # 添加线性层
)2.**kwargs
**kwargs是一个特殊的参数传递方式它允许函数接受不定数量的关键字参数Keyword Arguments并将它们作为一个字典进行处理。
下面是一个简单的示例说明**kwargs的用法
def example_func(**kwargs):for key, value in kwargs.items():print(key, value)example_func(nameMaverick, age22, locationcheng du)输出结果
name Maverick
age 22
location cheng du二、VGG网络模型详解
1.感受野
感受野receptive field是指在卷积神经网络CNN中的某一层输出特征图上的像素位置所对应的输入图像上的区域大小。 随着卷积核的增多即网络的加深感受野会越来越大。 当我们说一个神经网络层的感受野大小为N时可以简单解释为在该层输出特征图上的一个像素点它所看到的输入图像区域大小是N×N。 随着网络的层数增加感受野也会逐渐增大。最早的卷积层例如卷积核为3x3的感受野较小但后续的层会通过池化或步幅更大的卷积来逐渐增加感受野的大小。 2.模型手算
VGG网络的常用配置是D有16个层包括13个卷积层和3个全连接层
LRN是一种对神经网络中的特征图进行局部归一化的操作。其目的是增加网络的鲁棒性防止某些特征具有过大的响应值而抑制其他特征的重要性。 具有鲁棒性的模型能够在输入数据中存在一定程度的扰动、噪声或异常情况下仍然保持良好的性能。 反复记忆输出的特征矩阵的深度out_channels和卷积核的个数相同 因为彩色图形有rgb三个通道所以最开始的特征矩阵深度为3 后面都是根据卷积核个数的不同产生不同的改变。
三、代码
1.module.py
import torch.nn as nn
import torch# official pretrain weights
model_urls {vgg11: https://download.pytorch.org/models/vgg11-bbd30ac9.pth,vgg13: https://download.pytorch.org/models/vgg13-c768596a.pth,vgg16: https://download.pytorch.org/models/vgg16-397923af.pth,vgg19: https://download.pytorch.org/models/vgg19-dcbb9e9d.pth
}class VGG(nn.Module):def __init__(self, features, num_classes1000, init_weightsFalse):super(VGG, self).__init__()self.features featuresself.classifier nn.Sequential(nn.Linear(512*7*7, 4096),nn.ReLU(True),nn.Dropout(p0.5),nn.Linear(4096, 4096),nn.ReLU(True),nn.Dropout(p0.5),nn.Linear(4096, num_classes))if init_weights:self._initialize_weights()def forward(self, x):# N x 3 x 224 x 224x self.features(x)# N x 512 x 7 x 7x torch.flatten(x, start_dim1)# N x 512*7*7x self.classifier(x)return xdef _initialize_weights(self):for m in self.modules():if isinstance(m, nn.Conv2d):# nn.init.kaiming_normal_(m.weight, modefan_out, nonlinearityrelu)nn.init.xavier_uniform_(m.weight)if m.bias is not None:nn.init.constant_(m.bias, 0)elif isinstance(m, nn.Linear):nn.init.xavier_uniform_(m.weight)# nn.init.normal_(m.weight, 0, 0.01)nn.init.constant_(m.bias, 0)def make_features(cfg: list):layers []in_channels 3for v in cfg:if v M:layers [nn.MaxPool2d(kernel_size2, stride2)]else:conv2d nn.Conv2d(in_channels, v, kernel_size3, padding1)layers [conv2d, nn.ReLU(True)]in_channels vreturn nn.Sequential(*layers)cfgs {vgg11: [64, M, 128, M, 256, 256, M, 512, 512, M, 512, 512, M],vgg13: [64, 64, M, 128, 128, M, 256, 256, M, 512, 512, M, 512, 512, M],vgg16: [64, 64, M, 128, 128, M, 256, 256, 256, M, 512, 512, 512, M, 512, 512, 512, M],vgg19: [64, 64, M, 128, 128, M, 256, 256, 256, 256, M, 512, 512, 512, 512, M, 512, 512, 512, 512, M],
}def vgg(model_namevgg16, **kwargs):assert model_name in cfgs, Warning: model number {} not in cfgs dict!.format(model_name)cfg cfgs[model_name]model VGG(make_features(cfg), **kwargs)return model2.train.py
import os
import sys
import jsonimport torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdmfrom model import vggdef main():device torch.device(cuda:0 if torch.cuda.is_available() else cpu)print(using {} device..format(device))data_transform {train: transforms.Compose([transforms.RandomResizedCrop(224),transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),val: transforms.Compose([transforms.Resize((224, 224)),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}data_root os.path.abspath(os.path.join(os.getcwd(), ../..)) # get data root pathimage_path os.path.join(data_root, data_set, flower_data) # flower data set pathassert os.path.exists(image_path), {} path does not exist..format(image_path)train_dataset datasets.ImageFolder(rootos.path.join(image_path, train),transformdata_transform[train])train_num len(train_dataset)# {daisy:0, dandelion:1, roses:2, sunflower:3, tulips:4}flower_list train_dataset.class_to_idxcla_dict dict((val, key) for key, val in flower_list.items())# write dict into json filejson_str json.dumps(cla_dict, indent4)with open(class_indices.json, w) as json_file:json_file.write(json_str)batch_size 2nw min([os.cpu_count(), batch_size if batch_size 1 else 0, 8]) # number of workersprint(Using {} dataloader workers every process.format(nw))train_loader torch.utils.data.DataLoader(train_dataset,batch_sizebatch_size, shuffleTrue,num_workers0)validate_dataset datasets.ImageFolder(rootos.path.join(image_path, val),transformdata_transform[val])val_num len(validate_dataset)validate_loader torch.utils.data.DataLoader(validate_dataset,batch_sizebatch_size, shuffleFalse,num_workers0)print(using {} images for training, {} images for validation..format(train_num,val_num))# test_data_iter iter(validate_loader)# test_image, test_label test_data_iter.next()model_name vgg16net vgg(model_namemodel_name, num_classes5, init_weightsTrue)net.to(device)loss_function nn.CrossEntropyLoss()optimizer optim.Adam(net.parameters(), lr0.0001)epochs 30best_acc 0.0save_path ./{}Net.pth.format(model_name)train_steps len(train_loader)for epoch in range(epochs):# trainnet.train()running_loss 0.0train_bar tqdm(train_loader, filesys.stdout)for step, data in enumerate(train_bar):images, labels dataoptimizer.zero_grad()outputs net(images.to(device))loss loss_function(outputs, labels.to(device))loss.backward()optimizer.step()# print statisticsrunning_loss loss.item()train_bar.desc train epoch[{}/{}] loss:{:.3f}.format(epoch 1,epochs,loss)# validatenet.eval()acc 0.0 # accumulate accurate number / epochwith torch.no_grad():val_bar tqdm(validate_loader, filesys.stdout)for val_data in val_bar:val_images, val_labels val_dataoutputs net(val_images.to(device))predict_y torch.max(outputs, dim1)[1]acc torch.eq(predict_y, val_labels.to(device)).sum().item()val_accurate acc / val_numprint([epoch %d] train_loss: %.3f val_accuracy: %.3f %(epoch 1, running_loss / train_steps, val_accurate))if val_accurate best_acc:best_acc val_accuratetorch.save(net.state_dict(), save_path)print(Finished Training)if __name__ __main__:main()
用的是老师的代码我的gpu内存不够我已经将批处理大小batch size减少到2了还是运行不起来 CUDA out of memory. Tried to allocate 392.00 MiB (GPU 0; 2.00 GiB total capacity; 718.01 MiB already allocated; 341.00 MiB free; 740.00 MiB reserved in total by PyTorch)
3.predict.py
import os
import jsonimport torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as pltfrom model import vggdef main():device torch.device(cuda:0 if torch.cuda.is_available() else cpu)data_transform transforms.Compose([transforms.Resize((224, 224)),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])# load imageimg_path ../tulip.jpgassert os.path.exists(img_path), file: {} dose not exist..format(img_path)img Image.open(img_path)plt.imshow(img)# [N, C, H, W]img data_transform(img)# expand batch dimensionimg torch.unsqueeze(img, dim0)# read class_indictjson_path ./class_indices.jsonassert os.path.exists(json_path), file: {} dose not exist..format(json_path)with open(json_path, r) as f:class_indict json.load(f)# create modelmodel vgg(model_namevgg16, num_classes5).to(device)# load model weightsweights_path ./vgg16Net.pthassert os.path.exists(weights_path), file: {} dose not exist..format(weights_path)model.load_state_dict(torch.load(weights_path, map_locationdevice))model.eval()with torch.no_grad():# predict classoutput torch.squeeze(model(img.to(device))).cpu()predict torch.softmax(output, dim0)predict_cla torch.argmax(predict).numpy()print_res class: {} prob: {:.3}.format(class_indict[str(predict_cla)],predict[predict_cla].numpy())plt.title(print_res)for i in range(len(predict)):print(class: {:10} prob: {:.3}.format(class_indict[str(i)],predict[i].numpy()))plt.show()if __name__ __main__:main()