活动策划网站有哪些,免费视频素材网站哪个最好,朝阳网站建设 高碑店,wordpress 仿糗百GPU训练 要让模型在 GPU 上训练#xff0c;主要是将模型和数据迁移到 GPU 设备上。
在 PyTorch 里#xff0c;.to(device) 方法的作用是把张量或者模型转移到指定的计算设备#xff08;像 CPU 或者 GPU#xff09;上。
对于张量#xff08;Tensor#xff09;#xff1… GPU训练 要让模型在 GPU 上训练主要是将模型和数据迁移到 GPU 设备上。
在 PyTorch 里.to(device) 方法的作用是把张量或者模型转移到指定的计算设备像 CPU 或者 GPU上。
对于张量Tensor调用 .to(device) 之后会返回一个在新设备上的新张量。 对于模型nn.Module调用 .to(device) 会直接对模型进行修改让其所有参数和缓冲区都移到新设备上。在进行计算时所有输入张量和模型必须处于同一个设备。要是它们不在同一设备上就会引发运行时错误。并非所有 PyTorch 对象都有 .to(device) 方法只有继承自 torch.nn.Module 的模型以及 torch.Tensor 对象才有此方法。 RuntimeError: Tensor for argument #1 input is on CPU, but expected it to be on GPU 这个常见错误就是输入张量和模型处于不同的设备。
import torchif torch.cuda.is_available():print(CUDA可用)device_count torch.cuda.device_count()print(f可用的CUDA设备数量: {device_count})current_device torch.cuda.current_device()print(f当前使用的CUDA设备索引: {current_device})device_name torch.cuda.get_device_name(current_device)print(f当前CUDA设备的名称: {device_name})cuda_version torch.version.cudaprint(fCUDA版本: {cuda_version})print(cuDNN版本:, torch.backends.cudnn.version())else:print(CUDA不可用。)iris load_iris()
X iris.data
y iris.target
X_train, X_test, y_train, y_test train_test_split(X, y, test_size0.2, random_state42)from sklearn.preprocessing import MinMaxScaler
scaler MinMaxScaler()
X_train scaler.fit_transform(X_train)
X_test scaler.transform(X_test)X_train torch.FloatTensor(X_train).to(device)
y_train torch.LongTensor(y_train).to(device)
X_test torch.FloatTensor(X_test).to(device)
y_test torch.LongTensor(y_test).to(device)class MLP(nn.Module):def __init__(self):super(MLP, self).__init__()self.fc1 nn.Linear(4, 10)self.relu nn.ReLU()self.fc2 nn.Linear(10, 3)def forward(self, x):out self.fc1(x)out self.relu(out)out self.fc2(out)return outmodel MLP().to(device)criterion nn.CrossEntropyLoss()
optimizer optim.SGD(model.parameters(), lr0.01)num_epochs 20000
losses []
start_time time.time()for epoch in range(num_epochs):outputs model(X_train)loss criterion(outputs, y_train)optimizer.zero_grad()loss.backward()optimizer.step()losses.append(loss.item())if (epoch 1) % 100 0:print(fEpoch [{epoch1}/{num_epochs}], Loss: {loss.item():.4f})time_all time.time() - start_time
print(fTraining time: {time_all:.2f} seconds)plt.plot(range(num_epochs), losses)
plt.xlabel(Epoch)
plt.ylabel(Loss)
plt.title(Training Loss over Epochs)
plt.show()
能够优化的只有数据传输时间针对性解决即可很容易想到2个思路 1. 直接不打印训练过程的loss了但是这样会没办法记录最后的可视化图片只能肉眼观察loss数值变化。 2. 每隔200个epoch保存一下loss不需要20000个epoch每次都打印
下面先尝试第一个思路
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as npiris load_iris()
X iris.data
y iris.target
X_train, X_test, y_train, y_test train_test_split(X, y, test_size0.2, random_state42)from sklearn.preprocessing import MinMaxScaler
scaler MinMaxScaler()
X_train scaler.fit_transform(X_train)
X_test scaler.transform(X_test)X_train torch.FloatTensor(X_train)
y_train torch.LongTensor(y_train)
X_test torch.FloatTensor(X_test)
y_test torch.LongTensor(y_test)class MLP(nn.Module): def __init__(self): super(MLP, self).__init__() self.fc1 nn.Linear(4, 10) self.relu nn.ReLU()self.fc2 nn.Linear(10, 3) def forward(self, x):out self.fc1(x)out self.relu(out)out self.fc2(out)return outmodel MLP()criterion nn.CrossEntropyLoss()optimizer optim.SGD(model.parameters(), lr0.01)num_epochs 20000 losses []import time
start_time time.time() for epoch in range(num_epochs): outputs model.forward(X_train) # outputs model(X_train) loss criterion(outputs, y_train) optimizer.zero_grad() loss.backward(optimizer.step() if (epoch 1) % 100 0:print(fEpoch [{epoch1}/{num_epochs}], Loss: {loss.item():.4f})time_all time.time() - start_time
print(fTraining time: {time_all:.2f} seconds)
优化后发现确实效果好近乎和用cpu训练的时长差不多。所以可以理解为数据从gpu到cpu的传输占用了大量时间。
下面尝试下第二个思路
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import time
import matplotlib.pyplot as pltdevice torch.device(cuda:0 if torch.cuda.is_available() else cpu)
print(f使用设备: {device})iris load_iris()
X iris.data 、
y iris.target 、X_train, X_test, y_train, y_test train_test_split(X, y, test_size0.2, random_state42)scaler MinMaxScaler()
X_train scaler.fit_transform(X_train)
X_test scaler.transform(X_test)X_train torch.FloatTensor(X_train).to(device)
y_train torch.LongTensor(y_train).to(device)
X_test torch.FloatTensor(X_test).to(device)
y_test torch.LongTensor(y_test).to(device)class MLP(nn.Module):def __init__(self):super(MLP, self).__init__()self.fc1 nn.Linear(4, 10) self.relu nn.ReLU()self.fc2 nn.Linear(10, 3) def forward(self, x):out self.fc1(x)out self.relu(out)out self.fc2(out)return outmodel MLP().to(device)criterion nn.CrossEntropyLoss()、
optimizer optim.SGD(model.parameters(), lr0.01)num_epochs 20000 、losses []start_time time.time() 、for epoch in range(num_epochs):outputs model(X_train) 、loss criterion(outputs, y_train)optimizer.zero_grad()loss.backward()optimizer.step()if (epoch 1) % 200 0:losses.append(loss.item()) # item()方法返回一个Python数值loss是一个标量张量print(fEpoch [{epoch1}/{num_epochs}], Loss: {loss.item():.4f})if (epoch 1) % 100 0:print(fEpoch [{epoch1}/{num_epochs}], Loss: {loss.item():.4f})time_all time.time() - start_time 、
print(fTraining time: {time_all:.2f} seconds)plt.plot(range(len(losses)), losses)
plt.xlabel(Epoch)
plt.ylabel(Loss)
plt.title(Training Loss over Epochs)
plt.show()