import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F import torchvision import matplotlib.pyplot as plt import numpy as np input_size = 28 num_classes = 10 num_epochs = 40 batch_size = 64 # 确保 CUDA 可用并且使用它 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_dataset = torchvision.datasets.MNIST( root='./data', train=True, transform=torchvision.transforms.ToTensor(), download=True ) test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=torchvision.transforms.ToTensor()) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True) #定义了一个CNN类作为神经网络模型 # in_channels:输入数据的通道数。在第一层卷积中,输入的MNIST图像是灰度图像,因此 in_channels=1。 # out_channels:输出的特征图通道数,即卷积核的数量。每个卷积核生成一个特征图。 # kernel_size:卷积核的大小。例如,kernel_size=5 表示卷积核大小为5x5。 # stride:卷积核的步幅。在代码中设置为1,表示卷积核每次移动1个像素。 # padding:填充的像素数。在代码中设置为2,表示在输入数据周围填充2个像素。 class CNN(nn.Module): def __init__(self): super(CNN, self).__init__() self.conv1 = nn.Sequential( nn.Conv2d(1, 16, 5, 1, 2), nn.ReLU(), nn.MaxPool2d(2) ) self.conv2 = nn.Sequential( nn.Conv2d(16, 32, 5, 1, 2), nn.ReLU(), nn.MaxPool2d(2) ) self.conv3 = nn.Sequential( nn.Conv2d(32, 64, 5, 1, 2), nn.ReLU(), nn.MaxPool2d(2) ) self.conv4 = nn.Sequential( nn.Conv2d(64, 32, 5, 1, 2), nn.ReLU(), nn.MaxPool2d(2) ) self.out = nn.Linear(32, num_classes) # 确保输出类别数正确 def forward(self, x): x = self.conv1(x).to(device) # 将数据迁移到GPU x = self.conv2(x).to(device) x=self.conv3(x).to(device) x = self.conv4(x).to(device) x = x.view(x.size(0), -1).to(device) x = self.out(x).to(device) return x def accuracy(prediction, label): pred = torch.max(prediction.data, 1)[1] rights = pred.eq(label.data.view_as(pred)).sum() return rights, len(label) net = CNN().to(device) # 将模型迁移到GPU criterion = nn.CrossEntropyLoss().to(device) # 将损失函数迁移到GPU optimizer = optim.Adam(net.parameters(), lr=0.001) tr = [] for epoch in range(40): train_rights = [] for batch_id, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) # 将数据迁移到GPU optimizer.zero_grad() output = net(data) loss = criterion(output, target) loss.backward() optimizer.step() right = accuracy(output, target) train_rights.append(right) tr.append(loss) if batch_id % 100 == 0: val_right = [] for (data, target) in test_loader: data, target = data.to(device), target.to(device) # 将数据迁移到GPU output = net(data) right = accuracy(output, target) val_right.append(right) train_r = (sum(t[0] for t in train_rights), sum(t[1] for t in train_rights)) val_r = (sum(t[0] for t in val_right), sum(t[1] for t in val_right)) print("train_acc {}, test_acc {}".format(train_r[0] / train_r[1], val_r[0] / val_r[1])) # 注意,绘图时不需要将数据迁移到GPU plt.plot(list(range(len([tensor.detach().cpu().numpy() for tensor in tr]))), [tensor.detach().cpu().numpy() for tensor in tr]) plt.show() torch.save(net.state_dict(), 'model.pth')