|
|
|
|
@ -13,29 +13,30 @@ from paddle.io import Dataset
|
|
|
|
|
参数配置
|
|
|
|
|
'''
|
|
|
|
|
train_parameters = {
|
|
|
|
|
"input_size": [3, 224, 224], #输入图片的shape
|
|
|
|
|
"class_dim": -1, #分类数
|
|
|
|
|
"src_path":"/home/aistudio/data/data55190/Chinese Medicine.zip", #原始数据集路径
|
|
|
|
|
"target_path":"/home/aistudio/data/", #要解压的路径
|
|
|
|
|
"train_list_path": "/home/aistudio/data/train.txt", #train.txt路径
|
|
|
|
|
"eval_list_path": "/home/aistudio/data/eval.txt", #eval.txt路径
|
|
|
|
|
"readme_path": "/home/aistudio/data/readme.json", #readme.json路径
|
|
|
|
|
"label_dict":{}, #标签字典
|
|
|
|
|
"num_epochs": 1, #训练轮数
|
|
|
|
|
"train_batch_size": 8, #训练时每个批次的大小
|
|
|
|
|
"input_size": [3, 224, 224], # 输入图片的shape
|
|
|
|
|
"class_dim": -1, # 分类数
|
|
|
|
|
"src_path": "D:/aistudio/data/data55190/Chinese Medicine.zip", # 原始数据集路径
|
|
|
|
|
"target_path": "D:/aistudio/data/", # 要解压的路径
|
|
|
|
|
"train_list_path": "D:/aistudio/data/train.txt", # train.txt路径
|
|
|
|
|
"eval_list_path": "D:/aistudio/data/eval.txt", # eval.txt路径
|
|
|
|
|
"readme_path": "D:/aistudio/data/readme.json", # readme.json路径
|
|
|
|
|
"label_dict": {}, # 标签字典
|
|
|
|
|
"num_epochs": 1, # 训练轮数
|
|
|
|
|
"train_batch_size": 8, # 训练时每个批次的大小
|
|
|
|
|
"skip_steps": 10,
|
|
|
|
|
"save_steps": 30,
|
|
|
|
|
"learning_strategy": { #优化函数相关的配置
|
|
|
|
|
"lr": 0.0001 #超参数学习率
|
|
|
|
|
"learning_strategy": { # 优化函数相关的配置
|
|
|
|
|
"lr": 0.0001 # 超参数学习率
|
|
|
|
|
},
|
|
|
|
|
"checkpoints": "/home/aistudio/work/checkpoints" #保存的路径
|
|
|
|
|
"checkpoints": "D:/aistudio/work/checkpoints" # 保存的路径
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def unzip_data(src_path,target_path):
|
|
|
|
|
'''
|
|
|
|
|
"""
|
|
|
|
|
解压原始数据集,将src_path路径下的zip包解压至target_path目录下
|
|
|
|
|
'''
|
|
|
|
|
"""
|
|
|
|
|
if(not os.path.isdir(target_path + "Chinese Medicine")):
|
|
|
|
|
z = zipfile.ZipFile(src_path, 'r')
|
|
|
|
|
z.extractall(path=target_path)
|
|
|
|
|
@ -43,9 +44,9 @@ def unzip_data(src_path,target_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_data_list(target_path, train_list_path, eval_list_path):
|
|
|
|
|
'''
|
|
|
|
|
"""
|
|
|
|
|
生成数据列表
|
|
|
|
|
'''
|
|
|
|
|
"""
|
|
|
|
|
# 存放所有类别的信息
|
|
|
|
|
class_detail = []
|
|
|
|
|
# 获取所有类别保存的文件夹名称
|
|
|
|
|
@ -147,6 +148,7 @@ with open(eval_list_path, 'w') as f:
|
|
|
|
|
# 生成数据列表
|
|
|
|
|
get_data_list(target_path, train_list_path, eval_list_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class dataset(Dataset):
|
|
|
|
|
def __init__(self, data_path, mode='train'):
|
|
|
|
|
"""
|
|
|
|
|
@ -175,7 +177,6 @@ class dataset(Dataset):
|
|
|
|
|
self.img_paths.append(img_path)
|
|
|
|
|
self.labels.append(int(label))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __getitem__(self, index):
|
|
|
|
|
"""
|
|
|
|
|
获取一组数据
|
|
|
|
|
@ -201,12 +202,12 @@ class dataset(Dataset):
|
|
|
|
|
return len(self.img_paths)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#训练数据加载
|
|
|
|
|
train_dataset = dataset('/home/aistudio/data',mode='train')
|
|
|
|
|
# 训练数据加载
|
|
|
|
|
train_dataset = dataset('D:/aistudio/data', mode='train')
|
|
|
|
|
train_loader = paddle.io.DataLoader(train_dataset, batch_size=16, shuffle=True)
|
|
|
|
|
#测试数据加载
|
|
|
|
|
eval_dataset = dataset('/home/aistudio/data',mode='eval')
|
|
|
|
|
eval_loader = paddle.io.DataLoader(eval_dataset, batch_size = 8, shuffle=False)
|
|
|
|
|
# 测试数据加载
|
|
|
|
|
eval_dataset = dataset('D:/aistudio/data', mode='eval')
|
|
|
|
|
eval_loader = paddle.io.DataLoader(eval_dataset, batch_size=8, shuffle=False)
|
|
|
|
|
|
|
|
|
|
train_dataset.print_sample(200)
|
|
|
|
|
print(train_dataset.__len__())
|
|
|
|
|
@ -215,52 +216,9 @@ print(eval_dataset.__len__())
|
|
|
|
|
print(eval_dataset.__getitem__(10)[0].shape)
|
|
|
|
|
print(eval_dataset.__getitem__(10)[1].shape)
|
|
|
|
|
|
|
|
|
|
# class ConvPool(paddle.nn.Layer):
|
|
|
|
|
# '''卷积+池化'''
|
|
|
|
|
# def __init__(self,
|
|
|
|
|
# num_channels,#1
|
|
|
|
|
# num_filters, #2
|
|
|
|
|
# filter_size,#3
|
|
|
|
|
# pool_size,#4
|
|
|
|
|
# pool_stride,#5
|
|
|
|
|
# groups,#6
|
|
|
|
|
# conv_stride=1,
|
|
|
|
|
# conv_padding=1,
|
|
|
|
|
# ):
|
|
|
|
|
# super(ConvPool, self).__init__()
|
|
|
|
|
|
|
|
|
|
# self._conv2d_list = []
|
|
|
|
|
|
|
|
|
|
# for i in range(groups):
|
|
|
|
|
# conv2d = self.add_sublayer( #添加子层实例
|
|
|
|
|
# 'bb_%d' % i,
|
|
|
|
|
# paddle.nn.Conv2D( # layer
|
|
|
|
|
# in_channels=num_channels, #通道数
|
|
|
|
|
# out_channels=num_filters, #卷积核个数
|
|
|
|
|
# kernel_size=filter_size, #卷积核大小
|
|
|
|
|
# stride=conv_stride, #步长
|
|
|
|
|
# padding = conv_padding, #padding
|
|
|
|
|
# )
|
|
|
|
|
# )
|
|
|
|
|
# num_channels = num_filters
|
|
|
|
|
|
|
|
|
|
# self._conv2d_list.append(conv2d)
|
|
|
|
|
|
|
|
|
|
# self._pool2d = paddle.nn.MaxPool2D(
|
|
|
|
|
# kernel_size=pool_size, #池化核大小
|
|
|
|
|
# stride=pool_stride #池化步长
|
|
|
|
|
# )
|
|
|
|
|
# print(self._conv2d_list)
|
|
|
|
|
# def forward(self, inputs):
|
|
|
|
|
# x = inputs
|
|
|
|
|
# for conv in self._conv2d_list:
|
|
|
|
|
# x = conv(x)
|
|
|
|
|
# x = paddle.nn.functional.relu(x)
|
|
|
|
|
# x = self._pool2d(x)
|
|
|
|
|
# return x
|
|
|
|
|
|
|
|
|
|
class ConvPool(paddle.nn.Layer):
|
|
|
|
|
'''卷积+池化'''
|
|
|
|
|
""" 卷积+池化 """
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
num_channels,
|
|
|
|
|
@ -351,15 +309,17 @@ class VGGNet(paddle.nn.Layer):
|
|
|
|
|
else:
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
def draw_process(title,color,iters,data,label):
|
|
|
|
|
|
|
|
|
|
def draw_process(title, color, iters, data, label):
|
|
|
|
|
plt.title(title, fontsize=24)
|
|
|
|
|
plt.xlabel("iter", fontsize=20)
|
|
|
|
|
plt.ylabel(label, fontsize=20)
|
|
|
|
|
plt.plot(iters, data,color=color,label=label)
|
|
|
|
|
plt.plot(iters, data, color=color, label=label)
|
|
|
|
|
plt.legend()
|
|
|
|
|
plt.grid()
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(train_parameters['class_dim'])
|
|
|
|
|
print(train_parameters['label_dict'])
|
|
|
|
|
|
|
|
|
|
@ -386,17 +346,17 @@ for epo in range(train_parameters['num_epochs']):
|
|
|
|
|
Iters.append(steps)
|
|
|
|
|
total_loss.append(loss.numpy()[0])
|
|
|
|
|
total_acc.append(acc.numpy()[0])
|
|
|
|
|
#打印中间过程
|
|
|
|
|
# 打印中间过程
|
|
|
|
|
print('epo: {}, step: {}, loss is: {}, acc is: {}'\
|
|
|
|
|
.format(epo, steps, loss.numpy(), acc.numpy()))
|
|
|
|
|
#保存模型参数
|
|
|
|
|
# 保存模型参数
|
|
|
|
|
if steps % train_parameters["save_steps"] == 0:
|
|
|
|
|
save_path = train_parameters["checkpoints"]+"/"+"save_dir_" + str(steps) + '.pdparams'
|
|
|
|
|
print('save model to: ' + save_path)
|
|
|
|
|
paddle.save(model.state_dict(),save_path)
|
|
|
|
|
paddle.save(model.state_dict(),train_parameters["checkpoints"]+"/"+"save_dir_final.pdparams")
|
|
|
|
|
draw_process("trainning loss","red",Iters,total_loss,"trainning loss")
|
|
|
|
|
draw_process("trainning acc","green",Iters,total_acc,"trainning acc")
|
|
|
|
|
paddle.save(model.state_dict(), train_parameters["checkpoints"]+"/"+"save_dir_final.pdparams")
|
|
|
|
|
draw_process("trainning loss", "red",Iters,total_loss, "trainning loss")
|
|
|
|
|
draw_process("trainning acc", "green",Iters,total_acc, "trainning acc")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
@ -414,12 +374,13 @@ for _, data in enumerate(eval_loader()):
|
|
|
|
|
predicts = model_eval(x_data)
|
|
|
|
|
acc = paddle.metric.accuracy(predicts, y_data)
|
|
|
|
|
accs.append(acc.numpy()[0])
|
|
|
|
|
print('模型在验证集上的准确率为:',np.mean(accs))
|
|
|
|
|
print('模型在验证集上的准确率为:', np.mean(accs))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def unzip_infer_data(src_path,target_path):
|
|
|
|
|
'''
|
|
|
|
|
def unzip_infer_data(src_path, target_path):
|
|
|
|
|
"""
|
|
|
|
|
解压预测数据集
|
|
|
|
|
'''
|
|
|
|
|
"""
|
|
|
|
|
if(not os.path.isdir(target_path + "Chinese Medicine Infer")):
|
|
|
|
|
z = zipfile.ZipFile(src_path, 'r')
|
|
|
|
|
z.extractall(path=target_path)
|
|
|
|
|
@ -427,20 +388,20 @@ def unzip_infer_data(src_path,target_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_image(img_path):
|
|
|
|
|
'''
|
|
|
|
|
"""
|
|
|
|
|
预测图片预处理
|
|
|
|
|
'''
|
|
|
|
|
"""
|
|
|
|
|
img = Image.open(img_path)
|
|
|
|
|
if img.mode != 'RGB':
|
|
|
|
|
img = img.convert('RGB')
|
|
|
|
|
img = img.resize((224, 224), Image.BILINEAR)
|
|
|
|
|
img = np.array(img).astype('float32')
|
|
|
|
|
img = img.transpose((2, 0, 1)) / 255 # HWC to CHW 及归一化
|
|
|
|
|
img = img.transpose((2, 0, 1)) / 255 # HWC to CHW 及归一化
|
|
|
|
|
return img
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer_src_path = '/home/aistudio/data/data55194/Chinese Medicine Infer.zip'
|
|
|
|
|
infer_dst_path = '/home/aistudio/data/'
|
|
|
|
|
infer_src_path = 'D:/aistudio/data/data55194/Chinese Medicine Infer.zip'
|
|
|
|
|
infer_dst_path = 'D:/aistudio/data/'
|
|
|
|
|
unzip_infer_data(infer_src_path,infer_dst_path)
|
|
|
|
|
|
|
|
|
|
label_dic = train_parameters['label_dict']
|
|
|
|
|
@ -453,8 +414,8 @@ infer_imgs_path = os.listdir(infer_dst_path+"Chinese Medicine Infer")
|
|
|
|
|
print(infer_imgs_path)
|
|
|
|
|
for infer_img_path in infer_imgs_path:
|
|
|
|
|
infer_img = load_image(infer_dst_path+"Chinese Medicine Infer/"+infer_img_path)
|
|
|
|
|
infer_img = infer_img[np.newaxis,:, : ,:] #reshape(-1,3,224,224)
|
|
|
|
|
infer_img = infer_img[np.newaxis, :, :, :] # reshape(-1,3,224,224)
|
|
|
|
|
infer_img = paddle.to_tensor(infer_img)
|
|
|
|
|
result = model_predict(infer_img)
|
|
|
|
|
lab = np.argmax(result.numpy())
|
|
|
|
|
print("样本: {},被预测为:{}".format(infer_img_path,label_dic[str(lab)]))
|
|
|
|
|
print("样本: {},被预测为:{}".format(infer_img_path, label_dic[str(lab)]))
|