commit 519a23ca5080bb57e6c56882de2a5192539a1ff2 Author: zz Date: Mon Oct 14 19:08:08 2019 +0800 first commit diff --git a/README.md b/README.md new file mode 100755 index 0000000..553e503 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# pytorch_deephash + +## Introduction + +This is the Pytorch implementation of [Deep Learning of Binary Hash Codes for Fast Image Retrieval](https://github.com/kevinlin311tw/caffe-cvprw15), and can achieve more than 93% mAP in CIFAR10 dataset. + +## Environment + +> Pytorch 0.4.0 + +> torchvision 0.2.1 + +## Training + +```python +python train.py +``` + +You will get trained models in model folder by default, and models' names are their test accuracy. + +## Evaluation + +```shell +python mAP.py --pretrained {your saved model name in model folder by default} +``` + +## Tips + +There are some other args, which you can get them by adding '-h' or reading the code. diff --git a/__pycache__/net.cpython-37.pyc b/__pycache__/net.cpython-37.pyc new file mode 100644 index 0000000..64b7ebf Binary files /dev/null and b/__pycache__/net.cpython-37.pyc differ diff --git a/evaluation.py b/evaluation.py new file mode 100755 index 0000000..0a083b1 --- /dev/null +++ b/evaluation.py @@ -0,0 +1,135 @@ +import os +import argparse + +import numpy as np +from net import AlexNetPlusLatent + +from timeit import time + +import torch +import torch.nn as nn + +from torchvision import datasets, models, transforms +from torch.autograd import Variable +import torch.backends.cudnn as cudnn +import torch.optim.lr_scheduler + +parser = argparse.ArgumentParser(description='Deep Hashing evaluate mAP') +parser.add_argument('--pretrained', type=str, default=92, metavar='pretrained_model', + help='loading pretrained model(default = None)') +parser.add_argument('--bits', type=int, default=48, metavar='bts', + help='binary bits') +parser.add_argument('--path', type=str, default='model', metavar='P', + help='path directory') +args = parser.parse_args() + +def load_data(): + transform_train = transforms.Compose( + [transforms.Resize(227), + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) + transform_test = transforms.Compose( + [transforms.Resize(227), + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) + trainset = datasets.CIFAR10(root='./data', train=True, download=True, + transform=transform_train) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, + shuffle=False, num_workers=2) + + testset = datasets.CIFAR10(root='./data', train=False, download=True, + transform=transform_test) + testloader = torch.utils.data.DataLoader(testset, batch_size=100, + shuffle=False, num_workers=2) + return trainloader, testloader + +def binary_output(dataloader): + net = AlexNetPlusLatent(args.bits) + net.load_state_dict(torch.load('./{}/{}'.format(args.path, args.pretrained))) + use_cuda = torch.cuda.is_available() + if use_cuda: + net.cuda() + full_batch_output = torch.cuda.FloatTensor() + full_batch_label = torch.cuda.LongTensor() + net.eval() + for batch_idx, (inputs, targets) in enumerate(dataloader): + if use_cuda: + inputs, targets = inputs.cuda(), targets.cuda() + inputs, targets = Variable(inputs), Variable(targets) + outputs, _ = net(inputs) + full_batch_output = torch.cat((full_batch_output, outputs.data), 0) + full_batch_label = torch.cat((full_batch_label, targets.data), 0) + return torch.round(full_batch_output), full_batch_label + +def precision(trn_binary, trn_label, tst_binary, tst_label): + trn_binary = trn_binary.cpu().numpy() + trn_binary = np.asarray(trn_binary, np.int32) + trn_label = trn_label.cpu().numpy() + tst_binary = tst_binary.cpu().numpy() + tst_binary = np.asarray(tst_binary, np.int32) + tst_label = tst_label.cpu().numpy() + classes = np.max(tst_label) + 1 + + # 写法冗余 + for i in range(classes): + if i == 0: + tst_sample_binary = tst_binary[np.random.RandomState(seed=i).permutation(np.where(tst_label==i)[0])[:100]] + tst_sample_label = np.array([i]).repeat(100) + continue + else: + tst_sample_binary = np.concatenate([tst_sample_binary, tst_binary[np.random.RandomState(seed=i).permutation(np.where(tst_label==i)[0])[:100]]]) + tst_sample_label = np.concatenate([tst_sample_label, np.array([i]).repeat(100)]) + + """ + + for i in range(classes): + tst + """ + query_times = tst_sample_binary.shape[0] + trainset_len = trn_binary.shape[0] + AP = np.zeros(query_times) + precision_radius = np.zeros(query_times) + Ns = np.arange(1, trainset_len + 1) + sum_tp = np.zeros(trainset_len) + for i in range(query_times): + print('Query ', i+1) + query_label = tst_sample_label[i] + query_binary = tst_sample_binary[i,:] + query_result = np.count_nonzero(query_binary != trn_binary, axis=1) #don't need to divide binary length + sort_indices = np.argsort(query_result) + buffer_yes = np.equal(query_label, trn_label[sort_indices]).astype(int) + P = np.cumsum(buffer_yes) / Ns + precision_radius[i] = P[np.where(np.sort(query_result)>2)[0][0]-1] + AP[i] = np.sum(P * buffer_yes) /sum(buffer_yes) + sum_tp = sum_tp + np.cumsum(buffer_yes) + precision_at_k = sum_tp / Ns / query_times + index = [100, 200, 400, 600, 800, 1000] + index = [i - 1 for i in index] + print('precision at k:', precision_at_k[index]) + np.save('precision_at_k', precision_at_k) + print('precision within Hamming radius 2:', np.mean(precision_radius)) + map = np.mean(AP) + print('mAP:', map) + + + +if os.path.exists('./result/train_binary') and os.path.exists('./result/train_label') and \ + os.path.exists('./result/test_binary') and os.path.exists('./result/test_label') and args.pretrained == 0: + train_binary = torch.load('./result/train_binary') + train_label = torch.load('./result/train_label') + test_binary = torch.load('./result/test_binary') + test_label = torch.load('./result/test_label') + +else: + trainloader, testloader = load_data() + train_binary, train_label = binary_output(trainloader) + test_binary, test_label = binary_output(testloader) + if not os.path.isdir('result'): + os.mkdir('result') + torch.save(train_binary, './result/train_binary') + torch.save(train_label, './result/train_label') + torch.save(test_binary, './result/test_binary') + torch.save(test_label, './result/test_label') + + +precision(train_binary, train_label, test_binary, test_label) diff --git a/mAP.py b/mAP.py new file mode 100755 index 0000000..07686a5 --- /dev/null +++ b/mAP.py @@ -0,0 +1,109 @@ +import os +import argparse + +import numpy as np +from scipy.spatial.distance import hamming, cdist +from net import AlexNetPlusLatent + +from timeit import time + +import torch +import torch.nn as nn + +from torchvision import datasets, models, transforms +from torch.autograd import Variable +import torch.backends.cudnn as cudnn +import torch.optim.lr_scheduler + +parser = argparse.ArgumentParser(description='Deep Hashing evaluate mAP') +parser.add_argument('--pretrained', type=int, default=92, metavar='pretrained_model', + help='loading pretrained model(default = None)') +parser.add_argument('--bits', type=int, default=48, metavar='bts', + help='binary bits') +args = parser.parse_args() + +def load_data(): + transform_train = transforms.Compose( + [transforms.Scale(227), + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) + transform_test = transforms.Compose( + [transforms.Scale(227), + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) + trainset = datasets.CIFAR10(root='./data', train=True, download=True, + transform=transform_train) + trainloader = torch.utils.data.DataLoader(trainset, batch_size=100, + shuffle=False, num_workers=2) + + testset = datasets.CIFAR10(root='./data', train=False, download=True, + transform=transform_test) + testloader = torch.utils.data.DataLoader(testset, batch_size=100, + shuffle=False, num_workers=2) + return trainloader, testloader + +def binary_output(dataloader): + net = AlexNetPlusLatent(args.bits) + net.load_state_dict(torch.load('./model/%d' %args.pretrained)) + use_cuda = torch.cuda.is_available() + if use_cuda: + net.cuda() + full_batch_output = torch.cuda.FloatTensor() + full_batch_label = torch.cuda.LongTensor() + net.eval() + for batch_idx, (inputs, targets) in enumerate(dataloader): + if use_cuda: + inputs, targets = inputs.cuda(), targets.cuda() + inputs, targets = Variable(inputs, volatile=True), Variable(targets) + outputs, _ = net(inputs) + full_batch_output = torch.cat((full_batch_output, outputs.data), 0) + full_batch_label = torch.cat((full_batch_label, targets.data), 0) + return torch.round(full_batch_output), full_batch_label + +def precision(trn_binary, trn_label, tst_binary, tst_label): + trn_binary = trn_binary.cpu().numpy() + trn_binary = np.asarray(trn_binary, np.int32) + trn_label = trn_label.cpu().numpy() + tst_binary = tst_binary.cpu().numpy() + tst_binary = np.asarray(tst_binary, np.int32) + tst_label = tst_label.cpu().numpy() + query_times = tst_binary.shape[0] + trainset_len = train_binary.shape[0] + AP = np.zeros(query_times) + Ns = np.arange(1, trainset_len + 1) + total_time_start = time.time() + for i in range(query_times): + print('Query ', i+1) + query_label = tst_label[i] + query_binary = tst_binary[i,:] + query_result = np.count_nonzero(query_binary != trn_binary, axis=1) #don't need to divide binary length + sort_indices = np.argsort(query_result) + buffer_yes= np.equal(query_label, trn_label[sort_indices]).astype(int) + P = np.cumsum(buffer_yes) / Ns + AP[i] = np.sum(P * buffer_yes) /sum(buffer_yes) + map = np.mean(AP) + print(map) + print('total query time = ', time.time() - total_time_start) + + + +if os.path.exists('./result/train_binary') and os.path.exists('./result/train_label') and \ + os.path.exists('./result/test_binary') and os.path.exists('./result/test_label') and args.pretrained == 0: + train_binary = torch.load('./result/train_binary') + train_label = torch.load('./result/train_label') + test_binary = torch.load('./result/test_binary') + test_label = torch.load('./result/test_label') + +else: + trainloader, testloader = load_data() + train_binary, train_label = binary_output(trainloader) + test_binary, test_label = binary_output(testloader) + if not os.path.isdir('result'): + os.mkdir('result') + torch.save(train_binary, './result/train_binary') + torch.save(train_label, './result/train_label') + torch.save(test_binary, './result/test_binary') + torch.save(test_label, './result/test_label') + + +precision(train_binary, train_label, test_binary, test_label) diff --git a/net.py b/net.py new file mode 100755 index 0000000..252d6d6 --- /dev/null +++ b/net.py @@ -0,0 +1,26 @@ +import torch.nn as nn +from torchvision import models + +# pre-trained alex net model +alexnet_model = models.alexnet(pretrained=True) + +# nn.Module: Base class for all neural network modules. +# Custom class should also subclass this class +class AlexNetPlusLatent(nn.Module): + def __init__(self, bits): + super(AlexNetPlusLatent, self).__init__() + self.bits = bits + self.features = nn.Sequential(*list(alexnet_model.features.children())) + self.remain = nn.Sequential(*list(alexnet_model.classifier.children())[:-1]) + self.Linear1 = nn.Linear(4096, self.bits) + self.sigmoid = nn.Sigmoid() + self.Linear2 = nn.Linear(self.bits, 10) + + def forward(self, x): + x = self.features(x) + x = x.view(x.size(0), 256 * 6 * 6) + x = self.remain(x) + x = self.Linear1(x) + features = self.sigmoid(x) + result = self.Linear2(features) + return features, result diff --git a/net.pyc b/net.pyc new file mode 100644 index 0000000..d757220 Binary files /dev/null and b/net.pyc differ diff --git a/precision_at_k.npy b/precision_at_k.npy new file mode 100644 index 0000000..1ba2558 Binary files /dev/null and b/precision_at_k.npy differ diff --git a/result/test_binary b/result/test_binary new file mode 100644 index 0000000..6ce0ddb Binary files /dev/null and b/result/test_binary differ diff --git a/result/test_label b/result/test_label new file mode 100644 index 0000000..f7a852c Binary files /dev/null and b/result/test_label differ diff --git a/result/train_binary b/result/train_binary new file mode 100644 index 0000000..ff48204 Binary files /dev/null and b/result/train_binary differ diff --git a/result/train_label b/result/train_label new file mode 100644 index 0000000..0d2fa7e Binary files /dev/null and b/result/train_label differ diff --git a/train.py b/train.py new file mode 100755 index 0000000..eecfe01 --- /dev/null +++ b/train.py @@ -0,0 +1,153 @@ +import os +import shutil +import argparse # 命令行参数解析模块 + +import torch +import torch.nn as nn + +from net import AlexNetPlusLatent + +# vision.datasets:包括几个常用视觉数据集 +# vision.models:流行的模型以及训练好的参数,例如AlexNet,VGG,ResNet等 +# vision.transforms:常用的图像操作,例如随机切割,旋转等 +from torchvision import datasets, models, transforms +from torch.autograd import Variable +import torch.optim.lr_scheduler + + +parser = argparse.ArgumentParser(description='Deep Hashing') +parser.add_argument('--lr', type=float, default=0.01, metavar='LR', + help='learning rate (default: 0.01)') +parser.add_argument('--momentum', type=float, default=0.9, metavar='M', + help='SGD momentum (default: 0.9)') +parser.add_argument('--epoch', type=int, default=60, metavar='epoch', + help='epoch') +parser.add_argument('--pretrained', type=int, default=0, metavar='pretrained_model', + help='loading pretrained model(default = None)') +parser.add_argument('--bits', type=int, default=48, metavar='bts', + help='binary bits') +parser.add_argument('--path', type=str, default='model', metavar='P', + help='path directory') +args = parser.parse_args() + +best_acc = 0 +# 计数器 +start_epoch = 1 + +# 数据预处理 +transform_train = transforms.Compose([ + # 将图像大小调整到256*256 + transforms.Resize(256), + # 随机切割成227*227大小的图像 + transforms.RandomCrop(227), + # 随机水平翻转 + transforms.RandomHorizontalFlip(), + # 将图像数据转换成tensor张量数据结构 + transforms.ToTensor(), + # 图像归一化处理 + # 根据给出的平均值和标准差进行标准化 + # 前面是3个通道的均值,后面是3个通道的标准差 + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) +]) +transform_test = transforms.Compose( + [transforms.Resize(227), + transforms.ToTensor(), + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) + +# 加载数据,训练集和测试集 +# 典型的CIFAR10数据集,并且进行transform +trainset = datasets.CIFAR10(root='./data', train=True, download=True, + transform=transform_train) + +# 数据加载器,把训练数据分成多个小组,多线程处理数据集 +# 一个batch 128个样本,shuffle打乱数据,number_workers多少个子线程处理数据 +trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, + shuffle=True, num_workers=2) + +testset = datasets.CIFAR10(root='./data', train=False, download=True, + transform=transform_test) + +testloader = torch.utils.data.DataLoader(testset, batch_size=100, + shuffle=True, num_workers=2) + +# 建立模型 +net = AlexNetPlusLatent(args.bits) + +# 使用GPU计算 +use_cuda = torch.cuda.is_available() + +if use_cuda: + net.cuda() + +softmaxloss = nn.CrossEntropyLoss().cuda() + +# Stochastic Gradient Descent 随机梯度下降 +optimizer4nn = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=0.0005) + +scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer4nn, milestones=[64], gamma=0.1) + +# 训练和验证网络 +def train(epoch): + print('\nEpoch: %d' % epoch) + net.train() + train_loss = 0 + correct = 0 + total = 0 + for batch_idx, (inputs, targets) in enumerate(trainloader): + if use_cuda: + inputs, targets = inputs.cuda(), targets.cuda() + inputs, targets = Variable(inputs), Variable(targets) + _, outputs = net(inputs) + loss = softmaxloss(outputs, targets) + optimizer4nn.zero_grad() + + loss.backward() + + optimizer4nn.step() + + train_loss += softmaxloss(outputs, targets).item() + _, predicted = torch.max(outputs.data, 1) + total += targets.size(0) + correct += predicted.eq(targets.data).cpu().sum() + + print(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' + % (train_loss/(batch_idx+1), 100*int(correct)/int(total), correct, total)) + return train_loss/(batch_idx+1) + +def test(): + net.eval() + test_loss = 0 + correct = 0 + total = 0 + for batch_idx, (inputs, targets) in enumerate(testloader): + if use_cuda: + inputs, targets = inputs.cuda(), targets.cuda() + inputs, targets = Variable(inputs), Variable(targets) + _, outputs = net(inputs) + loss = softmaxloss(outputs, targets) + test_loss += loss.item() + # 返回每一行的最大值和索引 + _, predicted = torch.max(outputs.data, 1) + total += targets.size(0) + correct += predicted.eq(targets.data).cpu().sum() + + print(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' + % (test_loss/(batch_idx+1), 100*int(correct)/int(total), correct, total)) + acc = 100*int(correct) / int(total) + # 保存结果 + if epoch == args.epoch: + print('Saving') + if not os.path.isdir('{}'.format(args.path)): + os.mkdir('{}'.format(args.path)) + torch.save(net.state_dict(), './{}/{}'.format(args.path, acc)) + +if args.pretrained: + net.load_state_dict(torch.load('./{}/{}'.format(args.path, args.pretrained))) + test() +else: + if os.path.isdir('{}'.format(args.path)): + shutil.rmtree('{}'.format(args.path)) + for epoch in range(start_epoch, start_epoch+args.epoch): + train(epoch) + test() + scheduler.step()