diff --git a/mnist-master/.vscode/launch.json b/mnist-master/.vscode/launch.json new file mode 100644 index 0000000..af43941 --- /dev/null +++ b/mnist-master/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: 当前文件", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "cwd": "" + } + ] +} \ No newline at end of file diff --git a/mnist-master/.vscode/settings.json b/mnist-master/.vscode/settings.json new file mode 100644 index 0000000..52bd56a --- /dev/null +++ b/mnist-master/.vscode/settings.json @@ -0,0 +1,8 @@ +{ + "python.pythonPath": "/usr/bin/python3.7", + "python.linting.pylintArgs": [ + "--errors-only", + "--disable=E0401", + "--extension-pkg-whitelist=PyQt5" + ] +} \ No newline at end of file diff --git a/mnist-master/README.md b/mnist-master/README.md new file mode 100644 index 0000000..fb38d38 --- /dev/null +++ b/mnist-master/README.md @@ -0,0 +1,41 @@ +# 纯Python实现CNN识别手写体数字+GUI + +![](https://img.hamlinzheng.com/i/2020/02/07/psh0gw.png) + + +--- + +> 由于把数据集也传上来了,导致下载时间比较长,我打包了一份放在服务器,点击[这里](https://dl.hamlinzheng.com/Python/MNIST.zip)进行下载 + + +项目文件结构如下所示 + +``` +. +├── common +│   ├── functions.py +│   ├── gradient.py +│   ├── layers.py +│   ├── optimizer.py +│   ├── trainer.py +│   └── util.py +├── dataset +│   ├── mnist.pkl +│   ├── mnist.py +│   ├── t10k-images-idx3-ubyte.gz +│   ├── t10k-labels-idx1-ubyte.gz +│   ├── train-images-idx3-ubyte.gz +│   └── train-labels-idx1-ubyte.gz +├── deep_convnet_params.pkl +├── deep_convnet.py +├── mnist_cnn_gui_main.py +├── params.pkl +├── qt +│   ├── layout.py +│   ├── layout.ui +│   ├── paintboard.py +│   └── ui2py.sh +├── simple_convnet.py +├── train_convnet.py +└── train_deepnet.py +``` diff --git a/mnist-master/common/functions.py b/mnist-master/common/functions.py new file mode 100644 index 0000000..ec02dd0 --- /dev/null +++ b/mnist-master/common/functions.py @@ -0,0 +1,61 @@ +# coding: utf-8 +import numpy as np + + +def identity_function(x): + return x + + +def step_function(x): + return np.array(x > 0, dtype=np.int) + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) + + +def sigmoid_grad(x): + return (1.0 - sigmoid(x)) * sigmoid(x) + + +def relu(x): + return np.maximum(0, x) + + +def relu_grad(x): + grad = np.zeros(x) + grad[x>=0] = 1 + return grad + + +def softmax(x): + if x.ndim == 2: + x = x.T + x = x - np.max(x, axis=0) + y = np.exp(x) / np.sum(np.exp(x), axis=0) + return y.T + + x = x - np.max(x) # 溢出对策 + return np.exp(x) / np.sum(np.exp(x)) + + +def mean_squared_error(y, t): + return 0.5 * np.sum((y-t)**2) + + +def cross_entropy_error(y, t): + if y.ndim == 1: + t = t.reshape(1, t.size) + y = y.reshape(1, y.size) + + # 监督数据是one-hot-vector的情况下,转换为正确解标签的索引 + if t.size == y.size: + t = t.argmax(axis=1) + + batch_size = y.shape[0] + return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size + + +def softmax_loss(X, t): + y = softmax(X) + return cross_entropy_error(y, t) diff --git a/mnist-master/common/gradient.py b/mnist-master/common/gradient.py new file mode 100644 index 0000000..fd283c6 --- /dev/null +++ b/mnist-master/common/gradient.py @@ -0,0 +1,53 @@ +# coding: utf-8 +import numpy as np + +def _numerical_gradient_1d(f, x): + h = 1e-4 # 0.0001 + grad = np.zeros_like(x) + + for idx in range(x.size): + tmp_val = x[idx] + x[idx] = float(tmp_val) + h + fxh1 = f(x) # f(x+h) + + x[idx] = tmp_val - h + fxh2 = f(x) # f(x-h) + grad[idx] = (fxh1 - fxh2) / (2*h) + + x[idx] = tmp_val # 还原值 + + return grad + + +def numerical_gradient_2d(f, X): + if X.ndim == 1: + return _numerical_gradient_1d(f, X) + else: + grad = np.zeros_like(X) + + for idx, x in enumerate(X): + grad[idx] = _numerical_gradient_1d(f, x) + + return grad + + +def numerical_gradient(f, x): + h = 1e-4 # 0.0001 + grad = np.zeros_like(x) + + # 多维迭代 + it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) + while not it.finished: + idx = it.multi_index + tmp_val = x[idx] + x[idx] = float(tmp_val) + h + fxh1 = f(x) # f(x+h) + + x[idx] = tmp_val - h + fxh2 = f(x) # f(x-h) + grad[idx] = (fxh1 - fxh2) / (2*h) + + x[idx] = tmp_val # 还原值 + it.iternext() + + return grad \ No newline at end of file diff --git a/mnist-master/common/layers.py b/mnist-master/common/layers.py new file mode 100644 index 0000000..ae50d80 --- /dev/null +++ b/mnist-master/common/layers.py @@ -0,0 +1,284 @@ +# coding: utf-8 +import numpy as np +from common.functions import * +from common.util import im2col, col2im + + +class Relu: + def __init__(self): + self.mask = None + + def forward(self, x): + self.mask = (x <= 0) + out = x.copy() + out[self.mask] = 0 + + return out + + def backward(self, dout): + dout[self.mask] = 0 + dx = dout + + return dx + + +class Sigmoid: + def __init__(self): + self.out = None + + def forward(self, x): + out = sigmoid(x) + self.out = out + return out + + def backward(self, dout): + dx = dout * (1.0 - self.out) * self.out + + return dx + + +class Affine: + def __init__(self, W, b): + self.W =W + self.b = b + + self.x = None + self.original_x_shape = None + # 权重和偏置参数的导数 + self.dW = None + self.db = None + + def forward(self, x): + # 对应张量 + self.original_x_shape = x.shape + x = x.reshape(x.shape[0], -1) + self.x = x + + out = np.dot(self.x, self.W) + self.b + + return out + + def backward(self, dout): + dx = np.dot(dout, self.W.T) + self.dW = np.dot(self.x.T, dout) + self.db = np.sum(dout, axis=0) + + dx = dx.reshape(*self.original_x_shape) # 还原输入数据的形状(对应张量) + return dx + + +class SoftmaxWithLoss: + def __init__(self): + self.loss = None + self.y = None # softmax的输出 + self.t = None # 监督数据 + + def forward(self, x, t): + self.t = t + self.y = softmax(x) + self.loss = cross_entropy_error(self.y, self.t) + + return self.loss + + def backward(self, dout=1): + batch_size = self.t.shape[0] + if self.t.size == self.y.size: # 监督数据是one-hot-vector的情况 + dx = (self.y - self.t) / batch_size + else: + dx = self.y.copy() + dx[np.arange(batch_size), self.t] -= 1 + dx = dx / batch_size + + return dx + + +class Dropout: + """ + http://arxiv.org/abs/1207.0580 + """ + def __init__(self, dropout_ratio=0.5): + self.dropout_ratio = dropout_ratio + self.mask = None + + def forward(self, x, train_flg=True): + if train_flg: + self.mask = np.random.rand(*x.shape) > self.dropout_ratio + return x * self.mask + else: + return x * (1.0 - self.dropout_ratio) + + def backward(self, dout): + return dout * self.mask + + +class BatchNormalization: + """ + http://arxiv.org/abs/1502.03167 + """ + def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None): + self.gamma = gamma + self.beta = beta + self.momentum = momentum + self.input_shape = None # Conv层的情况下为4维,全连接层的情况下为2维 + + # 测试时使用的平均值和方差 + self.running_mean = running_mean + self.running_var = running_var + + # backward时使用的中间数据 + self.batch_size = None + self.xc = None + self.std = None + self.dgamma = None + self.dbeta = None + + def forward(self, x, train_flg=True): + self.input_shape = x.shape + if x.ndim != 2: + N, C, H, W = x.shape + x = x.reshape(N, -1) + + out = self.__forward(x, train_flg) + + return out.reshape(*self.input_shape) + + def __forward(self, x, train_flg): + if self.running_mean is None: + N, D = x.shape + self.running_mean = np.zeros(D) + self.running_var = np.zeros(D) + + if train_flg: + mu = x.mean(axis=0) + xc = x - mu + var = np.mean(xc**2, axis=0) + std = np.sqrt(var + 10e-7) + xn = xc / std + + self.batch_size = x.shape[0] + self.xc = xc + self.xn = xn + self.std = std + self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu + self.running_var = self.momentum * self.running_var + (1-self.momentum) * var + else: + xc = x - self.running_mean + xn = xc / ((np.sqrt(self.running_var + 10e-7))) + + out = self.gamma * xn + self.beta + return out + + def backward(self, dout): + if dout.ndim != 2: + N, C, H, W = dout.shape + dout = dout.reshape(N, -1) + + dx = self.__backward(dout) + + dx = dx.reshape(*self.input_shape) + return dx + + def __backward(self, dout): + dbeta = dout.sum(axis=0) + dgamma = np.sum(self.xn * dout, axis=0) + dxn = self.gamma * dout + dxc = dxn / self.std + dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0) + dvar = 0.5 * dstd / self.std + dxc += (2.0 / self.batch_size) * self.xc * dvar + dmu = np.sum(dxc, axis=0) + dx = dxc - dmu / self.batch_size + + self.dgamma = dgamma + self.dbeta = dbeta + + return dx + + +class Convolution: + def __init__(self, W, b, stride=1, pad=0): + self.W = W + self.b = b + self.stride = stride + self.pad = pad + + # 中间数据(backward时使用) + self.x = None + self.col = None + self.col_W = None + + # 权重和偏置参数的梯度 + self.dW = None + self.db = None + + def forward(self, x): + FN, C, FH, FW = self.W.shape + N, C, H, W = x.shape + out_h = 1 + int((H + 2*self.pad - FH) / self.stride) + out_w = 1 + int((W + 2*self.pad - FW) / self.stride) + + col = im2col(x, FH, FW, self.stride, self.pad) + col_W = self.W.reshape(FN, -1).T + + out = np.dot(col, col_W) + self.b + out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2) + + self.x = x + self.col = col + self.col_W = col_W + + return out + + def backward(self, dout): + FN, C, FH, FW = self.W.shape + dout = dout.transpose(0,2,3,1).reshape(-1, FN) + + self.db = np.sum(dout, axis=0) + self.dW = np.dot(self.col.T, dout) + self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW) + + dcol = np.dot(dout, self.col_W.T) + dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad) + + return dx + + +class Pooling: + def __init__(self, pool_h, pool_w, stride=1, pad=0): + self.pool_h = pool_h + self.pool_w = pool_w + self.stride = stride + self.pad = pad + + self.x = None + self.arg_max = None + + def forward(self, x): + N, C, H, W = x.shape + out_h = int(1 + (H - self.pool_h) / self.stride) + out_w = int(1 + (W - self.pool_w) / self.stride) + + col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad) + col = col.reshape(-1, self.pool_h*self.pool_w) + + arg_max = np.argmax(col, axis=1) + out = np.max(col, axis=1) + out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2) + + self.x = x + self.arg_max = arg_max + + return out + + def backward(self, dout): + dout = dout.transpose(0, 2, 3, 1) + + pool_size = self.pool_h * self.pool_w + dmax = np.zeros((dout.size, pool_size)) + dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten() + dmax = dmax.reshape(dout.shape + (pool_size,)) + + dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1) + dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad) + + return dx diff --git a/mnist-master/common/optimizer.py b/mnist-master/common/optimizer.py new file mode 100644 index 0000000..9c22bcd --- /dev/null +++ b/mnist-master/common/optimizer.py @@ -0,0 +1,130 @@ +# coding: utf-8 +import numpy as np + +class SGD: + + """随机梯度下降法(Stochastic Gradient Descent)""" + + def __init__(self, lr=0.01): + self.lr = lr + + def update(self, params, grads): + for key in params.keys(): + params[key] -= self.lr * grads[key] + + +class Momentum: + + """Momentum SGD""" + + def __init__(self, lr=0.01, momentum=0.9): + self.lr = lr + self.momentum = momentum + self.v = None + + def update(self, params, grads): + if self.v is None: + self.v = {} + for key, val in params.items(): + self.v[key] = np.zeros_like(val) + + for key in params.keys(): + self.v[key] = self.momentum*self.v[key] - self.lr*grads[key] + params[key] += self.v[key] + + +class Nesterov: + + """Nesterov's Accelerated Gradient (http://arxiv.org/abs/1212.0901)""" + + def __init__(self, lr=0.01, momentum=0.9): + self.lr = lr + self.momentum = momentum + self.v = None + + def update(self, params, grads): + if self.v is None: + self.v = {} + for key, val in params.items(): + self.v[key] = np.zeros_like(val) + + for key in params.keys(): + self.v[key] *= self.momentum + self.v[key] -= self.lr * grads[key] + params[key] += self.momentum * self.momentum * self.v[key] + params[key] -= (1 + self.momentum) * self.lr * grads[key] + + +class AdaGrad: + + """AdaGrad""" + + def __init__(self, lr=0.01): + self.lr = lr + self.h = None + + def update(self, params, grads): + if self.h is None: + self.h = {} + for key, val in params.items(): + self.h[key] = np.zeros_like(val) + + for key in params.keys(): + self.h[key] += grads[key] * grads[key] + params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7) + + +class RMSprop: + + """RMSprop""" + + def __init__(self, lr=0.01, decay_rate = 0.99): + self.lr = lr + self.decay_rate = decay_rate + self.h = None + + def update(self, params, grads): + if self.h is None: + self.h = {} + for key, val in params.items(): + self.h[key] = np.zeros_like(val) + + for key in params.keys(): + self.h[key] *= self.decay_rate + self.h[key] += (1 - self.decay_rate) * grads[key] * grads[key] + params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7) + + +class Adam: + + """Adam (http://arxiv.org/abs/1412.6980v8)""" + + def __init__(self, lr=0.001, beta1=0.9, beta2=0.999): + self.lr = lr + self.beta1 = beta1 + self.beta2 = beta2 + self.iter = 0 + self.m = None + self.v = None + + def update(self, params, grads): + if self.m is None: + self.m, self.v = {}, {} + for key, val in params.items(): + self.m[key] = np.zeros_like(val) + self.v[key] = np.zeros_like(val) + + self.iter += 1 + lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter) + + for key in params.keys(): + #self.m[key] = self.beta1*self.m[key] + (1-self.beta1)*grads[key] + #self.v[key] = self.beta2*self.v[key] + (1-self.beta2)*(grads[key]**2) + self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key]) + self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key]) + + params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7) + + #unbias_m += (1 - self.beta1) * (grads[key] - self.m[key]) # correct bias + #unbisa_b += (1 - self.beta2) * (grads[key]*grads[key] - self.v[key]) # correct bias + #params[key] += self.lr * unbias_m / (np.sqrt(unbisa_b) + 1e-7) diff --git a/mnist-master/common/trainer.py b/mnist-master/common/trainer.py new file mode 100644 index 0000000..1878105 --- /dev/null +++ b/mnist-master/common/trainer.py @@ -0,0 +1,78 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +from common.optimizer import * + +class Trainer: + """进行神经网络的训练的类 + """ + def __init__(self, network, x_train, t_train, x_test, t_test, + epochs=20, mini_batch_size=100, + optimizer='SGD', optimizer_param={'lr':0.01}, + evaluate_sample_num_per_epoch=None, verbose=True): + self.network = network + self.verbose = verbose + self.x_train = x_train + self.t_train = t_train + self.x_test = x_test + self.t_test = t_test + self.epochs = epochs + self.batch_size = mini_batch_size + self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch + + # optimzer + optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov, + 'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adam':Adam} + self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param) + + self.train_size = x_train.shape[0] + self.iter_per_epoch = max(self.train_size / mini_batch_size, 1) + self.max_iter = int(epochs * self.iter_per_epoch) + self.current_iter = 0 + self.current_epoch = 0 + + self.train_loss_list = [] + self.train_acc_list = [] + self.test_acc_list = [] + + def train_step(self): + batch_mask = np.random.choice(self.train_size, self.batch_size) + x_batch = self.x_train[batch_mask] + t_batch = self.t_train[batch_mask] + + grads = self.network.gradient(x_batch, t_batch) + self.optimizer.update(self.network.params, grads) + + loss = self.network.loss(x_batch, t_batch) + self.train_loss_list.append(loss) + if self.verbose: print("train loss:" + str(loss)) + + if self.current_iter % self.iter_per_epoch == 0: + self.current_epoch += 1 + + x_train_sample, t_train_sample = self.x_train, self.t_train + x_test_sample, t_test_sample = self.x_test, self.t_test + if not self.evaluate_sample_num_per_epoch is None: + t = self.evaluate_sample_num_per_epoch + x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t] + x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t] + + train_acc = self.network.accuracy(x_train_sample, t_train_sample) + test_acc = self.network.accuracy(x_test_sample, t_test_sample) + self.train_acc_list.append(train_acc) + self.test_acc_list.append(test_acc) + + if self.verbose: print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===") + self.current_iter += 1 + + def train(self): + for i in range(self.max_iter): + self.train_step() + + test_acc = self.network.accuracy(self.x_test, self.t_test) + + if self.verbose: + print("=============== Final Test Accuracy ===============") + print("test acc:" + str(test_acc)) + diff --git a/mnist-master/common/util.py b/mnist-master/common/util.py new file mode 100644 index 0000000..9e0f0b3 --- /dev/null +++ b/mnist-master/common/util.py @@ -0,0 +1,99 @@ +# coding: utf-8 +import numpy as np + + +def smooth_curve(x): + """用于使损失函数的图形变圆滑 + + 参考:http://glowingpython.blogspot.jp/2012/02/convolution-with-numpy.html + """ + window_len = 11 + s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]] + w = np.kaiser(window_len, 2) + y = np.convolve(w/w.sum(), s, mode='valid') + return y[5:len(y)-5] + + +def shuffle_dataset(x, t): + """打乱数据集 + + Parameters + ---------- + x : 训练数据 + t : 监督数据 + + Returns + ------- + x, t : 打乱的训练数据和监督数据 + """ + permutation = np.random.permutation(x.shape[0]) + x = x[permutation,:] if x.ndim == 2 else x[permutation,:,:,:] + t = t[permutation] + + return x, t + +def conv_output_size(input_size, filter_size, stride=1, pad=0): + return (input_size + 2*pad - filter_size) / stride + 1 + + +def im2col(input_data, filter_h, filter_w, stride=1, pad=0): + """ + + Parameters + ---------- + input_data : 由(数据量, 通道, 高, 长)的4维数组构成的输入数据 + filter_h : 滤波器的高 + filter_w : 滤波器的长 + stride : 步幅 + pad : 填充 + + Returns + ------- + col : 2维数组 + """ + N, C, H, W = input_data.shape + out_h = (H + 2*pad - filter_h)//stride + 1 + out_w = (W + 2*pad - filter_w)//stride + 1 + + img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant') + col = np.zeros((N, C, filter_h, filter_w, out_h, out_w)) + + for y in range(filter_h): + y_max = y + stride*out_h + for x in range(filter_w): + x_max = x + stride*out_w + col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride] + + col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1) + return col + + +def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0): + """ + + Parameters + ---------- + col : + input_shape : 输入数据的形状(例:(10, 1, 28, 28)) + filter_h : + filter_w + stride + pad + + Returns + ------- + + """ + N, C, H, W = input_shape + out_h = (H + 2*pad - filter_h)//stride + 1 + out_w = (W + 2*pad - filter_w)//stride + 1 + col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2) + + img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1)) + for y in range(filter_h): + y_max = y + stride*out_h + for x in range(filter_w): + x_max = x + stride*out_w + img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :] + + return img[:, :, pad:H + pad, pad:W + pad] \ No newline at end of file diff --git a/mnist-master/dataset/mnist.py b/mnist-master/dataset/mnist.py new file mode 100644 index 0000000..a8ff216 --- /dev/null +++ b/mnist-master/dataset/mnist.py @@ -0,0 +1,131 @@ +# coding: utf-8 +try: + import urllib.request +except ImportError: + raise ImportError('You should use Python 3.x') +import os.path +import gzip +import pickle +import os +import numpy as np + + +url_base = 'http://yann.lecun.com/exdb/mnist/' +key_file = { + 'train_img':'train-images-idx3-ubyte.gz', + 'train_label':'train-labels-idx1-ubyte.gz', + 'test_img':'t10k-images-idx3-ubyte.gz', + 'test_label':'t10k-labels-idx1-ubyte.gz' +} + +dataset_dir = os.path.dirname(os.path.abspath(__file__)) +save_file = dataset_dir + "/mnist.pkl" + +train_num = 60000 +test_num = 10000 +img_dim = (1, 28, 28) +img_size = 784 + + +def _download(file_name): + file_path = dataset_dir + "/" + file_name + + if os.path.exists(file_path): + return + + print("Downloading " + file_name + " ... ") + urllib.request.urlretrieve(url_base + file_name, file_path) + print("Done") + +def download_mnist(): + for v in key_file.values(): + _download(v) + +def _load_label(file_name): + file_path = dataset_dir + "/" + file_name + + print("Converting " + file_name + " to NumPy Array ...") + with gzip.open(file_path, 'rb') as f: + labels = np.frombuffer(f.read(), np.uint8, offset=8) + print("Done") + + return labels + +def _load_img(file_name): + file_path = dataset_dir + "/" + file_name + + print("Converting " + file_name + " to NumPy Array ...") + with gzip.open(file_path, 'rb') as f: + data = np.frombuffer(f.read(), np.uint8, offset=16) + data = data.reshape(-1, img_size) + print("Done") + + return data + +def _convert_numpy(): + dataset = {} + dataset['train_img'] = _load_img(key_file['train_img']) + dataset['train_label'] = _load_label(key_file['train_label']) + dataset['test_img'] = _load_img(key_file['test_img']) + dataset['test_label'] = _load_label(key_file['test_label']) + + return dataset + +def init_mnist(): + ''' + Note:已将数据集下载至本地,第一次加载会将数据集保存成pickle + ''' + # download_mnist() + dataset = _convert_numpy() + print("Creating pickle file ...") + with open(save_file, 'wb') as f: + pickle.dump(dataset, f, -1) + print("Done!") + +def _change_one_hot_label(X): + T = np.zeros((X.size, 10)) + for idx, row in enumerate(T): + row[X[idx]] = 1 + + return T + + +def load_mnist(normalize=True, flatten=True, one_hot_label=False): + """读入MNIST数据集 + + Parameters + ---------- + normalize : 将图像的像素值正规化为0.0~1.0 + one_hot_label : + one_hot_label为True的情况下,标签作为one-hot数组返回 + one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组 + flatten : 是否将图像展开为一维数组 + + Returns + ------- + (训练图像, 训练标签), (测试图像, 测试标签) + """ + if not os.path.exists(save_file): + init_mnist() + + with open(save_file, 'rb') as f: + dataset = pickle.load(f) + + if normalize: + for key in ('train_img', 'test_img'): + dataset[key] = dataset[key].astype(np.float32) + dataset[key] /= 255.0 + + if one_hot_label: + dataset['train_label'] = _change_one_hot_label(dataset['train_label']) + dataset['test_label'] = _change_one_hot_label(dataset['test_label']) + + if not flatten: + for key in ('train_img', 'test_img'): + dataset[key] = dataset[key].reshape(-1, 1, 28, 28) + + return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label']) + + +if __name__ == '__main__': + init_mnist() diff --git a/mnist-master/dataset/t10k-images-idx3-ubyte.gz b/mnist-master/dataset/t10k-images-idx3-ubyte.gz new file mode 100644 index 0000000..45f261c Binary files /dev/null and b/mnist-master/dataset/t10k-images-idx3-ubyte.gz differ diff --git a/mnist-master/dataset/t10k-labels-idx1-ubyte.gz b/mnist-master/dataset/t10k-labels-idx1-ubyte.gz new file mode 100644 index 0000000..a877692 Binary files /dev/null and b/mnist-master/dataset/t10k-labels-idx1-ubyte.gz differ diff --git a/mnist-master/dataset/train-labels-idx1-ubyte.gz b/mnist-master/dataset/train-labels-idx1-ubyte.gz new file mode 100644 index 0000000..2c24ce4 Binary files /dev/null and b/mnist-master/dataset/train-labels-idx1-ubyte.gz differ diff --git a/mnist-master/dataset/请在当前文件夹解压该文件.z01 b/mnist-master/dataset/请在当前文件夹解压该文件.z01 new file mode 100644 index 0000000..802fc21 Binary files /dev/null and b/mnist-master/dataset/请在当前文件夹解压该文件.z01 differ diff --git a/mnist-master/dataset/请在当前文件夹解压该文件.zip b/mnist-master/dataset/请在当前文件夹解压该文件.zip new file mode 100644 index 0000000..6175ab1 Binary files /dev/null and b/mnist-master/dataset/请在当前文件夹解压该文件.zip differ diff --git a/mnist-master/dataset/请在当前文件夹解压该文件1.z01 b/mnist-master/dataset/请在当前文件夹解压该文件1.z01 new file mode 100644 index 0000000..d2e8d41 Binary files /dev/null and b/mnist-master/dataset/请在当前文件夹解压该文件1.z01 differ diff --git a/mnist-master/dataset/请在当前文件夹解压该文件1.z02 b/mnist-master/dataset/请在当前文件夹解压该文件1.z02 new file mode 100644 index 0000000..534661a Binary files /dev/null and b/mnist-master/dataset/请在当前文件夹解压该文件1.z02 differ diff --git a/mnist-master/dataset/请在当前文件夹解压该文件1.zip b/mnist-master/dataset/请在当前文件夹解压该文件1.zip new file mode 100644 index 0000000..68fc43f Binary files /dev/null and b/mnist-master/dataset/请在当前文件夹解压该文件1.zip differ diff --git a/mnist-master/deep_convnet.py b/mnist-master/deep_convnet.py new file mode 100644 index 0000000..d974c7b --- /dev/null +++ b/mnist-master/deep_convnet.py @@ -0,0 +1,136 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import pickle +import numpy as np +from collections import OrderedDict +from common.layers import * + + +class DeepConvNet: + """识别率为99%以上的高精度的ConvNet + + 网络结构如下所示 + conv - relu - conv- relu - pool - + conv - relu - conv- relu - pool - + conv - relu - conv- relu - pool - + affine - relu - dropout - affine - dropout - softmax + """ + def __init__(self, input_dim=(1, 28, 28), + conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1}, + conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1}, + conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1}, + conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1}, + conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1}, + conv_param_6 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1}, + hidden_size=50, output_size=10): + # 初始化权重=========== + # 各层的神经元平均与前一层的几个神经元有连接(TODO:自动计算) + pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size]) + wight_init_scales = np.sqrt(2.0 / pre_node_nums) # 使用ReLU的情况下推荐的初始值 + + self.params = {} + pre_channel_num = input_dim[0] + for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]): + self.params['W' + str(idx+1)] = wight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size']) + self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num']) + pre_channel_num = conv_param['filter_num'] + self.params['W7'] = wight_init_scales[6] * np.random.randn(64*4*4, hidden_size) + self.params['b7'] = np.zeros(hidden_size) + self.params['W8'] = wight_init_scales[7] * np.random.randn(hidden_size, output_size) + self.params['b8'] = np.zeros(output_size) + + # 生成层=========== + self.layers = [] + self.layers.append(Convolution(self.params['W1'], self.params['b1'], + conv_param_1['stride'], conv_param_1['pad'])) + self.layers.append(Relu()) + self.layers.append(Convolution(self.params['W2'], self.params['b2'], + conv_param_2['stride'], conv_param_2['pad'])) + self.layers.append(Relu()) + self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) + self.layers.append(Convolution(self.params['W3'], self.params['b3'], + conv_param_3['stride'], conv_param_3['pad'])) + self.layers.append(Relu()) + self.layers.append(Convolution(self.params['W4'], self.params['b4'], + conv_param_4['stride'], conv_param_4['pad'])) + self.layers.append(Relu()) + self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) + self.layers.append(Convolution(self.params['W5'], self.params['b5'], + conv_param_5['stride'], conv_param_5['pad'])) + self.layers.append(Relu()) + self.layers.append(Convolution(self.params['W6'], self.params['b6'], + conv_param_6['stride'], conv_param_6['pad'])) + self.layers.append(Relu()) + self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2)) + self.layers.append(Affine(self.params['W7'], self.params['b7'])) + self.layers.append(Relu()) + self.layers.append(Dropout(0.5)) + self.layers.append(Affine(self.params['W8'], self.params['b8'])) + self.layers.append(Dropout(0.5)) + + self.last_layer = SoftmaxWithLoss() + + def predict(self, x, train_flg=False): + for layer in self.layers: + if isinstance(layer, Dropout): + x = layer.forward(x, train_flg) + else: + x = layer.forward(x) + return x + + def loss(self, x, t): + y = self.predict(x, train_flg=True) + return self.last_layer.forward(y, t) + + def accuracy(self, x, t, batch_size=100): + if t.ndim != 1 : t = np.argmax(t, axis=1) + + acc = 0.0 + + for i in range(int(x.shape[0] / batch_size)): + tx = x[i*batch_size:(i+1)*batch_size] + tt = t[i*batch_size:(i+1)*batch_size] + y = self.predict(tx, train_flg=False) + y = np.argmax(y, axis=1) + acc += np.sum(y == tt) + + return acc / x.shape[0] + + def gradient(self, x, t): + # forward + self.loss(x, t) + + # backward + dout = 1 + dout = self.last_layer.backward(dout) + + tmp_layers = self.layers.copy() + tmp_layers.reverse() + for layer in tmp_layers: + dout = layer.backward(dout) + + # 设定 + grads = {} + for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)): + grads['W' + str(i+1)] = self.layers[layer_idx].dW + grads['b' + str(i+1)] = self.layers[layer_idx].db + + return grads + + def save_params(self, file_name="params.pkl"): + params = {} + for key, val in self.params.items(): + params[key] = val + with open(file_name, 'wb') as f: + pickle.dump(params, f) + + def load_params(self, file_name="params.pkl"): + with open(file_name, 'rb') as f: + params = pickle.load(f) + for key, val in params.items(): + self.params[key] = val + + for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)): + self.layers[layer_idx].W = self.params['W' + str(i+1)] + self.layers[layer_idx].b = self.params['b' + str(i+1)] diff --git a/mnist-master/deep_convnet_params.pkl b/mnist-master/deep_convnet_params.pkl new file mode 100644 index 0000000..b32451c Binary files /dev/null and b/mnist-master/deep_convnet_params.pkl differ diff --git a/mnist-master/mnist_cnn_gui_main.py b/mnist-master/mnist_cnn_gui_main.py new file mode 100644 index 0000000..b3a4cc2 --- /dev/null +++ b/mnist-master/mnist_cnn_gui_main.py @@ -0,0 +1,192 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import sys, os +import numpy as np +from dataset.mnist import load_mnist +from PIL import Image, ImageQt + + +from qt.layout import Ui_MainWindow +from qt.paintboard import PaintBoard + +from PyQt5.QtWidgets import QMainWindow, QDesktopWidget, QApplication +from PyQt5.QtWidgets import QLabel, QMessageBox, QPushButton, QFrame +from PyQt5.QtGui import QPainter, QPen, QPixmap, QColor, QImage +from PyQt5.QtCore import Qt, QPoint, QSize + +from simple_convnet import SimpleConvNet +from common.functions import softmax +from deep_convnet import DeepConvNet + + + +MODE_MNIST = 1 # MNIST随机抽取 +MODE_WRITE = 2 # 手写输入 + +Thresh = 0.5 # 识别结果置信度阈值 + + + +# 读取MNIST数据集 +(_, _), (x_test, _) = load_mnist(normalize=True, flatten=False, one_hot_label=False) + + +# 初始化网络 + +# 网络1:简单CNN +""" +conv - relu - pool - affine - relu - affine - softmax +""" +network = SimpleConvNet(input_dim=(1,28,28), + conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1}, + hidden_size=100, output_size=10, weight_init_std=0.01) +network.load_params("params.pkl") + +# 网络2:深度CNN +# network = DeepConvNet() +# network.load_params("deep_convnet_params.pkl") + + +class MainWindow(QMainWindow,Ui_MainWindow): + def __init__(self): + super(MainWindow,self).__init__() + + # 初始化参数 + self.mode = MODE_MNIST + self.result = [0, 0] + + # 初始化UI + self.setupUi(self) + self.center() + + # 初始化画板 + self.paintBoard = PaintBoard(self, Size = QSize(224, 224), Fill = QColor(0,0,0,0)) + self.paintBoard.setPenColor(QColor(0,0,0,0)) + self.dArea_Layout.addWidget(self.paintBoard) + + self.clearDataArea() + + # 窗口居中 + def center(self): + # 获得窗口 + framePos = self.frameGeometry() + # 获得屏幕中心点 + scPos = QDesktopWidget().availableGeometry().center() + # 显示到屏幕中心 + framePos.moveCenter(scPos) + self.move(framePos.topLeft()) + + + # 窗口关闭事件 + def closeEvent(self, event): + reply = QMessageBox.question(self, 'Message', + "Are you sure to quit?", QMessageBox.Yes | + QMessageBox.No, QMessageBox.Yes) + + if reply == QMessageBox.Yes: + event.accept() + else: + event.ignore() + + # 清除数据待输入区 + def clearDataArea(self): + self.paintBoard.Clear() + self.lbDataArea.clear() + self.lbResult.clear() + self.lbCofidence.clear() + self.result = [0, 0] + + """ + 回调函数 + """ + # 模式下拉列表回调 + def cbBox_Mode_Callback(self, text): + if text == '1:MINIST随机抽取': + self.mode = MODE_MNIST + self.clearDataArea() + self.pbtGetMnist.setEnabled(True) + + self.paintBoard.setBoardFill(QColor(0,0,0,0)) + self.paintBoard.setPenColor(QColor(0,0,0,0)) + + elif text == '2:鼠标手写输入': + self.mode = MODE_WRITE + self.clearDataArea() + self.pbtGetMnist.setEnabled(False) + + # 更改背景 + self.paintBoard.setBoardFill(QColor(0,0,0,255)) + self.paintBoard.setPenColor(QColor(255,255,255,255)) + + + # 数据清除 + def pbtClear_Callback(self): + self.clearDataArea() + + + # 识别 + def pbtPredict_Callback(self): + __img, img_array =[],[] # 将图像统一从qimage->pil image -> np.array [1, 1, 28, 28] + + # 获取qimage格式图像 + if self.mode == MODE_MNIST: + __img = self.lbDataArea.pixmap() # label内若无图像返回None + if __img == None: # 无图像则用纯黑代替 + # __img = QImage(224, 224, QImage.Format_Grayscale8) + __img = ImageQt.ImageQt(Image.fromarray(np.uint8(np.zeros([224,224])))) + else: __img = __img.toImage() + elif self.mode == MODE_WRITE: + __img = self.paintBoard.getContentAsQImage() + + # 转换成pil image类型处理 + pil_img = ImageQt.fromqimage(__img) + pil_img = pil_img.resize((28, 28), Image.ANTIALIAS) + + # pil_img.save('test.png') + + img_array = np.array(pil_img.convert('L')).reshape(1,1,28, 28) / 255.0 + # img_array = np.where(img_array>0.5, 1, 0) + + # reshape成网络输入类型 + __result = network.predict(img_array) # shape:[1, 10] + + # print (__result) + + # 将预测结果使用softmax输出 + __result = softmax(__result) + + self.result[0] = np.argmax(__result) # 预测的数字 + self.result[1] = __result[0, self.result[0]] # 置信度 + + self.lbResult.setText("%d" % (self.result[0])) + self.lbCofidence.setText("%.8f" % (self.result[1])) + + + # 随机抽取 + def pbtGetMnist_Callback(self): + self.clearDataArea() + + # 随机抽取一张测试集图片,放大后显示 + img = x_test[np.random.randint(0, 9999)] # shape:[1,28,28] + img = img.reshape(28, 28) # shape:[28,28] + + img = img * 0xff # 恢复灰度值大小 + pil_img = Image.fromarray(np.uint8(img)) + pil_img = pil_img.resize((224, 224)) # 图像放大显示 + + # 将pil图像转换成qimage类型 + qimage = ImageQt.ImageQt(pil_img) + + # 将qimage类型图像显示在label + pix = QPixmap.fromImage(qimage) + self.lbDataArea.setPixmap(pix) + + + +if __name__ == "__main__": + app = QApplication(sys.argv) + Gui = MainWindow() + Gui.show() + + sys.exit(app.exec_()) \ No newline at end of file diff --git a/mnist-master/params.pkl b/mnist-master/params.pkl new file mode 100644 index 0000000..eab8592 Binary files /dev/null and b/mnist-master/params.pkl differ diff --git a/mnist-master/qt/layout.py b/mnist-master/qt/layout.py new file mode 100644 index 0000000..44a4124 --- /dev/null +++ b/mnist-master/qt/layout.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- + +# Form implementation generated from reading ui file 'layout.ui' +# +# Created by: PyQt5 UI code generator 5.14.1 +# +# WARNING! All changes made in this file will be lost! + + +from PyQt5 import QtCore, QtGui, QtWidgets + + +class Ui_MainWindow(object): + def setupUi(self, MainWindow): + MainWindow.setObjectName("MainWindow") + MainWindow.resize(800, 600) + self.cbBox_Mode = QtWidgets.QComboBox(MainWindow) + self.cbBox_Mode.setGeometry(QtCore.QRect(60, 300, 211, 31)) + self.cbBox_Mode.setObjectName("cbBox_Mode") + self.cbBox_Mode.addItem("") + self.cbBox_Mode.addItem("") + self.label = QtWidgets.QLabel(MainWindow) + self.label.setGeometry(QtCore.QRect(60, 270, 80, 20)) + self.label.setObjectName("label") + self.pbtClear = QtWidgets.QPushButton(MainWindow) + self.pbtClear.setGeometry(QtCore.QRect(80, 440, 120, 30)) + self.pbtClear.setStyleSheet("") + self.pbtClear.setCheckable(False) + self.pbtClear.setChecked(False) + self.pbtClear.setObjectName("pbtClear") + self.pbtGetMnist = QtWidgets.QPushButton(MainWindow) + self.pbtGetMnist.setGeometry(QtCore.QRect(80, 380, 120, 30)) + self.pbtGetMnist.setCheckable(False) + self.pbtGetMnist.setObjectName("pbtGetMnist") + self.pbtPredict = QtWidgets.QPushButton(MainWindow) + self.pbtPredict.setGeometry(QtCore.QRect(80, 500, 120, 30)) + self.pbtPredict.setStyleSheet("") + self.pbtPredict.setObjectName("pbtPredict") + self.lbDataArea = QtWidgets.QLabel(MainWindow) + self.lbDataArea.setGeometry(QtCore.QRect(540, 350, 224, 224)) + self.lbDataArea.setMouseTracking(False) + self.lbDataArea.setStyleSheet("background-color: rgb(255, 255, 255);") + self.lbDataArea.setFrameShape(QtWidgets.QFrame.Box) + self.lbDataArea.setFrameShadow(QtWidgets.QFrame.Sunken) + self.lbDataArea.setLineWidth(4) + self.lbDataArea.setMidLineWidth(0) + self.lbDataArea.setText("") + self.lbDataArea.setObjectName("lbDataArea") + self.label_3 = QtWidgets.QLabel(MainWindow) + self.label_3.setGeometry(QtCore.QRect(260, 340, 91, 181)) + self.label_3.setObjectName("label_3") + self.label_4 = QtWidgets.QLabel(MainWindow) + self.label_4.setGeometry(QtCore.QRect(540, 320, 131, 20)) + self.label_4.setObjectName("label_4") + self.label_5 = QtWidgets.QLabel(MainWindow) + self.label_5.setGeometry(QtCore.QRect(20, 10, 711, 241)) + self.label_5.setObjectName("label_5") + self.verticalLayoutWidget = QtWidgets.QWidget(MainWindow) + self.verticalLayoutWidget.setGeometry(QtCore.QRect(540, 350, 221, 221)) + self.verticalLayoutWidget.setObjectName("verticalLayoutWidget") + self.dArea_Layout = QtWidgets.QVBoxLayout(self.verticalLayoutWidget) + self.dArea_Layout.setContentsMargins(0, 0, 0, 0) + self.dArea_Layout.setSpacing(0) + self.dArea_Layout.setObjectName("dArea_Layout") + self.lbResult = QtWidgets.QLabel(MainWindow) + self.lbResult.setGeometry(QtCore.QRect(380, 350, 91, 131)) + font = QtGui.QFont() + font.setPointSize(48) + self.lbResult.setFont(font) + self.lbResult.setObjectName("lbResult") + self.lbCofidence = QtWidgets.QLabel(MainWindow) + self.lbCofidence.setGeometry(QtCore.QRect(360, 500, 151, 21)) + font = QtGui.QFont() + font.setPointSize(12) + self.lbCofidence.setFont(font) + self.lbCofidence.setObjectName("lbCofidence") + + self.retranslateUi(MainWindow) + self.cbBox_Mode.activated['QString'].connect(MainWindow.cbBox_Mode_Callback) + self.pbtClear.clicked.connect(MainWindow.pbtClear_Callback) + self.pbtPredict.clicked.connect(MainWindow.pbtPredict_Callback) + self.pbtGetMnist.clicked.connect(MainWindow.pbtGetMnist_Callback) + QtCore.QMetaObject.connectSlotsByName(MainWindow) + + def retranslateUi(self, MainWindow): + _translate = QtCore.QCoreApplication.translate + MainWindow.setWindowTitle(_translate("MainWindow", "手写数字识别GUI-v1.0 --by hamlin")) + self.cbBox_Mode.setItemText(0, _translate("MainWindow", "1:MINIST随机抽取")) + self.cbBox_Mode.setItemText(1, _translate("MainWindow", "2:鼠标手写输入")) + self.label.setText(_translate("MainWindow", "模式选择")) + self.pbtClear.setText(_translate("MainWindow", "清除数据")) + self.pbtGetMnist.setText(_translate("MainWindow", "MNIST抽取")) + self.pbtPredict.setText(_translate("MainWindow", "识别")) + self.label_3.setText(_translate("MainWindow", "

识别结果:



Softmax:

")) + self.label_4.setText(_translate("MainWindow", "数据输入区域")) + self.label_5.setText(_translate("MainWindow", "\n" +"\n" +"

使用说明

\n" +"

1、点击下拉列表进行模式选择,输入待识别数据后点击“识别”按键进行识别

\n" +"

2、经CNN网络计算后输出,显示识别结果与Softmax值

\n" +"

3、点击“清除数据”按键重新输入数据

\n" +"

模式1:随机从测试集抽取图像作为待识别数据,点击“MNIST抽取”按键抽取

\n" +"

模式2:使用鼠标在数据输入区域手写输入作为待识别数据

")) + self.lbResult.setText(_translate("MainWindow", "9")) + self.lbCofidence.setText(_translate("MainWindow", "0.99999999")) diff --git a/mnist-master/qt/layout.ui b/mnist-master/qt/layout.ui new file mode 100644 index 0000000..ae89896 --- /dev/null +++ b/mnist-master/qt/layout.ui @@ -0,0 +1,319 @@ + + + MainWindow + + + + 0 + 0 + 800 + 600 + + + + 手写数字识别GUI-v1.0 --by hamlin + + + + + 60 + 300 + 211 + 31 + + + + + 1:MINIST随机抽取 + + + + + 2:鼠标手写输入 + + + + + + + 60 + 270 + 80 + 20 + + + + 模式选择 + + + + + + 80 + 440 + 120 + 30 + + + + + + + 清除数据 + + + false + + + false + + + + + + 80 + 380 + 120 + 30 + + + + MNIST抽取 + + + false + + + + + + 80 + 500 + 120 + 30 + + + + + + + 识别 + + + + + + 540 + 350 + 224 + 224 + + + + false + + + background-color: rgb(255, 255, 255); + + + QFrame::Box + + + QFrame::Sunken + + + 4 + + + 0 + + + + + + + + + 260 + 340 + 91 + 181 + + + + <html><head/><body><p><span style=" font-size:12pt; font-weight:600;">识别结果:</span></p><p><br/></p><p><br/></p><p><span style=" font-size:12pt; font-weight:600;">Softmax:</span></p></body></html> + + + + + + 540 + 320 + 131 + 20 + + + + 数据输入区域 + + + + + + 20 + 10 + 711 + 241 + + + + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/REC-html40/strict.dtd"> +<html><head><meta name="qrichtext" content="1" /><style type="text/css"> +p, li { white-space: pre-wrap; } +</style></head><body style=" font-family:'Ubuntu'; font-size:11pt; font-weight:400; font-style:normal;"> +<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-family:'Ubuntu'; font-weight:600;">使用说明</span></p> +<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-family:'Ubuntu';">1、点击下拉列表进行模式选择,输入待识别数据后点击“识别”按键进行识别</span></p> +<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-family:'Ubuntu';">2、经CNN网络计算后输出,显示识别结果与Softmax值</span></p> +<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-family:'Ubuntu';">3、点击“清除数据”按键重新输入数据</span></p> +<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-family:'Ubuntu';">模式1:随机从测试集抽取图像作为待识别数据,点击“MNIST抽取”按键抽取</span></p> +<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-family:'Ubuntu';">模式2:使用鼠标在数据输入区域手写输入作为待识别数据</span></p></body></html> + + + + + + 540 + 350 + 221 + 221 + + + + + 0 + + + 0 + + + 0 + + + 0 + + + 0 + + + + + + + 380 + 350 + 91 + 131 + + + + + 48 + + + + 9 + + + + + + 360 + 500 + 151 + 21 + + + + + 12 + + + + 0.99999999 + + + + + + + cbBox_Mode + activated(QString) + MainWindow + cbBox_Mode_Callback() + + + 178 + 317 + + + 158 + 168 + + + + + pbtClear + clicked() + MainWindow + pbtClear_Callback() + + + 131 + 460 + + + 105 + 341 + + + + + pbtPredict + clicked() + MainWindow + pbtPredict_Callback() + + + 157 + 517 + + + 157 + 542 + + + + + pbtGetMnist + clicked() + MainWindow + pbtGetMnist_Callback() + + + 102 + 399 + + + 29 + 401 + + + + + + cbBox_Mode_Callback() + pbtClear_Callback() + pbtPredict_Callback() + pbtGetMnist_Callback() + + diff --git a/mnist-master/qt/paintboard.py b/mnist-master/qt/paintboard.py new file mode 100644 index 0000000..183fc7f --- /dev/null +++ b/mnist-master/qt/paintboard.py @@ -0,0 +1,82 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import sys +from PyQt5.QtWidgets import QWidget, QApplication +from PyQt5.QtGui import QPixmap, QPainter, QPen, QColor +from PyQt5.QtCore import Qt, QPoint, QSize + +class PaintBoard(QWidget): + def __init__(self, Parent = None, Size = QSize(320, 240), Fill = QColor(255,255,255,255)): + super().__init__(Parent) + + # 初始化参数 + self.__size = Size # 画板尺寸 + self.__fill = Fill # 画板默认填充颜色 + + self.__thickness = 18 # 默认画笔粗细 + self.__penColor = QColor(0,0,0,255) # 默认画笔颜色 + + self.__begin_point = QPoint() + self.__end_point = QPoint() + + # 初始化画板界面 + self.__board = QPixmap(self.__size) + self.__board.fill(Fill) + self.setFixedSize(self.__size) + self.__painter = QPainter() # 新建绘图工具 + + + # 清空画板 + def Clear(self): + self.__board.fill(self.__fill) + self.update() + + def setBoardFill(self, fill): + self.__fill = fill + self.__board.fill(fill) + self.update() + + # 设置画笔颜色 + def setPenColor(self, color): + self.__penColor = color + + # 设置画笔粗细 + def setPenThickness(self, thickness=10): + self.__thickness = thickness + + # 获取画板QImage类型图片 + def getContentAsQImage(self): + image = self.__board.toImage() + return image + + # 双缓冲绘图,绘图事件 + def paintEvent(self, paintEvent): + self.__painter.begin(self) + self.__painter.drawPixmap(0,0,self.__board) + self.__painter.end() + + def mousePressEvent(self, mouseEvent): + if mouseEvent.button() == Qt.LeftButton: + self.__begin_point = mouseEvent.pos() + self.__end_point = self.__begin_point + # self.update() + + def mouseMoveEvent(self, mouseEvent): + if mouseEvent.buttons() == Qt.LeftButton: + self.__end_point = mouseEvent.pos() + + # 画入缓冲区 + self.__painter.begin(self.__board) + self.__painter.setPen(QPen(self.__penColor,self.__thickness)) + self.__painter.drawLine(self.__begin_point, self.__end_point) + self.__painter.end() + + self.__begin_point = self.__end_point + self.update() + +if __name__ == '__main__': + app = QApplication(sys.argv) + demo = PaintBoard() + demo.show() + sys.exit(app.exec_()) \ No newline at end of file diff --git a/mnist-master/qt/ui2py.sh b/mnist-master/qt/ui2py.sh new file mode 100644 index 0000000..ec3a44e --- /dev/null +++ b/mnist-master/qt/ui2py.sh @@ -0,0 +1,2 @@ +#!/bin/bash +pyuic5 -o layout.py layout.ui diff --git a/mnist-master/simple_convnet.py b/mnist-master/simple_convnet.py new file mode 100644 index 0000000..af0651b --- /dev/null +++ b/mnist-master/simple_convnet.py @@ -0,0 +1,160 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import pickle +import numpy as np +from collections import OrderedDict +from common.layers import * +from common.gradient import numerical_gradient + + +class SimpleConvNet: + """简单的ConvNet + + conv - relu - pool - affine - relu - affine - softmax + + Parameters + ---------- + input_size : 输入大小(MNIST的情况下为784) + hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100]) + output_size : 输出大小(MNIST的情况下为10) + activation : 'relu' or 'sigmoid' + weight_init_std : 指定权重的标准差(e.g. 0.01) + 指定'relu'或'he'的情况下设定“He的初始值” + 指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值” + """ + def __init__(self, input_dim=(1, 28, 28), + conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1}, + hidden_size=100, output_size=10, weight_init_std=0.01): + filter_num = conv_param['filter_num'] + filter_size = conv_param['filter_size'] + filter_pad = conv_param['pad'] + filter_stride = conv_param['stride'] + input_size = input_dim[1] + conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1 + pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2)) + + # 初始化权重 + self.params = {} + self.params['W1'] = weight_init_std * \ + np.random.randn(filter_num, input_dim[0], filter_size, filter_size) + self.params['b1'] = np.zeros(filter_num) + self.params['W2'] = weight_init_std * \ + np.random.randn(pool_output_size, hidden_size) + self.params['b2'] = np.zeros(hidden_size) + self.params['W3'] = weight_init_std * \ + np.random.randn(hidden_size, output_size) + self.params['b3'] = np.zeros(output_size) + + # 生成层 + self.layers = OrderedDict() + self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], + conv_param['stride'], conv_param['pad']) + self.layers['Relu1'] = Relu() + self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) + self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) + self.layers['Relu2'] = Relu() + self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) + + self.last_layer = SoftmaxWithLoss() + + def predict(self, x): + for layer in self.layers.values(): + x = layer.forward(x) + + return x + + def loss(self, x, t): + """求损失函数 + 参数x是输入数据、t是教师标签 + """ + y = self.predict(x) + return self.last_layer.forward(y, t) + + def accuracy(self, x, t, batch_size=100): + if t.ndim != 1 : t = np.argmax(t, axis=1) + + acc = 0.0 + + for i in range(int(x.shape[0] / batch_size)): + tx = x[i*batch_size:(i+1)*batch_size] + tt = t[i*batch_size:(i+1)*batch_size] + y = self.predict(tx) + y = np.argmax(y, axis=1) + acc += np.sum(y == tt) + + return acc / x.shape[0] + + def numerical_gradient(self, x, t): + """求梯度(数值微分) + + Parameters + ---------- + x : 输入数据 + t : 教师标签 + + Returns + ------- + 具有各层的梯度的字典变量 + grads['W1']、grads['W2']、...是各层的权重 + grads['b1']、grads['b2']、...是各层的偏置 + """ + loss_w = lambda w: self.loss(x, t) + + grads = {} + for idx in (1, 2, 3): + grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)]) + grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)]) + + return grads + + def gradient(self, x, t): + """求梯度(误差反向传播法) + + Parameters + ---------- + x : 输入数据 + t : 教师标签 + + Returns + ------- + 具有各层的梯度的字典变量 + grads['W1']、grads['W2']、...是各层的权重 + grads['b1']、grads['b2']、...是各层的偏置 + """ + # forward + self.loss(x, t) + + # backward + dout = 1 + dout = self.last_layer.backward(dout) + + layers = list(self.layers.values()) + layers.reverse() + for layer in layers: + dout = layer.backward(dout) + + # 设定 + grads = {} + grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db + grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db + grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db + + return grads + + def save_params(self, file_name="params.pkl"): + params = {} + for key, val in self.params.items(): + params[key] = val + with open(file_name, 'wb') as f: + pickle.dump(params, f) + + def load_params(self, file_name="params.pkl"): + with open(file_name, 'rb') as f: + params = pickle.load(f) + for key, val in params.items(): + self.params[key] = val + + for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']): + self.layers[key].W = self.params['W' + str(i+1)] + self.layers[key].b = self.params['b' + str(i+1)] \ No newline at end of file diff --git a/mnist-master/train_convnet.py b/mnist-master/train_convnet.py new file mode 100644 index 0000000..2596a9c --- /dev/null +++ b/mnist-master/train_convnet.py @@ -0,0 +1,42 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from dataset.mnist import load_mnist +from simple_convnet import SimpleConvNet +from common.trainer import Trainer + +# 读入数据 +(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) + +# 处理花费时间较长的情况下减少数据 +#x_train, t_train = x_train[:5000], t_train[:5000] +#x_test, t_test = x_test[:1000], t_test[:1000] + +max_epochs = 20 + +network = SimpleConvNet(input_dim=(1,28,28), + conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1}, + hidden_size=100, output_size=10, weight_init_std=0.01) + +trainer = Trainer(network, x_train, t_train, x_test, t_test, + epochs=max_epochs, mini_batch_size=100, + optimizer='Adam', optimizer_param={'lr': 0.001}, + evaluate_sample_num_per_epoch=1000) +trainer.train() + +# 保存参数 +network.save_params("params.pkl") +print("Saved Network Parameters!") + +# 绘制图形 +markers = {'train': 'o', 'test': 's'} +x = np.arange(max_epochs) +plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2) +plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2) +plt.xlabel("epochs") +plt.ylabel("accuracy") +plt.ylim(0, 1.0) +plt.legend(loc='lower right') +plt.show() diff --git a/mnist-master/train_deepnet.py b/mnist-master/train_deepnet.py new file mode 100644 index 0000000..9cdf3fb --- /dev/null +++ b/mnist-master/train_deepnet.py @@ -0,0 +1,21 @@ +# coding: utf-8 +import sys, os +sys.path.append(os.pardir) # 为了导入父目录而进行的设定 +import numpy as np +import matplotlib.pyplot as plt +from dataset.mnist import load_mnist +from deep_convnet import DeepConvNet +from common.trainer import Trainer + +(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) + +network = DeepConvNet() +trainer = Trainer(network, x_train, t_train, x_test, t_test, + epochs=20, mini_batch_size=100, + optimizer='Adam', optimizer_param={'lr':0.001}, + evaluate_sample_num_per_epoch=1000) +trainer.train() + +# 保存参数 +network.save_params("deep_convnet_params.pkl") +print("Saved Network Parameters!")