请求合并 #1

Closed
ptlgybnei wants to merge 39 commits from master into common

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: 当前文件",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"cwd": ""
}
]
}

@ -0,0 +1,8 @@
{
"python.pythonPath": "/usr/bin/python3.7",
"python.linting.pylintArgs": [
"--errors-only",
"--disable=E0401",
"--extension-pkg-whitelist=PyQt5"
]
}

@ -0,0 +1,41 @@
# 纯Python实现CNN识别手写体数字+GUI
![](https://img.hamlinzheng.com/i/2020/02/07/psh0gw.png)
---
> 由于把数据集也传上来了,导致下载时间比较长,我打包了一份放在服务器,点击[这里](https://dl.hamlinzheng.com/Python/MNIST.zip)进行下载
项目文件结构如下所示
```
.
├── common
│   ├── functions.py
│   ├── gradient.py
│   ├── layers.py
│   ├── optimizer.py
│   ├── trainer.py
│   └── util.py
├── dataset
│   ├── mnist.pkl
│   ├── mnist.py
│   ├── t10k-images-idx3-ubyte.gz
│   ├── t10k-labels-idx1-ubyte.gz
│   ├── train-images-idx3-ubyte.gz
│   └── train-labels-idx1-ubyte.gz
├── deep_convnet_params.pkl
├── deep_convnet.py
├── mnist_cnn_gui_main.py
├── params.pkl
├── qt
│   ├── layout.py
│   ├── layout.ui
│   ├── paintboard.py
│   └── ui2py.sh
├── simple_convnet.py
├── train_convnet.py
└── train_deepnet.py
```

@ -0,0 +1,61 @@
# coding: utf-8
import numpy as np
def identity_function(x):
return x
def step_function(x):
return np.array(x > 0, dtype=np.int)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_grad(x):
return (1.0 - sigmoid(x)) * sigmoid(x)
def relu(x):
return np.maximum(0, x)
def relu_grad(x):
grad = np.zeros(x)
grad[x>=0] = 1
return grad
def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis=0)
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T
x = x - np.max(x) # 溢出对策
return np.exp(x) / np.sum(np.exp(x))
def mean_squared_error(y, t):
return 0.5 * np.sum((y-t)**2)
def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
# 监督数据是one-hot-vector的情况下转换为正确解标签的索引
if t.size == y.size:
t = t.argmax(axis=1)
batch_size = y.shape[0]
return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
def softmax_loss(X, t):
y = softmax(X)
return cross_entropy_error(y, t)

@ -0,0 +1,53 @@
# coding: utf-8
import numpy as np
def _numerical_gradient_1d(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
for idx in range(x.size):
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val # 还原值
return grad
def numerical_gradient_2d(f, X):
if X.ndim == 1:
return _numerical_gradient_1d(f, X)
else:
grad = np.zeros_like(X)
for idx, x in enumerate(X):
grad[idx] = _numerical_gradient_1d(f, x)
return grad
def numerical_gradient(f, x):
h = 1e-4 # 0.0001
grad = np.zeros_like(x)
# 多维迭代
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
tmp_val = x[idx]
x[idx] = float(tmp_val) + h
fxh1 = f(x) # f(x+h)
x[idx] = tmp_val - h
fxh2 = f(x) # f(x-h)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val # 还原值
it.iternext()
return grad

@ -0,0 +1,284 @@
# coding: utf-8
import numpy as np
from common.functions import *
from common.util import im2col, col2im
class Relu:
def __init__(self):
self.mask = None
def forward(self, x):
self.mask = (x <= 0)
out = x.copy()
out[self.mask] = 0
return out
def backward(self, dout):
dout[self.mask] = 0
dx = dout
return dx
class Sigmoid:
def __init__(self):
self.out = None
def forward(self, x):
out = sigmoid(x)
self.out = out
return out
def backward(self, dout):
dx = dout * (1.0 - self.out) * self.out
return dx
class Affine:
def __init__(self, W, b):
self.W =W
self.b = b
self.x = None
self.original_x_shape = None
# 权重和偏置参数的导数
self.dW = None
self.db = None
def forward(self, x):
# 对应张量
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x
out = np.dot(self.x, self.W) + self.b
return out
def backward(self, dout):
dx = np.dot(dout, self.W.T)
self.dW = np.dot(self.x.T, dout)
self.db = np.sum(dout, axis=0)
dx = dx.reshape(*self.original_x_shape) # 还原输入数据的形状(对应张量)
return dx
class SoftmaxWithLoss:
def __init__(self):
self.loss = None
self.y = None # softmax的输出
self.t = None # 监督数据
def forward(self, x, t):
self.t = t
self.y = softmax(x)
self.loss = cross_entropy_error(self.y, self.t)
return self.loss
def backward(self, dout=1):
batch_size = self.t.shape[0]
if self.t.size == self.y.size: # 监督数据是one-hot-vector的情况
dx = (self.y - self.t) / batch_size
else:
dx = self.y.copy()
dx[np.arange(batch_size), self.t] -= 1
dx = dx / batch_size
return dx
class Dropout:
"""
http://arxiv.org/abs/1207.0580
"""
def __init__(self, dropout_ratio=0.5):
self.dropout_ratio = dropout_ratio
self.mask = None
def forward(self, x, train_flg=True):
if train_flg:
self.mask = np.random.rand(*x.shape) > self.dropout_ratio
return x * self.mask
else:
return x * (1.0 - self.dropout_ratio)
def backward(self, dout):
return dout * self.mask
class BatchNormalization:
"""
http://arxiv.org/abs/1502.03167
"""
def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None):
self.gamma = gamma
self.beta = beta
self.momentum = momentum
self.input_shape = None # Conv层的情况下为4维全连接层的情况下为2维
# 测试时使用的平均值和方差
self.running_mean = running_mean
self.running_var = running_var
# backward时使用的中间数据
self.batch_size = None
self.xc = None
self.std = None
self.dgamma = None
self.dbeta = None
def forward(self, x, train_flg=True):
self.input_shape = x.shape
if x.ndim != 2:
N, C, H, W = x.shape
x = x.reshape(N, -1)
out = self.__forward(x, train_flg)
return out.reshape(*self.input_shape)
def __forward(self, x, train_flg):
if self.running_mean is None:
N, D = x.shape
self.running_mean = np.zeros(D)
self.running_var = np.zeros(D)
if train_flg:
mu = x.mean(axis=0)
xc = x - mu
var = np.mean(xc**2, axis=0)
std = np.sqrt(var + 10e-7)
xn = xc / std
self.batch_size = x.shape[0]
self.xc = xc
self.xn = xn
self.std = std
self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
self.running_var = self.momentum * self.running_var + (1-self.momentum) * var
else:
xc = x - self.running_mean
xn = xc / ((np.sqrt(self.running_var + 10e-7)))
out = self.gamma * xn + self.beta
return out
def backward(self, dout):
if dout.ndim != 2:
N, C, H, W = dout.shape
dout = dout.reshape(N, -1)
dx = self.__backward(dout)
dx = dx.reshape(*self.input_shape)
return dx
def __backward(self, dout):
dbeta = dout.sum(axis=0)
dgamma = np.sum(self.xn * dout, axis=0)
dxn = self.gamma * dout
dxc = dxn / self.std
dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0)
dvar = 0.5 * dstd / self.std
dxc += (2.0 / self.batch_size) * self.xc * dvar
dmu = np.sum(dxc, axis=0)
dx = dxc - dmu / self.batch_size
self.dgamma = dgamma
self.dbeta = dbeta
return dx
class Convolution:
def __init__(self, W, b, stride=1, pad=0):
self.W = W
self.b = b
self.stride = stride
self.pad = pad
# 中间数据backward时使用
self.x = None
self.col = None
self.col_W = None
# 权重和偏置参数的梯度
self.dW = None
self.db = None
def forward(self, x):
FN, C, FH, FW = self.W.shape
N, C, H, W = x.shape
out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
col = im2col(x, FH, FW, self.stride, self.pad)
col_W = self.W.reshape(FN, -1).T
out = np.dot(col, col_W) + self.b
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
self.x = x
self.col = col
self.col_W = col_W
return out
def backward(self, dout):
FN, C, FH, FW = self.W.shape
dout = dout.transpose(0,2,3,1).reshape(-1, FN)
self.db = np.sum(dout, axis=0)
self.dW = np.dot(self.col.T, dout)
self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
dcol = np.dot(dout, self.col_W.T)
dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
return dx
class Pooling:
def __init__(self, pool_h, pool_w, stride=1, pad=0):
self.pool_h = pool_h
self.pool_w = pool_w
self.stride = stride
self.pad = pad
self.x = None
self.arg_max = None
def forward(self, x):
N, C, H, W = x.shape
out_h = int(1 + (H - self.pool_h) / self.stride)
out_w = int(1 + (W - self.pool_w) / self.stride)
col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
col = col.reshape(-1, self.pool_h*self.pool_w)
arg_max = np.argmax(col, axis=1)
out = np.max(col, axis=1)
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
self.x = x
self.arg_max = arg_max
return out
def backward(self, dout):
dout = dout.transpose(0, 2, 3, 1)
pool_size = self.pool_h * self.pool_w
dmax = np.zeros((dout.size, pool_size))
dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
dmax = dmax.reshape(dout.shape + (pool_size,))
dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
return dx

@ -0,0 +1,130 @@
# coding: utf-8
import numpy as np
class SGD:
"""随机梯度下降法Stochastic Gradient Descent"""
def __init__(self, lr=0.01):
self.lr = lr
def update(self, params, grads):
for key in params.keys():
params[key] -= self.lr * grads[key]
class Momentum:
"""Momentum SGD"""
def __init__(self, lr=0.01, momentum=0.9):
self.lr = lr
self.momentum = momentum
self.v = None
def update(self, params, grads):
if self.v is None:
self.v = {}
for key, val in params.items():
self.v[key] = np.zeros_like(val)
for key in params.keys():
self.v[key] = self.momentum*self.v[key] - self.lr*grads[key]
params[key] += self.v[key]
class Nesterov:
"""Nesterov's Accelerated Gradient (http://arxiv.org/abs/1212.0901)"""
def __init__(self, lr=0.01, momentum=0.9):
self.lr = lr
self.momentum = momentum
self.v = None
def update(self, params, grads):
if self.v is None:
self.v = {}
for key, val in params.items():
self.v[key] = np.zeros_like(val)
for key in params.keys():
self.v[key] *= self.momentum
self.v[key] -= self.lr * grads[key]
params[key] += self.momentum * self.momentum * self.v[key]
params[key] -= (1 + self.momentum) * self.lr * grads[key]
class AdaGrad:
"""AdaGrad"""
def __init__(self, lr=0.01):
self.lr = lr
self.h = None
def update(self, params, grads):
if self.h is None:
self.h = {}
for key, val in params.items():
self.h[key] = np.zeros_like(val)
for key in params.keys():
self.h[key] += grads[key] * grads[key]
params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
class RMSprop:
"""RMSprop"""
def __init__(self, lr=0.01, decay_rate = 0.99):
self.lr = lr
self.decay_rate = decay_rate
self.h = None
def update(self, params, grads):
if self.h is None:
self.h = {}
for key, val in params.items():
self.h[key] = np.zeros_like(val)
for key in params.keys():
self.h[key] *= self.decay_rate
self.h[key] += (1 - self.decay_rate) * grads[key] * grads[key]
params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
class Adam:
"""Adam (http://arxiv.org/abs/1412.6980v8)"""
def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
self.lr = lr
self.beta1 = beta1
self.beta2 = beta2
self.iter = 0
self.m = None
self.v = None
def update(self, params, grads):
if self.m is None:
self.m, self.v = {}, {}
for key, val in params.items():
self.m[key] = np.zeros_like(val)
self.v[key] = np.zeros_like(val)
self.iter += 1
lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
for key in params.keys():
#self.m[key] = self.beta1*self.m[key] + (1-self.beta1)*grads[key]
#self.v[key] = self.beta2*self.v[key] + (1-self.beta2)*(grads[key]**2)
self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)
#unbias_m += (1 - self.beta1) * (grads[key] - self.m[key]) # correct bias
#unbisa_b += (1 - self.beta2) * (grads[key]*grads[key] - self.v[key]) # correct bias
#params[key] += self.lr * unbias_m / (np.sqrt(unbisa_b) + 1e-7)

@ -0,0 +1,78 @@
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
import numpy as np
from common.optimizer import *
class Trainer:
"""进行神经网络的训练的类
"""
def __init__(self, network, x_train, t_train, x_test, t_test,
epochs=20, mini_batch_size=100,
optimizer='SGD', optimizer_param={'lr':0.01},
evaluate_sample_num_per_epoch=None, verbose=True):
self.network = network
self.verbose = verbose
self.x_train = x_train
self.t_train = t_train
self.x_test = x_test
self.t_test = t_test
self.epochs = epochs
self.batch_size = mini_batch_size
self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch
# optimzer
optimizer_class_dict = {'sgd':SGD, 'momentum':Momentum, 'nesterov':Nesterov,
'adagrad':AdaGrad, 'rmsprpo':RMSprop, 'adam':Adam}
self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param)
self.train_size = x_train.shape[0]
self.iter_per_epoch = max(self.train_size / mini_batch_size, 1)
self.max_iter = int(epochs * self.iter_per_epoch)
self.current_iter = 0
self.current_epoch = 0
self.train_loss_list = []
self.train_acc_list = []
self.test_acc_list = []
def train_step(self):
batch_mask = np.random.choice(self.train_size, self.batch_size)
x_batch = self.x_train[batch_mask]
t_batch = self.t_train[batch_mask]
grads = self.network.gradient(x_batch, t_batch)
self.optimizer.update(self.network.params, grads)
loss = self.network.loss(x_batch, t_batch)
self.train_loss_list.append(loss)
if self.verbose: print("train loss:" + str(loss))
if self.current_iter % self.iter_per_epoch == 0:
self.current_epoch += 1
x_train_sample, t_train_sample = self.x_train, self.t_train
x_test_sample, t_test_sample = self.x_test, self.t_test
if not self.evaluate_sample_num_per_epoch is None:
t = self.evaluate_sample_num_per_epoch
x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t]
x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t]
train_acc = self.network.accuracy(x_train_sample, t_train_sample)
test_acc = self.network.accuracy(x_test_sample, t_test_sample)
self.train_acc_list.append(train_acc)
self.test_acc_list.append(test_acc)
if self.verbose: print("=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc) + " ===")
self.current_iter += 1
def train(self):
for i in range(self.max_iter):
self.train_step()
test_acc = self.network.accuracy(self.x_test, self.t_test)
if self.verbose:
print("=============== Final Test Accuracy ===============")
print("test acc:" + str(test_acc))

@ -0,0 +1,99 @@
# coding: utf-8
import numpy as np
def smooth_curve(x):
"""用于使损失函数的图形变圆滑
参考http://glowingpython.blogspot.jp/2012/02/convolution-with-numpy.html
"""
window_len = 11
s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
w = np.kaiser(window_len, 2)
y = np.convolve(w/w.sum(), s, mode='valid')
return y[5:len(y)-5]
def shuffle_dataset(x, t):
"""打乱数据集
Parameters
----------
x : 训练数据
t : 监督数据
Returns
-------
x, t : 打乱的训练数据和监督数据
"""
permutation = np.random.permutation(x.shape[0])
x = x[permutation,:] if x.ndim == 2 else x[permutation,:,:,:]
t = t[permutation]
return x, t
def conv_output_size(input_size, filter_size, stride=1, pad=0):
return (input_size + 2*pad - filter_size) / stride + 1
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
"""
Parameters
----------
input_data : (数据量, 通道, , )的4维数组构成的输入数据
filter_h : 滤波器的高
filter_w : 滤波器的长
stride : 步幅
pad : 填充
Returns
-------
col : 2维数组
"""
N, C, H, W = input_data.shape
out_h = (H + 2*pad - filter_h)//stride + 1
out_w = (W + 2*pad - filter_w)//stride + 1
img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))
for y in range(filter_h):
y_max = y + stride*out_h
for x in range(filter_w):
x_max = x + stride*out_w
col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]
col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
return col
def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
"""
Parameters
----------
col :
input_shape : 输入数据的形状(10, 1, 28, 28)
filter_h :
filter_w
stride
pad
Returns
-------
"""
N, C, H, W = input_shape
out_h = (H + 2*pad - filter_h)//stride + 1
out_w = (W + 2*pad - filter_w)//stride + 1
col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)
img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
for y in range(filter_h):
y_max = y + stride*out_h
for x in range(filter_w):
x_max = x + stride*out_w
img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]
return img[:, :, pad:H + pad, pad:W + pad]

@ -0,0 +1,131 @@
# coding: utf-8
try:
import urllib.request
except ImportError:
raise ImportError('You should use Python 3.x')
import os.path
import gzip
import pickle
import os
import numpy as np
url_base = 'http://yann.lecun.com/exdb/mnist/'
key_file = {
'train_img':'train-images-idx3-ubyte.gz',
'train_label':'train-labels-idx1-ubyte.gz',
'test_img':'t10k-images-idx3-ubyte.gz',
'test_label':'t10k-labels-idx1-ubyte.gz'
}
dataset_dir = os.path.dirname(os.path.abspath(__file__))
save_file = dataset_dir + "/mnist.pkl"
train_num = 60000
test_num = 10000
img_dim = (1, 28, 28)
img_size = 784
def _download(file_name):
file_path = dataset_dir + "/" + file_name
if os.path.exists(file_path):
return
print("Downloading " + file_name + " ... ")
urllib.request.urlretrieve(url_base + file_name, file_path)
print("Done")
def download_mnist():
for v in key_file.values():
_download(v)
def _load_label(file_name):
file_path = dataset_dir + "/" + file_name
print("Converting " + file_name + " to NumPy Array ...")
with gzip.open(file_path, 'rb') as f:
labels = np.frombuffer(f.read(), np.uint8, offset=8)
print("Done")
return labels
def _load_img(file_name):
file_path = dataset_dir + "/" + file_name
print("Converting " + file_name + " to NumPy Array ...")
with gzip.open(file_path, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
data = data.reshape(-1, img_size)
print("Done")
return data
def _convert_numpy():
dataset = {}
dataset['train_img'] = _load_img(key_file['train_img'])
dataset['train_label'] = _load_label(key_file['train_label'])
dataset['test_img'] = _load_img(key_file['test_img'])
dataset['test_label'] = _load_label(key_file['test_label'])
return dataset
def init_mnist():
'''
Note已将数据集下载至本地第一次加载会将数据集保存成pickle
'''
# download_mnist()
dataset = _convert_numpy()
print("Creating pickle file ...")
with open(save_file, 'wb') as f:
pickle.dump(dataset, f, -1)
print("Done!")
def _change_one_hot_label(X):
T = np.zeros((X.size, 10))
for idx, row in enumerate(T):
row[X[idx]] = 1
return T
def load_mnist(normalize=True, flatten=True, one_hot_label=False):
"""读入MNIST数据集
Parameters
----------
normalize : 将图像的像素值正规化为0.0~1.0
one_hot_label :
one_hot_label为True的情况下标签作为one-hot数组返回
one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组
flatten : 是否将图像展开为一维数组
Returns
-------
(训练图像, 训练标签), (测试图像, 测试标签)
"""
if not os.path.exists(save_file):
init_mnist()
with open(save_file, 'rb') as f:
dataset = pickle.load(f)
if normalize:
for key in ('train_img', 'test_img'):
dataset[key] = dataset[key].astype(np.float32)
dataset[key] /= 255.0
if one_hot_label:
dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
dataset['test_label'] = _change_one_hot_label(dataset['test_label'])
if not flatten:
for key in ('train_img', 'test_img'):
dataset[key] = dataset[key].reshape(-1, 1, 28, 28)
return (dataset['train_img'], dataset['train_label']), (dataset['test_img'], dataset['test_label'])
if __name__ == '__main__':
init_mnist()

@ -0,0 +1,136 @@
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
import pickle
import numpy as np
from collections import OrderedDict
from common.layers import *
class DeepConvNet:
"""识别率为99%以上的高精度的ConvNet
网络结构如下所示
conv - relu - conv- relu - pool -
conv - relu - conv- relu - pool -
conv - relu - conv- relu - pool -
affine - relu - dropout - affine - dropout - softmax
"""
def __init__(self, input_dim=(1, 28, 28),
conv_param_1 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_2 = {'filter_num':16, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_3 = {'filter_num':32, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_4 = {'filter_num':32, 'filter_size':3, 'pad':2, 'stride':1},
conv_param_5 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
conv_param_6 = {'filter_num':64, 'filter_size':3, 'pad':1, 'stride':1},
hidden_size=50, output_size=10):
# 初始化权重===========
# 各层的神经元平均与前一层的几个神经元有连接TODO:自动计算)
pre_node_nums = np.array([1*3*3, 16*3*3, 16*3*3, 32*3*3, 32*3*3, 64*3*3, 64*4*4, hidden_size])
wight_init_scales = np.sqrt(2.0 / pre_node_nums) # 使用ReLU的情况下推荐的初始值
self.params = {}
pre_channel_num = input_dim[0]
for idx, conv_param in enumerate([conv_param_1, conv_param_2, conv_param_3, conv_param_4, conv_param_5, conv_param_6]):
self.params['W' + str(idx+1)] = wight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])
self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
pre_channel_num = conv_param['filter_num']
self.params['W7'] = wight_init_scales[6] * np.random.randn(64*4*4, hidden_size)
self.params['b7'] = np.zeros(hidden_size)
self.params['W8'] = wight_init_scales[7] * np.random.randn(hidden_size, output_size)
self.params['b8'] = np.zeros(output_size)
# 生成层===========
self.layers = []
self.layers.append(Convolution(self.params['W1'], self.params['b1'],
conv_param_1['stride'], conv_param_1['pad']))
self.layers.append(Relu())
self.layers.append(Convolution(self.params['W2'], self.params['b2'],
conv_param_2['stride'], conv_param_2['pad']))
self.layers.append(Relu())
self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
self.layers.append(Convolution(self.params['W3'], self.params['b3'],
conv_param_3['stride'], conv_param_3['pad']))
self.layers.append(Relu())
self.layers.append(Convolution(self.params['W4'], self.params['b4'],
conv_param_4['stride'], conv_param_4['pad']))
self.layers.append(Relu())
self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
self.layers.append(Convolution(self.params['W5'], self.params['b5'],
conv_param_5['stride'], conv_param_5['pad']))
self.layers.append(Relu())
self.layers.append(Convolution(self.params['W6'], self.params['b6'],
conv_param_6['stride'], conv_param_6['pad']))
self.layers.append(Relu())
self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
self.layers.append(Affine(self.params['W7'], self.params['b7']))
self.layers.append(Relu())
self.layers.append(Dropout(0.5))
self.layers.append(Affine(self.params['W8'], self.params['b8']))
self.layers.append(Dropout(0.5))
self.last_layer = SoftmaxWithLoss()
def predict(self, x, train_flg=False):
for layer in self.layers:
if isinstance(layer, Dropout):
x = layer.forward(x, train_flg)
else:
x = layer.forward(x)
return x
def loss(self, x, t):
y = self.predict(x, train_flg=True)
return self.last_layer.forward(y, t)
def accuracy(self, x, t, batch_size=100):
if t.ndim != 1 : t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i*batch_size:(i+1)*batch_size]
tt = t[i*batch_size:(i+1)*batch_size]
y = self.predict(tx, train_flg=False)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
def gradient(self, x, t):
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.last_layer.backward(dout)
tmp_layers = self.layers.copy()
tmp_layers.reverse()
for layer in tmp_layers:
dout = layer.backward(dout)
# 设定
grads = {}
for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
grads['W' + str(i+1)] = self.layers[layer_idx].dW
grads['b' + str(i+1)] = self.layers[layer_idx].db
return grads
def save_params(self, file_name="params.pkl"):
params = {}
for key, val in self.params.items():
params[key] = val
with open(file_name, 'wb') as f:
pickle.dump(params, f)
def load_params(self, file_name="params.pkl"):
with open(file_name, 'rb') as f:
params = pickle.load(f)
for key, val in params.items():
self.params[key] = val
for i, layer_idx in enumerate((0, 2, 5, 7, 10, 12, 15, 18)):
self.layers[layer_idx].W = self.params['W' + str(i+1)]
self.layers[layer_idx].b = self.params['b' + str(i+1)]

@ -0,0 +1,192 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import sys, os
import numpy as np
from dataset.mnist import load_mnist
from PIL import Image, ImageQt
from qt.layout import Ui_MainWindow
from qt.paintboard import PaintBoard
from PyQt5.QtWidgets import QMainWindow, QDesktopWidget, QApplication
from PyQt5.QtWidgets import QLabel, QMessageBox, QPushButton, QFrame
from PyQt5.QtGui import QPainter, QPen, QPixmap, QColor, QImage
from PyQt5.QtCore import Qt, QPoint, QSize
from simple_convnet import SimpleConvNet
from common.functions import softmax
from deep_convnet import DeepConvNet
MODE_MNIST = 1 # MNIST随机抽取
MODE_WRITE = 2 # 手写输入
Thresh = 0.5 # 识别结果置信度阈值
# 读取MNIST数据集
(_, _), (x_test, _) = load_mnist(normalize=True, flatten=False, one_hot_label=False)
# 初始化网络
# 网络1简单CNN
"""
conv - relu - pool - affine - relu - affine - softmax
"""
network = SimpleConvNet(input_dim=(1,28,28),
conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
hidden_size=100, output_size=10, weight_init_std=0.01)
network.load_params("params.pkl")
# 网络2深度CNN
# network = DeepConvNet()
# network.load_params("deep_convnet_params.pkl")
class MainWindow(QMainWindow,Ui_MainWindow):
def __init__(self):
super(MainWindow,self).__init__()
# 初始化参数
self.mode = MODE_MNIST
self.result = [0, 0]
# 初始化UI
self.setupUi(self)
self.center()
# 初始化画板
self.paintBoard = PaintBoard(self, Size = QSize(224, 224), Fill = QColor(0,0,0,0))
self.paintBoard.setPenColor(QColor(0,0,0,0))
self.dArea_Layout.addWidget(self.paintBoard)
self.clearDataArea()
# 窗口居中
def center(self):
# 获得窗口
framePos = self.frameGeometry()
# 获得屏幕中心点
scPos = QDesktopWidget().availableGeometry().center()
# 显示到屏幕中心
framePos.moveCenter(scPos)
self.move(framePos.topLeft())
# 窗口关闭事件
def closeEvent(self, event):
reply = QMessageBox.question(self, 'Message',
"Are you sure to quit?", QMessageBox.Yes |
QMessageBox.No, QMessageBox.Yes)
if reply == QMessageBox.Yes:
event.accept()
else:
event.ignore()
# 清除数据待输入区
def clearDataArea(self):
self.paintBoard.Clear()
self.lbDataArea.clear()
self.lbResult.clear()
self.lbCofidence.clear()
self.result = [0, 0]
"""
回调函数
"""
# 模式下拉列表回调
def cbBox_Mode_Callback(self, text):
if text == '1MINIST随机抽取':
self.mode = MODE_MNIST
self.clearDataArea()
self.pbtGetMnist.setEnabled(True)
self.paintBoard.setBoardFill(QColor(0,0,0,0))
self.paintBoard.setPenColor(QColor(0,0,0,0))
elif text == '2鼠标手写输入':
self.mode = MODE_WRITE
self.clearDataArea()
self.pbtGetMnist.setEnabled(False)
# 更改背景
self.paintBoard.setBoardFill(QColor(0,0,0,255))
self.paintBoard.setPenColor(QColor(255,255,255,255))
# 数据清除
def pbtClear_Callback(self):
self.clearDataArea()
# 识别
def pbtPredict_Callback(self):
__img, img_array =[],[] # 将图像统一从qimage->pil image -> np.array [1, 1, 28, 28]
# 获取qimage格式图像
if self.mode == MODE_MNIST:
__img = self.lbDataArea.pixmap() # label内若无图像返回None
if __img == None: # 无图像则用纯黑代替
# __img = QImage(224, 224, QImage.Format_Grayscale8)
__img = ImageQt.ImageQt(Image.fromarray(np.uint8(np.zeros([224,224]))))
else: __img = __img.toImage()
elif self.mode == MODE_WRITE:
__img = self.paintBoard.getContentAsQImage()
# 转换成pil image类型处理
pil_img = ImageQt.fromqimage(__img)
pil_img = pil_img.resize((28, 28), Image.ANTIALIAS)
# pil_img.save('test.png')
img_array = np.array(pil_img.convert('L')).reshape(1,1,28, 28) / 255.0
# img_array = np.where(img_array>0.5, 1, 0)
# reshape成网络输入类型
__result = network.predict(img_array) # shape:[1, 10]
# print (__result)
# 将预测结果使用softmax输出
__result = softmax(__result)
self.result[0] = np.argmax(__result) # 预测的数字
self.result[1] = __result[0, self.result[0]] # 置信度
self.lbResult.setText("%d" % (self.result[0]))
self.lbCofidence.setText("%.8f" % (self.result[1]))
# 随机抽取
def pbtGetMnist_Callback(self):
self.clearDataArea()
# 随机抽取一张测试集图片,放大后显示
img = x_test[np.random.randint(0, 9999)] # shape:[1,28,28]
img = img.reshape(28, 28) # shape:[28,28]
img = img * 0xff # 恢复灰度值大小
pil_img = Image.fromarray(np.uint8(img))
pil_img = pil_img.resize((224, 224)) # 图像放大显示
# 将pil图像转换成qimage类型
qimage = ImageQt.ImageQt(pil_img)
# 将qimage类型图像显示在label
pix = QPixmap.fromImage(qimage)
self.lbDataArea.setPixmap(pix)
if __name__ == "__main__":
app = QApplication(sys.argv)
Gui = MainWindow()
Gui.show()
sys.exit(app.exec_())

Binary file not shown.

@ -0,0 +1,107 @@
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'layout.ui'
#
# Created by: PyQt5 UI code generator 5.14.1
#
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(800, 600)
self.cbBox_Mode = QtWidgets.QComboBox(MainWindow)
self.cbBox_Mode.setGeometry(QtCore.QRect(60, 300, 211, 31))
self.cbBox_Mode.setObjectName("cbBox_Mode")
self.cbBox_Mode.addItem("")
self.cbBox_Mode.addItem("")
self.label = QtWidgets.QLabel(MainWindow)
self.label.setGeometry(QtCore.QRect(60, 270, 80, 20))
self.label.setObjectName("label")
self.pbtClear = QtWidgets.QPushButton(MainWindow)
self.pbtClear.setGeometry(QtCore.QRect(80, 440, 120, 30))
self.pbtClear.setStyleSheet("")
self.pbtClear.setCheckable(False)
self.pbtClear.setChecked(False)
self.pbtClear.setObjectName("pbtClear")
self.pbtGetMnist = QtWidgets.QPushButton(MainWindow)
self.pbtGetMnist.setGeometry(QtCore.QRect(80, 380, 120, 30))
self.pbtGetMnist.setCheckable(False)
self.pbtGetMnist.setObjectName("pbtGetMnist")
self.pbtPredict = QtWidgets.QPushButton(MainWindow)
self.pbtPredict.setGeometry(QtCore.QRect(80, 500, 120, 30))
self.pbtPredict.setStyleSheet("")
self.pbtPredict.setObjectName("pbtPredict")
self.lbDataArea = QtWidgets.QLabel(MainWindow)
self.lbDataArea.setGeometry(QtCore.QRect(540, 350, 224, 224))
self.lbDataArea.setMouseTracking(False)
self.lbDataArea.setStyleSheet("background-color: rgb(255, 255, 255);")
self.lbDataArea.setFrameShape(QtWidgets.QFrame.Box)
self.lbDataArea.setFrameShadow(QtWidgets.QFrame.Sunken)
self.lbDataArea.setLineWidth(4)
self.lbDataArea.setMidLineWidth(0)
self.lbDataArea.setText("")
self.lbDataArea.setObjectName("lbDataArea")
self.label_3 = QtWidgets.QLabel(MainWindow)
self.label_3.setGeometry(QtCore.QRect(260, 340, 91, 181))
self.label_3.setObjectName("label_3")
self.label_4 = QtWidgets.QLabel(MainWindow)
self.label_4.setGeometry(QtCore.QRect(540, 320, 131, 20))
self.label_4.setObjectName("label_4")
self.label_5 = QtWidgets.QLabel(MainWindow)
self.label_5.setGeometry(QtCore.QRect(20, 10, 711, 241))
self.label_5.setObjectName("label_5")
self.verticalLayoutWidget = QtWidgets.QWidget(MainWindow)
self.verticalLayoutWidget.setGeometry(QtCore.QRect(540, 350, 221, 221))
self.verticalLayoutWidget.setObjectName("verticalLayoutWidget")
self.dArea_Layout = QtWidgets.QVBoxLayout(self.verticalLayoutWidget)
self.dArea_Layout.setContentsMargins(0, 0, 0, 0)
self.dArea_Layout.setSpacing(0)
self.dArea_Layout.setObjectName("dArea_Layout")
self.lbResult = QtWidgets.QLabel(MainWindow)
self.lbResult.setGeometry(QtCore.QRect(380, 350, 91, 131))
font = QtGui.QFont()
font.setPointSize(48)
self.lbResult.setFont(font)
self.lbResult.setObjectName("lbResult")
self.lbCofidence = QtWidgets.QLabel(MainWindow)
self.lbCofidence.setGeometry(QtCore.QRect(360, 500, 151, 21))
font = QtGui.QFont()
font.setPointSize(12)
self.lbCofidence.setFont(font)
self.lbCofidence.setObjectName("lbCofidence")
self.retranslateUi(MainWindow)
self.cbBox_Mode.activated['QString'].connect(MainWindow.cbBox_Mode_Callback)
self.pbtClear.clicked.connect(MainWindow.pbtClear_Callback)
self.pbtPredict.clicked.connect(MainWindow.pbtPredict_Callback)
self.pbtGetMnist.clicked.connect(MainWindow.pbtGetMnist_Callback)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "手写数字识别GUI-v1.0 --by hamlin"))
self.cbBox_Mode.setItemText(0, _translate("MainWindow", "1MINIST随机抽取"))
self.cbBox_Mode.setItemText(1, _translate("MainWindow", "2鼠标手写输入"))
self.label.setText(_translate("MainWindow", "模式选择"))
self.pbtClear.setText(_translate("MainWindow", "清除数据"))
self.pbtGetMnist.setText(_translate("MainWindow", "MNIST抽取"))
self.pbtPredict.setText(_translate("MainWindow", "识别"))
self.label_3.setText(_translate("MainWindow", "<html><head/><body><p><span style=\" font-size:12pt; font-weight:600;\">识别结果:</span></p><p><br/></p><p><br/></p><p><span style=\" font-size:12pt; font-weight:600;\">Softmax</span></p></body></html>"))
self.label_4.setText(_translate("MainWindow", "数据输入区域"))
self.label_5.setText(_translate("MainWindow", "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" \"http://www.w3.org/TR/REC-html40/strict.dtd\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/css\">\n"
"p, li { white-space: pre-wrap; }\n"
"</style></head><body style=\" font-family:\'Ubuntu\'; font-size:11pt; font-weight:400; font-style:normal;\">\n"
"<p style=\" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;\"><span style=\" font-family:\'Ubuntu\'; font-weight:600;\">使用说明</span></p>\n"
"<p style=\" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;\"><span style=\" font-family:\'Ubuntu\';\">1、点击下拉列表进行模式选择输入待识别数据后点击“识别”按键进行识别</span></p>\n"
"<p style=\" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;\"><span style=\" font-family:\'Ubuntu\';\">2、经CNN网络计算后输出显示识别结果与Softmax值</span></p>\n"
"<p style=\" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;\"><span style=\" font-family:\'Ubuntu\';\">3、点击“清除数据”按键重新输入数据</span></p>\n"
"<p style=\" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;\"><span style=\" font-family:\'Ubuntu\';\">模式1随机从测试集抽取图像作为待识别数据点击“MNIST抽取”按键抽取</span></p>\n"
"<p style=\" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;\"><span style=\" font-family:\'Ubuntu\';\">模式2使用鼠标在数据输入区域手写输入作为待识别数据</span></p></body></html>"))
self.lbResult.setText(_translate("MainWindow", "9"))
self.lbCofidence.setText(_translate("MainWindow", "0.99999999"))

@ -0,0 +1,319 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>MainWindow</class>
<widget class="QDialog" name="MainWindow">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>800</width>
<height>600</height>
</rect>
</property>
<property name="windowTitle">
<string>手写数字识别GUI-v1.0 --by hamlin</string>
</property>
<widget class="QComboBox" name="cbBox_Mode">
<property name="geometry">
<rect>
<x>60</x>
<y>300</y>
<width>211</width>
<height>31</height>
</rect>
</property>
<item>
<property name="text">
<string>1MINIST随机抽取</string>
</property>
</item>
<item>
<property name="text">
<string>2鼠标手写输入</string>
</property>
</item>
</widget>
<widget class="QLabel" name="label">
<property name="geometry">
<rect>
<x>60</x>
<y>270</y>
<width>80</width>
<height>20</height>
</rect>
</property>
<property name="text">
<string>模式选择</string>
</property>
</widget>
<widget class="QPushButton" name="pbtClear">
<property name="geometry">
<rect>
<x>80</x>
<y>440</y>
<width>120</width>
<height>30</height>
</rect>
</property>
<property name="styleSheet">
<string notr="true"/>
</property>
<property name="text">
<string>清除数据</string>
</property>
<property name="checkable">
<bool>false</bool>
</property>
<property name="checked">
<bool>false</bool>
</property>
</widget>
<widget class="QPushButton" name="pbtGetMnist">
<property name="geometry">
<rect>
<x>80</x>
<y>380</y>
<width>120</width>
<height>30</height>
</rect>
</property>
<property name="text">
<string>MNIST抽取</string>
</property>
<property name="checkable">
<bool>false</bool>
</property>
</widget>
<widget class="QPushButton" name="pbtPredict">
<property name="geometry">
<rect>
<x>80</x>
<y>500</y>
<width>120</width>
<height>30</height>
</rect>
</property>
<property name="styleSheet">
<string notr="true"/>
</property>
<property name="text">
<string>识别</string>
</property>
</widget>
<widget class="QLabel" name="lbDataArea">
<property name="geometry">
<rect>
<x>540</x>
<y>350</y>
<width>224</width>
<height>224</height>
</rect>
</property>
<property name="mouseTracking">
<bool>false</bool>
</property>
<property name="styleSheet">
<string notr="true">background-color: rgb(255, 255, 255);</string>
</property>
<property name="frameShape">
<enum>QFrame::Box</enum>
</property>
<property name="frameShadow">
<enum>QFrame::Sunken</enum>
</property>
<property name="lineWidth">
<number>4</number>
</property>
<property name="midLineWidth">
<number>0</number>
</property>
<property name="text">
<string/>
</property>
</widget>
<widget class="QLabel" name="label_3">
<property name="geometry">
<rect>
<x>260</x>
<y>340</y>
<width>91</width>
<height>181</height>
</rect>
</property>
<property name="text">
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;&lt;span style=&quot; font-size:12pt; font-weight:600;&quot;&gt;识别结果:&lt;/span&gt;&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;&lt;br/&gt;&lt;/p&gt;&lt;p&gt;&lt;span style=&quot; font-size:12pt; font-weight:600;&quot;&gt;Softmax&lt;/span&gt;&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
<widget class="QLabel" name="label_4">
<property name="geometry">
<rect>
<x>540</x>
<y>320</y>
<width>131</width>
<height>20</height>
</rect>
</property>
<property name="text">
<string>数据输入区域</string>
</property>
</widget>
<widget class="QLabel" name="label_5">
<property name="geometry">
<rect>
<x>20</x>
<y>10</y>
<width>711</width>
<height>241</height>
</rect>
</property>
<property name="text">
<string>&lt;!DOCTYPE HTML PUBLIC &quot;-//W3C//DTD HTML 4.0//EN&quot; &quot;http://www.w3.org/TR/REC-html40/strict.dtd&quot;&gt;
&lt;html&gt;&lt;head&gt;&lt;meta name=&quot;qrichtext&quot; content=&quot;1&quot; /&gt;&lt;style type=&quot;text/css&quot;&gt;
p, li { white-space: pre-wrap; }
&lt;/style&gt;&lt;/head&gt;&lt;body style=&quot; font-family:'Ubuntu'; font-size:11pt; font-weight:400; font-style:normal;&quot;&gt;
&lt;p style=&quot; margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;&quot;&gt;&lt;span style=&quot; font-family:'Ubuntu'; font-weight:600;&quot;&gt;使用说明&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot; margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;&quot;&gt;&lt;span style=&quot; font-family:'Ubuntu';&quot;&gt;1、点击下拉列表进行模式选择输入待识别数据后点击“识别”按键进行识别&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot; margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;&quot;&gt;&lt;span style=&quot; font-family:'Ubuntu';&quot;&gt;2、经CNN网络计算后输出显示识别结果与Softmax值&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot; margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;&quot;&gt;&lt;span style=&quot; font-family:'Ubuntu';&quot;&gt;3、点击“清除数据”按键重新输入数据&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot; margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;&quot;&gt;&lt;span style=&quot; font-family:'Ubuntu';&quot;&gt;模式1随机从测试集抽取图像作为待识别数据点击“MNIST抽取”按键抽取&lt;/span&gt;&lt;/p&gt;
&lt;p style=&quot; margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;&quot;&gt;&lt;span style=&quot; font-family:'Ubuntu';&quot;&gt;模式2使用鼠标在数据输入区域手写输入作为待识别数据&lt;/span&gt;&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
<widget class="QWidget" name="verticalLayoutWidget">
<property name="geometry">
<rect>
<x>540</x>
<y>350</y>
<width>221</width>
<height>221</height>
</rect>
</property>
<layout class="QVBoxLayout" name="dArea_Layout">
<property name="spacing">
<number>0</number>
</property>
<property name="leftMargin">
<number>0</number>
</property>
<property name="topMargin">
<number>0</number>
</property>
<property name="rightMargin">
<number>0</number>
</property>
<property name="bottomMargin">
<number>0</number>
</property>
</layout>
</widget>
<widget class="QLabel" name="lbResult">
<property name="geometry">
<rect>
<x>380</x>
<y>350</y>
<width>91</width>
<height>131</height>
</rect>
</property>
<property name="font">
<font>
<pointsize>48</pointsize>
</font>
</property>
<property name="text">
<string>9</string>
</property>
</widget>
<widget class="QLabel" name="lbCofidence">
<property name="geometry">
<rect>
<x>360</x>
<y>500</y>
<width>151</width>
<height>21</height>
</rect>
</property>
<property name="font">
<font>
<pointsize>12</pointsize>
</font>
</property>
<property name="text">
<string>0.99999999</string>
</property>
</widget>
</widget>
<resources/>
<connections>
<connection>
<sender>cbBox_Mode</sender>
<signal>activated(QString)</signal>
<receiver>MainWindow</receiver>
<slot>cbBox_Mode_Callback()</slot>
<hints>
<hint type="sourcelabel">
<x>178</x>
<y>317</y>
</hint>
<hint type="destinationlabel">
<x>158</x>
<y>168</y>
</hint>
</hints>
</connection>
<connection>
<sender>pbtClear</sender>
<signal>clicked()</signal>
<receiver>MainWindow</receiver>
<slot>pbtClear_Callback()</slot>
<hints>
<hint type="sourcelabel">
<x>131</x>
<y>460</y>
</hint>
<hint type="destinationlabel">
<x>105</x>
<y>341</y>
</hint>
</hints>
</connection>
<connection>
<sender>pbtPredict</sender>
<signal>clicked()</signal>
<receiver>MainWindow</receiver>
<slot>pbtPredict_Callback()</slot>
<hints>
<hint type="sourcelabel">
<x>157</x>
<y>517</y>
</hint>
<hint type="destinationlabel">
<x>157</x>
<y>542</y>
</hint>
</hints>
</connection>
<connection>
<sender>pbtGetMnist</sender>
<signal>clicked()</signal>
<receiver>MainWindow</receiver>
<slot>pbtGetMnist_Callback()</slot>
<hints>
<hint type="sourcelabel">
<x>102</x>
<y>399</y>
</hint>
<hint type="destinationlabel">
<x>29</x>
<y>401</y>
</hint>
</hints>
</connection>
</connections>
<slots>
<slot>cbBox_Mode_Callback()</slot>
<slot>pbtClear_Callback()</slot>
<slot>pbtPredict_Callback()</slot>
<slot>pbtGetMnist_Callback()</slot>
</slots>
</ui>

@ -0,0 +1,82 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import sys
from PyQt5.QtWidgets import QWidget, QApplication
from PyQt5.QtGui import QPixmap, QPainter, QPen, QColor
from PyQt5.QtCore import Qt, QPoint, QSize
class PaintBoard(QWidget):
def __init__(self, Parent = None, Size = QSize(320, 240), Fill = QColor(255,255,255,255)):
super().__init__(Parent)
# 初始化参数
self.__size = Size # 画板尺寸
self.__fill = Fill # 画板默认填充颜色
self.__thickness = 18 # 默认画笔粗细
self.__penColor = QColor(0,0,0,255) # 默认画笔颜色
self.__begin_point = QPoint()
self.__end_point = QPoint()
# 初始化画板界面
self.__board = QPixmap(self.__size)
self.__board.fill(Fill)
self.setFixedSize(self.__size)
self.__painter = QPainter() # 新建绘图工具
# 清空画板
def Clear(self):
self.__board.fill(self.__fill)
self.update()
def setBoardFill(self, fill):
self.__fill = fill
self.__board.fill(fill)
self.update()
# 设置画笔颜色
def setPenColor(self, color):
self.__penColor = color
# 设置画笔粗细
def setPenThickness(self, thickness=10):
self.__thickness = thickness
# 获取画板QImage类型图片
def getContentAsQImage(self):
image = self.__board.toImage()
return image
# 双缓冲绘图,绘图事件
def paintEvent(self, paintEvent):
self.__painter.begin(self)
self.__painter.drawPixmap(0,0,self.__board)
self.__painter.end()
def mousePressEvent(self, mouseEvent):
if mouseEvent.button() == Qt.LeftButton:
self.__begin_point = mouseEvent.pos()
self.__end_point = self.__begin_point
# self.update()
def mouseMoveEvent(self, mouseEvent):
if mouseEvent.buttons() == Qt.LeftButton:
self.__end_point = mouseEvent.pos()
# 画入缓冲区
self.__painter.begin(self.__board)
self.__painter.setPen(QPen(self.__penColor,self.__thickness))
self.__painter.drawLine(self.__begin_point, self.__end_point)
self.__painter.end()
self.__begin_point = self.__end_point
self.update()
if __name__ == '__main__':
app = QApplication(sys.argv)
demo = PaintBoard()
demo.show()
sys.exit(app.exec_())

@ -0,0 +1,2 @@
#!/bin/bash
pyuic5 -o layout.py layout.ui

@ -0,0 +1,160 @@
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
import pickle
import numpy as np
from collections import OrderedDict
from common.layers import *
from common.gradient import numerical_gradient
class SimpleConvNet:
"""简单的ConvNet
conv - relu - pool - affine - relu - affine - softmax
Parameters
----------
input_size : 输入大小MNIST的情况下为784
hidden_size_list : 隐藏层的神经元数量的列表e.g. [100, 100, 100]
output_size : 输出大小MNIST的情况下为10
activation : 'relu' or 'sigmoid'
weight_init_std : 指定权重的标准差e.g. 0.01
指定'relu''he'的情况下设定He的初始值
指定'sigmoid''xavier'的情况下设定Xavier的初始值
"""
def __init__(self, input_dim=(1, 28, 28),
conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
hidden_size=100, output_size=10, weight_init_std=0.01):
filter_num = conv_param['filter_num']
filter_size = conv_param['filter_size']
filter_pad = conv_param['pad']
filter_stride = conv_param['stride']
input_size = input_dim[1]
conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))
# 初始化权重
self.params = {}
self.params['W1'] = weight_init_std * \
np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
self.params['b1'] = np.zeros(filter_num)
self.params['W2'] = weight_init_std * \
np.random.randn(pool_output_size, hidden_size)
self.params['b2'] = np.zeros(hidden_size)
self.params['W3'] = weight_init_std * \
np.random.randn(hidden_size, output_size)
self.params['b3'] = np.zeros(output_size)
# 生成层
self.layers = OrderedDict()
self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
conv_param['stride'], conv_param['pad'])
self.layers['Relu1'] = Relu()
self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
self.layers['Relu2'] = Relu()
self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
self.last_layer = SoftmaxWithLoss()
def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)
return x
def loss(self, x, t):
"""求损失函数
参数x是输入数据t是教师标签
"""
y = self.predict(x)
return self.last_layer.forward(y, t)
def accuracy(self, x, t, batch_size=100):
if t.ndim != 1 : t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i*batch_size:(i+1)*batch_size]
tt = t[i*batch_size:(i+1)*batch_size]
y = self.predict(tx)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
def numerical_gradient(self, x, t):
"""求梯度(数值微分)
Parameters
----------
x : 输入数据
t : 教师标签
Returns
-------
具有各层的梯度的字典变量
grads['W1']grads['W2']...是各层的权重
grads['b1']grads['b2']...是各层的偏置
"""
loss_w = lambda w: self.loss(x, t)
grads = {}
for idx in (1, 2, 3):
grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])
return grads
def gradient(self, x, t):
"""求梯度(误差反向传播法)
Parameters
----------
x : 输入数据
t : 教师标签
Returns
-------
具有各层的梯度的字典变量
grads['W1']grads['W2']...是各层的权重
grads['b1']grads['b2']...是各层的偏置
"""
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.last_layer.backward(dout)
layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)
# 设定
grads = {}
grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
return grads
def save_params(self, file_name="params.pkl"):
params = {}
for key, val in self.params.items():
params[key] = val
with open(file_name, 'wb') as f:
pickle.dump(params, f)
def load_params(self, file_name="params.pkl"):
with open(file_name, 'rb') as f:
params = pickle.load(f)
for key, val in params.items():
self.params[key] = val
for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
self.layers[key].W = self.params['W' + str(i+1)]
self.layers[key].b = self.params['b' + str(i+1)]

@ -0,0 +1,42 @@
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 为了导入父目录的文件而进行的设定
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from simple_convnet import SimpleConvNet
from common.trainer import Trainer
# 读入数据
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
# 处理花费时间较长的情况下减少数据
#x_train, t_train = x_train[:5000], t_train[:5000]
#x_test, t_test = x_test[:1000], t_test[:1000]
max_epochs = 20
network = SimpleConvNet(input_dim=(1,28,28),
conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
hidden_size=100, output_size=10, weight_init_std=0.01)
trainer = Trainer(network, x_train, t_train, x_test, t_test,
epochs=max_epochs, mini_batch_size=100,
optimizer='Adam', optimizer_param={'lr': 0.001},
evaluate_sample_num_per_epoch=1000)
trainer.train()
# 保存参数
network.save_params("params.pkl")
print("Saved Network Parameters!")
# 绘制图形
markers = {'train': 'o', 'test': 's'}
x = np.arange(max_epochs)
plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2)
plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()

@ -0,0 +1,21 @@
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 为了导入父目录而进行的设定
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from deep_convnet import DeepConvNet
from common.trainer import Trainer
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
network = DeepConvNet()
trainer = Trainer(network, x_train, t_train, x_test, t_test,
epochs=20, mini_batch_size=100,
optimizer='Adam', optimizer_param={'lr':0.001},
evaluate_sample_num_per_epoch=1000)
trainer.train()
# 保存参数
network.save_params("deep_convnet_params.pkl")
print("Saved Network Parameters!")
Loading…
Cancel
Save