|
|
import numpy as np # numpy当中axis=0表示按行操作,axis=1表示按列操作
|
|
|
import matplotlib.pyplot as plt
|
|
|
import tensorflow as tf
|
|
|
import pandas as pd
|
|
|
from sklearn.neighbors import KNeighborsClassifier
|
|
|
|
|
|
# 单层神经网络测试(鸢尾花数据集),下载数据集
|
|
|
Train_aggdress = "http://download.tensorflow.org/data/iris_training.csv"
|
|
|
Train_path = tf.keras.utils.get_file(Train_aggdress.split('/')[-1], Train_aggdress)
|
|
|
Test_aggdress = "http://download.tensorflow.org/data/iris_test.csv"
|
|
|
Test_path = tf.keras.utils.get_file(Test_aggdress.split('/')[-1], Test_aggdress)
|
|
|
c_name = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width', 'Species']
|
|
|
# 花萼长,花萼宽,花瓣长,花瓣宽,品种(山鸢尾,变色鸢尾花,维吉尼亚鸢尾)
|
|
|
|
|
|
#读取数据集,将数据转换成numpy数组
|
|
|
df_iris = pd.read_csv(Train_path, names=c_name, header=0)
|
|
|
iris = np.array(df_iris)
|
|
|
figure = plt.figure('Iris Data', figsize=(12, 9))
|
|
|
figure.suptitle("Anderson's Iris Data set\n(Blue-->Setosa | Red-->Versicolor | Green-->Virginica)",
|
|
|
fontsize=16) # 这里不能使用中文
|
|
|
|
|
|
for i in range(4):
|
|
|
for j in range(4):
|
|
|
plt.subplot(4, 4, 4 * i + (j + 1)) # 遍历每一个框图
|
|
|
if i == j:
|
|
|
plt.text(0.3, 0.4, c_name[i], fontsize=11) # 将文字加上去
|
|
|
else:
|
|
|
plt.scatter(iris[:, j], iris[:, i], c=iris[:, 4], cmap='brg')
|
|
|
if i == 0:
|
|
|
plt.title(c_name[j])
|
|
|
if j == 0:
|
|
|
plt.ylabel(c_name[i])
|
|
|
plt.show()
|
|
|
|
|
|
#处理数据
|
|
|
df_iris_train = pd.read_csv(Train_path, header=0) # 读取训练集数据
|
|
|
df_iris_test = pd.read_csv(Test_path, header=0) # 读取测试集数据
|
|
|
iris_train = np.array(df_iris_train) # 训练集数据转换为Numpy数组类型
|
|
|
iris_test = np.array(df_iris_test) # 测试集数据转换为Numpy数组类型
|
|
|
x_train = iris_train[:, 0:4] # x存放属性值
|
|
|
y_train = iris_train[:, 4]
|
|
|
x_test = iris_test[:, 0:4] # y存放标签值
|
|
|
y_test = iris_test[:, 4]
|
|
|
x_train_knn = x_train
|
|
|
y_train_knn = y_train
|
|
|
|
|
|
x_train = x_train - np.mean(x_train, axis=0) # 属性值的标准化处理(求平均值)
|
|
|
x_test = x_test - np.mean(x_test, axis=0)
|
|
|
|
|
|
X_train = tf.cast(x_train, tf.float32) #要转换成浮点型,否则计算的时候会报错
|
|
|
Y_train = tf.one_hot(tf.constant(y_train, dtype=tf.int32), 3) # Y训练集独热编码(鸢尾花数据集的标签值为自然顺序码,应转换为独热编码)
|
|
|
X_test = tf.cast(x_test, tf.float32)
|
|
|
Y_test = tf.one_hot(tf.constant(y_test, dtype=tf.int32), 3) # Y测试集的独热编码
|
|
|
|
|
|
learn_rate = 0.55 # 设置超参和,学习率
|
|
|
display_step = 1 # 显示测试间隔
|
|
|
# 设置模型参数初始值
|
|
|
np.random.seed(600)
|
|
|
W = tf.Variable(np.random.randn(4, 3), dtype=tf.float32) # 权值矩阵W为4*3的张量,取正态分布的随机值作为初始值输入为4输出为3种种类
|
|
|
B = tf.Variable(np.zeros([3]), dtype=tf.float32) # 偏置值B为一维张量,初始化全零
|
|
|
|
|
|
# 设置训练模型
|
|
|
acc_train = [] # 当前训练集
|
|
|
acc_test = [] # 当前测试集
|
|
|
loss_train = [] # 当前训练集
|
|
|
loss_test = [] # 当前测试集
|
|
|
|
|
|
# 训练开始!!!!!!
|
|
|
print("训练开始!")
|
|
|
for i in range(1, 101):
|
|
|
with tf.GradientTape() as t:
|
|
|
# 定义梯度带,定义神经网络学习模型
|
|
|
PRED_train = tf.nn.softmax(tf.matmul(X_train, W) + B) # Y = WX + B,训练集在神经网络上的输出
|
|
|
LOSS_train = tf.reduce_mean(
|
|
|
tf.keras.losses.categorical_crossentropy(y_true=Y_train, y_pred=PRED_train)) # 训练集的交叉熵损失
|
|
|
# 第一个参数是独热编码的标签值,第二个为第二个为softmax函数的输出值,结果为一维张量
|
|
|
PRED_test = tf.nn.softmax(tf.matmul(X_test, W) + B)
|
|
|
LOSS_text = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_test, y_pred=PRED_test))
|
|
|
|
|
|
accuracy_train = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_train.numpy(), axis=1), y_train), tf.float32))
|
|
|
accuracy_test = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_test.numpy(), axis=1), y_test), tf.float32))
|
|
|
# 记录训练集和测试集的准确率
|
|
|
acc_train.append(accuracy_train)
|
|
|
acc_test.append(accuracy_test)
|
|
|
loss_train.append(LOSS_train)
|
|
|
loss_test.append(LOSS_text)
|
|
|
|
|
|
grads = t.gradient(LOSS_train, [W, B]) # 获得训练集损失函数对W和B的偏导数
|
|
|
W.assign_sub(learn_rate * grads[0]) # 第一个值为损失函数对W的偏导数
|
|
|
B.assign_sub(learn_rate * grads[1]) # 第二个值为损失函数对B的偏导数
|
|
|
# 更新模型参数W和B
|
|
|
if i % display_step == 0:
|
|
|
print("训练次数: %i次, 训练集准确度:%f, 训练集损失值: %f, 测试集准确度: %f, 测试集损失值: %f"
|
|
|
% (i, accuracy_train, LOSS_train, accuracy_test, LOSS_text))
|
|
|
|
|
|
# PRED_train里面的每一个数表示属于不同种类的概率,数值越大则表示越不属于这个类别,数值越小则表示越属于这个类别
|
|
|
print("训练结束~")
|
|
|
print()
|
|
|
|
|
|
# print(tf.argmax(PRED_train.numpy(), axis=1)) # 预测模型训练过后的预测种类集合
|
|
|
plt.figure(figsize=(6, 5))
|
|
|
plt.subplot(111)
|
|
|
plt.plot(loss_train, color="blue", label="train")
|
|
|
plt.plot(loss_test, color="yellow", label="test")
|
|
|
plt.xlabel("Iteration")
|
|
|
plt.ylabel("LOSS")
|
|
|
plt.legend()
|
|
|
plt.figure(figsize=(6, 5))
|
|
|
plt.subplot(111)
|
|
|
plt.plot(acc_train, color="red", label="test")
|
|
|
plt.plot(acc_test, color="green", label="test")
|
|
|
plt.xlabel("Iteration") #1个iteration等于使用batchsize个样本训练一次
|
|
|
plt.ylabel("Accuracy")
|
|
|
plt.legend()
|
|
|
plt.show()
|
|
|
|
|
|
# 以下为K最邻近法预测鸢尾花种类
|
|
|
knn = KNeighborsClassifier(n_neighbors=1) # 获取knn对象
|
|
|
knn.fit(x_train_knn, y_train_knn) # 利用之前的训练模型进行fit() 拟合功能,生成一个knn模型
|
|
|
while True:
|
|
|
choose = input("请输入是否要进行预测~ 是(Y/y),否(N/n)\n")
|
|
|
if choose == "Y" or choose == "y":
|
|
|
user_input = []
|
|
|
flag = False
|
|
|
print("请输入鸢尾花的属性值:")
|
|
|
print("请输入花萼长度:")
|
|
|
Sepal_Length = eval(input())
|
|
|
user_input.append(Sepal_Length)
|
|
|
print("请输入花萼宽度:")
|
|
|
Sepal_Width = eval(input())
|
|
|
user_input.append(Sepal_Width)
|
|
|
print("请输入花瓣长度:")
|
|
|
Petal_Length = eval(input())
|
|
|
user_input.append(Petal_Length)
|
|
|
print("请输入花瓣宽度:")
|
|
|
Petal_Width = eval(input())
|
|
|
user_input.append(Petal_Width)
|
|
|
for i in range(4):
|
|
|
if user_input[i] >= 8:
|
|
|
flag = True
|
|
|
break
|
|
|
|
|
|
if flag:
|
|
|
print("输入数据范围有误!\n")
|
|
|
else:
|
|
|
x_new = np.array([user_input])
|
|
|
# `print("x_new.shape:{}".format(x_new.shape))
|
|
|
prediction = int(knn.predict(x_new)) # 预测
|
|
|
if prediction == 0:
|
|
|
print("预测的种类为 %d : 山鸢尾" % prediction)
|
|
|
elif prediction == 1:
|
|
|
print("预测的种类为 %d : 变色鸢尾" % prediction)
|
|
|
else:
|
|
|
print("预测的种类为 %d : 维吉尼亚鸢尾" % prediction)
|
|
|
|
|
|
elif choose == "N" or choose == "n":
|
|
|
print("程序退出!")
|
|
|
break
|
|
|
else:
|
|
|
print("输入有误~请重新输入!\n") |