You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

160 lines
7.3 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import numpy as np # numpy当中axis=0表示按行操作,axis=1表示按列操作
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
# 单层神经网络测试(鸢尾花数据集),下载数据集
Train_aggdress = "http://download.tensorflow.org/data/iris_training.csv"
Train_path = tf.keras.utils.get_file(Train_aggdress.split('/')[-1], Train_aggdress)
Test_aggdress = "http://download.tensorflow.org/data/iris_test.csv"
Test_path = tf.keras.utils.get_file(Test_aggdress.split('/')[-1], Test_aggdress)
c_name = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width', 'Species']
# 花萼长,花萼宽,花瓣长,花瓣宽,品种(山鸢尾,变色鸢尾花,维吉尼亚鸢尾)
#读取数据集将数据转换成numpy数组
df_iris = pd.read_csv(Train_path, names=c_name, header=0)
iris = np.array(df_iris)
figure = plt.figure('Iris Data', figsize=(12, 9))
figure.suptitle("Anderson's Iris Data set\n(Blue-->Setosa | Red-->Versicolor | Green-->Virginica)",
fontsize=16) # 这里不能使用中文
for i in range(4):
for j in range(4):
plt.subplot(4, 4, 4 * i + (j + 1)) # 遍历每一个框图
if i == j:
plt.text(0.3, 0.4, c_name[i], fontsize=11) # 将文字加上去
else:
plt.scatter(iris[:, j], iris[:, i], c=iris[:, 4], cmap='brg')
if i == 0:
plt.title(c_name[j])
if j == 0:
plt.ylabel(c_name[i])
plt.show()
#处理数据
df_iris_train = pd.read_csv(Train_path, header=0) # 读取训练集数据
df_iris_test = pd.read_csv(Test_path, header=0) # 读取测试集数据
iris_train = np.array(df_iris_train) # 训练集数据转换为Numpy数组类型
iris_test = np.array(df_iris_test) # 测试集数据转换为Numpy数组类型
x_train = iris_train[:, 0:4] # x存放属性值
y_train = iris_train[:, 4]
x_test = iris_test[:, 0:4] # y存放标签值
y_test = iris_test[:, 4]
x_train_knn = x_train
y_train_knn = y_train
x_train = x_train - np.mean(x_train, axis=0) # 属性值的标准化处理(求平均值)
x_test = x_test - np.mean(x_test, axis=0)
X_train = tf.cast(x_train, tf.float32) #要转换成浮点型,否则计算的时候会报错
Y_train = tf.one_hot(tf.constant(y_train, dtype=tf.int32), 3) # Y训练集独热编码(鸢尾花数据集的标签值为自然顺序码,应转换为独热编码)
X_test = tf.cast(x_test, tf.float32)
Y_test = tf.one_hot(tf.constant(y_test, dtype=tf.int32), 3) # Y测试集的独热编码
learn_rate = 0.55 # 设置超参和,学习率
display_step = 1 # 显示测试间隔
# 设置模型参数初始值
np.random.seed(600)
W = tf.Variable(np.random.randn(4, 3), dtype=tf.float32) # 权值矩阵W为4*3的张量,取正态分布的随机值作为初始值输入为4输出为3种种类
B = tf.Variable(np.zeros([3]), dtype=tf.float32) # 偏置值B为一维张量初始化全零
# 设置训练模型
acc_train = [] # 当前训练集
acc_test = [] # 当前测试集
loss_train = [] # 当前训练集
loss_test = [] # 当前测试集
# 训练开始!!!!!!
print("训练开始!")
for i in range(1, 101):
with tf.GradientTape() as t:
# 定义梯度带,定义神经网络学习模型
PRED_train = tf.nn.softmax(tf.matmul(X_train, W) + B) # Y = WX + B,训练集在神经网络上的输出
LOSS_train = tf.reduce_mean(
tf.keras.losses.categorical_crossentropy(y_true=Y_train, y_pred=PRED_train)) # 训练集的交叉熵损失
# 第一个参数是独热编码的标签值第二个为第二个为softmax函数的输出值结果为一维张量
PRED_test = tf.nn.softmax(tf.matmul(X_test, W) + B)
LOSS_text = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_test, y_pred=PRED_test))
accuracy_train = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_train.numpy(), axis=1), y_train), tf.float32))
accuracy_test = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_test.numpy(), axis=1), y_test), tf.float32))
# 记录训练集和测试集的准确率
acc_train.append(accuracy_train)
acc_test.append(accuracy_test)
loss_train.append(LOSS_train)
loss_test.append(LOSS_text)
grads = t.gradient(LOSS_train, [W, B]) # 获得训练集损失函数对W和B的偏导数
W.assign_sub(learn_rate * grads[0]) # 第一个值为损失函数对W的偏导数
B.assign_sub(learn_rate * grads[1]) # 第二个值为损失函数对B的偏导数
# 更新模型参数W和B
if i % display_step == 0:
print("训练次数: %i次, 训练集准确度:%f, 训练集损失值: %f, 测试集准确度: %f, 测试集损失值: %f"
% (i, accuracy_train, LOSS_train, accuracy_test, LOSS_text))
# PRED_train里面的每一个数表示属于不同种类的概率数值越大则表示越不属于这个类别数值越小则表示越属于这个类别
print("训练结束~")
print()
# print(tf.argmax(PRED_train.numpy(), axis=1)) # 预测模型训练过后的预测种类集合
plt.figure(figsize=(6, 5))
plt.subplot(111)
plt.plot(loss_train, color="blue", label="train")
plt.plot(loss_test, color="yellow", label="test")
plt.xlabel("Iteration")
plt.ylabel("LOSS")
plt.legend()
plt.figure(figsize=(6, 5))
plt.subplot(111)
plt.plot(acc_train, color="red", label="test")
plt.plot(acc_test, color="green", label="test")
plt.xlabel("Iteration") #1个iteration等于使用batchsize个样本训练一次
plt.ylabel("Accuracy")
plt.legend()
plt.show()
# 以下为K最邻近法预测鸢尾花种类
knn = KNeighborsClassifier(n_neighbors=1) # 获取knn对象
knn.fit(x_train_knn, y_train_knn) # 利用之前的训练模型进行fit() 拟合功能生成一个knn模型
while True:
choose = input("请输入是否要进行预测~ 是(Y/y),否(N/n)\n")
if choose == "Y" or choose == "y":
user_input = []
flag = False
print("请输入鸢尾花的属性值:")
print("请输入花萼长度:")
Sepal_Length = eval(input())
user_input.append(Sepal_Length)
print("请输入花萼宽度:")
Sepal_Width = eval(input())
user_input.append(Sepal_Width)
print("请输入花瓣长度:")
Petal_Length = eval(input())
user_input.append(Petal_Length)
print("请输入花瓣宽度:")
Petal_Width = eval(input())
user_input.append(Petal_Width)
for i in range(4):
if user_input[i] >= 8:
flag = True
break
if flag:
print("输入数据范围有误!\n")
else:
x_new = np.array([user_input])
# `print("x_new.shape:{}".format(x_new.shape))
prediction = int(knn.predict(x_new)) # 预测
if prediction == 0:
print("预测的种类为 %d : 山鸢尾" % prediction)
elif prediction == 1:
print("预测的种类为 %d : 变色鸢尾" % prediction)
else:
print("预测的种类为 %d : 维吉尼亚鸢尾" % prediction)
elif choose == "N" or choose == "n":
print("程序退出!")
break
else:
print("输入有误~请重新输入!\n")