import numpy as np  # numpy当中axis=0表示按行操作,axis=1表示按列操作
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier

# 单层神经网络测试(鸢尾花数据集)，下载数据集
Train_aggdress = "http://download.tensorflow.org/data/iris_training.csv"
Train_path = tf.keras.utils.get_file(Train_aggdress.split('/')[-1], Train_aggdress)
Test_aggdress = "http://download.tensorflow.org/data/iris_test.csv"
Test_path = tf.keras.utils.get_file(Test_aggdress.split('/')[-1], Test_aggdress)
c_name = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width', 'Species']
# 花萼长，花萼宽，花瓣长，花瓣宽，品种(山鸢尾,变色鸢尾花,维吉尼亚鸢尾)

#读取数据集，将数据转换成numpy数组
df_iris = pd.read_csv(Train_path, names=c_name, header=0)
iris = np.array(df_iris)
figure = plt.figure('Iris Data', figsize=(12, 9))
figure.suptitle("Anderson's Iris Data set\n(Blue-->Setosa | Red-->Versicolor | Green-->Virginica)",
                fontsize=16)  # 这里不能使用中文

for i in range(4):
    for j in range(4):
        plt.subplot(4, 4, 4 * i + (j + 1))  # 遍历每一个框图
        if i == j:
            plt.text(0.3, 0.4, c_name[i], fontsize=11)  # 将文字加上去
        else:
            plt.scatter(iris[:, j], iris[:, i], c=iris[:, 4], cmap='brg')
        if i == 0:
            plt.title(c_name[j])
        if j == 0:
            plt.ylabel(c_name[i])
plt.show()

#处理数据
df_iris_train = pd.read_csv(Train_path, header=0)  # 读取训练集数据
df_iris_test = pd.read_csv(Test_path, header=0)  # 读取测试集数据
iris_train = np.array(df_iris_train)  # 训练集数据转换为Numpy数组类型
iris_test = np.array(df_iris_test)  # 测试集数据转换为Numpy数组类型
x_train = iris_train[:, 0:4]  # x存放属性值
y_train = iris_train[:, 4]
x_test = iris_test[:, 0:4]  # y存放标签值
y_test = iris_test[:, 4]
x_train_knn = x_train
y_train_knn = y_train

x_train = x_train - np.mean(x_train, axis=0)  # 属性值的标准化处理(求平均值)
x_test = x_test - np.mean(x_test, axis=0)

X_train = tf.cast(x_train, tf.float32)        #要转换成浮点型，否则计算的时候会报错
Y_train = tf.one_hot(tf.constant(y_train, dtype=tf.int32), 3)  # Y训练集独热编码(鸢尾花数据集的标签值为自然顺序码，应转换为独热编码)
X_test = tf.cast(x_test, tf.float32)
Y_test = tf.one_hot(tf.constant(y_test, dtype=tf.int32), 3)  # Y测试集的独热编码

learn_rate = 0.55  # 设置超参和，学习率
display_step = 1  # 显示测试间隔
# 设置模型参数初始值
np.random.seed(600)
W = tf.Variable(np.random.randn(4, 3), dtype=tf.float32)  # 权值矩阵W为4*3的张量,取正态分布的随机值作为初始值输入为4输出为3种种类
B = tf.Variable(np.zeros([3]), dtype=tf.float32)  # 偏置值B为一维张量，初始化全零

# 设置训练模型
acc_train = []  # 当前训练集
acc_test = []  # 当前测试集
loss_train = []  # 当前训练集
loss_test = []  # 当前测试集

# 训练开始!!!!!!
print("训练开始！")
for i in range(1, 101):
    with tf.GradientTape() as t:
        # 定义梯度带，定义神经网络学习模型
        PRED_train = tf.nn.softmax(tf.matmul(X_train, W) + B)  # Y = WX + B,训练集在神经网络上的输出
        LOSS_train = tf.reduce_mean(
        tf.keras.losses.categorical_crossentropy(y_true=Y_train, y_pred=PRED_train))  # 训练集的交叉熵损失
        # 第一个参数是独热编码的标签值，第二个为第二个为softmax函数的输出值，结果为一维张量
    PRED_test = tf.nn.softmax(tf.matmul(X_test, W) + B)
    LOSS_text = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_test, y_pred=PRED_test))

    accuracy_train = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_train.numpy(), axis=1), y_train), tf.float32))
    accuracy_test = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(PRED_test.numpy(), axis=1), y_test), tf.float32))
    # 记录训练集和测试集的准确率
    acc_train.append(accuracy_train)
    acc_test.append(accuracy_test)
    loss_train.append(LOSS_train)
    loss_test.append(LOSS_text)

    grads = t.gradient(LOSS_train, [W, B])  # 获得训练集损失函数对W和B的偏导数
    W.assign_sub(learn_rate * grads[0])  # 第一个值为损失函数对W的偏导数
    B.assign_sub(learn_rate * grads[1])  # 第二个值为损失函数对B的偏导数
    # 更新模型参数W和B
    if i % display_step == 0:
        print("训练次数: %i次, 训练集准确度:%f, 训练集损失值: %f, 测试集准确度: %f, 测试集损失值: %f"
              % (i, accuracy_train, LOSS_train, accuracy_test, LOSS_text))

# PRED_train里面的每一个数表示属于不同种类的概率，数值越大则表示越不属于这个类别，数值越小则表示越属于这个类别
print("训练结束~")
print()

# print(tf.argmax(PRED_train.numpy(), axis=1)) # 预测模型训练过后的预测种类集合
plt.figure(figsize=(6, 5))
plt.subplot(111)
plt.plot(loss_train, color="blue", label="train")
plt.plot(loss_test, color="yellow", label="test")
plt.xlabel("Iteration")
plt.ylabel("LOSS")
plt.legend()
plt.figure(figsize=(6, 5))
plt.subplot(111)
plt.plot(acc_train, color="red", label="test")
plt.plot(acc_test, color="green", label="test")
plt.xlabel("Iteration")    #1个iteration等于使用batchsize个样本训练一次
plt.ylabel("Accuracy")
plt.legend()
plt.show()

# 以下为K最邻近法预测鸢尾花种类
knn = KNeighborsClassifier(n_neighbors=1)  # 获取knn对象
knn.fit(x_train_knn, y_train_knn)  # 利用之前的训练模型进行fit() 拟合功能，生成一个knn模型
while True:
    choose = input("请输入是否要进行预测~  是(Y/y),否(N/n)\n")
    if choose == "Y" or choose == "y":
        user_input = []
        flag = False
        print("请输入鸢尾花的属性值:")
        print("请输入花萼长度：")
        Sepal_Length = eval(input())
        user_input.append(Sepal_Length)
        print("请输入花萼宽度：")
        Sepal_Width = eval(input())
        user_input.append(Sepal_Width)
        print("请输入花瓣长度：")
        Petal_Length = eval(input())
        user_input.append(Petal_Length)
        print("请输入花瓣宽度：")
        Petal_Width = eval(input())
        user_input.append(Petal_Width)
        for i in range(4):
            if user_input[i] >= 8:
                flag = True
                break

        if flag:
            print("输入数据范围有误！\n")
        else:
            x_new = np.array([user_input])
            # `print("x_new.shape:{}".format(x_new.shape))
            prediction = int(knn.predict(x_new))  # 预测
            if prediction == 0:
                print("预测的种类为 %d : 山鸢尾" % prediction)
            elif prediction == 1:
                print("预测的种类为 %d : 变色鸢尾" % prediction)
            else:
                print("预测的种类为 %d : 维吉尼亚鸢尾" % prediction)

    elif choose == "N" or choose == "n":
        print("程序退出！")
        break
    else:
        print("输入有误~请重新输入！\n")