You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
hnu202310050110 2379c7823e
Update README.md
2 years ago
C:/Users/LENOVO ADD file via upload 2 years ago
README.md Update README.md 2 years ago

README.md

Pokemon

import pandas as pd import numpy as np from sklearn.preprocessing import minmax_scale from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_absolute_error import os import tensorflow as tf import matplotlib.pyplot as plt from pathlib import Path import re

df_original=pd.read_csv("C:/Users/LENOVO/Downloads/Pokemon.csv",encoding='gbk') print(df_original) df_modified=df_original'HP','Attack','Defense','Sp. Atk','Sp. Def','Speed'

''' train split to three part ''' df_shuffled=df_modified.sample(frac=1) #print(df_shuffled.head()) df_train=df_shuffled[:500] df_val=df_shuffled[500:650] df_test=df_shuffled[650:] M_train=np.asarray(df_train) M_val=np.asanyarray(df_val) M_test=np.asanyarray(df_test) X_train,y_train=M_train[:,:-1],M_train[:,-1] X_val,y_val=M_val[:,:-1],M_val[:,-1] X_test,y_test=M_test[:,:-1],M_test[:,-1]

''' precess the input ''' scaler=MinMaxScaler().fit(X_train) print(scaler) X_train_scaled,X_val_scaled,X_test_scaled=scaler.transform(X_train),scaler.transform(X_val),scaler.transform(X_test) pd.DataFrame(X_train_scaled).hist() print(scaler) ''' there are some model (use mean_absoulut_error) '''

''' 平均值拟合 ''' average_speed=y_train.mean() mean_absolute_error([average_speed]*len(y_val), y_val) #result:24.4084 ''' 线性回归拟合 ''' from sklearn.linear_model import LinearRegression linear_model=LinearRegression().fit(X_train,y_train) mean_absolute_error(y_val,linear_model.predict(X_val)) #resukt:18.71774993094178 ''' 随机森林回归 ''' from sklearn.ensemble import RandomForestRegressor random_frest=RandomForestRegressor().fit(X_train,y_train) m=mean_absolute_error(y_val, random_frest.predict(X_val)) #result:20.182850000000002 ''' 使用tensorflow构建神经网络 ''' ''' 这里说明Sequential Sequential 最简单的线性堆叠模型 ''' #model_1 from tensorflow.keras import layers from tensorflow.keras.models import Sequential from tensorflow.keras.optimizers import Adam model_1=Sequential([layers.Input((5,)),layers.Dense(1)]) model_1.compile(loss='mse',optimizer=Adam(learning_rate=0.01),metrics=['mean_absolute_error']) model_1.fit(x=X_train,y=y_train,validation_data=(X_val,y_val),epochs=1) mean_absolute_error(y_test,model_1.predict(X_test)) print(model_1.summary()) #result:36.14171112060547 #model_2 from tensorflow.keras import layers from tensorflow.keras.models import Sequential from tensorflow.keras.optimizers import Adam model_2=Sequential([layers.Input((5,)),layers.Dense(32),layers.Dense(32),layers.Dense(1)]) model_2.compile(loss='mse',optimizer=Adam(learning_rate=0.01),metrics=['mean_absolute_error']) model_2.fit(x=X_train,y=y_train,validation_data=(X_val,y_val),epochs=1) mean_absolute_error(y_test,model_2.predict(X_test)) print(model_2.summary()) #result:32.58675776163737 #model_3 from tensorflow.keras import layers from tensorflow.keras.models import Sequential from tensorflow.keras.optimizers import Adam model_3=Sequential([layers.Input((5,)),layers.Dense(64),layers.Dense(64),layers.Dense(1)]) model_3.compile(loss='mse',optimizer=Adam(learning_rate=0.01),metrics=['mean_absolute_error']) model_3.fit(x=X_train,y=y_train,validation_data=(X_val,y_val),epochs=1) mean_absolute_error(y_test,model_3.predict(X_test)) print(model_3.summary()) #result:23.092122675577798 ''' pick a best model ''' ''' 我们可以通过设置learning_rate的方式 或者增加grand desen的次数来达到更好的拟合 作为三个sensorflow中最好的 我们选择model_3 以及四个model中error最小的LinearRegression 我们选择对这两个进行predict ''' print(mean_absolute_error(y_test,linear_model.predict(X_test))) #result:17.610008815706447 print(mean_absolute_error(y_test,model_3.predict(X_test))) #result:23.381181411743164 '''从结果来看 似乎线性拟合更好'''

''' 我们可以通过神经网络对属性进行预测 那我们是否可以通过照片来预测宝可梦的属性呢? 我们以水火两种属性为例 ''' ''' 准备数据 ''' train_dic="C:/Users/LENOVO/Downloads/images" train_path=Path(train_dic) files=list(train_path.glob('.png')) names=[os.path.split(x)[1] for x in list(train_path.glob('.png'))] namess=[x.title() for x in names] image_df = pd.concat([pd.Series(namess, name='Name'), pd.Series(files, name='Filepath').astype(str)], axis=1) image_df['Name'] = image_df['Name'].apply(lambda x: re.sub('.Png','', x)) label_df=df_original

合并图表

train_df = pd.merge(label_df,image_df,on='Name')

把资料限制在水火属性上

train_df=train_df[(train_df['Type1']=='Fire') | (train_df['Type1']=='Water')] ''' Load Image Data ''' ''' 数据的预处理 以及做一个数据集 ''' train_gen = tf.keras.preprocessing.image.ImageDataGenerator( validation_split=0.2, rescale=1./255)

train_data = train_gen.flow_from_dataframe( train_df, x_col='Filepath', y_col='Type1', target_size=(120, 120), color_mode='rgba', class_mode='sparse', batch_size=32, shuffle=True, seed=1, subset='training' )

val_data = train_gen.flow_from_dataframe( train_df, x_col='Filepath', y_col='Type1', target_size=(120, 120), color_mode='rgba', class_mode='sparse', batch_size=32, shuffle=True, seed=1, subset='validation' )

image_sample = train_data.next()[0] plt.figure(figsize=(10, 10)) for i in range(9): plt.subplot(3, 3, i + 1) plt.imshow(image_sample[i, :, :, :]) plt.axis('off') plt.show()

''' 特征抓取 ''' ''' 卷积神经网络 卷积层: 卷积核kernel 滤波器filters可以有效的保留图片的特点增强特征信号 减少干扰信号) 卷积的作用类似与平均 relu函数类似与sigmoid 函数 是我们用来拟合的函数 池化层: 取一个pool的最大值 可以有效地增强特征 输出: global average pooling 对图片取平均 得到一个固定的输出值

keras.model创建模型 其中输入值为inputs 输出值为outputs ''' inputs = tf.keras.Input(shape=(120, 120, 4))

conv1 = tf.keras.layers.Conv2D(filters=64, kernel_size=(8, 8), activation='relu')(inputs) pool1 = tf.keras.layers.MaxPool2D()(conv1)

conv2 = tf.keras.layers.Conv2D(filters=128, kernel_size=(8, 8), activation='relu')(pool1) pool2 = tf.keras.layers.MaxPool2D()(conv2)

conv3 = tf.keras.layers.Conv2D(filters=256, kernel_size=(8, 8), activation='relu')(pool2) pool3 = tf.keras.layers.MaxPool2D()(conv3)

outputs = tf.keras.layers.GlobalAveragePooling2D()(pool3)

feature_extractor = tf.keras.Model(inputs=inputs, outputs=outputs) print(feature_extractor.summary()) ''' Classification ''' ''' units 指该层的神经元数量 由于我们的问题是二分类 units被设置为1并使用sigmoid函数 sigmoid函数sigmoid可以将值压缩到[0,1] 可以之间解释为概率

如果是多分类问题 如10分类 将units设置为10 激活函数为softmax函数 ''' clf_inputs = feature_extractor.input clf_outputs = tf.keras.layers.Dense(units=1, activation='sigmoid')(feature_extractor.output)

classifier = tf.keras.Model(inputs=clf_inputs, outputs=clf_outputs) print(classifier.summary()) ''' Training ''' ''' compile 优化器optimizer 使用什么方法优化 常见的有grand desen梯度下降 adam 此处使用adam 优点为自适应的learning rate 损失函数loss 二元交叉熵binary cross entropy 优点在于 对概论十分敏感 当预测标签与真实标签接近时 损失函数接近 当预测标签与真实标签对立时 损失趋近无穷 评估指标metrics 准确率accuracy ''' classifier.compile( optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'] ) ''' callbacks.earlystopping: 当验证集上的性能不在提升时 将提前结束模型训练 监视器monitor 如果当val_loss指标在 patience容忍度这里我们设置为3个周期内性能没有提升则提前结束模型训练 restore_best_weight: 当该其=True时训练途中更好的模型参数会被保存 当stop条件触发 模型会自动恢复到最佳参数上 callbacks.ReduceLROnPlateau 当val_loss在一段时间内没有改善 该函数会自动降低learning rate 使模型达到更好的性能 ''' history = classifier.fit( train_data, validation_data=val_data,batch_size=32,epochs=100, callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=3, restore_best_weights=True), tf.keras.callbacks.ReduceLROnPlateau() ] ) ''' Visualizing Extracted Features ''' '''

''' print(feature_extractor.summary(), feature_extractor.layers, feature_extractor.layers[1].weights[0].shape) plt.figure(figsize=(10, 10)) for i in range(64): plt.subplot(8, 8, i + 1) img = feature_extractor.layers[1].weights[0][:, :, :, i].numpy().copy() for channel in range(4): img[:,:,channel] = minmax_scale(img[:,:,channel])#数据归一化处理 plt.imshow(img) plt.axis('off') plt.show()