From 40f20369dd1779b1f2ad06f36b72232827eb1118 Mon Sep 17 00:00:00 2001 From: w2j4b9tsc <1048140512@qq.com> Date: Fri, 28 May 2021 12:15:41 +0800 Subject: [PATCH] commit PythonProject.py --- PythonProject.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 PythonProject.py diff --git a/PythonProject.py b/PythonProject.py new file mode 100644 index 0000000..c375a21 --- /dev/null +++ b/PythonProject.py @@ -0,0 +1 @@ +import sys from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog, QWidget from PyQt5.QtGui import QPixmap from functools import partial import datetime import numpy as np import pandas as pd import FirstUi import SecondUi import ThirdUi from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report import matplotlib.pyplot as plt import seaborn from sklearn.preprocessing import OneHotEncoder as ohe from sklearn.preprocessing import OrdinalEncoder as ode from sklearn.preprocessing import KBinsDiscretizer from sklearn.linear_model import LogisticRegression class Util: def __init__(self, F_Widget, S_Widget, T_Widget, S_Ui, T_Ui): self.F_Widget = F_Widget self.S_Widget = S_Widget self.T_Widget = T_Widget self.S_Ui = S_Ui self.T_Ui = T_Ui self.TrainFile = None self.TestFile = None self.TestSurvived = None self.fig = None self.canvas = None self.train = None self.test = None self.y_test = None self.pred_x = None self.train_y = None self.train_x = None self.test_x = None self.test_y = None # 将按钮与槽函数连接(数据导入) self.F_Widget.pushButton.clicked.connect(self.DataInput) self.S_Widget.pushButton.clicked.connect(partial(self.OpenFile, self.S_Widget.lineEdit)) self.S_Widget.pushButton_2.clicked.connect(partial(self.OpenFile, self.S_Widget.lineEdit_2)) self.S_Widget.pushButton_5.clicked.connect(partial(self.OpenFile, self.S_Widget.lineEdit_3)) self.S_Widget.pushButton_3.clicked.connect(self.FileReady) self.S_Widget.pushButton_4.clicked.connect(self.FileClear) self.S_Widget.lineEdit.textChanged.connect(partial(self.SetFileInfo, self.S_Widget.lineEdit, 1)) self.S_Widget.lineEdit_2.textChanged.connect(partial(self.SetFileInfo, self.S_Widget.lineEdit_2, 2)) self.S_Widget.lineEdit_3.textChanged.connect(partial(self.SetFileInfo, self.S_Widget.lineEdit_3, 3)) # 将按钮与槽函数连接(特征分析) self.F_Widget.pushButton_2.clicked.connect(self.DataProcess) # 将按钮与槽函数连接(算法预测) self.F_Widget.pushButton_3.clicked.connect(self.predicted) def SetFileInfo(self, line, num): if num == 1: if line.text() != '': self.TrainFile = line.text() if num == 2: if line.text() != '': self.TestFile = line.text() if num == 3: if line.text() != '': self.TestSurvived = line.text() def DataInput(self): self.S_Ui.show() def FileClear(self): if self.TrainFile is not None or self.TestFile is not None or self.TestSurvived is not None: self.TrainFile = None self.TestFile = None self.TestSurvived = None self.S_Widget.lineEdit.clear() self.S_Widget.lineEdit_2.clear() self.S_Widget.lineEdit_3.clear() curr_time = datetime.datetime.now() time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S') self.F_Widget.textBrowser.append(time_str + ":" + "数据清除成功") self.S_Ui.close() def FileReady(self): if self.TrainFile is not None and self.TestFile is not None and self.TestSurvived is not None: curr_time = datetime.datetime.now() time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S') self.F_Widget.textBrowser.append(time_str + ":" + "数据导入成功") self.S_Ui.close() def OpenFile(self, line): # 第一个参数指向父组件 filename, type = QFileDialog.getOpenFileName(self.S_Ui, "选择目录", r"C:\Users\Administrator\Desktop") line.setText(filename) def DataProcess(self): curr_time = datetime.datetime.now() time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S') self.F_Widget.textBrowser.append(time_str + ":" + "开始进行数据的特征分析") if self.TrainFile is not None and self.TestFile is not None and self.TestSurvived is not None: self.train = pd.read_csv(self.TrainFile) self.test = pd.read_csv(self.TestFile) self.y_test = pd.read_csv(self.TestSurvived) self.FeatureProcess() else: curr_time = datetime.datetime.now() time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S') self.F_Widget.textBrowser.append(time_str + ":" + "数据尚未导入") def Draw_HeatMap(self, corrDf): self.T_Ui.show() pic = seaborn.heatmap(corrDf).get_figure() pic.savefig('HeatMap.jpg', dpi=300, bbox_inches='tight') pix = QPixmap('HeatMap.jpg').scaled(self.T_Widget.label.width(), self.T_Widget.label.height()) self.T_Widget.label.setPixmap(pix) def FeatureProcess(self): rowNum_train = self.train.shape[0] xfull = self.train.loc[:, self.train.columns != "Survived"].append(self.test, ignore_index=True) ytrain = self.train["Survived"] self.F_Widget.textBrowser.append("1. 查看缺失值:") self.F_Widget.textBrowser.append(xfull.isnull().mean().to_string()) self.F_Widget.textBrowser.append("开始缺失值填充") xfull['Age'] = xfull['Age'].fillna(xfull['Age'].mean()) xfull['Fare'] = xfull['Fare'].fillna(xfull['Fare'].mean()) xfull['Embarked'].fillna('S', inplace=True) xfull['Cabin'] = xfull['Cabin'].fillna('U') self.F_Widget.textBrowser.append("2. 查看异常值:") self.F_Widget.textBrowser.append(xfull.describe().to_string()) self.F_Widget.textBrowser.append("开始处理异常值") xfull['Nclass'] = xfull['Name'].map(self.NameSplit) xfull = xfull.loc[:, xfull.columns != 'Name'] title_mapDict = { "Capt": "Officer", "Col": "Officer", "Major": "Officer", "Jonkheer": "Royalty", "Don": "Royalty", "Sir": "Royalty", "Dr": "Officer", "Rev": "Officer", "the Countess": "Royalty", "Dona": "Royalty", "Mme": "Mrs", "Mlle": "Miss", "Ms": "Mrs", "Mr": "Mr", "Mrs": "Mrs", "Miss": "Miss", "Master": "Master", "Lady": "Royalty" } xfull['Nclass'] = xfull['Nclass'].map(title_mapDict) xfull['Cabin'] = xfull['Cabin'].map(lambda c: c[0]) xfull['FamilyNum'] = xfull['Parch'] + xfull['SibSp'] + 1 xfull = xfull.loc[:, xfull.columns != 'Parch'] xfull = xfull.loc[:, xfull.columns != 'SibSp'] est = KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='uniform') xfull['Age'] = est.fit_transform(xfull['Age'].values.reshape(-1, 1)) est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='quantile') xfull['Fare'] = est.fit_transform(xfull['Fare'].values.reshape(-1, 1)) xfull['FamilyNum'] = pd.cut(xfull['FamilyNum'], [0, 1, 4, np.inf], labels=['小家庭', '中等家庭', '大家庭']) hot = ['Sex', 'Embarked', 'Nclass', 'Cabin'] ordinal = ['Pclass', 'Age', 'Fare', 'FamilyNum'] xfull.loc[:, ordinal] = ode().fit_transform(xfull.loc[:, ordinal]) sex = pd.get_dummies(xfull['Sex'], prefix='Sex') embarked = pd.get_dummies(xfull['Embarked'], prefix='Embarked') nclass = pd.get_dummies(xfull['Nclass'], prefix='Nclass') cabin = pd.get_dummies(xfull['Cabin'], prefix='Cabin') xfull = pd.concat([xfull, sex, embarked, nclass, cabin], axis=1) xfull.drop(hot, axis=1, inplace=True) corrDf = pd.concat([xfull, ytrain], axis=1).corr() self.Draw_HeatMap(corrDf) col = corrDf['Survived'][(abs(corrDf['Survived']).values >= 0.1) & (abs(corrDf['Survived']).values < 1)].index xfullnew = xfull[col] x = xfullnew.loc[0:rowNum_train - 1, :] y = ytrain self.pred_x = xfullnew.loc[rowNum_train:, :] self.train_x, self.test_x, self.train_y, self.test_y = train_test_split(x, y, train_size=.8, random_state=420) # 输出数据集大小 self.F_Widget.textBrowser.append("已划分好数据集") self.F_Widget.textBrowser.append("原始数据集特征:" + str(x.shape)) self.F_Widget.textBrowser.append("训练数据集特征:" + str(self.train_x.shape)) self.F_Widget.textBrowser.append("测试数据集特征:" + str(self.test_x.shape)) self.F_Widget.textBrowser.append("原始数据集标签:" + str(x.shape)) self.F_Widget.textBrowser.append("训练数据集标签:" + str(self.train_y.shape)) self.F_Widget.textBrowser.append("测试数据集标签:" + str(self.test_y.shape)) curr_time = datetime.datetime.now() time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S') self.F_Widget.textBrowser.append(time_str + ":" + "数据分析完毕") def NameSplit(self, x): x1 = x.split('.')[0] x2 = x1.split(',')[1].strip() return x2 def predicted(self): model = LogisticRegression(C=1.0, max_iter=100, penalty='l2', random_state=0, solver='liblinear') curr_time = datetime.datetime.now() time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S') self.F_Widget.textBrowser.append(time_str + ":" + "开始训练线性回归模型") model.fit(self.train_x, self.train_y) curr_time = datetime.datetime.now() time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S') self.F_Widget.textBrowser.append(time_str + ":" + "模型训练完毕") self.F_Widget.textBrowser.append(time_str + ":" + "开始对模型进行评估") self.F_Widget.textBrowser.append("评估分数为:" + str(model.score(self.test_x, self.test_y))) curr_time = datetime.datetime.now() time_str = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S') self.F_Widget.textBrowser.append(time_str + ":" + "开始使用线性回归模型进行预测") pred_y = model.predict(self.pred_x) y_test = self.y_test['Survived'] self.F_Widget.textBrowser.append("预测效果:" + "\n" + classification_report(pred_y, y_test)) def main(): app = QApplication(sys.argv) MainWindow = QMainWindow() Widget = QWidget() Widget1 = QWidget() ui = FirstUi.Ui_MainWindow() ui2 = SecondUi.Ui_Form() ui3 = ThirdUi.Ui_Form() ui.setupUi(MainWindow) ui2.setupUi(Widget) ui3.setupUi(Widget1) MainWindow.show() u = Util(ui, ui2, ui3, Widget, Widget1) sys.exit(app.exec_()) if __name__ == '__main__': main() \ No newline at end of file