#coding=gbk import numpy as np import pandas as pd from sklearn.metrics import accuracy_score import matplotlib.pyplot as plt import seaborn as sns from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from sklearn.model_selection import cross_val_score from sklearn.metrics import precision_recall_curve from sklearn.metrics import roc_curve, auc import matplotlib.pyplot as plt import warnings warnings.filterwarnings("ignore") #数据进一步处理,删去Survived列 train=pd.read_csv("E:\\python1\\123\\Data\\new_train.csv") test=pd.read_csv("E:\\python1\\123\\Data\\new_test.csv") gender=pd.read_csv("E:\\python1\\123\\Data\\gender_submission.csv") sur_train=train.Survived train.drop(['Survived'], axis=1, inplace =True) test.drop(['Survived'], axis=1, inplace =True) #逻辑回归 def Logistic_Regression(): LRmodel=LogisticRegression() LRmodel.fit(train,sur_train) LR_prediction=LRmodel.predict(test) # 0.9186602870813397 print("逻辑回归模型accuracy_score验证准确率:",accuracy_score(gender['Survived'], LR_prediction)) LR_cross_val_scores = cross_val_score(LRmodel, train, sur_train, cv=5) print("逻辑回归模型交叉验证准确率:", LR_cross_val_scores.mean()) return LR_prediction #随机森林分类器 def Forest(): Fmodel=RandomForestClassifier(n_estimators=500,max_depth=10,min_samples_split=1.0,min_samples_leaf=1) Fmodel.fit(train,sur_train) F_prediction=Fmodel.predict(test) print("随机森林模型accuracy_score验证准确率:",accuracy_score(gender['Survived'], F_prediction)) F_cross_val_scores = cross_val_score(Fmodel, train, sur_train, cv=5) print("随机森林模型交叉验证准确率:", F_cross_val_scores.mean()) #0.6363636363636364 return F_prediction #向量机模型 def SVM(): SVMmodel=SVC() SVMmodel.fit(train,sur_train) SVM_prediction=SVMmodel.predict(test) #0.6483253588516746 print("向量机模型accuracy_score验证准确率:",accuracy_score(gender['Survived'], SVM_prediction)) SVM_cross_val_scores = cross_val_score(SVMmodel, train, sur_train, cv=5) print("向量机模型交叉验证准确率:", SVM_cross_val_scores.mean()) return SVM_prediction # 绘制 Precision-Recall 曲线 #PR曲线越靠近右上角,模型性能越好 def PrecisionRecall(LR_prediction,F_prediction,SVM_prediction): LR_precision, LR_recall, _ = precision_recall_curve(gender['Survived'], LR_prediction) F_precision, F_recall, _ = precision_recall_curve(gender['Survived'], F_prediction) SVM_precision, SVM_recall, _ = precision_recall_curve(gender['Survived'], SVM_prediction) plt.figure(figsize=(8, 6)) plt.plot(LR_recall, LR_precision, label='LR', marker='o') plt.plot(F_recall, F_precision, label='FOREST', marker='o') plt.plot(SVM_recall, SVM_precision, label='SVM', marker='o') plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Precision-Recall 曲线') plt.legend() plt.grid(True) plt.show() #绘制ROC曲线 def ROC(LR_prediction,F_prediction,SVM_prediction): LR_fpr, LR_tpr, _ = roc_curve(gender['Survived'], LR_prediction) LR_auc = auc(LR_fpr, LR_tpr) F_fpr, F_tpr, _ = roc_curve(gender['Survived'], F_prediction) F_auc = auc(F_fpr, F_tpr) SVM_fpr, SVM_tpr, _ = roc_curve(gender['Survived'], SVM_prediction) SVM_auc = auc(SVM_fpr, SVM_tpr) plt.figure(figsize=(8, 6)) plt.plot(LR_fpr, LR_tpr, color='blue', lw=2, label='LR (AUC = %0.2f)' % LR_auc) plt.plot(F_fpr, F_tpr, color='green', lw=2, label='Forest (AUC = %0.2f)' % F_auc) plt.plot(SVM_fpr, SVM_tpr, color='red', lw=2, label='SVM (AUC = %0.2f)' % SVM_auc) plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate (FPR)') plt.ylabel('True Positive Rate (TPR)') plt.title('ROC') plt.legend(loc="lower right") plt.grid(True) plt.show() if __name__ == '__main__': LR_prediction=Logistic_Regression() F_prediction=Forest() SVM_prediction=SVM() PrecisionRecall(LR_prediction,F_prediction,SVM_prediction) ROC(LR_prediction,F_prediction,SVM_prediction)