ADD file via upload

1 year ago · 6c88c9d10d
parent 528141d9c3
commit 6c88c9d10d
1 changed files with 104 additions and 0 deletions
--- a/model.py
+++ b/model.py
@ -0,0 +1,104 @@
+#coding=gbk
+import numpy as np
+import pandas as pd
+from sklearn.metrics import accuracy_score
+import matplotlib.pyplot as plt
+import seaborn as sns 
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.svm import SVC
+from sklearn.model_selection import cross_val_score
+from sklearn.metrics import precision_recall_curve
+from sklearn.metrics import roc_curve, auc
+import matplotlib.pyplot as plt
+import warnings
+warnings.filterwarnings("ignore")
+
+#数据进一步处理，删去Survived列
+train=pd.read_csv("E:\\python1\\123\\Data\\new_train.csv")
+test=pd.read_csv("E:\\python1\\123\\Data\\new_test.csv")
+gender=pd.read_csv("E:\\python1\\123\\Data\\gender_submission.csv")
+sur_train=train.Survived
+train.drop(['Survived'], axis=1, inplace =True)
+test.drop(['Survived'], axis=1, inplace =True)
+
+#逻辑回归
+def Logistic_Regression():
+    LRmodel=LogisticRegression()
+    LRmodel.fit(train,sur_train)
+    LR_prediction=LRmodel.predict(test) 
+    # 0.9186602870813397
+    print("逻辑回归模型accuracy_score验证准确率：",accuracy_score(gender['Survived'], LR_prediction))
+    LR_cross_val_scores = cross_val_score(LRmodel, train, sur_train, cv=5)
+    print("逻辑回归模型交叉验证准确率:", LR_cross_val_scores.mean()) 
+    return LR_prediction
+
+#随机森林分类器
+def Forest():
+    Fmodel=RandomForestClassifier(n_estimators=500,max_depth=10,min_samples_split=1.0,min_samples_leaf=1)
+    Fmodel.fit(train,sur_train)
+    F_prediction=Fmodel.predict(test)
+    print("随机森林模型accuracy_score验证准确率：",accuracy_score(gender['Survived'], F_prediction))
+    F_cross_val_scores = cross_val_score(Fmodel, train, sur_train, cv=5)
+    print("随机森林模型交叉验证准确率:", F_cross_val_scores.mean())
+    #0.6363636363636364
+    return F_prediction
+
+#向量机模型
+def SVM():
+    SVMmodel=SVC()
+    SVMmodel.fit(train,sur_train)
+    SVM_prediction=SVMmodel.predict(test)
+    #0.6483253588516746
+    print("向量机模型accuracy_score验证准确率：",accuracy_score(gender['Survived'], SVM_prediction))
+    SVM_cross_val_scores = cross_val_score(SVMmodel, train, sur_train, cv=5)
+    print("向量机模型交叉验证准确率:", SVM_cross_val_scores.mean())
+    return SVM_prediction
+
+
+# 绘制 Precision-Recall 曲线
+#PR曲线越靠近右上角，模型性能越好
+def PrecisionRecall(LR_prediction,F_prediction,SVM_prediction):
+    LR_precision, LR_recall, _ = precision_recall_curve(gender['Survived'], LR_prediction)
+    F_precision, F_recall, _ = precision_recall_curve(gender['Survived'], F_prediction)
+    SVM_precision, SVM_recall, _ = precision_recall_curve(gender['Survived'], SVM_prediction)
+    plt.figure(figsize=(8, 6))
+    plt.plot(LR_recall, LR_precision, label='LR', marker='o')
+    plt.plot(F_recall, F_precision, label='FOREST', marker='o')
+    plt.plot(SVM_recall, SVM_precision, label='SVM', marker='o')
+    plt.xlabel('Recall')
+    plt.ylabel('Precision')
+    plt.title('Precision-Recall 曲线')
+    plt.legend()
+    plt.grid(True)
+    plt.show()
+
+#绘制ROC曲线
+def ROC(LR_prediction,F_prediction,SVM_prediction):
+    LR_fpr, LR_tpr, _ = roc_curve(gender['Survived'], LR_prediction)
+    LR_auc = auc(LR_fpr, LR_tpr)
+    F_fpr, F_tpr, _ = roc_curve(gender['Survived'], F_prediction)
+    F_auc = auc(F_fpr, F_tpr)
+    SVM_fpr, SVM_tpr, _ = roc_curve(gender['Survived'], SVM_prediction)
+    SVM_auc = auc(SVM_fpr, SVM_tpr)
+    plt.figure(figsize=(8, 6))
+    plt.plot(LR_fpr, LR_tpr, color='blue', lw=2, label='LR (AUC = %0.2f)' % LR_auc)
+    plt.plot(F_fpr, F_tpr, color='green', lw=2, label='Forest (AUC = %0.2f)' % F_auc)
+    plt.plot(SVM_fpr, SVM_tpr, color='red', lw=2, label='SVM (AUC = %0.2f)' % SVM_auc)
+    plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
+    plt.xlim([0.0, 1.0])
+    plt.ylim([0.0, 1.05])
+    plt.xlabel('False Positive Rate (FPR)')
+    plt.ylabel('True Positive Rate (TPR)')
+    plt.title('ROC')
+    plt.legend(loc="lower right")
+    plt.grid(True)
+    plt.show()
+
+
+if __name__ == '__main__':
+    LR_prediction=Logistic_Regression()
+    F_prediction=Forest()
+    SVM_prediction=SVM()
+    PrecisionRecall(LR_prediction,F_prediction,SVM_prediction)
+    ROC(LR_prediction,F_prediction,SVM_prediction)