You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

104 lines
4.2 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#coding=gbk
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
#数据进一步处理删去Survived列
train=pd.read_csv("E:\\python1\\123\\Data\\new_train.csv")
test=pd.read_csv("E:\\python1\\123\\Data\\new_test.csv")
gender=pd.read_csv("E:\\python1\\123\\Data\\gender_submission.csv")
sur_train=train.Survived
train.drop(['Survived'], axis=1, inplace =True)
test.drop(['Survived'], axis=1, inplace =True)
#逻辑回归
def Logistic_Regression():
LRmodel=LogisticRegression()
LRmodel.fit(train,sur_train)
LR_prediction=LRmodel.predict(test)
# 0.9186602870813397
print("逻辑回归模型accuracy_score验证准确率",accuracy_score(gender['Survived'], LR_prediction))
LR_cross_val_scores = cross_val_score(LRmodel, train, sur_train, cv=5)
print("逻辑回归模型交叉验证准确率:", LR_cross_val_scores.mean())
return LR_prediction
#随机森林分类器
def Forest():
Fmodel=RandomForestClassifier(n_estimators=500,max_depth=10,min_samples_split=1.0,min_samples_leaf=1)
Fmodel.fit(train,sur_train)
F_prediction=Fmodel.predict(test)
print("随机森林模型accuracy_score验证准确率",accuracy_score(gender['Survived'], F_prediction))
F_cross_val_scores = cross_val_score(Fmodel, train, sur_train, cv=5)
print("随机森林模型交叉验证准确率:", F_cross_val_scores.mean())
#0.6363636363636364
return F_prediction
#向量机模型
def SVM():
SVMmodel=SVC()
SVMmodel.fit(train,sur_train)
SVM_prediction=SVMmodel.predict(test)
#0.6483253588516746
print("向量机模型accuracy_score验证准确率",accuracy_score(gender['Survived'], SVM_prediction))
SVM_cross_val_scores = cross_val_score(SVMmodel, train, sur_train, cv=5)
print("向量机模型交叉验证准确率:", SVM_cross_val_scores.mean())
return SVM_prediction
# 绘制 Precision-Recall 曲线
#PR曲线越靠近右上角模型性能越好
def PrecisionRecall(LR_prediction,F_prediction,SVM_prediction):
LR_precision, LR_recall, _ = precision_recall_curve(gender['Survived'], LR_prediction)
F_precision, F_recall, _ = precision_recall_curve(gender['Survived'], F_prediction)
SVM_precision, SVM_recall, _ = precision_recall_curve(gender['Survived'], SVM_prediction)
plt.figure(figsize=(8, 6))
plt.plot(LR_recall, LR_precision, label='LR', marker='o')
plt.plot(F_recall, F_precision, label='FOREST', marker='o')
plt.plot(SVM_recall, SVM_precision, label='SVM', marker='o')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall 曲线')
plt.legend()
plt.grid(True)
plt.show()
#绘制ROC曲线
def ROC(LR_prediction,F_prediction,SVM_prediction):
LR_fpr, LR_tpr, _ = roc_curve(gender['Survived'], LR_prediction)
LR_auc = auc(LR_fpr, LR_tpr)
F_fpr, F_tpr, _ = roc_curve(gender['Survived'], F_prediction)
F_auc = auc(F_fpr, F_tpr)
SVM_fpr, SVM_tpr, _ = roc_curve(gender['Survived'], SVM_prediction)
SVM_auc = auc(SVM_fpr, SVM_tpr)
plt.figure(figsize=(8, 6))
plt.plot(LR_fpr, LR_tpr, color='blue', lw=2, label='LR (AUC = %0.2f)' % LR_auc)
plt.plot(F_fpr, F_tpr, color='green', lw=2, label='Forest (AUC = %0.2f)' % F_auc)
plt.plot(SVM_fpr, SVM_tpr, color='red', lw=2, label='SVM (AUC = %0.2f)' % SVM_auc)
plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('ROC')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()
if __name__ == '__main__':
LR_prediction=Logistic_Regression()
F_prediction=Forest()
SVM_prediction=SVM()
PrecisionRecall(LR_prediction,F_prediction,SVM_prediction)
ROC(LR_prediction,F_prediction,SVM_prediction)