|
|
|
@ -0,0 +1,43 @@
|
|
|
|
|
#coding=gbk
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
|
|
|
|
from sklearn.metrics import accuracy_score
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
from sklearn.linear_model import LogisticRegression
|
|
|
|
|
import seaborn as sns
|
|
|
|
|
from sklearn.model_selection import cross_val_score, train_test_split
|
|
|
|
|
import warnings
|
|
|
|
|
warnings.filterwarnings("ignore")
|
|
|
|
|
|
|
|
|
|
train=pd.read_csv("E:\\python1\\123\\Data\\new_train.csv")
|
|
|
|
|
test=pd.read_csv("E:\\python1\\123\\Data\\new_test.csv")
|
|
|
|
|
gender=pd.read_csv("E:\\python1\\123\\Data\\gender_submission.csv")
|
|
|
|
|
sur_train=train.Survived
|
|
|
|
|
train.drop(['Survived'], axis=1, inplace =True)
|
|
|
|
|
test.drop(['Survived'], axis=1, inplace =True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#相关性热力图
|
|
|
|
|
dataset = train._append(test,sort=False)
|
|
|
|
|
plt.figure(figsize=(14,12))
|
|
|
|
|
sns.heatmap(dataset.corr(),annot = True)
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#展示两个数据集前五个的内容
|
|
|
|
|
# print(train.head())
|
|
|
|
|
# print(test.head())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LR=LogisticRegression()
|
|
|
|
|
#train数据集前100个数据训练,剩下数据测试
|
|
|
|
|
# LR.fit(train.iloc[0:-100,:],sur_train.iloc[0:-100])
|
|
|
|
|
# print(accuracy_score(LR.predict(train.iloc[-100:,:]),sur_train[-100:].values.reshape(-1,1)))
|
|
|
|
|
#0.82
|
|
|
|
|
|
|
|
|
|
#train数据集训练,test数据集测试
|
|
|
|
|
LR.fit(train,sur_train)
|
|
|
|
|
prediction=LR.predict(test)
|
|
|
|
|
print(accuracy_score(gender['Survived'], prediction))
|
|
|
|
|
#0.9186602870813397
|