|
|
|
@ -28,8 +28,12 @@
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import zipfile
|
|
|
|
|
from zipfile import Zipfile
|
|
|
|
|
# 读入数据
|
|
|
|
|
data = pd.read_csv("data.csv")
|
|
|
|
|
z=Zipfile("data.csv.zip")
|
|
|
|
|
f=z.open("data.csv")
|
|
|
|
|
data = pd.read_csv(f)
|
|
|
|
|
#显示大小
|
|
|
|
|
print("数据集大小:",data.shape)
|
|
|
|
|
# 数据集详细信息
|
|
|
|
@ -648,51 +652,4 @@ plt.show()
|
|
|
|
|
|
|
|
|
|
# AUC
|
|
|
|
|
model_auc = auc(model_fpr, model_tpr)
|
|
|
|
|
model_auc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ## 二、案例练习
|
|
|
|
|
|
|
|
|
|
# 根据上述学习内容,完成以下练习:
|
|
|
|
|
#
|
|
|
|
|
# ### 分析泰坦尼克号人群生存情况
|
|
|
|
|
#
|
|
|
|
|
# 数据集路径如下:
|
|
|
|
|
#
|
|
|
|
|
# 训练数据:/data/shixunfiles/2309cc5f04782ed9bb6016d9f4e381cf_1607607386535.csv
|
|
|
|
|
#
|
|
|
|
|
# 测试数据:/data/shixunfiles/7533b82eae4b582610cbd68aa636b017_1607607386511.csv
|
|
|
|
|
|
|
|
|
|
# **练习内容如下**
|
|
|
|
|
|
|
|
|
|
# #### 1.导入训练测试数据,将数据合并并查看数据基本信息
|
|
|
|
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# #### 2.查看数据前10行数据
|
|
|
|
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# #### 3.显示男性与女性乘客生存比例并进行柱状图可视化
|
|
|
|
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# #### 4.显示不同客舱乘客生存比例并进行柱状图可视化
|
|
|
|
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_auc
|