|
|
|
@ -1,10 +1,62 @@
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import seaborn as sns
|
|
|
|
|
#将txt文件转换为csv文件
|
|
|
|
|
data_train = pd.read_csv("train.csv")
|
|
|
|
|
grandtruth = pd.read_csv("sample_submission.csv")
|
|
|
|
|
pred = pd.read_csv("Predictions.csv")
|
|
|
|
|
plt.scatter('Id','SalePrice')
|
|
|
|
|
|
|
|
|
|
var = 'CentralAir'
|
|
|
|
|
data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1)
|
|
|
|
|
fig = sns.boxplot(x=var, y="SalePrice", data=data)
|
|
|
|
|
fig.axis(ymin=0, ymax=800000);
|
|
|
|
|
|
|
|
|
|
# YearBuilt boxplot
|
|
|
|
|
var = 'YearBuilt'
|
|
|
|
|
data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1)
|
|
|
|
|
f, ax = plt.subplots(figsize=(26, 12))
|
|
|
|
|
fig = sns.boxplot(x=var, y="SalePrice", data=data)
|
|
|
|
|
fig.axis(ymin=0, ymax=800000);
|
|
|
|
|
|
|
|
|
|
# YearBuilt scatter
|
|
|
|
|
var = 'YearBuilt'
|
|
|
|
|
data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1)
|
|
|
|
|
data.plot.scatter(x=var, y="SalePrice", ylim=(0, 800000))
|
|
|
|
|
|
|
|
|
|
# Neighborhood
|
|
|
|
|
var = 'Neighborhood'
|
|
|
|
|
data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1)
|
|
|
|
|
data.plot.scatter(x=var, y="SalePrice", ylim=(0, 800000))
|
|
|
|
|
# f, ax = plt.subplots(figsize=(26, 12))
|
|
|
|
|
# fig = sns.boxplot(x=var, y="SalePrice", data=data)
|
|
|
|
|
# fig.axis(ymin=0, ymax=800000);
|
|
|
|
|
|
|
|
|
|
#LotArea
|
|
|
|
|
var = 'LotArea'
|
|
|
|
|
data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1)
|
|
|
|
|
data.plot.scatter(x=var, y='SalePrice', xlim=(0,20000),ylim=(0, 800000))
|
|
|
|
|
|
|
|
|
|
#GrLivArea
|
|
|
|
|
var = 'GrLivArea'
|
|
|
|
|
data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1)
|
|
|
|
|
data.plot.scatter(x=var, y='SalePrice', ylim=(0, 800000))
|
|
|
|
|
|
|
|
|
|
#TotalBsmtBF
|
|
|
|
|
var = 'TotalBsmtSF'
|
|
|
|
|
data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1)
|
|
|
|
|
data.plot.scatter(x=var, y='SalePrice', ylim=(0, 800000))
|
|
|
|
|
|
|
|
|
|
#MiscVal
|
|
|
|
|
var = 'MiscVal'
|
|
|
|
|
data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1)
|
|
|
|
|
data.plot.scatter(x=var, y='SalePrice', ylim=(0, 800000))
|
|
|
|
|
|
|
|
|
|
#GaragArea
|
|
|
|
|
var = ['GarageArea', 'GarageCars']
|
|
|
|
|
for index in range(2):
|
|
|
|
|
data = pd.concat([data_train['SalePrice'], data_train[var[index]]], axis=1)
|
|
|
|
|
data.plot.scatter(x=var[index], y='SalePrice', ylim=(0, 800000))
|
|
|
|
|
#画出散点图
|
|
|
|
|
plt.show()
|
|
|
|
|
#将散点图显示出来
|
|
|
|
|