diff --git a/PricePredict.py b/PricePredict.py index 87068ce..529699a 100644 --- a/PricePredict.py +++ b/PricePredict.py @@ -26,9 +26,21 @@ sns.distplot(data_train['SalePrice']) #skewness and kurtosis 峰度和偏度 print("Skewness: %f" % data_train['SalePrice'].skew()) print("Kurtosis: %f" % data_train['SalePrice'].kurt()) +# CentralAir +var = 'CentralAir' +data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1) +fig = sns.boxplot(x=var, y="SalePrice", data=data) +fig.axis(ymin=0, ymax=800000); +# OverallQual +var = 'OverallQual' +data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1) +fig = sns.boxplot(x=var, y="SalePrice", data=data) +fig.axis(ymin=0, ymax=800000); +plt.show() corrmat = data_train.corr() f, ax = plt.subplots(figsize=(20, 9)) sns.heatmap(corrmat, vmax=0.8, square=True) +#plt.show() k = 10 # 关系矩阵中将显示10个特征,由此我们可以知道相关性高的数据类别,便于后续的分析 cols = corrmat.nlargest(k, 'SalePrice')['SalePrice'].index cm = np.corrcoef(data_train[cols].values.T) @@ -38,7 +50,7 @@ hm = sns.heatmap(cm, cbar=True, annot=True, \ sns.set() cols = ['SalePrice','OverallQual','GrLivArea', 'GarageCars','TotalBsmtSF', 'FullBath', 'TotRmsAbvGrd', 'YearBuilt'] sns.pairplot(data_train[cols], size = 2.5) - +#plt.show() # 获取数据 cols = ['OverallQual','GrLivArea', 'GarageCars','TotalBsmtSF', 'FullBath', 'TotRmsAbvGrd', 'YearBuilt'] x = data_train[cols].values