From ce69c78eef2a9c4be02e5cb8182525981ae1cfa7 Mon Sep 17 00:00:00 2001 From: molskqv38 <748067453@qq.com> Date: Thu, 28 Oct 2021 16:05:48 +0800 Subject: [PATCH] Update PricePredict.py --- PricePredict.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/PricePredict.py b/PricePredict.py index 87068ce..529699a 100644 --- a/PricePredict.py +++ b/PricePredict.py @@ -26,9 +26,21 @@ sns.distplot(data_train['SalePrice']) #skewness and kurtosis 峰度和偏度 print("Skewness: %f" % data_train['SalePrice'].skew()) print("Kurtosis: %f" % data_train['SalePrice'].kurt()) +# CentralAir +var = 'CentralAir' +data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1) +fig = sns.boxplot(x=var, y="SalePrice", data=data) +fig.axis(ymin=0, ymax=800000); +# OverallQual +var = 'OverallQual' +data = pd.concat([data_train['SalePrice'], data_train[var]], axis=1) +fig = sns.boxplot(x=var, y="SalePrice", data=data) +fig.axis(ymin=0, ymax=800000); +plt.show() corrmat = data_train.corr() f, ax = plt.subplots(figsize=(20, 9)) sns.heatmap(corrmat, vmax=0.8, square=True) +#plt.show() k = 10 # 关系矩阵中将显示10个特征,由此我们可以知道相关性高的数据类别,便于后续的分析 cols = corrmat.nlargest(k, 'SalePrice')['SalePrice'].index cm = np.corrcoef(data_train[cols].values.T) @@ -38,7 +50,7 @@ hm = sns.heatmap(cm, cbar=True, annot=True, \ sns.set() cols = ['SalePrice','OverallQual','GrLivArea', 'GarageCars','TotalBsmtSF', 'FullBath', 'TotRmsAbvGrd', 'YearBuilt'] sns.pairplot(data_train[cols], size = 2.5) - +#plt.show() # 获取数据 cols = ['OverallQual','GrLivArea', 'GarageCars','TotalBsmtSF', 'FullBath', 'TotRmsAbvGrd', 'YearBuilt'] x = data_train[cols].values