You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
50 lines
1.8 KiB
50 lines
1.8 KiB
#导包
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
# 正确显示
|
|
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
|
|
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
|
|
df=pd.read_excel('./房天下新房数据(清洗后).xlsx',index_col=0)
|
|
df1=df.dropna(subset=['行政区','价格(元/㎡)'])
|
|
# 根据行政区分组取平均
|
|
df1=df.groupby('行政区').mean(numeric_only=True)
|
|
|
|
# 行政区平均新房单价分析
|
|
plt.title('行政区平均新房单价分析')
|
|
df1['价格(元/㎡)'].plot.bar(color='#CCCCFF')
|
|
plt.ylabel('价格(元/㎡)')
|
|
for i,v in enumerate(df1['价格(元/㎡)']):
|
|
plt.text(i-0.35,v+300,round(v))
|
|
plt.xticks(rotation=60)
|
|
'''
|
|
岳麓区的平均新房单价是最高的,浏阳的平均新房单价是最低的
|
|
'''
|
|
plt.savefig('./新房数据展示1.jpg')
|
|
|
|
# 新房户型占比分析
|
|
plt.figure(figsize=(10,10))
|
|
plt.title('新房户型占比分析')
|
|
df2=df
|
|
df2[df2['一居']=='有'].count()
|
|
a1=df2.一居.value_counts()['有']
|
|
a2=df2.二居.value_counts()['有']
|
|
a3=df2.三居.value_counts()['有']
|
|
a4=df2.四居.value_counts()['有']
|
|
a5=df2.五居.value_counts()['有']
|
|
colors=['#FFECE5','#E5CCFF','#CCCCFF','#CCFFE5','#FFE5CC']
|
|
plt.pie([a1,a2,a3,a4,a5],autopct='%.1f%%',colors=colors,labels=['一居','二居','三居','四居','五居'])
|
|
plt.legend()
|
|
plt.ylabel('')
|
|
plt.savefig('./新房数据展示2.jpg')
|
|
|
|
# 新房数量占比分析
|
|
plt.figure(figsize=(10,10))
|
|
df2=df.dropna(subset=['行政区'])
|
|
df2=df.groupby('行政区').count()
|
|
plt.title('新房数量占比分析')
|
|
colors=['#FFECE5','#E5CCFF','#CCCCFF','#CCFFE5','#FFE5CC','#e6e6fa','#9cc3eb','#d0eb9b',"#e6cfe6","#d1fff8"]
|
|
df2['名称'].plot.pie(autopct='%.1f%%',colors=colors)
|
|
plt.legend()
|
|
plt.ylabel('')
|
|
plt.savefig('./新房数据展示3.jpg')
|