You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
60 lines
2.2 KiB
60 lines
2.2 KiB
8 months ago
|
import pandas as pd
|
||
|
from pyecharts.charts import Bar
|
||
|
from pyecharts import options as opts
|
||
|
from pyecharts.globals import ThemeType
|
||
|
|
||
|
|
||
|
df = pd.read_csv('./JD.csv', encoding='gbk')
|
||
|
# print(f"==>> df: {df}")
|
||
|
|
||
|
# 价格去掉无用字符,然后把价格转换成浮点类型数据
|
||
|
df['价格'] = df['价格'].str.replace('¥', '').str.replace(',', '').astype(float)
|
||
|
df['评论数'] = df['评论数'].str.replace('+', '').str.replace('万', '0000').astype(float)
|
||
|
|
||
|
|
||
|
|
||
|
# print(f"==>> df: {df.head(5)}")
|
||
|
describe = df.describe()
|
||
|
print(f"==>> describe: {describe}")
|
||
|
|
||
|
# 价格区间,和标签
|
||
|
bins = [0, 100, 300, 500, 800, 1000, 1300]
|
||
|
labels = ['100以下', '100-300', '300-500', '500-800', '800-1000', '1000以上']
|
||
|
|
||
|
# 把价格区间分为6个区间
|
||
|
df['价格区间'] = pd.cut(df['价格'], bins=bins, labels=labels, include_lowest=True)
|
||
|
# 统计区间内的价格
|
||
|
df_price_count = df['价格区间'].value_counts(sort=False)
|
||
|
print(f"==>> df_price_count: {df_price_count}")
|
||
|
|
||
|
# 画直方图
|
||
|
hist = (
|
||
|
Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
|
||
|
.add_xaxis(df_price_count.index.tolist())
|
||
|
.add_yaxis('价格区间', df_price_count.values.tolist())
|
||
|
.set_global_opts(title_opts=opts.TitleOpts(title='价格区间分布直方图'))
|
||
|
)
|
||
|
|
||
|
hist.render('./价格区间分布直方图.html')
|
||
|
|
||
|
|
||
|
# 把评论数去重,排序,然后取前十
|
||
|
df_sorted = df.sort_values(by='评论数', ascending=False).drop_duplicates(subset='评论数').head(10)
|
||
|
# print(f"==>> df_sorted: {df_sorted}")
|
||
|
|
||
|
# 画条形图
|
||
|
bar_reverse = (
|
||
|
Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT, width='1200px', height='800px'))
|
||
|
.add_xaxis([i for i in reversed(df_sorted['商品'].tolist())])
|
||
|
# .add_xaxis([i for i in reversed(df_sorted['店铺'].tolist())])
|
||
|
.add_yaxis('评论数', [i for i in reversed(df_sorted['评论数'].values.tolist())])
|
||
|
.reversal_axis()
|
||
|
.set_series_opts(label_opts=opts.LabelOpts(position='right'))
|
||
|
.set_global_opts(
|
||
|
yaxis_opts=opts.AxisOpts(
|
||
|
axislabel_opts=opts.LabelOpts(font_size=10, rotate=60) # 调整字体大小并倾斜标签
|
||
|
)
|
||
|
)
|
||
|
)
|
||
|
bar_reverse.render('./评论数前十条直方图.html')
|