diff --git a/visualization.py b/visualization.py new file mode 100644 index 0000000..1627957 --- /dev/null +++ b/visualization.py @@ -0,0 +1,59 @@ +import pandas as pd +from pyecharts.charts import Bar +from pyecharts import options as opts +from pyecharts.globals import ThemeType + + +df = pd.read_csv('./JD.csv', encoding='gbk') +# print(f"==>> df: {df}") + +# 价格去掉无用字符,然后把价格转换成浮点类型数据 +df['价格'] = df['价格'].str.replace('¥', '').str.replace(',', '').astype(float) +df['评论数'] = df['评论数'].str.replace('+', '').str.replace('万', '0000').astype(float) + + + +# print(f"==>> df: {df.head(5)}") +describe = df.describe() +print(f"==>> describe: {describe}") + +# 价格区间,和标签 +bins = [0, 100, 300, 500, 800, 1000, 1300] +labels = ['100以下', '100-300', '300-500', '500-800', '800-1000', '1000以上'] + +# 把价格区间分为6个区间 +df['价格区间'] = pd.cut(df['价格'], bins=bins, labels=labels, include_lowest=True) +# 统计区间内的价格 +df_price_count = df['价格区间'].value_counts(sort=False) +print(f"==>> df_price_count: {df_price_count}") + +# 画直方图 +hist = ( + Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT)) + .add_xaxis(df_price_count.index.tolist()) + .add_yaxis('价格区间', df_price_count.values.tolist()) + .set_global_opts(title_opts=opts.TitleOpts(title='价格区间分布直方图')) +) + +hist.render('./价格区间分布直方图.html') + + +# 把评论数去重,排序,然后取前十 +df_sorted = df.sort_values(by='评论数', ascending=False).drop_duplicates(subset='评论数').head(10) +# print(f"==>> df_sorted: {df_sorted}") + +# 画条形图 +bar_reverse = ( + Bar(init_opts=opts.InitOpts(theme=ThemeType.LIGHT, width='1200px', height='800px')) + .add_xaxis([i for i in reversed(df_sorted['商品'].tolist())]) + # .add_xaxis([i for i in reversed(df_sorted['店铺'].tolist())]) + .add_yaxis('评论数', [i for i in reversed(df_sorted['评论数'].values.tolist())]) + .reversal_axis() + .set_series_opts(label_opts=opts.LabelOpts(position='right')) + .set_global_opts( + yaxis_opts=opts.AxisOpts( + axislabel_opts=opts.LabelOpts(font_size=10, rotate=60) # 调整字体大小并倾斜标签 + ) + ) +) +bar_reverse.render('./评论数前十条直方图.html')