From 83f868c66bcb2f268b525a359a37665bf2696b06 Mon Sep 17 00:00:00 2001 From: pfwvrj5cf <1076978369@qq.com> Date: Mon, 5 Dec 2022 22:12:49 +0800 Subject: [PATCH] ADD file via upload --- pythooon.py | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 pythooon.py diff --git a/pythooon.py b/pythooon.py new file mode 100644 index 0000000..fde429e --- /dev/null +++ b/pythooon.py @@ -0,0 +1,213 @@ + +import pandas as pd +#绘制用户性别比例 +from pyecharts.charts import Pie +def g_gender(): + f=open("C:/Users/l'd/Desktop/可视化/py作业/gender.csv",'r',encoding='utf-8-sig') + data=pd.read_csv(f,header=0) + + x=data.iloc[:,0] + y=data.iloc[:,1] + pie=(Pie().add("",[list(z) for z in zip(x,y)],radius=[60,100]).set_colors(["silver", "pink"]).set_global_opts(title_opts=opts.TitleOpts(title="评论用户性别比"), + legend_opts=opts.LegendOpts(pos_top="30",pos_left="4%")).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}%"))) + pie.render("性别比例.html") +''' + +''' +import pandas as pd +import pyecharts.options as opts +from pyecharts.charts import Pie +#绘制评论日期: +def g_date(): + f=open('C:/Users/l'd/Desktop/可视化/py作业/result.csv','r',encoding='utf-8') + data=pd.read_csv(f) + x=data.iloc[:,0] + y=data.iloc[:,1] + pie=(Pie() + .add("",[list(z) for z in zip(x,y)]) + .set_global_opts(title_opts=opts.TitleOpts(title="评论日期分布"), + legend_opts=opts.LegendOpts(pos_left="80%"))) + pie.render("评论日期.html") +''' + + +''' +#评论时间分布图 +import pandas as pd +from pyecharts import Line +def g_time(): + df = pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/time.csv", header=0, encoding='utf-8-sig') + print(df.info()) + df['time'] = [int(i.split(':')[0]) for i in df['时间']] + # 分组汇总 + date_message = df.groupby(['time']) + date_com = date_message['time'].agg(['count']) + date_com.reset_index(inplace=True) + # 绘制分布图 + attr = [str(j)+":00" for j in date_com['time']] + v1 = date_com['count'] + line =Line("评论的时间分布", title_pos='center', title_top='10', width=900, height=500) + line.add("", attr, v1, is_smooth=True, is_fill=True, + area_color="#3299CC", is_xaxislabel_align=True, + xaxis_min="dataMin", area_opacity=0.6, + + mark_point=['max', 'min', 'average'], mark_point_symbol="pin", + mark_point_symbolsize=50,line_width=2, + ) + + line.render("评论的时间分布.html") +''' + + + +''' +#用户属地分布 +import pandas as pd +from pyecharts import Map +def g_map(): + df = pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig') + df['location'] = df['用户城市']# + # 分组汇总 + loc_message = df.groupby(['location']) + loc_com = loc_message['location'].agg(['count']) + loc_com.reset_index(inplace=True) + # 绘制地图 + a= [i for i in loc_com['location']] + v= [i for i in loc_com['count']] + + map = Map("用户的地区分布图", title_pos='center', title_top=1) + map.add("", a, v, maptype="china", is_visualmap=True, visual_text_color="blue", is_map_symbol_show=False, visual_range=[0, 2500]) + map.render('用户的地区分布图.html') + +''' + +''' +词云图 +# +from wordcloud import WordCloud +import matplotlib.pyplot as plt +import pandas as pd +import random +import jieba +from PIL import Image +import numpy as np + +# 设置文本随机颜色 +def random_color_func(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None): + h, s, l = random.choice([(2,100,50),(62,89,36),(205,73,59)]) + return "hsl({}, {}%, {}%)".format(h, s, l) + +def get_wc(): + # 读取信息 + df1= pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig',quoting=3,error_bad_lines=False) + df=df1['评论内容'] + df=df.dropna() + words = pd.read_csv("C:/Users/l'd/Desktop/可视化/csstop.txt", encoding='utf-8=sig', sep='\t', names=['stopword'],quoting=3,error_bad_lines=False) +# 分词 + text = '' + for line in df: + text += ' '.join(jieba.cut(str(line), cut_all=False)) + # 停用词 + stopwords = set('') + stopwords.update('事情','这是','说','地方','干','做','发生','事','拆','想','请','真','发现','太', '有人','干什么') + stopwords.update(words['stopword']) +#背景 + backgroud_Image = np.array(Image.open("C:/Users/l'd/Desktop/可视化/z.png")) + mask=backgroud_Image + + wc = WordCloud( + background_color='black', + mask=mask, + font_path='FZSTK.TTF', + max_words=2000, + max_font_size=500, + min_font_size=15, + color_func=random_color_func, + prefer_horizontal=1, + random_state=60, + stopwords=stopwords + ) + plt.imshow(wc) + plt.axis('off') + wc.to_file("词云.png") + print('成功!') + +''' +''' +#评论情感分布图: +import pandas as pd +from pyecharts.charts import Bar,Line +from pyecharts import options as opts +import snownlp + +def feci():#计算 + df1= pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig',quoting=3,error_bad_lines=False) + df=df1.iloc[:,[1,8]] + df.columns=['id','com1'] + df = df.drop_duplicates() + df=df.dropna() + df['com2'] = df['com1'].str.extract(r"([\u4e00-\u9fa5]+)") + df = df.dropna() + df['score'] = df["com2"].apply(lambda i:snownlp.SnowNLP(i).sentiments) + dataframe = pd.DataFrame(df) + dataframe.to_csv("./s.csv", encoding='utf_8_sig', mode='a', index=False, sep=',', header=0 ) + + +def fl(i): + x=float(i) + return format(x,"0.1f") + +def xt(): + df1 = pd.read_csv("C:/Users/l'd/Desktop/可视化/s.csv", header=0, encoding='utf-8-sig') + df=df1.iloc[:,[0,3]] + df.columns=['id','com'] + df['s'] = [fl(i) for i in df['com']] +# 分组汇总 + date_message = df.groupby(['s']) + date_c= date_message['s'].agg(['count']) + date_c.reset_index(inplace=True) + i=[x for x in date_c['count']] + e=[x for x in date_c['s']] + + bar =Bar(init_opts=opts.InitOpts(width="800px", height="600px")) + bar.add_xaxis(e) + bar.add_yaxis(series_name='数量', + y_axis=i, + label_opts=opts.LabelOpts(is_show=True), + itemstyle_opts=opts.ItemStyleOpts('black') + ) + bar.set_global_opts(title_opts=opts.TitleOpts(title="情感倾向分布"), + tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis"), + xaxis_opts=opts.AxisOpts(name='情感得分',axislabel_opts={"rotate": 16}, + axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow")), + yaxis_opts=opts.AxisOpts( + name="数量", + max_=3000, + #axislabel_opts=opts.LabelOpts(formatter="{value} ") + )) + # 折线图 + line = Line() + line.add_xaxis(e) + line.add_yaxis( + series_name='数量', + itemstyle_opts=opts.ItemStyleOpts("#E6E6FA"), + y_axis=i, + is_smooth=True, + label_opts=opts.LabelOpts(is_show=False), + areastyle_opts=opts.AreaStyleOpts(opacity=0.5), + z=2 + ) + + # 把折线图叠到条形图上面 + bar.overlap(line).render_notebook() + # 保存为html文件.overlap(line) + bar.render("C:/Users/l'd/Desktop/可视化/py作业/1情感分布.html") + +if __name__=="__main__": + feci() + xt() + + + + +