import pandas as pd #绘制用户性别比例 from pyecharts.charts import Pie def g_gender(): f=open("C:/Users/l'd/Desktop/可视化/py作业/gender.csv",'r',encoding='utf-8-sig') data=pd.read_csv(f,header=0) x=data.iloc[:,0] y=data.iloc[:,1] pie=(Pie().add("",[list(z) for z in zip(x,y)],radius=[60,100]).set_colors(["silver", "pink"]).set_global_opts(title_opts=opts.TitleOpts(title="评论用户性别比"), legend_opts=opts.LegendOpts(pos_top="30",pos_left="4%")).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}%"))) pie.render("性别比例.html") ''' ''' import pandas as pd import pyecharts.options as opts from pyecharts.charts import Pie #绘制评论日期: def g_date(): f=open('C:/Users/l'd/Desktop/可视化/py作业/result.csv','r',encoding='utf-8') data=pd.read_csv(f) x=data.iloc[:,0] y=data.iloc[:,1] pie=(Pie() .add("",[list(z) for z in zip(x,y)]) .set_global_opts(title_opts=opts.TitleOpts(title="评论日期分布"), legend_opts=opts.LegendOpts(pos_left="80%"))) pie.render("评论日期.html") ''' ''' #评论时间分布图 import pandas as pd from pyecharts import Line def g_time(): df = pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/time.csv", header=0, encoding='utf-8-sig') print(df.info()) df['time'] = [int(i.split(':')[0]) for i in df['时间']] # 分组汇总 date_message = df.groupby(['time']) date_com = date_message['time'].agg(['count']) date_com.reset_index(inplace=True) # 绘制分布图 attr = [str(j)+":00" for j in date_com['time']] v1 = date_com['count'] line =Line("评论的时间分布", title_pos='center', title_top='10', width=900, height=500) line.add("", attr, v1, is_smooth=True, is_fill=True, area_color="#3299CC", is_xaxislabel_align=True, xaxis_min="dataMin", area_opacity=0.6, mark_point=['max', 'min', 'average'], mark_point_symbol="pin", mark_point_symbolsize=50,line_width=2, ) line.render("评论的时间分布.html") ''' ''' #用户属地分布 import pandas as pd from pyecharts import Map def g_map(): df = pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig') df['location'] = df['用户城市']# # 分组汇总 loc_message = df.groupby(['location']) loc_com = loc_message['location'].agg(['count']) loc_com.reset_index(inplace=True) # 绘制地图 a= [i for i in loc_com['location']] v= [i for i in loc_com['count']] map = Map("用户的地区分布图", title_pos='center', title_top=1) map.add("", a, v, maptype="china", is_visualmap=True, visual_text_color="blue", is_map_symbol_show=False, visual_range=[0, 2500]) map.render('用户的地区分布图.html') ''' ''' 词云图 # from wordcloud import WordCloud import matplotlib.pyplot as plt import pandas as pd import random import jieba from PIL import Image import numpy as np # 设置文本随机颜色 def random_color_func(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None): h, s, l = random.choice([(2,100,50),(62,89,36),(205,73,59)]) return "hsl({}, {}%, {}%)".format(h, s, l) def get_wc(): # 读取信息 df1= pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig',quoting=3,error_bad_lines=False) df=df1['评论内容'] df=df.dropna() words = pd.read_csv("C:/Users/l'd/Desktop/可视化/csstop.txt", encoding='utf-8=sig', sep='\t', names=['stopword'],quoting=3,error_bad_lines=False) # 分词 text = '' for line in df: text += ' '.join(jieba.cut(str(line), cut_all=False)) # 停用词 stopwords = set('') stopwords.update('事情','这是','说','地方','干','做','发生','事','拆','想','请','真','发现','太', '有人','干什么') stopwords.update(words['stopword']) #背景 backgroud_Image = np.array(Image.open("C:/Users/l'd/Desktop/可视化/z.png")) mask=backgroud_Image wc = WordCloud( background_color='black', mask=mask, font_path='FZSTK.TTF', max_words=2000, max_font_size=500, min_font_size=15, color_func=random_color_func, prefer_horizontal=1, random_state=60, stopwords=stopwords ) plt.imshow(wc) plt.axis('off') wc.to_file("词云.png") print('成功!') ''' ''' #评论情感分布图: import pandas as pd from pyecharts.charts import Bar,Line from pyecharts import options as opts import snownlp def feci():#计算 df1= pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig',quoting=3,error_bad_lines=False) df=df1.iloc[:,[1,8]] df.columns=['id','com1'] df = df.drop_duplicates() df=df.dropna() df['com2'] = df['com1'].str.extract(r"([\u4e00-\u9fa5]+)") df = df.dropna() df['score'] = df["com2"].apply(lambda i:snownlp.SnowNLP(i).sentiments) dataframe = pd.DataFrame(df) dataframe.to_csv("./s.csv", encoding='utf_8_sig', mode='a', index=False, sep=',', header=0 ) def fl(i): x=float(i) return format(x,"0.1f") def xt(): df1 = pd.read_csv("C:/Users/l'd/Desktop/可视化/s.csv", header=0, encoding='utf-8-sig') df=df1.iloc[:,[0,3]] df.columns=['id','com'] df['s'] = [fl(i) for i in df['com']] # 分组汇总 date_message = df.groupby(['s']) date_c= date_message['s'].agg(['count']) date_c.reset_index(inplace=True) i=[x for x in date_c['count']] e=[x for x in date_c['s']] bar =Bar(init_opts=opts.InitOpts(width="800px", height="600px")) bar.add_xaxis(e) bar.add_yaxis(series_name='数量', y_axis=i, label_opts=opts.LabelOpts(is_show=True), itemstyle_opts=opts.ItemStyleOpts('black') ) bar.set_global_opts(title_opts=opts.TitleOpts(title="情感倾向分布"), tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis"), xaxis_opts=opts.AxisOpts(name='情感得分',axislabel_opts={"rotate": 16}, axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow")), yaxis_opts=opts.AxisOpts( name="数量", max_=3000, #axislabel_opts=opts.LabelOpts(formatter="{value} ") )) # 折线图 line = Line() line.add_xaxis(e) line.add_yaxis( series_name='数量', itemstyle_opts=opts.ItemStyleOpts("#E6E6FA"), y_axis=i, is_smooth=True, label_opts=opts.LabelOpts(is_show=False), areastyle_opts=opts.AreaStyleOpts(opacity=0.5), z=2 ) # 把折线图叠到条形图上面 bar.overlap(line).render_notebook() # 保存为html文件.overlap(line) bar.render("C:/Users/l'd/Desktop/可视化/py作业/1情感分布.html") if __name__=="__main__": feci() xt()