You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

214 lines
7.4 KiB

import pandas as pd
#绘制用户性别比例
from pyecharts.charts import Pie
def g_gender():
f=open("C:/Users/l'd/Desktop/可视化/py作业/gender.csv",'r',encoding='utf-8-sig')
data=pd.read_csv(f,header=0)
x=data.iloc[:,0]
y=data.iloc[:,1]
pie=(Pie().add("",[list(z) for z in zip(x,y)],radius=[60,100]).set_colors(["silver", "pink"]).set_global_opts(title_opts=opts.TitleOpts(title="评论用户性别比"),
legend_opts=opts.LegendOpts(pos_top="30",pos_left="4%")).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}%")))
pie.render("性别比例.html")
'''
'''
import pandas as pd
import pyecharts.options as opts
from pyecharts.charts import Pie
#绘制评论日期:
def g_date():
f=open('C:/Users/l'd/Desktop/可视化/py作业/result.csv','r',encoding='utf-8')
data=pd.read_csv(f)
x=data.iloc[:,0]
y=data.iloc[:,1]
pie=(Pie()
.add("",[list(z) for z in zip(x,y)])
.set_global_opts(title_opts=opts.TitleOpts(title="评论日期分布"),
legend_opts=opts.LegendOpts(pos_left="80%")))
pie.render("评论日期.html")
'''
'''
#评论时间分布图
import pandas as pd
from pyecharts import Line
def g_time():
df = pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/time.csv", header=0, encoding='utf-8-sig')
print(df.info())
df['time'] = [int(i.split(':')[0]) for i in df['时间']]
# 分组汇总
date_message = df.groupby(['time'])
date_com = date_message['time'].agg(['count'])
date_com.reset_index(inplace=True)
# 绘制分布图
attr = [str(j)+":00" for j in date_com['time']]
v1 = date_com['count']
line =Line("评论的时间分布", title_pos='center', title_top='10', width=900, height=500)
line.add("", attr, v1, is_smooth=True, is_fill=True,
area_color="#3299CC", is_xaxislabel_align=True,
xaxis_min="dataMin", area_opacity=0.6,
mark_point=['max', 'min', 'average'], mark_point_symbol="pin",
mark_point_symbolsize=50,line_width=2,
)
line.render("评论的时间分布.html")
'''
'''
#用户属地分布
import pandas as pd
from pyecharts import Map
def g_map():
df = pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig')
df['location'] = df['用户城市']#
# 分组汇总
loc_message = df.groupby(['location'])
loc_com = loc_message['location'].agg(['count'])
loc_com.reset_index(inplace=True)
# 绘制地图
a= [i for i in loc_com['location']]
v= [i for i in loc_com['count']]
map = Map("用户的地区分布图", title_pos='center', title_top=1)
map.add("", a, v, maptype="china", is_visualmap=True, visual_text_color="blue", is_map_symbol_show=False, visual_range=[0, 2500])
map.render('用户的地区分布图.html')
'''
'''
词云图
#
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import pandas as pd
import random
import jieba
from PIL import Image
import numpy as np
# 设置文本随机颜色
def random_color_func(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None):
h, s, l = random.choice([(2,100,50),(62,89,36),(205,73,59)])
return "hsl({}, {}%, {}%)".format(h, s, l)
def get_wc():
# 读取信息
df1= pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig',quoting=3,error_bad_lines=False)
df=df1['评论内容']
df=df.dropna()
words = pd.read_csv("C:/Users/l'd/Desktop/可视化/csstop.txt", encoding='utf-8=sig', sep='\t', names=['stopword'],quoting=3,error_bad_lines=False)
# 分词
text = ''
for line in df:
text += ' '.join(jieba.cut(str(line), cut_all=False))
# 停用词
stopwords = set('')
stopwords.update('事情','这是','','地方','','','发生','','','','','','发现','', '有人','干什么')
stopwords.update(words['stopword'])
#背景
backgroud_Image = np.array(Image.open("C:/Users/l'd/Desktop/可视化/z.png"))
mask=backgroud_Image
wc = WordCloud(
background_color='black',
mask=mask,
font_path='FZSTK.TTF',
max_words=2000,
max_font_size=500,
min_font_size=15,
color_func=random_color_func,
prefer_horizontal=1,
random_state=60,
stopwords=stopwords
)
plt.imshow(wc)
plt.axis('off')
wc.to_file("词云.png")
print('成功!')
'''
'''
#评论情感分布图:
import pandas as pd
from pyecharts.charts import Bar,Line
from pyecharts import options as opts
import snownlp
def feci():#计算
df1= pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig',quoting=3,error_bad_lines=False)
df=df1.iloc[:,[1,8]]
df.columns=['id','com1']
df = df.drop_duplicates()
df=df.dropna()
df['com2'] = df['com1'].str.extract(r"([\u4e00-\u9fa5]+)")
df = df.dropna()
df['score'] = df["com2"].apply(lambda i:snownlp.SnowNLP(i).sentiments)
dataframe = pd.DataFrame(df)
dataframe.to_csv("./s.csv", encoding='utf_8_sig', mode='a', index=False, sep=',', header=0 )
def fl(i):
x=float(i)
return format(x,"0.1f")
def xt():
df1 = pd.read_csv("C:/Users/l'd/Desktop/可视化/s.csv", header=0, encoding='utf-8-sig')
df=df1.iloc[:,[0,3]]
df.columns=['id','com']
df['s'] = [fl(i) for i in df['com']]
# 分组汇总
date_message = df.groupby(['s'])
date_c= date_message['s'].agg(['count'])
date_c.reset_index(inplace=True)
i=[x for x in date_c['count']]
e=[x for x in date_c['s']]
bar =Bar(init_opts=opts.InitOpts(width="800px", height="600px"))
bar.add_xaxis(e)
bar.add_yaxis(series_name='数量',
y_axis=i,
label_opts=opts.LabelOpts(is_show=True),
itemstyle_opts=opts.ItemStyleOpts('black')
)
bar.set_global_opts(title_opts=opts.TitleOpts(title="情感倾向分布"),
tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis"),
xaxis_opts=opts.AxisOpts(name='情感得分',axislabel_opts={"rotate": 16},
axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow")),
yaxis_opts=opts.AxisOpts(
name="数量",
max_=3000,
#axislabel_opts=opts.LabelOpts(formatter="{value} ")
))
# 折线图
line = Line()
line.add_xaxis(e)
line.add_yaxis(
series_name='数量',
itemstyle_opts=opts.ItemStyleOpts("#E6E6FA"),
y_axis=i,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=False),
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
z=2
)
# 把折线图叠到条形图上面
bar.overlap(line).render_notebook()
# 保存为html文件.overlap(line)
bar.render("C:/Users/l'd/Desktop/可视化/py作业/1情感分布.html")
if __name__=="__main__":
feci()
xt()