You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
214 lines
7.4 KiB
214 lines
7.4 KiB
|
|
import pandas as pd
|
|
#绘制用户性别比例
|
|
from pyecharts.charts import Pie
|
|
def g_gender():
|
|
f=open("C:/Users/l'd/Desktop/可视化/py作业/gender.csv",'r',encoding='utf-8-sig')
|
|
data=pd.read_csv(f,header=0)
|
|
|
|
x=data.iloc[:,0]
|
|
y=data.iloc[:,1]
|
|
pie=(Pie().add("",[list(z) for z in zip(x,y)],radius=[60,100]).set_colors(["silver", "pink"]).set_global_opts(title_opts=opts.TitleOpts(title="评论用户性别比"),
|
|
legend_opts=opts.LegendOpts(pos_top="30",pos_left="4%")).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}%")))
|
|
pie.render("性别比例.html")
|
|
'''
|
|
|
|
'''
|
|
import pandas as pd
|
|
import pyecharts.options as opts
|
|
from pyecharts.charts import Pie
|
|
#绘制评论日期:
|
|
def g_date():
|
|
f=open('C:/Users/l'd/Desktop/可视化/py作业/result.csv','r',encoding='utf-8')
|
|
data=pd.read_csv(f)
|
|
x=data.iloc[:,0]
|
|
y=data.iloc[:,1]
|
|
pie=(Pie()
|
|
.add("",[list(z) for z in zip(x,y)])
|
|
.set_global_opts(title_opts=opts.TitleOpts(title="评论日期分布"),
|
|
legend_opts=opts.LegendOpts(pos_left="80%")))
|
|
pie.render("评论日期.html")
|
|
'''
|
|
|
|
|
|
'''
|
|
#评论时间分布图
|
|
import pandas as pd
|
|
from pyecharts import Line
|
|
def g_time():
|
|
df = pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/time.csv", header=0, encoding='utf-8-sig')
|
|
print(df.info())
|
|
df['time'] = [int(i.split(':')[0]) for i in df['时间']]
|
|
# 分组汇总
|
|
date_message = df.groupby(['time'])
|
|
date_com = date_message['time'].agg(['count'])
|
|
date_com.reset_index(inplace=True)
|
|
# 绘制分布图
|
|
attr = [str(j)+":00" for j in date_com['time']]
|
|
v1 = date_com['count']
|
|
line =Line("评论的时间分布", title_pos='center', title_top='10', width=900, height=500)
|
|
line.add("", attr, v1, is_smooth=True, is_fill=True,
|
|
area_color="#3299CC", is_xaxislabel_align=True,
|
|
xaxis_min="dataMin", area_opacity=0.6,
|
|
|
|
mark_point=['max', 'min', 'average'], mark_point_symbol="pin",
|
|
mark_point_symbolsize=50,line_width=2,
|
|
)
|
|
|
|
line.render("评论的时间分布.html")
|
|
'''
|
|
|
|
|
|
|
|
'''
|
|
#用户属地分布
|
|
import pandas as pd
|
|
from pyecharts import Map
|
|
def g_map():
|
|
df = pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig')
|
|
df['location'] = df['用户城市']#
|
|
# 分组汇总
|
|
loc_message = df.groupby(['location'])
|
|
loc_com = loc_message['location'].agg(['count'])
|
|
loc_com.reset_index(inplace=True)
|
|
# 绘制地图
|
|
a= [i for i in loc_com['location']]
|
|
v= [i for i in loc_com['count']]
|
|
|
|
map = Map("用户的地区分布图", title_pos='center', title_top=1)
|
|
map.add("", a, v, maptype="china", is_visualmap=True, visual_text_color="blue", is_map_symbol_show=False, visual_range=[0, 2500])
|
|
map.render('用户的地区分布图.html')
|
|
|
|
'''
|
|
|
|
'''
|
|
词云图
|
|
#
|
|
from wordcloud import WordCloud
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
import random
|
|
import jieba
|
|
from PIL import Image
|
|
import numpy as np
|
|
|
|
# 设置文本随机颜色
|
|
def random_color_func(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None):
|
|
h, s, l = random.choice([(2,100,50),(62,89,36),(205,73,59)])
|
|
return "hsl({}, {}%, {}%)".format(h, s, l)
|
|
|
|
def get_wc():
|
|
# 读取信息
|
|
df1= pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig',quoting=3,error_bad_lines=False)
|
|
df=df1['评论内容']
|
|
df=df.dropna()
|
|
words = pd.read_csv("C:/Users/l'd/Desktop/可视化/csstop.txt", encoding='utf-8=sig', sep='\t', names=['stopword'],quoting=3,error_bad_lines=False)
|
|
# 分词
|
|
text = ''
|
|
for line in df:
|
|
text += ' '.join(jieba.cut(str(line), cut_all=False))
|
|
# 停用词
|
|
stopwords = set('')
|
|
stopwords.update('事情','这是','说','地方','干','做','发生','事','拆','想','请','真','发现','太', '有人','干什么')
|
|
stopwords.update(words['stopword'])
|
|
#背景
|
|
backgroud_Image = np.array(Image.open("C:/Users/l'd/Desktop/可视化/z.png"))
|
|
mask=backgroud_Image
|
|
|
|
wc = WordCloud(
|
|
background_color='black',
|
|
mask=mask,
|
|
font_path='FZSTK.TTF',
|
|
max_words=2000,
|
|
max_font_size=500,
|
|
min_font_size=15,
|
|
color_func=random_color_func,
|
|
prefer_horizontal=1,
|
|
random_state=60,
|
|
stopwords=stopwords
|
|
)
|
|
plt.imshow(wc)
|
|
plt.axis('off')
|
|
wc.to_file("词云.png")
|
|
print('成功!')
|
|
|
|
'''
|
|
'''
|
|
#评论情感分布图:
|
|
import pandas as pd
|
|
from pyecharts.charts import Bar,Line
|
|
from pyecharts import options as opts
|
|
import snownlp
|
|
|
|
def feci():#计算
|
|
df1= pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig',quoting=3,error_bad_lines=False)
|
|
df=df1.iloc[:,[1,8]]
|
|
df.columns=['id','com1']
|
|
df = df.drop_duplicates()
|
|
df=df.dropna()
|
|
df['com2'] = df['com1'].str.extract(r"([\u4e00-\u9fa5]+)")
|
|
df = df.dropna()
|
|
df['score'] = df["com2"].apply(lambda i:snownlp.SnowNLP(i).sentiments)
|
|
dataframe = pd.DataFrame(df)
|
|
dataframe.to_csv("./s.csv", encoding='utf_8_sig', mode='a', index=False, sep=',', header=0 )
|
|
|
|
|
|
def fl(i):
|
|
x=float(i)
|
|
return format(x,"0.1f")
|
|
|
|
def xt():
|
|
df1 = pd.read_csv("C:/Users/l'd/Desktop/可视化/s.csv", header=0, encoding='utf-8-sig')
|
|
df=df1.iloc[:,[0,3]]
|
|
df.columns=['id','com']
|
|
df['s'] = [fl(i) for i in df['com']]
|
|
# 分组汇总
|
|
date_message = df.groupby(['s'])
|
|
date_c= date_message['s'].agg(['count'])
|
|
date_c.reset_index(inplace=True)
|
|
i=[x for x in date_c['count']]
|
|
e=[x for x in date_c['s']]
|
|
|
|
bar =Bar(init_opts=opts.InitOpts(width="800px", height="600px"))
|
|
bar.add_xaxis(e)
|
|
bar.add_yaxis(series_name='数量',
|
|
y_axis=i,
|
|
label_opts=opts.LabelOpts(is_show=True),
|
|
itemstyle_opts=opts.ItemStyleOpts('black')
|
|
)
|
|
bar.set_global_opts(title_opts=opts.TitleOpts(title="情感倾向分布"),
|
|
tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis"),
|
|
xaxis_opts=opts.AxisOpts(name='情感得分',axislabel_opts={"rotate": 16},
|
|
axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow")),
|
|
yaxis_opts=opts.AxisOpts(
|
|
name="数量",
|
|
max_=3000,
|
|
#axislabel_opts=opts.LabelOpts(formatter="{value} ")
|
|
))
|
|
# 折线图
|
|
line = Line()
|
|
line.add_xaxis(e)
|
|
line.add_yaxis(
|
|
series_name='数量',
|
|
itemstyle_opts=opts.ItemStyleOpts("#E6E6FA"),
|
|
y_axis=i,
|
|
is_smooth=True,
|
|
label_opts=opts.LabelOpts(is_show=False),
|
|
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
|
|
z=2
|
|
)
|
|
|
|
# 把折线图叠到条形图上面
|
|
bar.overlap(line).render_notebook()
|
|
# 保存为html文件.overlap(line)
|
|
bar.render("C:/Users/l'd/Desktop/可视化/py作业/1情感分布.html")
|
|
|
|
if __name__=="__main__":
|
|
feci()
|
|
xt()
|
|
|
|
|
|
|
|
|
|
|