parent
fdae1d3dcf
commit
83f868c66b
@ -0,0 +1,213 @@
|
||||
|
||||
import pandas as pd
|
||||
#绘制用户性别比例
|
||||
from pyecharts.charts import Pie
|
||||
def g_gender():
|
||||
f=open("C:/Users/l'd/Desktop/可视化/py作业/gender.csv",'r',encoding='utf-8-sig')
|
||||
data=pd.read_csv(f,header=0)
|
||||
|
||||
x=data.iloc[:,0]
|
||||
y=data.iloc[:,1]
|
||||
pie=(Pie().add("",[list(z) for z in zip(x,y)],radius=[60,100]).set_colors(["silver", "pink"]).set_global_opts(title_opts=opts.TitleOpts(title="评论用户性别比"),
|
||||
legend_opts=opts.LegendOpts(pos_top="30",pos_left="4%")).set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}%")))
|
||||
pie.render("性别比例.html")
|
||||
'''
|
||||
|
||||
'''
|
||||
import pandas as pd
|
||||
import pyecharts.options as opts
|
||||
from pyecharts.charts import Pie
|
||||
#绘制评论日期:
|
||||
def g_date():
|
||||
f=open('C:/Users/l'd/Desktop/可视化/py作业/result.csv','r',encoding='utf-8')
|
||||
data=pd.read_csv(f)
|
||||
x=data.iloc[:,0]
|
||||
y=data.iloc[:,1]
|
||||
pie=(Pie()
|
||||
.add("",[list(z) for z in zip(x,y)])
|
||||
.set_global_opts(title_opts=opts.TitleOpts(title="评论日期分布"),
|
||||
legend_opts=opts.LegendOpts(pos_left="80%")))
|
||||
pie.render("评论日期.html")
|
||||
'''
|
||||
|
||||
|
||||
'''
|
||||
#评论时间分布图
|
||||
import pandas as pd
|
||||
from pyecharts import Line
|
||||
def g_time():
|
||||
df = pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/time.csv", header=0, encoding='utf-8-sig')
|
||||
print(df.info())
|
||||
df['time'] = [int(i.split(':')[0]) for i in df['时间']]
|
||||
# 分组汇总
|
||||
date_message = df.groupby(['time'])
|
||||
date_com = date_message['time'].agg(['count'])
|
||||
date_com.reset_index(inplace=True)
|
||||
# 绘制分布图
|
||||
attr = [str(j)+":00" for j in date_com['time']]
|
||||
v1 = date_com['count']
|
||||
line =Line("评论的时间分布", title_pos='center', title_top='10', width=900, height=500)
|
||||
line.add("", attr, v1, is_smooth=True, is_fill=True,
|
||||
area_color="#3299CC", is_xaxislabel_align=True,
|
||||
xaxis_min="dataMin", area_opacity=0.6,
|
||||
|
||||
mark_point=['max', 'min', 'average'], mark_point_symbol="pin",
|
||||
mark_point_symbolsize=50,line_width=2,
|
||||
)
|
||||
|
||||
line.render("评论的时间分布.html")
|
||||
'''
|
||||
|
||||
|
||||
|
||||
'''
|
||||
#用户属地分布
|
||||
import pandas as pd
|
||||
from pyecharts import Map
|
||||
def g_map():
|
||||
df = pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig')
|
||||
df['location'] = df['用户城市']#
|
||||
# 分组汇总
|
||||
loc_message = df.groupby(['location'])
|
||||
loc_com = loc_message['location'].agg(['count'])
|
||||
loc_com.reset_index(inplace=True)
|
||||
# 绘制地图
|
||||
a= [i for i in loc_com['location']]
|
||||
v= [i for i in loc_com['count']]
|
||||
|
||||
map = Map("用户的地区分布图", title_pos='center', title_top=1)
|
||||
map.add("", a, v, maptype="china", is_visualmap=True, visual_text_color="blue", is_map_symbol_show=False, visual_range=[0, 2500])
|
||||
map.render('用户的地区分布图.html')
|
||||
|
||||
'''
|
||||
|
||||
'''
|
||||
词云图
|
||||
#
|
||||
from wordcloud import WordCloud
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import random
|
||||
import jieba
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
# 设置文本随机颜色
|
||||
def random_color_func(word=None, font_size=None, position=None, orientation=None, font_path=None, random_state=None):
|
||||
h, s, l = random.choice([(2,100,50),(62,89,36),(205,73,59)])
|
||||
return "hsl({}, {}%, {}%)".format(h, s, l)
|
||||
|
||||
def get_wc():
|
||||
# 读取信息
|
||||
df1= pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig',quoting=3,error_bad_lines=False)
|
||||
df=df1['评论内容']
|
||||
df=df.dropna()
|
||||
words = pd.read_csv("C:/Users/l'd/Desktop/可视化/csstop.txt", encoding='utf-8=sig', sep='\t', names=['stopword'],quoting=3,error_bad_lines=False)
|
||||
# 分词
|
||||
text = ''
|
||||
for line in df:
|
||||
text += ' '.join(jieba.cut(str(line), cut_all=False))
|
||||
# 停用词
|
||||
stopwords = set('')
|
||||
stopwords.update('事情','这是','说','地方','干','做','发生','事','拆','想','请','真','发现','太', '有人','干什么')
|
||||
stopwords.update(words['stopword'])
|
||||
#背景
|
||||
backgroud_Image = np.array(Image.open("C:/Users/l'd/Desktop/可视化/z.png"))
|
||||
mask=backgroud_Image
|
||||
|
||||
wc = WordCloud(
|
||||
background_color='black',
|
||||
mask=mask,
|
||||
font_path='FZSTK.TTF',
|
||||
max_words=2000,
|
||||
max_font_size=500,
|
||||
min_font_size=15,
|
||||
color_func=random_color_func,
|
||||
prefer_horizontal=1,
|
||||
random_state=60,
|
||||
stopwords=stopwords
|
||||
)
|
||||
plt.imshow(wc)
|
||||
plt.axis('off')
|
||||
wc.to_file("词云.png")
|
||||
print('成功!')
|
||||
|
||||
'''
|
||||
'''
|
||||
#评论情感分布图:
|
||||
import pandas as pd
|
||||
from pyecharts.charts import Bar,Line
|
||||
from pyecharts import options as opts
|
||||
import snownlp
|
||||
|
||||
def feci():#计算
|
||||
df1= pd.read_csv("C:/Users/l'd/Desktop/可视化/py作业/all_comments.csv", header=0, encoding='utf-8-sig',quoting=3,error_bad_lines=False)
|
||||
df=df1.iloc[:,[1,8]]
|
||||
df.columns=['id','com1']
|
||||
df = df.drop_duplicates()
|
||||
df=df.dropna()
|
||||
df['com2'] = df['com1'].str.extract(r"([\u4e00-\u9fa5]+)")
|
||||
df = df.dropna()
|
||||
df['score'] = df["com2"].apply(lambda i:snownlp.SnowNLP(i).sentiments)
|
||||
dataframe = pd.DataFrame(df)
|
||||
dataframe.to_csv("./s.csv", encoding='utf_8_sig', mode='a', index=False, sep=',', header=0 )
|
||||
|
||||
|
||||
def fl(i):
|
||||
x=float(i)
|
||||
return format(x,"0.1f")
|
||||
|
||||
def xt():
|
||||
df1 = pd.read_csv("C:/Users/l'd/Desktop/可视化/s.csv", header=0, encoding='utf-8-sig')
|
||||
df=df1.iloc[:,[0,3]]
|
||||
df.columns=['id','com']
|
||||
df['s'] = [fl(i) for i in df['com']]
|
||||
# 分组汇总
|
||||
date_message = df.groupby(['s'])
|
||||
date_c= date_message['s'].agg(['count'])
|
||||
date_c.reset_index(inplace=True)
|
||||
i=[x for x in date_c['count']]
|
||||
e=[x for x in date_c['s']]
|
||||
|
||||
bar =Bar(init_opts=opts.InitOpts(width="800px", height="600px"))
|
||||
bar.add_xaxis(e)
|
||||
bar.add_yaxis(series_name='数量',
|
||||
y_axis=i,
|
||||
label_opts=opts.LabelOpts(is_show=True),
|
||||
itemstyle_opts=opts.ItemStyleOpts('black')
|
||||
)
|
||||
bar.set_global_opts(title_opts=opts.TitleOpts(title="情感倾向分布"),
|
||||
tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis"),
|
||||
xaxis_opts=opts.AxisOpts(name='情感得分',axislabel_opts={"rotate": 16},
|
||||
axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow")),
|
||||
yaxis_opts=opts.AxisOpts(
|
||||
name="数量",
|
||||
max_=3000,
|
||||
#axislabel_opts=opts.LabelOpts(formatter="{value} ")
|
||||
))
|
||||
# 折线图
|
||||
line = Line()
|
||||
line.add_xaxis(e)
|
||||
line.add_yaxis(
|
||||
series_name='数量',
|
||||
itemstyle_opts=opts.ItemStyleOpts("#E6E6FA"),
|
||||
y_axis=i,
|
||||
is_smooth=True,
|
||||
label_opts=opts.LabelOpts(is_show=False),
|
||||
areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
|
||||
z=2
|
||||
)
|
||||
|
||||
# 把折线图叠到条形图上面
|
||||
bar.overlap(line).render_notebook()
|
||||
# 保存为html文件.overlap(line)
|
||||
bar.render("C:/Users/l'd/Desktop/可视化/py作业/1情感分布.html")
|
||||
|
||||
if __name__=="__main__":
|
||||
feci()
|
||||
xt()
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in new issue