You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

121 lines
4.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from wordcloud import WordCloud
import jieba
import cv2 as cv
from pylab import mpl
import os
from matplotlib.font_manager import _rebuild
_rebuild() #reload一下
# 设置显示中文字体
mpl.rcParams["font.sans-serif"] = ["FangSong"]
# 设置正常显示符号
mpl.rcParams["axes.unicode_minus"] = False
def prepressingData(data1):
data = data1.copy()
zancheng = data['赞成数']
fandui = data['反对数']
huifu = data['回复数']
pingjia = data['评价']
star = []
for i in range(0,len(zancheng)):
zancheng[i] = str(zancheng[i]).replace('\n','')
fandui[i] = str(fandui[i]).replace('\n','')
huifu[i] = str(huifu[i]).replace('回应','')
if pingjia[i]=='力荐':
star.append(5)
elif pingjia[i]=='推荐':
star.append(4)
elif pingjia[i]=='还行':
star.append(3)
elif pingjia[i]=='较差':
star.append(2)
elif pingjia[i]=='很差':
star.append(1)
else:
star.append(0)
data['评价得分'] = pd.Series(star)
data = data.drop(axis=1,columns=['评论主体'])
# 删除列,后要赋值过去才算成功
return data
def cvtInt(data1):
data = data1.copy()
data['赞成数'] = pd.to_numeric(data1['赞成数'],errors='ignore')
data['赞成数'] = data['赞成数'].fillna(0)
data['反对数'] = pd.to_numeric(data1['反对数'],errors='ignore')
data['反对数'] = data['反对数'].fillna(0)
data['回复数'] = pd.to_numeric(data1['回复数'],errors='ignore')
return data
def dropUselessTime(data):
data1 = data.copy()
index = data1[data1['评论时间']=='no star'].index
data1 = data1.drop(axis=0,index=index)
return data1
def plotTimeWithData(data,path):
data1 = data.copy()
cur = data1.groupby('hour').count()
plt.figure(figsize=(8, 6))
plt.plot(cur.index, cur['month'])
plt.xlabel(u'时间/hour', size=23)
plt.ylabel(u'评论数量', size=23)
plt.title(u'评论数量随时间段的变化', size=23)
xticks = list(range(0, 24)) # 这里设置的是x轴点的位置
plt.xticks(xticks)
plt.savefig(path + '评论数量随时间段的变化.png')
cur = data1.groupby('hour').mean()
plt.figure(figsize=(8, 6))
plt.plot(cur.index, cur['评价得分'])
plt.xlabel('时间/hour', size=23)
plt.ylabel('评论分均值', size=23)
plt.title('评论均值随时间段的变化', size=23)
xticks = list(range(0, 24)) # 这里设置的是x轴点的位置
plt.xticks(xticks)
plt.savefig(path + '评论均值随时间段的变化.png')
def save_pics(name,path):
pic_path = path + name + '.jpg'
print(pic_path)
background_image = cv.imread(pic_path)
if not os.path.exists(path):
os.makedirs(path)
filename = name + '.csv'
yingping = pd.read_csv(path + '\\' + filename)
all_content = ''
for i in range(0,len(yingping['评论标题'])):
all_content= all_content+yingping['评论标题'][i]
cut_text = " ".join(jieba.cut(all_content))
wordcloud = WordCloud(font_path="C:/Windows/Fonts/SimHei.ttf",mask=background_image,background_color='white').generate(cut_text)
plt.figure(figsize=(10,10))
plt.imshow(wordcloud,interpolation="bilinear")
plt.axis("off")
# plt.show()
plt.savefig(path + '词云.png')
yingping = prepressingData(yingping)
yingping.head(50)
yingping.isnull().any()
# 画图开始
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.figure(figsize=(10, 8))
plt.hist(yingping['评价得分'], bins=15)
plt.xlabel('星级0表示没有评级', size=14)
plt.ylabel('人数', size=14)
# plt.show()
plt.savefig(path + '各星级的人数统计.png')
yingping = cvtInt(yingping)
yingping = dropUselessTime(yingping)
yingping.index = yingping['评论时间']
yingping['month'] = pd.to_datetime(yingping['评论时间']).dt.month
yingping['hour'] = pd.to_datetime(yingping['评论时间']).dt.hour
plotTimeWithData(yingping,path)