|
|
import pandas as pd
|
|
|
import matplotlib.pyplot as plt
|
|
|
import numpy as np
|
|
|
from wordcloud import WordCloud
|
|
|
import jieba
|
|
|
import cv2 as cv
|
|
|
from pylab import mpl
|
|
|
import os
|
|
|
from matplotlib.font_manager import _rebuild
|
|
|
_rebuild() #reload一下
|
|
|
|
|
|
# 设置显示中文字体
|
|
|
mpl.rcParams["font.sans-serif"] = ["FangSong"]
|
|
|
# 设置正常显示符号
|
|
|
mpl.rcParams["axes.unicode_minus"] = False
|
|
|
|
|
|
def prepressingData(data1):
|
|
|
data = data1.copy()
|
|
|
zancheng = data['赞成数']
|
|
|
fandui = data['反对数']
|
|
|
huifu = data['回复数']
|
|
|
pingjia = data['评价']
|
|
|
star = []
|
|
|
for i in range(0,len(zancheng)):
|
|
|
zancheng[i] = str(zancheng[i]).replace('\n','')
|
|
|
fandui[i] = str(fandui[i]).replace('\n','')
|
|
|
huifu[i] = str(huifu[i]).replace('回应','')
|
|
|
if pingjia[i]=='力荐':
|
|
|
star.append(5)
|
|
|
elif pingjia[i]=='推荐':
|
|
|
star.append(4)
|
|
|
elif pingjia[i]=='还行':
|
|
|
star.append(3)
|
|
|
elif pingjia[i]=='较差':
|
|
|
star.append(2)
|
|
|
elif pingjia[i]=='很差':
|
|
|
star.append(1)
|
|
|
else:
|
|
|
star.append(0)
|
|
|
data['评价得分'] = pd.Series(star)
|
|
|
data = data.drop(axis=1,columns=['评论主体'])
|
|
|
# 删除列,后要赋值过去才算成功
|
|
|
return data
|
|
|
|
|
|
def cvtInt(data1):
|
|
|
data = data1.copy()
|
|
|
data['赞成数'] = pd.to_numeric(data1['赞成数'],errors='ignore')
|
|
|
data['赞成数'] = data['赞成数'].fillna(0)
|
|
|
data['反对数'] = pd.to_numeric(data1['反对数'],errors='ignore')
|
|
|
data['反对数'] = data['反对数'].fillna(0)
|
|
|
data['回复数'] = pd.to_numeric(data1['回复数'],errors='ignore')
|
|
|
return data
|
|
|
|
|
|
def dropUselessTime(data):
|
|
|
data1 = data.copy()
|
|
|
index = data1[data1['评论时间']=='no star'].index
|
|
|
data1 = data1.drop(axis=0,index=index)
|
|
|
return data1
|
|
|
|
|
|
|
|
|
def plotTimeWithData(data,path):
|
|
|
data1 = data.copy()
|
|
|
cur = data1.groupby('hour').count()
|
|
|
plt.figure(figsize=(8, 6))
|
|
|
plt.plot(cur.index, cur['month'])
|
|
|
plt.xlabel(u'时间/hour', size=23)
|
|
|
plt.ylabel(u'评论数量', size=23)
|
|
|
plt.title(u'评论数量随时间段的变化', size=23)
|
|
|
xticks = list(range(0, 24)) # 这里设置的是x轴点的位置
|
|
|
plt.xticks(xticks)
|
|
|
plt.savefig(path + '评论数量随时间段的变化.png')
|
|
|
|
|
|
cur = data1.groupby('hour').mean()
|
|
|
plt.figure(figsize=(8, 6))
|
|
|
plt.plot(cur.index, cur['评价得分'])
|
|
|
plt.xlabel('时间/hour', size=23)
|
|
|
plt.ylabel('评论分均值', size=23)
|
|
|
plt.title('评论均值随时间段的变化', size=23)
|
|
|
xticks = list(range(0, 24)) # 这里设置的是x轴点的位置
|
|
|
plt.xticks(xticks)
|
|
|
plt.savefig(path + '评论均值随时间段的变化.png')
|
|
|
|
|
|
|
|
|
def save_pics(name,path):
|
|
|
pic_path = path + name + '.jpg'
|
|
|
print(pic_path)
|
|
|
background_image = cv.imread(pic_path)
|
|
|
if not os.path.exists(path):
|
|
|
os.makedirs(path)
|
|
|
filename = name + '.csv'
|
|
|
yingping = pd.read_csv(path + '\\' + filename)
|
|
|
all_content = ''
|
|
|
for i in range(0,len(yingping['评论标题'])):
|
|
|
all_content= all_content+yingping['评论标题'][i]
|
|
|
cut_text = " ".join(jieba.cut(all_content))
|
|
|
wordcloud = WordCloud(font_path="C:/Windows/Fonts/SimHei.ttf",mask=background_image,background_color='white').generate(cut_text)
|
|
|
plt.figure(figsize=(10,10))
|
|
|
plt.imshow(wordcloud,interpolation="bilinear")
|
|
|
plt.axis("off")
|
|
|
# plt.show()
|
|
|
plt.savefig(path + '词云.png')
|
|
|
|
|
|
yingping = prepressingData(yingping)
|
|
|
yingping.head(50)
|
|
|
yingping.isnull().any()
|
|
|
|
|
|
# 画图开始
|
|
|
plt.rcParams['font.sans-serif'] = ['SimHei']
|
|
|
plt.figure(figsize=(10, 8))
|
|
|
plt.hist(yingping['评价得分'], bins=15)
|
|
|
plt.xlabel('星级(0表示没有评级)', size=14)
|
|
|
plt.ylabel('人数', size=14)
|
|
|
# plt.show()
|
|
|
plt.savefig(path + '各星级的人数统计.png')
|
|
|
|
|
|
yingping = cvtInt(yingping)
|
|
|
yingping = dropUselessTime(yingping)
|
|
|
yingping.index = yingping['评论时间']
|
|
|
yingping['month'] = pd.to_datetime(yingping['评论时间']).dt.month
|
|
|
yingping['hour'] = pd.to_datetime(yingping['评论时间']).dt.hour
|
|
|
plotTimeWithData(yingping,path) |