ADD file via upload

master
p2ig48ofr 5 years ago
parent d17b871abe
commit 333498162a

@ -0,0 +1,121 @@
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from wordcloud import WordCloud
import jieba
import cv2 as cv
from pylab import mpl
import os
from matplotlib.font_manager import _rebuild
_rebuild() #reload一下
# 设置显示中文字体
mpl.rcParams["font.sans-serif"] = ["FangSong"]
# 设置正常显示符号
mpl.rcParams["axes.unicode_minus"] = False
def prepressingData(data1):
data = data1.copy()
zancheng = data['赞成数']
fandui = data['反对数']
huifu = data['回复数']
pingjia = data['评价']
star = []
for i in range(0,len(zancheng)):
zancheng[i] = str(zancheng[i]).replace('\n','')
fandui[i] = str(fandui[i]).replace('\n','')
huifu[i] = str(huifu[i]).replace('回应','')
if pingjia[i]=='力荐':
star.append(5)
elif pingjia[i]=='推荐':
star.append(4)
elif pingjia[i]=='还行':
star.append(3)
elif pingjia[i]=='较差':
star.append(2)
elif pingjia[i]=='很差':
star.append(1)
else:
star.append(0)
data['评价得分'] = pd.Series(star)
data = data.drop(axis=1,columns=['评论主体'])
# 删除列,后要赋值过去才算成功
return data
def cvtInt(data1):
data = data1.copy()
data['赞成数'] = pd.to_numeric(data1['赞成数'],errors='ignore')
data['赞成数'] = data['赞成数'].fillna(0)
data['反对数'] = pd.to_numeric(data1['反对数'],errors='ignore')
data['反对数'] = data['反对数'].fillna(0)
data['回复数'] = pd.to_numeric(data1['回复数'],errors='ignore')
return data
def dropUselessTime(data):
data1 = data.copy()
index = data1[data1['评论时间']=='no star'].index
data1 = data1.drop(axis=0,index=index)
return data1
def plotTimeWithData(data,path):
data1 = data.copy()
cur = data1.groupby('hour').count()
plt.figure(figsize=(8, 6))
plt.plot(cur.index, cur['month'])
plt.xlabel(u'时间/hour', size=23)
plt.ylabel(u'评论数量', size=23)
plt.title(u'评论数量随时间段的变化', size=23)
xticks = list(range(0, 24)) # 这里设置的是x轴点的位置
plt.xticks(xticks)
plt.savefig(path + '评论数量随时间段的变化.png')
cur = data1.groupby('hour').mean()
plt.figure(figsize=(8, 6))
plt.plot(cur.index, cur['评价得分'])
plt.xlabel('时间/hour', size=23)
plt.ylabel('评论分均值', size=23)
plt.title('评论均值随时间段的变化', size=23)
xticks = list(range(0, 24)) # 这里设置的是x轴点的位置
plt.xticks(xticks)
plt.savefig(path + '评论均值随时间段的变化.png')
def save_pics(name,path):
pic_path = path + name + '.jpg'
print(pic_path)
background_image = cv.imread(pic_path)
if not os.path.exists(path):
os.makedirs(path)
filename = name + '.csv'
yingping = pd.read_csv(path + '\\' + filename)
all_content = ''
for i in range(0,len(yingping['评论标题'])):
all_content= all_content+yingping['评论标题'][i]
cut_text = " ".join(jieba.cut(all_content))
wordcloud = WordCloud(font_path="C:/Windows/Fonts/SimHei.ttf",mask=background_image,background_color='white').generate(cut_text)
plt.figure(figsize=(10,10))
plt.imshow(wordcloud,interpolation="bilinear")
plt.axis("off")
# plt.show()
plt.savefig(path + '词云.png')
yingping = prepressingData(yingping)
yingping.head(50)
yingping.isnull().any()
# 画图开始
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.figure(figsize=(10, 8))
plt.hist(yingping['评价得分'], bins=15)
plt.xlabel('星级0表示没有评级', size=14)
plt.ylabel('人数', size=14)
# plt.show()
plt.savefig(path + '各星级的人数统计.png')
yingping = cvtInt(yingping)
yingping = dropUselessTime(yingping)
yingping.index = yingping['评论时间']
yingping['month'] = pd.to_datetime(yingping['评论时间']).dt.month
yingping['hour'] = pd.to_datetime(yingping['评论时间']).dt.hour
plotTimeWithData(yingping,path)
Loading…
Cancel
Save