import pandas as pd import matplotlib.pyplot as plt import numpy as np from wordcloud import WordCloud import jieba import cv2 as cv from pylab import mpl import os from matplotlib.font_manager import _rebuild _rebuild() #reload一下 # 设置显示中文字体 mpl.rcParams["font.sans-serif"] = ["FangSong"] # 设置正常显示符号 mpl.rcParams["axes.unicode_minus"] = False def prepressingData(data1): data = data1.copy() zancheng = data['赞成数'] fandui = data['反对数'] huifu = data['回复数'] pingjia = data['评价'] star = [] for i in range(0,len(zancheng)): zancheng[i] = str(zancheng[i]).replace('\n','') fandui[i] = str(fandui[i]).replace('\n','') huifu[i] = str(huifu[i]).replace('回应','') if pingjia[i]=='力荐': star.append(5) elif pingjia[i]=='推荐': star.append(4) elif pingjia[i]=='还行': star.append(3) elif pingjia[i]=='较差': star.append(2) elif pingjia[i]=='很差': star.append(1) else: star.append(0) data['评价得分'] = pd.Series(star) data = data.drop(axis=1,columns=['评论主体']) # 删除列,后要赋值过去才算成功 return data def cvtInt(data1): data = data1.copy() data['赞成数'] = pd.to_numeric(data1['赞成数'],errors='ignore') data['赞成数'] = data['赞成数'].fillna(0) data['反对数'] = pd.to_numeric(data1['反对数'],errors='ignore') data['反对数'] = data['反对数'].fillna(0) data['回复数'] = pd.to_numeric(data1['回复数'],errors='ignore') return data def dropUselessTime(data): data1 = data.copy() index = data1[data1['评论时间']=='no star'].index data1 = data1.drop(axis=0,index=index) return data1 def plotTimeWithData(data,path): data1 = data.copy() cur = data1.groupby('hour').count() plt.figure(figsize=(8, 6)) plt.plot(cur.index, cur['month']) plt.xlabel(u'时间/hour', size=23) plt.ylabel(u'评论数量', size=23) plt.title(u'评论数量随时间段的变化', size=23) xticks = list(range(0, 24)) # 这里设置的是x轴点的位置 plt.xticks(xticks) plt.savefig(path + '评论数量随时间段的变化.png') cur = data1.groupby('hour').mean() plt.figure(figsize=(8, 6)) plt.plot(cur.index, cur['评价得分']) plt.xlabel('时间/hour', size=23) plt.ylabel('评论分均值', size=23) plt.title('评论均值随时间段的变化', size=23) xticks = list(range(0, 24)) # 这里设置的是x轴点的位置 plt.xticks(xticks) plt.savefig(path + '评论均值随时间段的变化.png') def save_pics(name,path): pic_path = path + name + '.jpg' print(pic_path) background_image = cv.imread(pic_path) if not os.path.exists(path): os.makedirs(path) filename = name + '.csv' yingping = pd.read_csv(path + '\\' + filename) all_content = '' for i in range(0,len(yingping['评论标题'])): all_content= all_content+yingping['评论标题'][i] cut_text = " ".join(jieba.cut(all_content)) wordcloud = WordCloud(font_path="C:/Windows/Fonts/SimHei.ttf",mask=background_image,background_color='white').generate(cut_text) plt.figure(figsize=(10,10)) plt.imshow(wordcloud,interpolation="bilinear") plt.axis("off") # plt.show() plt.savefig(path + '词云.png') yingping = prepressingData(yingping) yingping.head(50) yingping.isnull().any() # 画图开始 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.figure(figsize=(10, 8)) plt.hist(yingping['评价得分'], bins=15) plt.xlabel('星级(0表示没有评级)', size=14) plt.ylabel('人数', size=14) # plt.show() plt.savefig(path + '各星级的人数统计.png') yingping = cvtInt(yingping) yingping = dropUselessTime(yingping) yingping.index = yingping['评论时间'] yingping['month'] = pd.to_datetime(yingping['评论时间']).dt.month yingping['hour'] = pd.to_datetime(yingping['评论时间']).dt.hour plotTimeWithData(yingping,path)