parent
b65f2532dc
commit
a8feb5eabf
@ -0,0 +1,44 @@
|
||||
import csv
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from wordcloud import WordCloud
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def purification(self,new_self):
|
||||
with open(self,"r",newline='',encoding='utf-8') as self:
|
||||
reader = csv.reader(self)
|
||||
data_list = []
|
||||
for row in reader:
|
||||
new_row_data = str([row[13]])
|
||||
pattern = re.compile("[\u4e00-\u9fa5]+")
|
||||
new_row_data = pattern.findall(new_row_data)
|
||||
data_list.append(new_row_data)
|
||||
|
||||
with open(new_self,'w',newline='',encoding='utf-8') as new_self:
|
||||
writer = csv.writer(new_self)
|
||||
for row in data_list:
|
||||
writer.writerow(row)
|
||||
|
||||
def wordcloud():
|
||||
# 读取 csv 文件
|
||||
df = pd.read_csv('analyse.csv',sep = '$')
|
||||
# 提取第四列数据并去重
|
||||
column_data = df.iloc[:,0] # 通过 iloc 方法选取第一列数据
|
||||
unique_data = column_data.drop_duplicates()
|
||||
# 将去重后的数据转换为字符串类型
|
||||
text = ' '.join(unique_data.astype(str).tolist())
|
||||
# 生成词云
|
||||
my_stopwords = []
|
||||
wordcloud = WordCloud(font_path='simhei.ttf',width=1000, height=600, background_color='white', stopwords=my_stopwords).generate(text)
|
||||
# 显示词云
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.imshow(wordcloud, interpolation='bilinear')
|
||||
plt.axis("off")
|
||||
plt.show()
|
||||
|
||||
self = 'congtent.csv'
|
||||
new_self = 'analyse.csv'
|
||||
purification(self,new_self)
|
||||
wordcloud()
|
Loading…
Reference in new issue