parent
b65f2532dc
commit
a8feb5eabf
@ -0,0 +1,44 @@
|
|||||||
|
import csv
|
||||||
|
import re
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from wordcloud import WordCloud
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
def purification(self,new_self):
|
||||||
|
with open(self,"r",newline='',encoding='utf-8') as self:
|
||||||
|
reader = csv.reader(self)
|
||||||
|
data_list = []
|
||||||
|
for row in reader:
|
||||||
|
new_row_data = str([row[13]])
|
||||||
|
pattern = re.compile("[\u4e00-\u9fa5]+")
|
||||||
|
new_row_data = pattern.findall(new_row_data)
|
||||||
|
data_list.append(new_row_data)
|
||||||
|
|
||||||
|
with open(new_self,'w',newline='',encoding='utf-8') as new_self:
|
||||||
|
writer = csv.writer(new_self)
|
||||||
|
for row in data_list:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
def wordcloud():
|
||||||
|
# 读取 csv 文件
|
||||||
|
df = pd.read_csv('analyse.csv',sep = '$')
|
||||||
|
# 提取第四列数据并去重
|
||||||
|
column_data = df.iloc[:,0] # 通过 iloc 方法选取第一列数据
|
||||||
|
unique_data = column_data.drop_duplicates()
|
||||||
|
# 将去重后的数据转换为字符串类型
|
||||||
|
text = ' '.join(unique_data.astype(str).tolist())
|
||||||
|
# 生成词云
|
||||||
|
my_stopwords = []
|
||||||
|
wordcloud = WordCloud(font_path='simhei.ttf',width=1000, height=600, background_color='white', stopwords=my_stopwords).generate(text)
|
||||||
|
# 显示词云
|
||||||
|
plt.figure(figsize=(10, 6))
|
||||||
|
plt.imshow(wordcloud, interpolation='bilinear')
|
||||||
|
plt.axis("off")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
self = 'congtent.csv'
|
||||||
|
new_self = 'analyse.csv'
|
||||||
|
purification(self,new_self)
|
||||||
|
wordcloud()
|
Loading…
Reference in new issue