You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

44 lines
1.4 KiB

import csv
import re
import numpy as np
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
def purification(self,new_self):
with open(self,"r",newline='',encoding='utf-8') as self:
reader = csv.reader(self)
data_list = []
for row in reader:
new_row_data = str([row[13]])
pattern = re.compile("[\u4e00-\u9fa5]+")
new_row_data = pattern.findall(new_row_data)
data_list.append(new_row_data)
with open(new_self,'w',newline='',encoding='utf-8') as new_self:
writer = csv.writer(new_self)
for row in data_list:
writer.writerow(row)
def wordcloud():
# 读取 csv 文件
df = pd.read_csv('analyse.csv',sep = '$')
# 提取第四列数据并去重
column_data = df.iloc[:,0] # 通过 iloc 方法选取第一列数据
unique_data = column_data.drop_duplicates()
# 将去重后的数据转换为字符串类型
text = ' '.join(unique_data.astype(str).tolist())
# 生成词云
my_stopwords = []
wordcloud = WordCloud(font_path='simhei.ttf',width=1000, height=600, background_color='white', stopwords=my_stopwords).generate(text)
# 显示词云
plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()
self = 'congtent.csv'
new_self = 'analyse.csv'
purification(self,new_self)
wordcloud()