import csv import re import numpy as np import pandas as pd from wordcloud import WordCloud import matplotlib.pyplot as plt def purification(self,new_self): with open(self,"r",newline='',encoding='utf-8') as self: reader = csv.reader(self) data_list = [] for row in reader: new_row_data = str([row[13]]) pattern = re.compile("[\u4e00-\u9fa5]+") new_row_data = pattern.findall(new_row_data) data_list.append(new_row_data) with open(new_self,'w',newline='',encoding='utf-8') as new_self: writer = csv.writer(new_self) for row in data_list: writer.writerow(row) def wordcloud(): # 读取 csv 文件 df = pd.read_csv('analyse.csv',sep = '$') # 提取第四列数据并去重 column_data = df.iloc[:,0] # 通过 iloc 方法选取第一列数据 unique_data = column_data.drop_duplicates() # 将去重后的数据转换为字符串类型 text = ' '.join(unique_data.astype(str).tolist()) # 生成词云 my_stopwords = [] wordcloud = WordCloud(font_path='simhei.ttf',width=1000, height=600, background_color='white', stopwords=my_stopwords).generate(text) # 显示词云 plt.figure(figsize=(10, 6)) plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") plt.show() self = 'congtent.csv' new_self = 'analyse.csv' purification(self,new_self) wordcloud()