From a8feb5eabf13853840ab7d4d1f75d09c7a8ec31f Mon Sep 17 00:00:00 2001 From: pe4nf27lt <2107895621@qq.com> Date: Thu, 30 May 2024 16:27:38 +0800 Subject: [PATCH] ADD file via upload --- anlyse.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 anlyse.py diff --git a/anlyse.py b/anlyse.py new file mode 100644 index 0000000..bd4cc3a --- /dev/null +++ b/anlyse.py @@ -0,0 +1,44 @@ +import csv +import re + +import numpy as np +import pandas as pd +from wordcloud import WordCloud +import matplotlib.pyplot as plt + +def purification(self,new_self): + with open(self,"r",newline='',encoding='utf-8') as self: + reader = csv.reader(self) + data_list = [] + for row in reader: + new_row_data = str([row[13]]) + pattern = re.compile("[\u4e00-\u9fa5]+") + new_row_data = pattern.findall(new_row_data) + data_list.append(new_row_data) + + with open(new_self,'w',newline='',encoding='utf-8') as new_self: + writer = csv.writer(new_self) + for row in data_list: + writer.writerow(row) + +def wordcloud(): + # 读取 csv 文件 + df = pd.read_csv('analyse.csv',sep = '$') + # 提取第四列数据并去重 + column_data = df.iloc[:,0] # 通过 iloc 方法选取第一列数据 + unique_data = column_data.drop_duplicates() + # 将去重后的数据转换为字符串类型 + text = ' '.join(unique_data.astype(str).tolist()) + # 生成词云 + my_stopwords = [] + wordcloud = WordCloud(font_path='simhei.ttf',width=1000, height=600, background_color='white', stopwords=my_stopwords).generate(text) + # 显示词云 + plt.figure(figsize=(10, 6)) + plt.imshow(wordcloud, interpolation='bilinear') + plt.axis("off") + plt.show() + +self = 'congtent.csv' +new_self = 'analyse.csv' +purification(self,new_self) +wordcloud() \ No newline at end of file