From a8feb5eabf13853840ab7d4d1f75d09c7a8ec31f Mon Sep 17 00:00:00 2001
From: pe4nf27lt <2107895621@qq.com>
Date: Thu, 30 May 2024 16:27:38 +0800
Subject: [PATCH] ADD file via upload

---
 anlyse.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 anlyse.py

diff --git a/anlyse.py b/anlyse.py
new file mode 100644
index 0000000..bd4cc3a
--- /dev/null
+++ b/anlyse.py
@@ -0,0 +1,44 @@
+import csv
+import re
+
+import numpy as np
+import pandas as pd
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+
+def purification(self,new_self):
+    with open(self,"r",newline='',encoding='utf-8') as self:
+        reader = csv.reader(self)
+        data_list = []
+        for row in reader:
+            new_row_data = str([row[13]])
+            pattern = re.compile("[\u4e00-\u9fa5]+")
+            new_row_data = pattern.findall(new_row_data)
+            data_list.append(new_row_data)
+
+    with open(new_self,'w',newline='',encoding='utf-8') as  new_self:
+        writer = csv.writer(new_self)
+        for row in data_list:
+            writer.writerow(row)
+
+def wordcloud():
+    # 读取 csv 文件
+    df = pd.read_csv('analyse.csv',sep = '$')
+    # 提取第四列数据并去重
+    column_data = df.iloc[:,0]  # 通过 iloc 方法选取第一列数据
+    unique_data = column_data.drop_duplicates()
+    # 将去重后的数据转换为字符串类型
+    text = ' '.join(unique_data.astype(str).tolist())
+    # 生成词云
+    my_stopwords = []
+    wordcloud = WordCloud(font_path='simhei.ttf',width=1000, height=600, background_color='white', stopwords=my_stopwords).generate(text)
+    # 显示词云
+    plt.figure(figsize=(10, 6))
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis("off")
+    plt.show()
+
+self = 'congtent.csv'
+new_self = 'analyse.csv'
+purification(self,new_self)
+wordcloud()
\ No newline at end of file