ADD file via upload

2 years ago · a8feb5eabf
parent b65f2532dc
commit a8feb5eabf
1 changed files with 44 additions and 0 deletions
--- a/anlyse.py
+++ b/anlyse.py
@ -0,0 +1,44 @@
+import csv
+import re
+
+import numpy as np
+import pandas as pd
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+
+def purification(self,new_self):
+    with open(self,"r",newline='',encoding='utf-8') as self:
+        reader = csv.reader(self)
+        data_list = []
+        for row in reader:
+            new_row_data = str([row[13]])
+            pattern = re.compile("[\u4e00-\u9fa5]+")
+            new_row_data = pattern.findall(new_row_data)
+            data_list.append(new_row_data)
+
+    with open(new_self,'w',newline='',encoding='utf-8') as  new_self:
+        writer = csv.writer(new_self)
+        for row in data_list:
+            writer.writerow(row)
+
+def wordcloud():
+    # 读取 csv 文件
+    df = pd.read_csv('analyse.csv',sep = '$')
+    # 提取第四列数据并去重
+    column_data = df.iloc[:,0]  # 通过 iloc 方法选取第一列数据
+    unique_data = column_data.drop_duplicates()
+    # 将去重后的数据转换为字符串类型
+    text = ' '.join(unique_data.astype(str).tolist())
+    # 生成词云
+    my_stopwords = []
+    wordcloud = WordCloud(font_path='simhei.ttf',width=1000, height=600, background_color='white', stopwords=my_stopwords).generate(text)
+    # 显示词云
+    plt.figure(figsize=(10, 6))
+    plt.imshow(wordcloud, interpolation='bilinear')
+    plt.axis("off")
+    plt.show()
+
+self = 'congtent.csv'
+new_self = 'analyse.csv'
+purification(self,new_self)
+wordcloud()