You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
44 lines
1.4 KiB
44 lines
1.4 KiB
9 months ago
import csv
import re
import numpy as np
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
def purification(self,new_self):
with open(self,"r",newline='',encoding='utf-8') as self:
reader = csv.reader(self)
data_list = []
for row in reader:
new_row_data = str([row[13]])
pattern = re.compile("[\u4e00-\u9fa5]+")
new_row_data = pattern.findall(new_row_data)
with open(new_self,'w',newline='',encoding='utf-8') as new_self:
writer = csv.writer(new_self)
for row in data_list:
def wordcloud():
# 读取 csv 文件
df = pd.read_csv('analyse.csv',sep = '$')
# 提取第四列数据并去重
column_data = df.iloc[:,0] # 通过 iloc 方法选取第一列数据
unique_data = column_data.drop_duplicates()
# 将去重后的数据转换为字符串类型
text = ' '.join(unique_data.astype(str).tolist())
# 生成词云
my_stopwords = []
wordcloud = WordCloud(font_path='simhei.ttf',width=1000, height=600, background_color='white', stopwords=my_stopwords).generate(text)
# 显示词云
plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
self = 'congtent.csv'
new_self = 'analyse.csv'