注释补充

main
xxxiix 6 months ago
parent 2fabe5f632
commit bdeec96b53

@ -14,11 +14,13 @@ import pandas as pd
from collections import Counter
from openpyxl import Workbook
# 读取弹幕文件
def ReadXlsx(filePath=''):
df = pd.read_excel(filePath, sheet_name=0)
df.dropna(axis=1, how='all')
return df
# 将dataframe类型转为string类型
def ChangeDfToString(df,sep=',', isSave=False, filePath=''):
string_data = df.to_string(index=False, header=False, na_rep='')
string = string_data.replace('\n', ' ')
@ -28,6 +30,7 @@ def ChangeDfToString(df,sep=',', isSave=False, filePath=''):
file.write(str)
return str
# 根据关键词进行检索
def GetKeyFromList(keyWords, origin_list):
filtered_list = [item for item in origin_list if any(keyword in item for keyword in keyWords)]
counter_list = Counter(filtered_list)

@ -16,6 +16,7 @@ from PIL import Image
from wordcloud import WordCloud, ImageColorGenerator
from sklearn.feature_extraction.text import TfidfVectorizer
# 将弹幕文本分隔成易于处理的字词
def ReadAndCutWords(filePath):
with open(filePath, 'r', encoding='utf-8') as file:
text = file.read()
@ -23,6 +24,7 @@ def ReadAndCutWords(filePath):
word_list = ' '.join(words)
return word_list
# 利用TF-IDF将字词按频率划分
def ChangeToFreq(word_list):
documents = [word_list]
vectorizer = TfidfVectorizer()
@ -31,6 +33,7 @@ def ChangeToFreq(word_list):
word_freq = dict(zip(feature_names, tfidf_matrix.toarray().sum(axis=0)))
return word_freq
# 根据字词频率来生成图云
def CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, save=False):
if maskImgPath == '':
mask = None

Loading…
Cancel
Save