注释补充

11 months ago · bdeec96b53
parent 2fabe5f632
commit bdeec96b53
2 changed files with 6 additions and 0 deletions
--- a/barrage/handleData.py
+++ b/barrage/handleData.py
@ -14,11 +14,13 @@ import pandas as pd
 from collections import Counter
 from openpyxl import Workbook

+# 读取弹幕文件
 def ReadXlsx(filePath=''):
    df = pd.read_excel(filePath, sheet_name=0)
    df.dropna(axis=1, how='all')
    return df

+# 将dataframe类型转为string类型
 def ChangeDfToString(df,sep=',', isSave=False, filePath=''):
    string_data = df.to_string(index=False, header=False, na_rep='')
    string = string_data.replace('\n', ' ')
@ -28,6 +30,7 @@ def ChangeDfToString(df,sep=',', isSave=False, filePath=''):
            file.write(str)
    return str

+# 根据关键词进行检索
 def GetKeyFromList(keyWords, origin_list):
    filtered_list = [item for item in origin_list if any(keyword in item for keyword in keyWords)]
    counter_list = Counter(filtered_list)
--- a/barrage/wordCloud.py
+++ b/barrage/wordCloud.py
@ -16,6 +16,7 @@ from PIL import Image
 from wordcloud import WordCloud, ImageColorGenerator
 from sklearn.feature_extraction.text import TfidfVectorizer

+# 将弹幕文本分隔成易于处理的字词
 def ReadAndCutWords(filePath):
    with open(filePath, 'r', encoding='utf-8') as file:
        text = file.read()
@ -23,6 +24,7 @@ def ReadAndCutWords(filePath):
    word_list = ' '.join(words)
    return word_list

+# 利用TF-IDF将字词按频率划分
 def ChangeToFreq(word_list):
    documents = [word_list]
    vectorizer = TfidfVectorizer()
@ -31,6 +33,7 @@ def ChangeToFreq(word_list):
    word_freq = dict(zip(feature_names, tfidf_matrix.toarray().sum(axis=0)))
    return word_freq

+# 根据字词频率来生成图云
 def CreateWordCloud(word_freq, width, height, maskImgPath, saveImgPath, save=False):
    if maskImgPath == '':
        mask = None