数据处理与转存,并实现Ai关键词提取

main
xxxiix 6 months ago
parent b1d28ce221
commit f02bd18bc6

@ -0,0 +1,61 @@
'''
数据处理与转存实现弹幕AI关键词提取
sep是转存成字符后的弹幕分隔标志
isSaveText是用于表明是否将弹幕数据转存成文本的标志
xlsxPath是弹幕数据的路径
saveToText是转存成文本数据的路径
saveAiPath是输出ai相关弹幕的路径
keywords是ai关键词
'''
import re
import pandas as pd
from collections import Counter
from openpyxl import Workbook
def ReadXlsx(filePath=''):
df = pd.read_excel(filePath, sheet_name=0)
df.dropna(axis=1, how='all')
return df
def ChangeDfToString(df,sep=',', isSave=False, filePath=''):
string_data = df.to_string(index=False, header=False, na_rep='')
string = string_data.replace('\n', ' ')
str = re.sub(' +', sep, string)
if isSave:
with open(filePath, mode="w",encoding='utf-8') as file:
file.write(str)
return str
def GetKeyFromList(keyWords, origin_list):
filtered_list = [item for item in origin_list if any(keyword in item for keyword in keyWords)]
counter_list = Counter(filtered_list)
sorted_list = sorted(counter_list.items(), key = lambda x:x[1], reverse=True)
return sorted_list
def main():
sep=','
isSaveText=True
xlsxPath = './docs/barrage.xlsx'
saveToText = './docs/allBarrage.txt'
saveAiPath = './docs/aiBarrage.xlsx'
keywords = ['AI', '智能']
df = ReadXlsx(xlsxPath)
str = ChangeDfToString(df, sep, isSaveText, saveToText)
s_list = str.split(sep)
sorted_list = GetKeyFromList(keywords, s_list)
wb = Workbook()
ws = wb.active
ws.cell(1, 1, '弹幕')
ws.cell(1, 2, '次数')
for i in range(8):
for j in range(2):
ws.cell(i+2, j+1, sorted_list[i][j])
wb.save(saveAiPath)
print(sorted_list)
if __name__ == '__main__':
main()

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.
Loading…
Cancel
Save