From 5d550ab92095fcc56ee9559c9864f3fd78a30344 Mon Sep 17 00:00:00 2001 From: QMZ <1164250597@qq.com> Date: Mon, 16 Sep 2024 22:29:36 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E7=AD=9B=E9=80=89=E4=B8=8E?= =?UTF-8?q?=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- handle.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 handle.py diff --git a/handle.py b/handle.py new file mode 100644 index 0000000..d84d739 --- /dev/null +++ b/handle.py @@ -0,0 +1,44 @@ +import requests +from bs4 import BeautifulSoup +import re +import time +import jieba +import wordcloud +import matplotlib.pyplot as plt +import pandas as pd +from pandas import ExcelWriter +from collections import Counter +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0', + +} +keywords = [ + 'AI', '人工智能', '机器学习', '深度学习', '神经网络', '自动化', + '算法', '数据科学' '自然语言处理', '计算机视觉', '人工智能技术', 'AI技术', 'AI应用', 'AI模型', + '大数据', '预测分析', '机器视觉', '自动驾驶', + '智能推荐', '计算机科学', '人工智能应用', + '数据分析','智能化', '情感计算','ai' +] + +def chuli(etxt): + #danmustr=''.join(i for i in etxt) #将所有弹幕拼接在一起 + #words=list(jieba.cut(danmustr)) ###利用jieba库将弹幕按词进行切分 + words=[i for i in etxt if len(i)>1] ###挑出长度大于1的词语(为去除诸如?,哈,啊等字符) + wc=wordcloud.WordCloud(background_color='white',height=1000,width=1000,font_path='simsun.ttc')#利用wordcloud库定义词云图片的信息 + wc.generate(' '.join(words)) ##生成图片 + print(wc) + plt.imshow(wc) + plt.show() + +def sort(txt, keywords): + + comment_counter = Counter() + for line in txt: + line.strip() + if any(word in keywords for word in jieba.cut(line)): + comment_counter[line] += 1 + + return comment_counter + +AIdanmu = sort(altxt, keywords) +chuli(AIdanmu)