diff --git a/demo.py b/demo.py index a459fde..789eada 100644 --- a/demo.py +++ b/demo.py @@ -1,6 +1,8 @@ import requests from bs4 import BeautifulSoup import re +import time +import random import jieba # 结巴分词 pip install jieba import wordcloud # 词云图 pip install wordcloud import imageio # 读取本地图片 修改词云图形 @@ -58,6 +60,10 @@ def get_danmu(id): txtsss=[txts.replace(' ','') for txts in txtss] #将字符串中的空格消除掉 return(txtsss) ###打印便可看见一条条弹幕的属性和内容了。 +def page(url,num): + num=num+1 + url=f'https://search.bilibili.com/video?keyword=2024巴黎奥运会&page={num}' + return url # 主函数 def main(search_url): @@ -75,12 +81,17 @@ def main(search_url): return(cids) # 示例搜索页 URL(需要替换为实际的搜索页 URL) -search_url = 'https://search.bilibili.com/all?keyword=2024巴黎奥运会' -aa = main(search_url) +search_url = 'https://search.bilibili.com/video?keyword=2024巴黎奥运会' alltxt=[] -for id in aa: - txt = get_danmu(id) - alltxt=alltxt + txt + +for i in range(10): + + aa = main(page(search_url,i)) + for id in aa: + txt = get_danmu(id) + alltxt=alltxt + txt + time.sleep(random.randint(0,3)+random.random()) + danmustr=''.join(i for i in alltxt) #将所有弹幕拼接在一起 words=list(jieba.cut(danmustr)) ###利用jieba库将弹幕按词进行切分 words=[i for i in words if len(i)>1] ###挑出长度大于1的词语(为去除诸如?,哈,啊等字符)