From 709561633a1f6572ec5d244d01cfdc82f4c7ed52 Mon Sep 17 00:00:00 2001 From: pu6qcatis <916351100@qq.com> Date: Sun, 15 Sep 2024 09:11:32 +0800 Subject: [PATCH] Delete 'requirements.txt' --- requirements.txt | 94 ------------------------------------------------ 1 file changed, 94 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 4232bd5..0000000 --- a/requirements.txt +++ /dev/null @@ -1,94 +0,0 @@ -import requests -from bs4 import BeautifulSoup -import time -import pandas as pd -from collections import Counter -from wordcloud import WordCloud -import matplotlib.pyplot as plt -cnt = 0 #已爬取视频数 -danmuku_all = [] #弹幕库 - -headers = { - user-agent Mozilla5.0 (Windows NT 10.0; Win64; x64) AppleWebKit537.36 (KHTML, like Gecko) Chrome128.0.0.0 Safari537.36 -} -def get_cid(bvid) - url = fhttpsapi.bilibili.comxplayerpagelistbvid={bvid} - try - response = requests.get(url, headers=headers, timeout=10) - response.raise_for_status() - Json = response.json() - return Json['data'][0]['cid'] - except requests.exceptions.RequestException as e - print(f请求失败 {e}) - return None -def get_danmuku(cid) - if cid is None - return [] - url = fhttpscomment.bilibili.com{cid}.xml - try - response = requests.get(url, headers=headers, timeout=10) - response.encoding = 'utf-8' - soup = BeautifulSoup(response.text, 'xml') - return [i.text for i in soup.find_all('d')] - except requests.exceptions.RequestException as e - print(f请求失败 {e}) - return [] -for Page in range(1, 22) - url = f'httpsapi.bilibili.comxweb-interfacesearchtypesearch_type=video&keyword=巴黎奥运会&page={Page}' - try - response = requests.get(url, headers=headers, timeout=10) - response.raise_for_status() - Json = response.json() - results = Json['data']['result'] - for result in results - cid = get_cid(result['bvid']) - danmuku = get_danmuku(cid) - danmuku_all.extend(danmuku) - cnt += 1 - if cnt = 300 - break - if cnt = 300 - break - except requests.exceptions.RequestException as e - print(f请求失败 {e}) -time.sleep(1) #延时1秒防止被屏蔽 -def filter_danmuku(danmuku_list, keywords) - #筛选包含指定关键词的弹幕 - keywords_lower = [keyword.lower() for keyword in keywords] # 关键词小写 - filtered = [d for d in danmuku_list if any(keyword in d.lower() for keyword in keywords_lower)] - return filtered - -#读取弹幕文件 -with open('所有视频弹幕.txt', 'r', encoding='utf-8') as file - danmuku_all = file.readlines() - -#筛选包含关键词的弹幕 -keywords = ['AI识曲','AI生成','神经网络','卷积神经网络','循环神经网络','智能家居','自动驾驶','智能推荐','智能算法','强化学习','计算机视觉','ai还原','ai合成'] -filtered_danmuku = filter_danmuku(danmuku_all, keywords) -#统计弹幕数量 -counter = Counter(filtered_danmuku) -most_common = counter.most_common(8) -#将结果按列写入Excel -data = {'弹幕内容' [content.strip() for content, count in most_common], - '数量' [count for content, count in most_common]} -df = pd.DataFrame(data) -df.to_excel('AI_人工智能_弹幕统计.xlsx', index=False) -print(前8位弹幕统计已保存到 'AI_人工智能_弹幕统计.xlsx'.) -font_path = r'CWindowsFontssimhei.ttf' -try - df = pd.read_excel('AI_人工智能_弹幕统计.xlsx') - if '弹幕内容' not in df.columns - raise ValueError(Excel 文件中没有找到 '弹幕内容' 列) - text = ' '.join(df['弹幕内容'].dropna()) - wordcloud = WordCloud(font_path=font_path, width=800, height=400, background_color='white').generate(text) - plt.figure(figsize=(10, 5)) - plt.imshow(wordcloud, interpolation='bilinear') - plt.axis('off') - plt.show() - wordcloud.to_file('词云图.png') -except FileNotFoundError - print(文件未找到,请检查文件路径) -except ValueError as ve - print(ve) -except Exception as e - print(f发生错误 {e}) \ No newline at end of file