diff --git a/GetDanmu.py b/GetDanmu.py new file mode 100644 index 0000000..88eded1 --- /dev/null +++ b/GetDanmu.py @@ -0,0 +1,116 @@ +import requests +import re +import csv +import time + +def GetResponse(url, data): + """ + 发送请求 + - url: 请求网址 + - data: 请求参数 + """ + # 模拟游览器 + headers = { + # Cookie 用户信息,常用于检测是否有登陆账号 + "Cookie": "b_nut=1655303638; CURRENT_BLACKGAP=0; i-wanna-go-back=-1; buvid_fp_plain=undefined; buvid3=136172B5-8F7E-42EF-90C5-2D4A04147EE8167644infoc; LIVE_BUVID=AUTO3016558277732131; is-2022-channel=1; DedeUserID=17012171; DedeUserID__ckMd5=f518907167c48789; buvid4=88DCF3D1-502C-06D9-BF49-B7E4F0A0519315466-022061522-P1wwgn5lZ%2FpHMTaSvipifA%3D%3D; rpdid=|(JlRYJ~Ykl)0J'u~|JulJkJm; b_ut=5; FEED_LIVE_VERSION=V_HEADER_LIVE_NO_POP; _uuid=10525FA4C-8522-AFCC-210B4-19DE77B6FC8C66067infoc; enable_web_push=DISABLE; header_theme_version=CLOSE; CURRENT_FNVAL=16; fingerprint=913be879852fe07606cd36fdc205d010; home_feed_column=5; PVID=2; buvid_fp=913be879852fe07606cd36fdc205d010; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjY1ODE3NTksImlhdCI6MTcyNjMyMjQ5OSwicGx0IjotMX0.gM_SG7dtVNHO6JWzjJQb9TKfMdYUCQCc5kwBCvwvLTg; bili_ticket_expires=1726581699; CURRENT_QUALITY=120; SESSDATA=453edc86%2C1742051724%2C4ecfc%2A92CjBdMxud0tS4fWj_7UbpyntEQgA3SRR47yjry_nQGUMYCBxWrcLTlEr08nFsh9Jb83oSVkJZQXlsMXN0MkQxRU9lQzZ2NjhmS1BGVHBETjVsUHVTX2lSeG9xMWpYUE1BM3hVcTljZWpUcnUzNmdlU29JMTh4a1otc3VUYUlPUmNPOFFteWctWm1nIIEC; bili_jct=1b519a4b9101a167c49d8dad05dbdb80; b_lsid=AB4510676_191FF788B10; sid=8axcda7r; bmg_af_switch=1; bmg_src_def_domain=i0.hdslb.com; browser_resolution=1707-837; bp_t_offset_17012171=978105529521930240", + # User-Agent 用户代理,表示游览器基本身份信息/设备信息 + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0" + } + + # 发送请求 + response = requests.get(url=url, params=data, headers=headers) + + # 返回内容 + return response + +def GetBvids(): + """获取综合排序前300个视频的bvid""" + pages = 10 + bvids = [] + for page in range(1, pages + 1): + if len(bvids) >= 300: + break + + # 该页请求网址 + url = f"https://api.bilibili.com/x/web-interface/wbi/search/type?category_id=&search_type=video&ad_resource=5654&__refresh__=true&_extra=&context=&page={page}&page_size=42&pubtime_begin_s=0&pubtime_end_s=0&from_source=&from_spmid=333.337&platform=pc&highlight=1&single_column=0&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&qv_id=v6jFbUhcT7bjHVQXrLt535Vt7pgQ0qf1&source_tag=3&gaia_vtoken=&dynamic_offset=24&web_location=1430654&w_rid=532d2b5246861f41eb2ab975ff882c0d&wts=1726475442" + + # 发送请求 + 获取数据 + jsondata = GetResponse(url=url, data=None).json() + + # 解析数据,提取视频信息 + result = jsondata['data']['result'] + + #for 循环遍历,提取列表里面元素 + for index in result: + bvids.append(index['bvid']) + if len(bvids) >= 300: + break + return bvids + +def GetCids(bvids): + """通过bvid获取综合排序前300个视频的cid""" + cids = [] + for bvid in bvids: + url = f"https://www.bilibili.com/video/{bvid}" + response = GetResponse(url=url, data=None) + + # 通过正则表达式匹配出cid + cid_pattern = re.compile(r'"cid":(\d+)') + cid = cid_pattern.search(response.text).group(1) + + cids.append(cid) + return cids + +def GetDanmus(cids): + """通过cid获取综合排序前300个视频的弹幕""" + DanmuDict = {} + for cid in cids: + url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={cid}" + response = GetResponse(url=url, data=None) + + # 转码以正常读取中文信息 + response.encoding = 'utf-8' + + danmus = re.findall('(.*?)', response.text) + + # 统计每条弹幕的数量 + for danmu in danmus: + if danmu in DanmuDict: + DanmuDict[danmu] += 1 + else: + DanmuDict[danmu] = 1 + return DanmuDict + +def WriteCsv(dictdata): + """将数据存入csv""" + fileName = "danmu.csv" + with open(fileName, 'w', encoding='utf-8-sig', newline='') as f: + fieldnames = ["弹幕", "数量"] + f_csv = csv.DictWriter(f, fieldnames=fieldnames) + f_csv.writeheader() + writer = csv.writer(f) + for row in dictdata.items(): + writer.writerow(row) + + +def GetInfo(): + """获取视频数据""" + start_time = time.perf_counter() + bvids = GetBvids() + end_time = time.perf_counter() + print(f"GetBvids函数执行时间:{end_time - start_time}秒") + + start_time = time.perf_counter() + cids = GetCids(bvids) + end_time = time.perf_counter() + print(f"GetCids函数执行时间:{end_time - start_time}秒") + + start_time = time.perf_counter() + danmus = GetDanmus(cids) + end_time = time.perf_counter() + print(f"GetDanmus函数执行时间:{end_time - start_time}秒") + + WriteCsv(danmus) + +if __name__ == '__main__': + GetInfo() \ No newline at end of file