You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
116 lines
5.2 KiB
116 lines
5.2 KiB
import requests
|
|
import re
|
|
import csv
|
|
import time
|
|
|
|
def GetResponse(url, data):
|
|
"""
|
|
发送请求
|
|
- url: 请求网址
|
|
- data: 请求参数
|
|
"""
|
|
# 模拟游览器
|
|
headers = {
|
|
# Cookie 用户信息,常用于检测是否有登陆账号
|
|
"Cookie": "b_nut=1655303638; CURRENT_BLACKGAP=0; i-wanna-go-back=-1; buvid_fp_plain=undefined; buvid3=136172B5-8F7E-42EF-90C5-2D4A04147EE8167644infoc; LIVE_BUVID=AUTO3016558277732131; is-2022-channel=1; DedeUserID=17012171; DedeUserID__ckMd5=f518907167c48789; buvid4=88DCF3D1-502C-06D9-BF49-B7E4F0A0519315466-022061522-P1wwgn5lZ%2FpHMTaSvipifA%3D%3D; rpdid=|(JlRYJ~Ykl)0J'u~|JulJkJm; b_ut=5; FEED_LIVE_VERSION=V_HEADER_LIVE_NO_POP; _uuid=10525FA4C-8522-AFCC-210B4-19DE77B6FC8C66067infoc; enable_web_push=DISABLE; header_theme_version=CLOSE; CURRENT_FNVAL=16; fingerprint=913be879852fe07606cd36fdc205d010; home_feed_column=5; PVID=2; buvid_fp=913be879852fe07606cd36fdc205d010; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjY1ODE3NTksImlhdCI6MTcyNjMyMjQ5OSwicGx0IjotMX0.gM_SG7dtVNHO6JWzjJQb9TKfMdYUCQCc5kwBCvwvLTg; bili_ticket_expires=1726581699; CURRENT_QUALITY=120; SESSDATA=453edc86%2C1742051724%2C4ecfc%2A92CjBdMxud0tS4fWj_7UbpyntEQgA3SRR47yjry_nQGUMYCBxWrcLTlEr08nFsh9Jb83oSVkJZQXlsMXN0MkQxRU9lQzZ2NjhmS1BGVHBETjVsUHVTX2lSeG9xMWpYUE1BM3hVcTljZWpUcnUzNmdlU29JMTh4a1otc3VUYUlPUmNPOFFteWctWm1nIIEC; bili_jct=1b519a4b9101a167c49d8dad05dbdb80; b_lsid=AB4510676_191FF788B10; sid=8axcda7r; bmg_af_switch=1; bmg_src_def_domain=i0.hdslb.com; browser_resolution=1707-837; bp_t_offset_17012171=978105529521930240",
|
|
# User-Agent 用户代理,表示游览器基本身份信息/设备信息
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
|
|
}
|
|
|
|
# 发送请求
|
|
response = requests.get(url=url, params=data, headers=headers)
|
|
|
|
# 返回内容
|
|
return response
|
|
|
|
def GetBvids():
|
|
"""获取综合排序前300个视频的bvid"""
|
|
pages = 10
|
|
bvids = []
|
|
for page in range(1, pages + 1):
|
|
if len(bvids) >= 300:
|
|
break
|
|
|
|
# 该页请求网址
|
|
url = f"https://api.bilibili.com/x/web-interface/wbi/search/type?category_id=&search_type=video&ad_resource=5654&__refresh__=true&_extra=&context=&page={page}&page_size=42&pubtime_begin_s=0&pubtime_end_s=0&from_source=&from_spmid=333.337&platform=pc&highlight=1&single_column=0&keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&qv_id=v6jFbUhcT7bjHVQXrLt535Vt7pgQ0qf1&source_tag=3&gaia_vtoken=&dynamic_offset=24&web_location=1430654&w_rid=532d2b5246861f41eb2ab975ff882c0d&wts=1726475442"
|
|
|
|
# 发送请求 + 获取数据
|
|
jsondata = GetResponse(url=url, data=None).json()
|
|
|
|
# 解析数据,提取视频信息
|
|
result = jsondata['data']['result']
|
|
|
|
#for 循环遍历,提取列表里面元素
|
|
for index in result:
|
|
bvids.append(index['bvid'])
|
|
if len(bvids) >= 300:
|
|
break
|
|
return bvids
|
|
|
|
def GetCids(bvids):
|
|
"""通过bvid获取综合排序前300个视频的cid"""
|
|
cids = []
|
|
for bvid in bvids:
|
|
url = f"https://www.bilibili.com/video/{bvid}"
|
|
response = GetResponse(url=url, data=None)
|
|
|
|
# 通过正则表达式匹配出cid
|
|
cid_pattern = re.compile(r'"cid":(\d+)')
|
|
cid = cid_pattern.search(response.text).group(1)
|
|
|
|
cids.append(cid)
|
|
return cids
|
|
|
|
def GetDanmus(cids):
|
|
"""通过cid获取综合排序前300个视频的弹幕"""
|
|
DanmuDict = {}
|
|
for cid in cids:
|
|
url = f"https://api.bilibili.com/x/v1/dm/list.so?oid={cid}"
|
|
response = GetResponse(url=url, data=None)
|
|
|
|
# 转码以正常读取中文信息
|
|
response.encoding = 'utf-8'
|
|
|
|
danmus = re.findall('<d p=".*?">(.*?)</d>', response.text)
|
|
|
|
# 统计每条弹幕的数量
|
|
for danmu in danmus:
|
|
if danmu in DanmuDict:
|
|
DanmuDict[danmu] += 1
|
|
else:
|
|
DanmuDict[danmu] = 1
|
|
return DanmuDict
|
|
|
|
def WriteCsv(dictdata):
|
|
"""将数据存入csv"""
|
|
fileName = "danmu.csv"
|
|
with open(fileName, 'w', encoding='utf-8-sig', newline='') as f:
|
|
fieldnames = ["弹幕", "数量"]
|
|
f_csv = csv.DictWriter(f, fieldnames=fieldnames)
|
|
f_csv.writeheader()
|
|
writer = csv.writer(f)
|
|
for row in dictdata.items():
|
|
writer.writerow(row)
|
|
|
|
|
|
def GetInfo():
|
|
"""获取视频数据"""
|
|
start_time = time.perf_counter()
|
|
bvids = GetBvids()
|
|
end_time = time.perf_counter()
|
|
print(f"GetBvids函数执行时间:{end_time - start_time}秒")
|
|
|
|
start_time = time.perf_counter()
|
|
cids = GetCids(bvids)
|
|
end_time = time.perf_counter()
|
|
print(f"GetCids函数执行时间:{end_time - start_time}秒")
|
|
|
|
start_time = time.perf_counter()
|
|
danmus = GetDanmus(cids)
|
|
end_time = time.perf_counter()
|
|
print(f"GetDanmus函数执行时间:{end_time - start_time}秒")
|
|
|
|
WriteCsv(danmus)
|
|
|
|
if __name__ == '__main__':
|
|
GetInfo() |