From 812827d9edcc40db3fbd3f2a47dd3ab4f345c5bb Mon Sep 17 00:00:00 2001 From: pfc8hp2r6 <2317678682@qq.com> Date: Mon, 16 Sep 2024 15:40:06 +0800 Subject: [PATCH] Delete 'danmupapa.py' --- danmupapa.py | 201 --------------------------------------------------- 1 file changed, 201 deletions(-) delete mode 100644 danmupapa.py diff --git a/danmupapa.py b/danmupapa.py deleted file mode 100644 index 8652ce5..0000000 --- a/danmupapa.py +++ /dev/null @@ -1,201 +0,0 @@ -import requests -import re -import pandas as pd - -import xlwt -import json -import chardet -import random -import time -import openpyxl -from openpyxl import Workbook -import xlrd - -import xlwings as xw -from openpyxl.reader.excel import load_workbook - - - -# 随机获取一个请求头 - - - -SEARCH_API_URL ='https://search.bilibili.com/all?keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.788&search_source=2' - -headers = {'User-Agent': '"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', - 'Cookie': 'buvid3=37CAA054-A8A1-787E-CD23-05D8BC9EEB4780230infoc; b_nut=1725113280; _uuid=1C1E246E-F3B6-D13B-106CD-A109D35CB4D9B67205infoc; enable_web_push=DISABLE; buvid4=C1C36600-6C10-0893-8E67-0999E6D18DC620055-024070815-r8KbDAvfSCu8tCCZ7llkuA%3D%3D; CURRENT_FNVAL=4048; rpdid=|(u))kJumk~~0J\'u~kluuJk)l; header_theme_version=CLOSE; fingerprint=13c5a0300ec32b6c6d2a5a81a8682b18; buvid_fp_plain=undefined; buvid_fp=13c5a0300ec32b6c6d2a5a81a8682b18; bp_t_offset_512104208=975890211160457216; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjYzMjExMjksImlhdCI6MTcyNjA2MTg2OSwicGx0IjotMX0.wd7VeJfGfUQR0Ofh0z5fnmQ1CpNKEjECYT5nAJWTgEQ; bili_ticket_expires=1726321069; bp_t_offset_3546760311998987=976235500761251840; b_lsid=81077410AF_191E6741DE6; bsource=search_baidu; home_feed_column=5; browser_resolution=2100-1095; SESSDATA=7e54226a%2C1741705429%2C85264%2A92CjC-dRoO18jeyJJLwwGvwJs00QhNm1jjKoflnsViX87MSQJOgo3T2MRFIWOTYe5GVuASVnd4YVRLa0NKN2NDWTdBVFZWb2hYR0M2RXBoTnRyeTliNnptVlFJWWhYWnZtam9pNmpNcm54Q00ybzNUenVxSng5UC1hY2VyYUQ5Y0U3czQ3NHRGOEFBIIEC; bili_jct=87feb43d0b6b672ca3d06eca43c442b1; DedeUserID=512104208; DedeUserID__ckMd5=d02f77dab719b614; sid=q6cirm22', - 'Origin': 'https: // search.bilibili.com', - 'Referer': 'https://www.bilibili.com/video' - - } - -response = requests.get(url=SEARCH_API_URL, headers=headers) -##print(response.text) -hrefs = [ -response.text - -] - -# 定义正则表达式模式 -pattern = r'video/(BV\w+)' - -# 提取所有匹配项 -all_bv_numbers = [] -for href in hrefs: - matches = re.findall(pattern, href) - all_bv_numbers.extend(matches) - #all_bv_numbers=set(all_bv_numbers) -#print("所有匹配的BV号:", all_bv_numbers) -for a in range(2,12): - b=30*a-30 - SEARCH_API_URL = 'https://search.bilibili.com/all?keyword=2024%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.788&search_source=2'+str(a)+'&o='+str(b) - - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.97 Safari/537.36 Core/1.116.438.400 QQBrowser/13.0.6070.400', - 'Cookie': 'buvid3=37CAA054-A8A1-787E-CD23-05D8BC9EEB4780230infoc; b_nut=1725113280; _uuid=1C1E246E-F3B6-D13B-106CD-A109D35CB4D9B67205infoc; enable_web_push=DISABLE; buvid4=C1C36600-6C10-0893-8E67-0999E6D18DC620055-024070815-r8KbDAvfSCu8tCCZ7llkuA%3D%3D; CURRENT_FNVAL=4048; rpdid=|(u))kJumk~~0J\'u~kluuJk)l; header_theme_version=CLOSE; fingerprint=13c5a0300ec32b6c6d2a5a81a8682b18; buvid_fp_plain=undefined; buvid_fp=13c5a0300ec32b6c6d2a5a81a8682b18; bp_t_offset_512104208=975890211160457216; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjYzMjExMjksImlhdCI6MTcyNjA2MTg2OSwicGx0IjotMX0.wd7VeJfGfUQR0Ofh0z5fnmQ1CpNKEjECYT5nAJWTgEQ; bili_ticket_expires=1726321069; bp_t_offset_3546760311998987=976235500761251840; b_lsid=81077410AF_191E6741DE6; bsource=search_baidu; home_feed_column=5; browser_resolution=2100-1095; SESSDATA=7e54226a%2C1741705429%2C85264%2A92CjC-dRoO18jeyJJLwwGvwJs00QhNm1jjKoflnsViX87MSQJOgo3T2MRFIWOTYe5GVuASVnd4YVRLa0NKN2NDWTdBVFZWb2hYR0M2RXBoTnRyeTliNnptVlFJWWhYWnZtam9pNmpNcm54Q00ybzNUenVxSng5UC1hY2VyYUQ5Y0U3czQ3NHRGOEFBIIEC; bili_jct=87feb43d0b6b672ca3d06eca43c442b1; DedeUserID=512104208; DedeUserID__ckMd5=d02f77dab719b614; sid=q6cirm22', - - } - response = requests.get(url=SEARCH_API_URL, headers=headers) - ##print(response.text) - hrefs = [ - response.text - - ] - - # 定义正则表达式模式 - pattern = r'video/(BV\w+)' - - # 提取所有匹配项 - - for href in hrefs: - matches = re.findall(pattern, href) - all_bv_numbers.extend(matches) - - - - a+=1 -all_bv_numbers = set(all_bv_numbers) -print(len(all_bv_numbers)) -print("所有匹配的BV号:", all_bv_numbers) - -url_bag = [] - -# 根据oid请求弹幕,解析弹幕得到最终的数据\ -for bvv in all_bv_numbers: - - - url_bag.append(str('https://api.bilibili.com/x/player/pagelist?bvid='+str(bvv)+'&jsonp=jsonp')) - - - # 清理请求头中的非 ASCII 字符 -def clean_header(header): - return ''.join([c if ord(c) < 128 else '' for c in header]) -cidd=[] -for url in url_bag: - - headers={ - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', - 'Referer': 'https://www.bilibili.com/video', - 'Cookie': 'buvid3=37CAA054-A8A1-787E-CD23-05D8BC9EEB4780230infoc; b_nut=1725113280; _uuid=1C1E246E-F3B6-D13B-106CD-A109D35CB4D9B67205infoc; enable_web_push=DISABLE; buvid4=C1C36600-6C10-0893-8E67-0999E6D18DC620055-024070815-r8KbDAvfSCu8tCCZ7llkuA%3D%3D; CURRENT_FNVAL=4048; rpdid=|(u))kJumk~~0J\'u~kluuJk)l; header_theme_version=CLOSE; fingerprint=13c5a0300ec32b6c6d2a5a81a8682b18; buvid_fp_plain=undefined; buvid_fp=13c5a0300ec32b6c6d2a5a81a8682b18; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjYzMjExMjksImlhdCI6MTcyNjA2MTg2OSwicGx0IjotMX0.wd7VeJfGfUQR0Ofh0z5fnmQ1CpNKEjECYT5nAJWTgEQ; bili_ticket_expires=1726321069; bsource=search_baidu; home_feed_column=5; browser_resolution=2100-1095; b_lsid=2C2F5DE8_191E6C6B17F; bp_t_offset_512104208=976311809445199872; SESSDATA=64f9af2c%2C1741705746%2C252bc%2A92CjBco6PCEf5jHGOtLwBCnpRRhnXcR0SL850C4F5X8GK2eVczaFKXrWQv4b7zWBkS77cSVmtEdktONDd2RUx0NTQybzAwZTdlOU1jNGlvbnZuN0YzU3VOZFBPZVBWMEVqWFN5a3hBdlVYTWkwaXZQTndCR3FQUkVIT1N1elpCNUI5UFZPc2JTYzNnIIEC; bili_jct=0dbb281cc47245a2970662a9f04112ea; DedeUserID=3546760311998987; DedeUserID__ckMd5=945a443ae2d0a983; sid=8uthyfdc; bp_t_offset_3546760311998987=976312754338004992', - } - - try: - response = requests.get(url=url, headers=headers) - response.raise_for_status() # 检查请求是否成功 - print("Response Text:", response.text) # 打印响应文本 - json_dict = json.loads(response.text) - cid = json_dict["data"][0]["cid"] - print(cid) - cidd.append(cid) - except requests.RequestException as e: - print(f"请求错误: {e}") - except json.JSONDecodeError as e: - print(f"JSON解码错误: {e}") - except KeyError as e: - print(f"键错误: {e}, 可能是数据结构不符合预期") - except UnicodeEncodeError as e: - print(f"编码错误: {e}") - time.sleep(1) - - # 增加请求间隔 - - - -cnt=0 -result = open('total_data.xls', 'w', encoding='utf-8') -ciyun_file=open('danmu.txt','w',encoding='utf-8') -workbook = openpyxl.Workbook() - -# 获取默认的工作表 -sheet = workbook.active - -# 写入数据 -sheet['A1'] = 'data' - -for data in cidd: - url = 'https://api.bilibili.com/x/v2/dm/web/history/seg.so?type=1&oid='+str(data)+'&date=2024-09-06' - - headers = {'User-Agent': '"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', - 'Referer': 'https://www.bilibili.com/video', - 'Cookie': 'buvid3=37CAA054-A8A1-787E-CD23-05D8BC9EEB4780230infoc; b_nut=1725113280; _uuid=1C1E246E-F3B6-D13B-106CD-A109D35CB4D9B67205infoc; enable_web_push=DISABLE; buvid4=C1C36600-6C10-0893-8E67-0999E6D18DC620055-024070815-r8KbDAvfSCu8tCCZ7llkuA%3D%3D; CURRENT_FNVAL=4048; rpdid=|(u))kJumk~~0J\'u~kluuJk)l; header_theme_version=CLOSE; fingerprint=13c5a0300ec32b6c6d2a5a81a8682b18; buvid_fp_plain=undefined; buvid_fp=13c5a0300ec32b6c6d2a5a81a8682b18; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjYzMjExMjksImlhdCI6MTcyNjA2MTg2OSwicGx0IjotMX0.wd7VeJfGfUQR0Ofh0z5fnmQ1CpNKEjECYT5nAJWTgEQ; bili_ticket_expires=1726321069; bp_t_offset_3546760311998987=976235500761251840; bsource=search_baidu; home_feed_column=5; browser_resolution=2100-1095; b_lsid=2C2F5DE8_191E6C6B17F; bp_t_offset_512104208=976311809445199872; SESSDATA=64f9af2c%2C1741705746%2C252bc%2A92CjBco6PCEf5jHGOtLwBCnpRRhnXcR0SL850C4F5X8GK2eVczaFKXrWQv4b7zWBkS77cSVmtEdktONDd2RUx0NTQybzAwZTdlOU1jNGlvbnZuN0YzU3VOZFBPZVBWMEVqWFN5a3hBdlVYTWkwaXZQTndCR3FQUkVIT1N1elpCNUI5UFZPc2JTYzNnIIEC; bili_jct=0dbb281cc47245a2970662a9f04112ea; DedeUserID=3546760311998987; DedeUserID__ckMd5=945a443ae2d0a983; sid=8uthyfdc', - - } - response = requests.get(url=url, headers=headers) - - mama=response.text - - print(mama) - - pattern1 = ':([^@]*)@' - - dataa = re.findall(pattern1, response.text) - keyword = '你' - - # 提取含有关键词的元素 - filtered_elements = [] - - for a in dataa: - illegal_chars = ['\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', - '\x08', '\x0b', '\x0c', '\x0e', '\x0f', '\x10', '\x11', '\x12', - '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1a', - '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', '\x7f'] - for char in illegal_chars: - a = a.replace(char, '') - - a = str(a) - sheet.append([a]) - ciyun_file.write(a) - -workbook.save('total_data.xls') -keywords = ['ai','智能'] - -# 读取原始表格数据 -df = pd.read_excel('total_data.xls') - -# 检查每一行是否含有关键字 -pattern = r'(?