From fe00241e3004b64e792e25ddd231ab12c3194678 Mon Sep 17 00:00:00 2001 From: pnkpw52ta <1241503069@qq.com> Date: Wed, 18 Sep 2024 23:12:58 +0800 Subject: [PATCH] ADD file via upload --- 102201209刘鑫豪.py | 89 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 102201209刘鑫豪.py diff --git a/102201209刘鑫豪.py b/102201209刘鑫豪.py new file mode 100644 index 0000000..b6cc471 --- /dev/null +++ b/102201209刘鑫豪.py @@ -0,0 +1,89 @@ +import requests +import re +import jieba +import wordcloud +import imageio +from urllib import request #请求模块 +from fake_useragent import UserAgent #在线生成User-Agent +import time, re,csv + +def get_urls(url): + ua = UserAgent() + headers = { + 'User-Agent' : ua.random, + "cookie" : "CURRENT_FNVAL=4048; DedeUserID=599512569; DedeUserID__ckMd5=dc4a13272f0a9ea2; buvid3=D25424AB-ED5E-FB32-6058-CEAFCE9BED0785126infoc; b_nut=1702298485; _uuid=3B16C9CF-A4D3-157C-C788-4414C7D1CB6883241infoc; rpdid=|(u))kkYu|)l0J'u~ukYYJRRl; header_theme_version=CLOSE; enable_web_push=DISABLE; home_feed_column=5; buvid4=BC1C2D23-29FB-83CA-7F80-AFC2DC96707044936-024091707-xyz%2FLhNhkMgQoBuUX4qoqQ%3D%3D; SESSDATA=f7936ef9%2C1742109545%2Cb955c%2A92CjBowCYGlCGs8LxEIVDfq_sHffd2IenL64iEzkCgBPuRNCQHJSPLTi_2TRVl6WouW0QSVmZRNUtHWGhQc04xLWFXQndaNUNEelhjZ0V4TmVsemRDbUUxMXk0OHg3OVY3UnptZ09WckxmQ2VSSVdlMFg5MGR6VU9sa0RZSTZndjNkcVlzM0tmd0FRIIEC; bili_jct=f57986cccdf574fa8be013c21ba69f00; sid=4ovm4bl0; fingerprint=34f624a48e970862c6be836c5ea87b35; buvid_fp_plain=undefined; buvid_fp=34f624a48e970862c6be836c5ea87b35; browser_resolution=1659-941; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjY4NDc0NDAsImlhdCI6MTcyNjU4ODE4MCwicGx0IjotMX0.h6GQaSN55QhTvi5tIFZ-hhhO7I5X55Pm2HR1R_k1_60; bili_ticket_expires=1726847380; b_lsid=171C6103F_19202722AED; bp_t_offset_599512569=978313367284350976", + + } + + req=request.Request(url=url,headers=headers) #创建请求对象 + res=request.urlopen(req) #发起请求得到响应 + html=res.read().decode() + + + re_dbs = '(.*?)") + DanMu = format.findall(html_doc) + # # 逐个输出弹幕 + with open("ba.txt", 'a', newline='') as f: + for i in DanMu[:-3]: + try: + f.write(i+'\n') + except: + print(1) + +for i in range(2,14): + url=f'https://search.bilibili.com/video?keyword=%E5%B7%B4%E9%BB%8E%E5%A5%A5%E8%BF%90%E4%BC%9A&from_source=webtop_search&spm_id_from=333.1007&search_source=2&page={i+1}&o={i*30}' + get_urls(url) + print(i) + +# https://comment.bilibili.com/1628109137.xml +with open('aaa.txt','r') as file: + i=0 + while(1): + url1=file.readline().strip() + if len(url1)<35: + continue + if url1=='': + break + url='https://'+url1 + # print(url) + get_barrage(url) + print(i) + i+=1 + +with open("ba.txt",'r') as f: #从ba.txt里读取存储的弹幕 + while(1): + str=f.readline().strip() + if str=='': + break + if 'AI技术' in str: + with open('bb.xls','a',encoding='gbk',newline='') as file: + write=csv.writer(file) + write.writerow([str]) \ No newline at end of file