From 7902f0f68d97765a7cf06d0bf729078c8f159e51 Mon Sep 17 00:00:00 2001 From: pcgueqlrn <1010121704@qq.com> Date: Fri, 28 May 2021 16:21:34 +0800 Subject: [PATCH] =?UTF-8?q?=E7=88=AC=E5=8F=96bilibili=E4=B8=AA=E4=BA=BA?= =?UTF-8?q?=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bilibili.py | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 bilibili.py diff --git a/bilibili.py b/bilibili.py new file mode 100644 index 0000000..413bcab --- /dev/null +++ b/bilibili.py @@ -0,0 +1,100 @@ +import urllib.request +import os +import random +import requests +import re + +def url_open(url): + req = urllib.request.Request(url) + #req.add_header('User-Agent','') + + proxies = ['60.195.206.86:80','124.205.155.156:9090','124.192.219.1:80'] + proxy = random.choice(proxies) + + proxy_support = urllib.request.ProxyHandler({'http':proxy}) + opener = urllib.request.build_opener(proxy_support) + urllib.request.install_opener(opener) + + response = urllib.request.urlopen(url) + html = response.read() + + return html + +def find_picture(url): + html = url_open(url).decode('utf-8') + img_picture = [] + + a = html.find('href=') + + while a != -1: + b = html.find('.jpg',a,a+255) + + if b != -1: + img_picture.append(html[a+6:b+4]) + else: + b = a+5 + a = html.find('href=',b) + + return img_picture + +def save_picture(folder,img_picture): + for each in img_picture: + url=each + res=requests.get(url) #有了网站地址后向服务器发出请求 + + + root="C://Users//86138//Desktop//个人信息//bilibili//" #需要存储的根目录 + path=root+"头像.jpg" #需要存储的路径以及文件名,若要自定义文件名则只需将改为path=root+"文件名.jpg + + try: #处理异常用 + if not os.path.exists(root): #判断根目录是否存在,不存在就创建 + os.mkdir(root) + if not os.path.exists(path): #查看文件(文件路径)是否存在 + r=requests.get(url) #浏览器向服务器发出请求 + with open(path,'wb') as f: + f.write(r.content) #把获取到的内容以二进制形式写入文件(图片等文件都是二进制存储的) + f.close() #写完后好像with自己会关,这行代码可要可不要 + else: + print("文件已存在") + except: + print("爬取失败") + +def find_name(url): + html = url_open(url).decode('utf-8') + + a = html.find('') + b = html.find('的个人空间',a,a+255) + + name = html[a+7:b] + + return name + +def save_name(name): + root="C://Users//86138//Desktop//个人信息//bilibili//" #需要存储的根目录 + path=root+"姓名.txt" #需要存储的路径以及文件名,若要自定义文件名则只需将改为path=root+"文件名.jpg + + try: #处理异常用 + if not os.path.exists(root): #判断根目录是否存在,不存在就创建 + os.mkdir(root) + if not os.path.exists(path): #查看文件(文件路径)是否存在 + with open(path,'w') as f: + f.write(name) #把获取到的内容以二进制形式写入文件(图片等文件都是二进制存储的) + f.close() #写完后好像with自己会关,这行代码可要可不要 + else: + print("文件已存在") + except: + print("爬取失败") + +def downlodabilibili(folder = 'bilibili'): + + id = input('请输入个人id号:\n') + url = "https://space.bilibili.com/"+id + + img_picture = find_picture(url) + save_picture(folder,img_picture) + + img_name = find_name(url) + save_name(img_name) + +if __name__ == '__main__': + downlodabilibili()