爬取bilibili个人信息

master
pcgueqlrn 4 years ago
parent f64efbfde3
commit 7902f0f68d

@ -0,0 +1,100 @@
import urllib.request
import os
import random
import requests
import re
def url_open(url):
req = urllib.request.Request(url)
#req.add_header('User-Agent','')
proxies = ['60.195.206.86:80','124.205.155.156:9090','124.192.219.1:80']
proxy = random.choice(proxies)
proxy_support = urllib.request.ProxyHandler({'http':proxy})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
response = urllib.request.urlopen(url)
html = response.read()
return html
def find_picture(url):
html = url_open(url).decode('utf-8')
img_picture = []
a = html.find('href=')
while a != -1:
b = html.find('.jpg',a,a+255)
if b != -1:
img_picture.append(html[a+6:b+4])
else:
b = a+5
a = html.find('href=',b)
return img_picture
def save_picture(folder,img_picture):
for each in img_picture:
url=each
res=requests.get(url) #有了网站地址后向服务器发出请求
root="C://Users//86138//Desktop//个人信息//bilibili//" #需要存储的根目录
path=root+"头像.jpg" #需要存储的路径以及文件名,若要自定义文件名则只需将改为path=root+"文件名.jpg
try: #处理异常用
if not os.path.exists(root): #判断根目录是否存在,不存在就创建
os.mkdir(root)
if not os.path.exists(path): #查看文件(文件路径)是否存在
r=requests.get(url) #浏览器向服务器发出请求
with open(path,'wb') as f:
f.write(r.content) #把获取到的内容以二进制形式写入文件(图片等文件都是二进制存储的)
f.close() #写完后好像with自己会关这行代码可要可不要
else:
print("文件已存在")
except:
print("爬取失败")
def find_name(url):
html = url_open(url).decode('utf-8')
a = html.find('<title>')
b = html.find('的个人空间',a,a+255)
name = html[a+7:b]
return name
def save_name(name):
root="C://Users//86138//Desktop//个人信息//bilibili//" #需要存储的根目录
path=root+"姓名.txt" #需要存储的路径以及文件名,若要自定义文件名则只需将改为path=root+"文件名.jpg
try: #处理异常用
if not os.path.exists(root): #判断根目录是否存在,不存在就创建
os.mkdir(root)
if not os.path.exists(path): #查看文件(文件路径)是否存在
with open(path,'w') as f:
f.write(name) #把获取到的内容以二进制形式写入文件(图片等文件都是二进制存储的)
f.close() #写完后好像with自己会关这行代码可要可不要
else:
print("文件已存在")
except:
print("爬取失败")
def downlodabilibili(folder = 'bilibili'):
id = input('请输入个人id号:\n')
url = "https://space.bilibili.com/"+id
img_picture = find_picture(url)
save_picture(folder,img_picture)
img_name = find_name(url)
save_name(img_name)
if __name__ == '__main__':
downlodabilibili()
Loading…
Cancel
Save