You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

101 lines
3.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import urllib.request
import os
import random
import requests
import re
def url_open(url):
req = urllib.request.Request(url)
#req.add_header('User-Agent','')
proxies = ['60.195.206.86:80','124.205.155.156:9090','124.192.219.1:80']
proxy = random.choice(proxies)
proxy_support = urllib.request.ProxyHandler({'http':proxy})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
response = urllib.request.urlopen(url)
html = response.read()
return html
def find_picture(url):
html = url_open(url).decode('utf-8')
img_picture = []
a = html.find('href=')
while a != -1:
b = html.find('.jpg',a,a+255)
if b != -1:
img_picture.append(html[a+6:b+4])
else:
b = a+5
a = html.find('href=',b)
return img_picture
def save_picture(folder,img_picture):
for each in img_picture:
url=each
res=requests.get(url) #有了网站地址后向服务器发出请求
root="C://Users//86138//Desktop//个人信息//bilibili//" #需要存储的根目录
path=root+"头像.jpg" #需要存储的路径以及文件名,若要自定义文件名则只需将改为path=root+"文件名.jpg
try: #处理异常用
if not os.path.exists(root): #判断根目录是否存在,不存在就创建
os.mkdir(root)
if not os.path.exists(path): #查看文件(文件路径)是否存在
r=requests.get(url) #浏览器向服务器发出请求
with open(path,'wb') as f:
f.write(r.content) #把获取到的内容以二进制形式写入文件(图片等文件都是二进制存储的)
f.close() #写完后好像with自己会关这行代码可要可不要
else:
print("文件已存在")
except:
print("爬取失败")
def find_name(url):
html = url_open(url).decode('utf-8')
a = html.find('<title>')
b = html.find('的个人空间',a,a+255)
name = html[a+7:b]
return name
def save_name(name):
root="C://Users//86138//Desktop//个人信息//bilibili//" #需要存储的根目录
path=root+"姓名.txt" #需要存储的路径以及文件名,若要自定义文件名则只需将改为path=root+"文件名.jpg
try: #处理异常用
if not os.path.exists(root): #判断根目录是否存在,不存在就创建
os.mkdir(root)
if not os.path.exists(path): #查看文件(文件路径)是否存在
with open(path,'w') as f:
f.write(name) #把获取到的内容以二进制形式写入文件(图片等文件都是二进制存储的)
f.close() #写完后好像with自己会关这行代码可要可不要
else:
print("文件已存在")
except:
print("爬取失败")
def downlodabilibili(folder = 'bilibili'):
id = input('请输入个人id号:\n')
url = "https://space.bilibili.com/"+id
img_picture = find_picture(url)
save_picture(folder,img_picture)
img_name = find_name(url)
save_name(img_name)
if __name__ == '__main__':
downlodabilibili()