parent
d250ca559f
commit
1a3873c5f2
@ -0,0 +1,61 @@
|
||||
#批量下载图片
|
||||
# -*- coding:utf8 -*-
|
||||
import requests
|
||||
import re
|
||||
from urllib import parse
|
||||
import os
|
||||
import random
|
||||
from UAagentpool.ua_info import ua_list
|
||||
|
||||
class BaiduImageSpider(object):
|
||||
def __init__(self):
|
||||
self.url = 'https://image.baidu.com/search/flip?tn=baiduimage&word={}'
|
||||
self.headers = {'User-Agent': random.choice(ua_list)}
|
||||
|
||||
# 获取图片
|
||||
def get_image(self, url, word):
|
||||
# 使用 requests模块得到响应对象
|
||||
res = requests.get(url, headers=self.headers)
|
||||
# 更改编码格式
|
||||
res.encoding = "utf-8"
|
||||
# 得到html网页
|
||||
html = res.text
|
||||
print(html)
|
||||
# 正则解析
|
||||
pattern = re.compile('"hoverURL":"(.*?)"', re.S)
|
||||
img_link_list = pattern.findall(html)
|
||||
# 存储图片的url链接
|
||||
print(img_link_list)
|
||||
|
||||
# 创建目录,用于保存图片
|
||||
directory = 'D:/tooth/image/teeth/{}'.format(word)
|
||||
#directory = 'C:/Users/Administrator/Desktop/image/{}/'.format(word)
|
||||
# 如果目录不存在则创建,此方法常用
|
||||
if not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
|
||||
# 添加计数
|
||||
i = 1
|
||||
for img_link in img_link_list:
|
||||
filename = '{}{}_{}.jpg'.format(directory, word, i)
|
||||
self.save_image(img_link, filename)
|
||||
i += 1
|
||||
|
||||
# 下载图片
|
||||
def save_image(self, img_link, filename):
|
||||
html = requests.get(url=img_link, headers=self.headers).content
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(html)
|
||||
print(filename, '下载成功')
|
||||
|
||||
# 入口函数
|
||||
def run(self):
|
||||
word = input("您想要谁的照片?")
|
||||
word_parse = parse.quote(word)
|
||||
url = self.url.format(word_parse)
|
||||
self.get_image(url, word)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
spider = BaiduImageSpider()
|
||||
spider.run()
|
Loading…
Reference in new issue