ADD file via upload

5 years ago · 1a3873c5f2
parent d250ca559f
commit 1a3873c5f2
1 changed files with 61 additions and 0 deletions
--- a/code/python/爬虫代码
+++ b/code/python/爬虫代码
@ -0,0 +1,61 @@
+#批量下载图片
+# -*- coding:utf8 -*-
+import requests
+import re
+from urllib import parse
+import os
+import random
+from UAagentpool.ua_info import ua_list
+
+class BaiduImageSpider(object):
+    def __init__(self):
+        self.url = 'https://image.baidu.com/search/flip?tn=baiduimage&word={}'
+        self.headers = {'User-Agent': random.choice(ua_list)}
+
+    # 获取图片
+    def get_image(self, url, word):
+        # 使用 requests模块得到响应对象
+        res = requests.get(url, headers=self.headers)
+        # 更改编码格式
+        res.encoding = "utf-8"
+        # 得到html网页
+        html = res.text
+        print(html)
+        # 正则解析
+        pattern = re.compile('"hoverURL":"(.*?)"', re.S)
+        img_link_list = pattern.findall(html)
+        # 存储图片的url链接
+        print(img_link_list)
+
+        # 创建目录，用于保存图片
+        directory = 'D:/tooth/image/teeth/{}'.format(word)
+        #directory = 'C:/Users/Administrator/Desktop/image/{}/'.format(word)
+        # 如果目录不存在则创建，此方法常用
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+
+        # 添加计数
+        i = 1
+        for img_link in img_link_list:
+            filename = '{}{}_{}.jpg'.format(directory, word, i)
+            self.save_image(img_link, filename)
+            i += 1
+
+    # 下载图片
+    def save_image(self, img_link, filename):
+        html = requests.get(url=img_link, headers=self.headers).content
+        with open(filename, 'wb') as f:
+            f.write(html)
+        print(filename, '下载成功')
+
+    # 入口函数
+    def run(self):
+        word = input("您想要谁的照片？")
+        word_parse = parse.quote(word)
+        url = self.url.format(word_parse)
+        self.get_image(url, word)
+
+
+if __name__ == '__main__':
+    spider = BaiduImageSpider()
+    spider.run()