# Development Software: PyCharm # Original Project: pythonProject # Name: RegularExpression.py # Author: Caesar Ren # Creation Time: 2022/5/28 20:46 import requests import os import re class PicSpider: def __init__(self, word, i, ): # 设置存放路径 self.path = "E:\\Pythonprojects\\Spider\\PictureDownload\\" + word + "\\" # 页数 self.page = i / 20 + 1 # 如果文件夹不存在,则创建文件夹 if not os.path.exists(self.path): os.mkdir(self.path) # 发出requests请求 def requests_get(self, url): req = requests.get(url, timeout=30) req.encoding = "utf-8" self.req = req.text # 正则找到图片链接 def get_imgurl(self): imgurls = re.findall('"hoverURL":"(.*?)"', self.req, re.S) self.imgurls = imgurls print(imgurls) def get_imgurl2(self): imgurls = re.findall('"fromPageTitle":"(.*?)",', self.req, re.S) self.imgurls = imgurls print(imgurls) def get_h(self): heights = [] for string in self.imgurls: height = re.findall('h=(.*)', string, re.S) heights.append(height) print(heights) print(len(heights)) word = '使徒' i = 3 url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={0}&pn={1}&gsm=50&ct=&ic=0&lm=-1&width=0&height=0".format( word, i) Run = PicSpider(word, i) Run.requests_get(url) Run.get_imgurl() Run.get_imgurl2() Run.get_h()