# Development Software: PyCharm
# Original Project:     pythonProject
# Name:                 RegularExpression.py
# Author:               Caesar Ren
# Creation Time:        2022/5/28 20:46

import requests
import os
import re


class PicSpider:
	def __init__(self, word, i, ):
		# 设置存放路径
		self.path = "E:\\Pythonprojects\\Spider\\PictureDownload\\" + word + "\\"
		# 页数
		self.page = i / 20 + 1
		# 如果文件夹不存在，则创建文件夹
		if not os.path.exists(self.path):
			os.mkdir(self.path)

	# 发出requests请求
	def requests_get(self, url):
		req = requests.get(url, timeout=30)
		req.encoding = "utf-8"
		self.req = req.text

	# 正则找到图片链接
	def get_imgurl(self):
		imgurls = re.findall('"hoverURL":"(.*?)"', self.req, re.S)
		self.imgurls = imgurls
		print(imgurls)

	def get_imgurl2(self):
		imgurls = re.findall('"fromPageTitle":"(.*?)",', self.req, re.S)
		self.imgurls = imgurls
		print(imgurls)

	def get_h(self):
		heights = []
		for string in self.imgurls:
			height = re.findall('h=(.*)', string, re.S)
			heights.append(height)
		print(heights)
		print(len(heights))


word = '使徒'
i = 3
url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={0}&pn={1}&gsm=50&ct=&ic=0&lm=-1&width=0&height=0".format(
	word, i)
Run = PicSpider(word, i)
Run.requests_get(url)
Run.get_imgurl()
Run.get_imgurl2()
Run.get_h()