ADD file via upload

3 years ago · a106905f57
parent ec7562054c
commit a106905f57
1 changed files with 56 additions and 0 deletions
--- a/RegularExpression.py
+++ b/RegularExpression.py
@ -0,0 +1,56 @@
 # Development Software: PyCharm
 # Original Project:     pythonProject
 # Name:                 RegularExpression.py
 # Author:               Caesar Ren
 # Creation Time:        2022/5/28 20:46
 import requests
 import os
 import re
 class PicSpider:
 	def __init__(self, word, i, ):
 		# 设置存放路径
 		self.path = "E:\\Pythonprojects\\Spider\\PictureDownload\\" + word + "\\"
 		# 页数
 		self.page = i / 20 + 1
 		# 如果文件夹不存在，则创建文件夹
 		if not os.path.exists(self.path):
 			os.mkdir(self.path)
 	# 发出requests请求
 	def requests_get(self, url):
 		req = requests.get(url, timeout=30)
 		req.encoding = "utf-8"
 		self.req = req.text
 	# 正则找到图片链接
 	def get_imgurl(self):
 		imgurls = re.findall('"hoverURL":"(.*?)"', self.req, re.S)
 		self.imgurls = imgurls
 		print(imgurls)
 	def get_imgurl2(self):
 		imgurls = re.findall('"fromPageTitle":"(.*?)",', self.req, re.S)
 		self.imgurls = imgurls
 		print(imgurls)
 	def get_h(self):
 		heights = []
 		for string in self.imgurls:
 			height = re.findall('h=(.*)', string, re.S)
 			heights.append(height)
 		print(heights)
 		print(len(heights))
 word = '使徒'
 i = 3
 url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={0}&pn={1}&gsm=50&ct=&ic=0&lm=-1&width=0&height=0".format(
 	word, i)
 Run = PicSpider(word, i)
 Run.requests_get(url)
 Run.get_imgurl()
 Run.get_imgurl2()
 Run.get_h()