diff --git a/RegularExpression.py b/RegularExpression.py new file mode 100644 index 0000000..5570f66 --- /dev/null +++ b/RegularExpression.py @@ -0,0 +1,56 @@ +# Development Software: PyCharm +# Original Project: pythonProject +# Name: RegularExpression.py +# Author: Caesar Ren +# Creation Time: 2022/5/28 20:46 + +import requests +import os +import re + + +class PicSpider: + def __init__(self, word, i, ): + # 设置存放路径 + self.path = "E:\\Pythonprojects\\Spider\\PictureDownload\\" + word + "\\" + # 页数 + self.page = i / 20 + 1 + # 如果文件夹不存在,则创建文件夹 + if not os.path.exists(self.path): + os.mkdir(self.path) + + # 发出requests请求 + def requests_get(self, url): + req = requests.get(url, timeout=30) + req.encoding = "utf-8" + self.req = req.text + + # 正则找到图片链接 + def get_imgurl(self): + imgurls = re.findall('"hoverURL":"(.*?)"', self.req, re.S) + self.imgurls = imgurls + print(imgurls) + + def get_imgurl2(self): + imgurls = re.findall('"fromPageTitle":"(.*?)",', self.req, re.S) + self.imgurls = imgurls + print(imgurls) + + def get_h(self): + heights = [] + for string in self.imgurls: + height = re.findall('h=(.*)', string, re.S) + heights.append(height) + print(heights) + print(len(heights)) + + +word = '使徒' +i = 3 +url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={0}&pn={1}&gsm=50&ct=&ic=0&lm=-1&width=0&height=0".format( + word, i) +Run = PicSpider(word, i) +Run.requests_get(url) +Run.get_imgurl() +Run.get_imgurl2() +Run.get_h()