ADD file via upload

master
hnu202110040108 3 years ago
parent ec7562054c
commit a106905f57

@ -0,0 +1,56 @@
# Development Software: PyCharm
# Original Project: pythonProject
# Name: RegularExpression.py
# Author: Caesar Ren
# Creation Time: 2022/5/28 20:46
import requests
import os
import re
class PicSpider:
def __init__(self, word, i, ):
# 设置存放路径
self.path = "E:\\Pythonprojects\\Spider\\PictureDownload\\" + word + "\\"
# 页数
self.page = i / 20 + 1
# 如果文件夹不存在,则创建文件夹
if not os.path.exists(self.path):
os.mkdir(self.path)
# 发出requests请求
def requests_get(self, url):
req = requests.get(url, timeout=30)
req.encoding = "utf-8"
self.req = req.text
# 正则找到图片链接
def get_imgurl(self):
imgurls = re.findall('"hoverURL":"(.*?)"', self.req, re.S)
self.imgurls = imgurls
print(imgurls)
def get_imgurl2(self):
imgurls = re.findall('"fromPageTitle":"(.*?)",', self.req, re.S)
self.imgurls = imgurls
print(imgurls)
def get_h(self):
heights = []
for string in self.imgurls:
height = re.findall('h=(.*)', string, re.S)
heights.append(height)
print(heights)
print(len(heights))
word = '使徒'
i = 3
url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={0}&pn={1}&gsm=50&ct=&ic=0&lm=-1&width=0&height=0".format(
word, i)
Run = PicSpider(word, i)
Run.requests_get(url)
Run.get_imgurl()
Run.get_imgurl2()
Run.get_h()
Loading…
Cancel
Save