import requests from bs4 import BeautifulSoup from urllib.robotparser import RobotFileParser from matplotlib import pyplot as plt import numpy as np def can_fetch(urlrobots, url): rp = RobotFileParser() rp.set_url(urlrobots+"/robots.txt") rp.read() return rp.can_fetch('*', url) def check_robots(url): if can_fetch(url, url): response = requests.get(url) if response.status_code == 200: flag=1 print('Robots协议允许访问该网站') return True else: print("Robots协议不允许访问该网站") return False def get_pictures(url,path): headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0'} re=requests.get(url,headers=headers) print(re.status_code)#查看请求状态,返回200说明正常 with open('img/'+path, 'wb') as f:#把图片数据写入本地,wb表示二进制储存 f.write(re.content) def get_pictures_urls(text): st='img src="' m=len(st) i=0 n=len(text) urls=[]#储存url while i