import re import requests # 1.进入江西现代职业技术学院官网“ http://www.jxxdxy.edu.cn/”, 点击首页中的“现代要闻”,页面中有“现代要闻”栏目。 url_head = 'https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen' url_tail = '.html' url_list = [] url_list.append(url_head + url_tail) for i in range(2, 21): url_list.append(url_head + '-'+str(i) + url_tail) # 2.使用python列表及循环语句构造所有页的url列表(总共20页)。 for url in url_list: print(url) headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"} res = requests.get(url_list[0], headers=headers) res.encoding = "utf-8" # 3.循环使用requests库的get方法定制请求头获取“现代要闻”所有页的网页源代码,并打印响应对象编码、响应状态码和第1页的网页源代码 print(res.text) for url in url_list: response = requests.get(url, headers=headers) print(response.status_code) print(response.encoding) texts = [] for url in url_list: response = requests.get(url, headers=headers) texts.append(response.text) my_re = '