import re import requests url_first = 'https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen' url_last = '.html' urls = [] urls.append(url_first + url_last) for i in range(2, 21): urls.append(url_first + '-'+str(i) + url_last) # for url in urls: # print(url) a = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrom/124.0.0.0 Safari/537.36 Edg/124.0.0.0' } # res = requests.get(urls[0], headers=a) # res.encoding = 'utf-8' # print(res.text) # for url in urls: # response = requests.get(url, headers=a) # print(response.status_code) # print(response.encoding) lists = [] for url in urls: response = requests.get(url, headers=a) lists.append(response.text) bd_re = '