From 327ff13883597c0954b15c6716f1519b623d765d Mon Sep 17 00:00:00 2001 From: pcsfy8h5o <1545077945@qq.com> Date: Wed, 8 May 2024 19:04:24 +0800 Subject: [PATCH] ADD file via upload --- zy3.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 zy3.py diff --git a/zy3.py b/zy3.py new file mode 100644 index 0000000..9ca5dd2 --- /dev/null +++ b/zy3.py @@ -0,0 +1,27 @@ +import requests + +import re +header = {"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0"} +urls=['https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-{}.html'.format(number) for number in range(1,21)] +all_url=[] +for url in urls: + #print(url) + all_url.append(url) + response = requests.get(url, headers=header,stream=True) + source=response.text + #print(source) + my_article1 = '
  • .*?(.*?).*?(.*?).*?
  • ' + regex1 = re.findall(my_article1, source, re.S) + #print(regex1) + my_article2='
  • .*?.*?<.*?>(.*?δΊ§ζ•™θžεˆ.*?).*?.*?(.*?).*?
  • ' + regex2 = re.findall(my_article2, source, re.S) + print(regex2) + my_article3 = '
  • .*?(.*?).*?(03-.*?).*?
  • ' + regex3 = re.findall(my_article3, source, re.S) + #print(regex3) + #print(len(regex3)) +response1=requests.get(all_url[0]) +source1=response1.text +#print(response1.status_code) +#print(response1.encoding) +#print(response1.text)