From 4c38f38a005a6e36fe0c66237aa6da77506be875 Mon Sep 17 00:00:00 2001 From: phzwq3x84 <3285023794@qq.com> Date: Sat, 11 May 2024 23:49:30 +0800 Subject: [PATCH] ADD file via upload --- 1.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 1.py diff --git a/1.py b/1.py new file mode 100644 index 0000000..d7b7efd --- /dev/null +++ b/1.py @@ -0,0 +1,36 @@ +import re + +import requests +from lxml import etree +h={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.1311 SLBChan/109"} +source = requests.get("http://www.jxxdxy.edu.cn/",headers=h).text +# print(source) +# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen.html +# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-2.html +# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-3.html +url_lists=["https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen.html"] +for pn in range(2,21): + xh=f"https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-{pn}.html" + url_lists.append(xh) + # print(xh) +# print(url_lists) +encoding_lists=[] +status_lists=[] +for i in url_lists: + res=requests.get(i,headers=h) + encoding_lists.append(res.encoding) + status_lists.append(res.status_code) + res_text=res.content.decode('utf-8') + res_dom=etree.HTML(res_text) + # print(res.status_code) + # print(res_text) + # print(res_dom) + lianjie=r'
  • (.*?)(.*?)
  • ' + result=re.findall(lianjie,res_text,re.S) + # print(result) + cjrh=(r'
  • .*?(.*?)
  • ') + result1=re.findall(cjrh,res_text,re.S) + # print(result1) + sy=(r'
  • (.*?)(2024-03.*?)
  • ') + result2=re.findall(sy,res_text,re.S) + # print(result2)