From 4c38f38a005a6e36fe0c66237aa6da77506be875 Mon Sep 17 00:00:00 2001
From: phzwq3x84 <3285023794@qq.com>
Date: Sat, 11 May 2024 23:49:30 +0800
Subject: [PATCH] ADD file via upload

---
 1.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 1.py
diff --git a/1.py b/1.py
new file mode 100644
index 0000000..d7b7efd
--- /dev/null
+++ b/1.py
@@ -0,0 +1,36 @@
+import re
+
+import requests
+from lxml import etree
+h={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.1311 SLBChan/109"}
+source = requests.get("http://www.jxxdxy.edu.cn/",headers=h).text
+# print(source)
+# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen.html
+# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-2.html
+# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-3.html
+url_lists=["https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen.html"]
+for pn in range(2,21):
+    xh=f"https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-{pn}.html"
+    url_lists.append(xh)
+    # print(xh)
+# print(url_lists)
+encoding_lists=[]
+status_lists=[]
+for i in url_lists:
+    res=requests.get(i,headers=h)
+    encoding_lists.append(res.encoding)
+    status_lists.append(res.status_code)
+    res_text=res.content.decode('utf-8')
+    res_dom=etree.HTML(res_text)
+    # print(res.status_code)
+    # print(res_text)
+    # print(res_dom)
+    lianjie=r'<li><a href="(.*?)" title=".*?"><span class="fl"><img src="/statics/xdxy/jnjs/images/ty.png" alt="" style="margin-right:10px;" >(.*?)</span><span class="fr timee">(.*?)</span></a></li>'
+    result=re.findall(lianjie,res_text,re.S)
+    # print(result)
+    cjrh=(r'<li><a href="([^>]+)" title="([^<]+产教融合+[^>]+)"><span class="fl"><img src="/statics/xdxy/jnjs/images/ty.png" alt="" style="margin-right:10px;" >.*?</span><span class="fr timee">(.*?)</span></a></li>')
+    result1=re.findall(cjrh,res_text,re.S)
+    # print(result1)
+    sy=(r'<li><a href="(.*?)" title="(.*?)"><span class="fl"><img src="/statics/xdxy/jnjs/images/ty.png" alt="" style="margin-right:10px;" >(.*?)</span><span class="fr timee">(2024-03.*?)</span></a></li>')
+    result2=re.findall(sy,res_text,re.S)
+    # print(result2)