From 35c1d048ed3f0d43ed61c23523378bcad6ef9265 Mon Sep 17 00:00:00 2001 From: pbsz73ypc <172876621@qq.com> Date: Wed, 29 May 2024 10:59:07 +0800 Subject: [PATCH] ADD file via upload --- 正则表达式代码.txt | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 正则表达式代码.txt diff --git a/正则表达式代码.txt b/正则表达式代码.txt new file mode 100644 index 0000000..187121e --- /dev/null +++ b/正则表达式代码.txt @@ -0,0 +1,36 @@ +import re + +import requests +from lxml import etree +h={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.1311 SLBChan/109"} +source = requests.get("http://www.jxxdxy.edu.cn/",headers=h).text +# print(source) +# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen.html +# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-2.html +# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-3.html +url_lists=["https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen.html"] +for pn in range(2,21): + xh=f"https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-{pn}.html" + url_lists.append(xh) + # print(xh) +# print(url_lists) +encoding_lists=[] +status_lists=[] +for i in url_lists: + res=requests.get(i,headers=h) + encoding_lists.append(res.encoding) + status_lists.append(res.status_code) + res_text=res.content.decode('utf-8') + res_dom=etree.HTML(res_text) + # print(res.status_code) + # print(res_text) + # print(res_dom) + lianjie=r'