parent
4557791ffc
commit
35c1d048ed
@ -0,0 +1,36 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from lxml import etree
|
||||||
|
h={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.1311 SLBChan/109"}
|
||||||
|
source = requests.get("http://www.jxxdxy.edu.cn/",headers=h).text
|
||||||
|
# print(source)
|
||||||
|
# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen.html
|
||||||
|
# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-2.html
|
||||||
|
# https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-3.html
|
||||||
|
url_lists=["https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen.html"]
|
||||||
|
for pn in range(2,21):
|
||||||
|
xh=f"https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-{pn}.html"
|
||||||
|
url_lists.append(xh)
|
||||||
|
# print(xh)
|
||||||
|
# print(url_lists)
|
||||||
|
encoding_lists=[]
|
||||||
|
status_lists=[]
|
||||||
|
for i in url_lists:
|
||||||
|
res=requests.get(i,headers=h)
|
||||||
|
encoding_lists.append(res.encoding)
|
||||||
|
status_lists.append(res.status_code)
|
||||||
|
res_text=res.content.decode('utf-8')
|
||||||
|
res_dom=etree.HTML(res_text)
|
||||||
|
# print(res.status_code)
|
||||||
|
# print(res_text)
|
||||||
|
# print(res_dom)
|
||||||
|
lianjie=r'<li><a href="(.*?)" title=".*?"><span class="fl"><img src="/statics/xdxy/jnjs/images/ty.png" alt="" style="margin-right:10px;" >(.*?)</span><span class="fr timee">(.*?)</span></a></li>'
|
||||||
|
result=re.findall(lianjie,res_text,re.S)
|
||||||
|
# print(result)
|
||||||
|
cjrh=(r'<li><a href="([^>]+)" title="([^<]+产教融合+[^>]+)"><span class="fl"><img src="/statics/xdxy/jnjs/images/ty.png" alt="" style="margin-right:10px;" >.*?</span><span class="fr timee">(.*?)</span></a></li>')
|
||||||
|
result1=re.findall(cjrh,res_text,re.S)
|
||||||
|
# print(result1)
|
||||||
|
sy=(r'<li><a href="(.*?)" title="(.*?)"><span class="fl"><img src="/statics/xdxy/jnjs/images/ty.png" alt="" style="margin-right:10px;" >(.*?)</span><span class="fr timee">(2024-03.*?)</span></a></li>')
|
||||||
|
result2=re.findall(sy,res_text,re.S)
|
||||||
|
# print(result2)
|
Loading…
Reference in new issue