parent
3931a0f68a
commit
a6b143a6d3
@ -0,0 +1,49 @@
|
||||
import re
|
||||
import requests
|
||||
|
||||
url_first = 'https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen'
|
||||
url_last = '.html'
|
||||
urls = []
|
||||
urls.append(url_first + url_last)
|
||||
for i in range(2, 21):
|
||||
urls.append(url_first + '-'+str(i) + url_last)
|
||||
|
||||
# for url in urls:
|
||||
# print(url)
|
||||
a = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrom/124.0.0.0 Safari/537.36 Edg/124.0.0.0'
|
||||
|
||||
}
|
||||
# res = requests.get(urls[0], headers=a)
|
||||
# res.encoding = 'utf-8'
|
||||
|
||||
# print(res.text)
|
||||
|
||||
# for url in urls:
|
||||
# response = requests.get(url, headers=a)
|
||||
# print(response.status_code)
|
||||
# print(response.encoding)
|
||||
lists = []
|
||||
for url in urls:
|
||||
response = requests.get(url, headers=a)
|
||||
lists.append(response.text)
|
||||
bd_re = '<li><a href="(.*?)" title="(.*?)".*?fr timee">(.*?)</span>'
|
||||
|
||||
abc = []
|
||||
for text in lists:
|
||||
result = re.findall(bd_re, text, re.S)
|
||||
abc.append(result)
|
||||
print(result)
|
||||
print(len(result))
|
||||
|
||||
# str = '产教融合'
|
||||
# for text in abc:
|
||||
# for i in text:
|
||||
# if str in i[1]:
|
||||
# print(i)
|
||||
|
||||
# time = '2024-03'
|
||||
# for text in abc:
|
||||
# for i in text:
|
||||
# if time in i[2]:
|
||||
# print(i)
|
Loading…
Reference in new issue