parent
83a44e2300
commit
327ff13883
@ -0,0 +1,27 @@
|
|||||||
|
import requests
|
||||||
|
|
||||||
|
import re
|
||||||
|
header = {"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 Edg/122.0.0.0"}
|
||||||
|
urls=['https://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-{}.html'.format(number) for number in range(1,21)]
|
||||||
|
all_url=[]
|
||||||
|
for url in urls:
|
||||||
|
#print(url)
|
||||||
|
all_url.append(url)
|
||||||
|
response = requests.get(url, headers=header,stream=True)
|
||||||
|
source=response.text
|
||||||
|
#print(source)
|
||||||
|
my_article1 = '<li>.*?<a href="(.*?)">(.*?)</a>.*?<span>(.*?)</span>.*?</li>'
|
||||||
|
regex1 = re.findall(my_article1, source, re.S)
|
||||||
|
#print(regex1)
|
||||||
|
my_article2=' <li>.*?<a href="(.*?)" .*>.*?<span class="fl"><.*?>(.*?产教融合.*?).*?</span>.*?<span class="fr timee">(.*?)</span>.*?</li>'
|
||||||
|
regex2 = re.findall(my_article2, source, re.S)
|
||||||
|
print(regex2)
|
||||||
|
my_article3 = '<li>.*?<a href=".*?">(.*?)</a>.*?<span>(03-.*?)</span>.*?</li>'
|
||||||
|
regex3 = re.findall(my_article3, source, re.S)
|
||||||
|
#print(regex3)
|
||||||
|
#print(len(regex3))
|
||||||
|
response1=requests.get(all_url[0])
|
||||||
|
source1=response1.text
|
||||||
|
#print(response1.status_code)
|
||||||
|
#print(response1.encoding)
|
||||||
|
#print(response1.text)
|
Loading…
Reference in new issue