import re
import requests
h = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61'
}
with open('江西现代职业技术学院要闻.html','r',encoding='utf8') as f:
html = f.read()
# print(html)
url_re = r''
urls = re.findall(url_re,html,re.S)
urls_list_re = r'