diff --git a/xd b/xd new file mode 100644 index 0000000..4320223 --- /dev/null +++ b/xd @@ -0,0 +1,53 @@ +import re +import requests +h = { +'User-Agent': +'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.61' +} +with open('江西现代职业技术学院要闻.html','r',encoding='utf8') as f: + html = f.read() + # print(html) + +url_re = r'' +urls = re.findall(url_re,html,re.S) + +urls_list_re = r'