diff --git a/demo b/demo new file mode 100644 index 0000000..5eb493c --- /dev/null +++ b/demo @@ -0,0 +1,60 @@ +import requests +import re +head = { + 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0' +} +#获取所有网站 +def website(): + urlList = [] + for i in range(20): + url = "http://www.jxxdxy.edu.cn/news-list-xiaoyuanyaowen-" + str(i + 1) +".html" + urlList.append(url) + return urlList + +#响应对象编码、响应状态码和第1页的网页源代码 +def qw(urlList): + re = requests.get(urlList[0], head) + re.encoding = "utf-8" + print(re.encoding) + print(re.status_code) + print(re.text) + +#获取所有源代码 +def code(urlList): + htmlList = [] + for i in urlList: + re = requests.get(i, head) + re.encoding = "utf-8" + htmlList.append(re.text) + return htmlList + +#获取标题时间网站 +def method(htmlList): + List = [] + obtain = '