# user2 #学习一 import requests url = "http://58921.com/alltime?page=0" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36" } r=requests.get(url=url,headers=headers) r.encoding = 'utf-8' html=r.text print(html) from lxml import etree doc = etree.HTML(html) trs = doc.xpath("/html/body/div[2]/div/div/div[3]/table/tbody/tr") print(trs) for tr in trs: rank = tr.xpath('./td[2]/text()')[0] title = tr.xpath('./td[3]/a/text()')[0] time = tr.xpath('./td[7]/text()')[0] print(rank+'|'+title+'|'+time) #练习 import requests url = "https://www.job001.cn/jobs?pageNo=1" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36" } r=requests.get(url=url,headers=headers) r.encoding = 'utf-8' html=r.text print(html) from lxml import etree doc = etree.HTML(html) trs = doc.xpath("/html/body/div[17]/div[1]/div[1]/div") print(trs) for tr in trs: a = tr.xpath('./td[2]/text()')[0] b = tr.xpath('./td[3]/a/text()')[0] c = tr.xpath('./td[7]/text()')[0] print(a) print(b) print(c)