1.3 KiB

Raw Permalink Blame History

user2

#学习一 import requests url = "http://58921.com/alltime?page=0"

headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36" }

r=requests.get(url=url,headers=headers)

r.encoding = 'utf-8' html=r.text

print(html)

from lxml import etree

doc = etree.HTML(html) trs = doc.xpath("/html/body/div[2]/div/div/div[3]/table/tbody/tr") print(trs) for tr in trs: rank = tr.xpath('./td[2]/text()')[0] title = tr.xpath('./td[3]/a/text()')[0]
time = tr.xpath('./td[7]/text()')[0] print(rank+'|'+title+'|'+time)

#练习 import requests url = "https://www.job001.cn/jobs?pageNo=1"