1.3 KiB
user2
#学习一 import requests url = "http://58921.com/alltime?page=0"
headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36" }
r=requests.get(url=url,headers=headers)
r.encoding = 'utf-8' html=r.text
print(html)
from lxml import etree
doc = etree.HTML(html)
trs = doc.xpath("/html/body/div[2]/div/div/div[3]/table/tbody/tr")
print(trs)
for tr in trs:
rank = tr.xpath('./td[2]/text()')[0]
title = tr.xpath('./td[3]/a/text()')[0]
time = tr.xpath('./td[7]/text()')[0]
print(rank+'|'+title+'|'+time)
#练习 import requests url = "https://www.job001.cn/jobs?pageNo=1"
headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36" }
r=requests.get(url=url,headers=headers)
r.encoding = 'utf-8' html=r.text
print(html)
from lxml import etree
doc = etree.HTML(html)
trs = doc.xpath("/html/body/div[17]/div[1]/div[1]/div")
print(trs)
for tr in trs:
a = tr.xpath('./td[2]/text()')[0]
b = tr.xpath('./td[3]/a/text()')[0]
c = tr.xpath('./td[7]/text()')[0]
print(a)
print(b)
print(c)