diff --git a/README.md b/README.md index b77a23e..79af8d1 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,57 @@ # user2 + + +#学习一 +import requests +url = "http://58921.com/alltime?page=0" + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36" +} + +r=requests.get(url=url,headers=headers) + +r.encoding = 'utf-8' +html=r.text + +print(html) + +from lxml import etree + +doc = etree.HTML(html) +trs = doc.xpath("/html/body/div[2]/div/div/div[3]/table/tbody/tr") +print(trs) +for tr in trs: + rank = tr.xpath('./td[2]/text()')[0] + title = tr.xpath('./td[3]/a/text()')[0] + time = tr.xpath('./td[7]/text()')[0] + print(rank+'|'+title+'|'+time) + +#练习 +import requests +url = "https://www.job001.cn/jobs?pageNo=1" + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36" +} + +r=requests.get(url=url,headers=headers) + +r.encoding = 'utf-8' +html=r.text + +print(html) + +from lxml import etree + +doc = etree.HTML(html) +trs = doc.xpath("/html/body/div[17]/div[1]/div[1]/div") +print(trs) +for tr in trs: + a = tr.xpath('./td[2]/text()')[0] + b = tr.xpath('./td[3]/a/text()')[0] + c = tr.xpath('./td[7]/text()')[0] + print(a) + print(b) + print(c) \ No newline at end of file