import time from selenium import webdriver from lxml import etree class Spider: def __init__(self): self.bro = webdriver.Chrome() def small_sort_spider(self,url): def link_get(resp, xpath): html = etree.HTML(resp) link = html.xpath(xpath) return link self.bro.get(url) resp = self.bro.page_source time.sleep(1) more_href = link_get(resp,"/html/body/div[5]/div[2]/div[3]/div/dl/dd/ul/li[@class='more']/a/@href") return more_href def big_sort_spider(self, url): def link_get(resp,xpath): html = etree.HTML(resp) link = html.xpath(xpath) return link self.bro.get(url) resp = self.bro.page_source href = link_get(resp,'/html/body/div[5]/div/div[1]/div/a/@href') return href def more_medicinal_detail_href(self,url): def link_get(resp,xpath): html = etree.HTML(resp) link = html.xpath(xpath) return link self.bro.get(url) resp = self.bro.page_source detail = link_get(resp,'/html/body/div[5]/div[2]/div[4]/div/dl/dd/ul/li/a/@href') sort = link_get(resp,'/html/body/div[5]/div[2]/div[1]/div[1]/b/h1/text()') return detail,sort def medicinal_detail_href(self,url): def link_get(resp,xpath): html = etree.HTML(resp) link = html.xpath(xpath) return link self.bro.get(url) resp = self.bro.page_source detail = link_get(resp,'/html/body/div[5]/div[2]/div[4]/div/dl/dd/ul/li/a/@href') sort = link_get(resp,"/html/body/div[5]/div[2]/div[1]/div[1]/b/h1/text()") return detail,sort