parent
4fd68103c0
commit
6c12fe207d
Binary file not shown.
@ -0,0 +1,28 @@
|
||||
import bigSortSpider
|
||||
import spider
|
||||
|
||||
|
||||
|
||||
|
||||
class Main:
|
||||
def __init__(self):
|
||||
self.spider = spider.Spider()
|
||||
|
||||
def main(self):
|
||||
url = 'http://www.zhongyoo.com/name/'
|
||||
big_hrefs = self.spider.big_sort_spider(url)
|
||||
for href in big_hrefs:
|
||||
more_hrefs = self.spider.small_sort_spider(href)
|
||||
|
||||
if more_hrefs != []:
|
||||
for more_href in more_hrefs:
|
||||
detail,sort = self.spider.more_medicinal_detail_href(more_href)
|
||||
print(sort)
|
||||
print(detail)
|
||||
else:
|
||||
detail,sort = self.spider.medicinal_detail_href(href)
|
||||
print(sort)
|
||||
print(detail)
|
||||
|
||||
if __name__ == '__main__':
|
||||
mian = Main().main()
|
@ -0,0 +1,57 @@
|
||||
import time
|
||||
|
||||
from selenium import webdriver
|
||||
from lxml import etree
|
||||
|
||||
|
||||
|
||||
class Spider:
|
||||
def __init__(self):
|
||||
self.bro = webdriver.Chrome()
|
||||
|
||||
|
||||
def small_sort_spider(self,url):
|
||||
def link_get(resp, xpath):
|
||||
html = etree.HTML(resp)
|
||||
link = html.xpath(xpath)
|
||||
return link
|
||||
|
||||
self.bro.get(url)
|
||||
resp = self.bro.page_source
|
||||
time.sleep(1)
|
||||
more_href = link_get(resp,"/html/body/div[5]/div[2]/div[3]/div/dl/dd/ul/li[@class='more']/a/@href")
|
||||
return more_href
|
||||
|
||||
def big_sort_spider(self, url):
|
||||
def link_get(resp,xpath):
|
||||
html = etree.HTML(resp)
|
||||
link = html.xpath(xpath)
|
||||
return link
|
||||
self.bro.get(url)
|
||||
resp = self.bro.page_source
|
||||
href = link_get(resp,'/html/body/div[5]/div/div[1]/div/a/@href')
|
||||
return href
|
||||
|
||||
def more_medicinal_detail_href(self,url):
|
||||
def link_get(resp,xpath):
|
||||
html = etree.HTML(resp)
|
||||
link = html.xpath(xpath)
|
||||
return link
|
||||
self.bro.get(url)
|
||||
resp = self.bro.page_source
|
||||
detail = link_get(resp,'/html/body/div[5]/div[2]/div[4]/div/dl/dd/ul/li/a/@href')
|
||||
sort = link_get(resp,'/html/body/div[5]/div[2]/div[1]/div[1]/b/h1/text()')
|
||||
|
||||
return detail,sort
|
||||
|
||||
def medicinal_detail_href(self,url):
|
||||
def link_get(resp,xpath):
|
||||
html = etree.HTML(resp)
|
||||
link = html.xpath(xpath)
|
||||
return link
|
||||
self.bro.get(url)
|
||||
resp = self.bro.page_source
|
||||
detail = link_get(resp,'/html/body/div[5]/div[2]/div[4]/div/dl/dd/ul/li/a/@href')
|
||||
sort = link_get(resp,"/html/body/div[5]/div[2]/div[1]/div[1]/b/h1/text()")
|
||||
|
||||
return detail,sort
|
Loading…
Reference in new issue