parent
4fd68103c0
commit
6c12fe207d
Binary file not shown.
@ -0,0 +1,28 @@
|
|||||||
|
import bigSortSpider
|
||||||
|
import spider
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Main:
|
||||||
|
def __init__(self):
|
||||||
|
self.spider = spider.Spider()
|
||||||
|
|
||||||
|
def main(self):
|
||||||
|
url = 'http://www.zhongyoo.com/name/'
|
||||||
|
big_hrefs = self.spider.big_sort_spider(url)
|
||||||
|
for href in big_hrefs:
|
||||||
|
more_hrefs = self.spider.small_sort_spider(href)
|
||||||
|
|
||||||
|
if more_hrefs != []:
|
||||||
|
for more_href in more_hrefs:
|
||||||
|
detail,sort = self.spider.more_medicinal_detail_href(more_href)
|
||||||
|
print(sort)
|
||||||
|
print(detail)
|
||||||
|
else:
|
||||||
|
detail,sort = self.spider.medicinal_detail_href(href)
|
||||||
|
print(sort)
|
||||||
|
print(detail)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
mian = Main().main()
|
@ -0,0 +1,57 @@
|
|||||||
|
import time
|
||||||
|
|
||||||
|
from selenium import webdriver
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Spider:
|
||||||
|
def __init__(self):
|
||||||
|
self.bro = webdriver.Chrome()
|
||||||
|
|
||||||
|
|
||||||
|
def small_sort_spider(self,url):
|
||||||
|
def link_get(resp, xpath):
|
||||||
|
html = etree.HTML(resp)
|
||||||
|
link = html.xpath(xpath)
|
||||||
|
return link
|
||||||
|
|
||||||
|
self.bro.get(url)
|
||||||
|
resp = self.bro.page_source
|
||||||
|
time.sleep(1)
|
||||||
|
more_href = link_get(resp,"/html/body/div[5]/div[2]/div[3]/div/dl/dd/ul/li[@class='more']/a/@href")
|
||||||
|
return more_href
|
||||||
|
|
||||||
|
def big_sort_spider(self, url):
|
||||||
|
def link_get(resp,xpath):
|
||||||
|
html = etree.HTML(resp)
|
||||||
|
link = html.xpath(xpath)
|
||||||
|
return link
|
||||||
|
self.bro.get(url)
|
||||||
|
resp = self.bro.page_source
|
||||||
|
href = link_get(resp,'/html/body/div[5]/div/div[1]/div/a/@href')
|
||||||
|
return href
|
||||||
|
|
||||||
|
def more_medicinal_detail_href(self,url):
|
||||||
|
def link_get(resp,xpath):
|
||||||
|
html = etree.HTML(resp)
|
||||||
|
link = html.xpath(xpath)
|
||||||
|
return link
|
||||||
|
self.bro.get(url)
|
||||||
|
resp = self.bro.page_source
|
||||||
|
detail = link_get(resp,'/html/body/div[5]/div[2]/div[4]/div/dl/dd/ul/li/a/@href')
|
||||||
|
sort = link_get(resp,'/html/body/div[5]/div[2]/div[1]/div[1]/b/h1/text()')
|
||||||
|
|
||||||
|
return detail,sort
|
||||||
|
|
||||||
|
def medicinal_detail_href(self,url):
|
||||||
|
def link_get(resp,xpath):
|
||||||
|
html = etree.HTML(resp)
|
||||||
|
link = html.xpath(xpath)
|
||||||
|
return link
|
||||||
|
self.bro.get(url)
|
||||||
|
resp = self.bro.page_source
|
||||||
|
detail = link_get(resp,'/html/body/div[5]/div[2]/div[4]/div/dl/dd/ul/li/a/@href')
|
||||||
|
sort = link_get(resp,"/html/body/div[5]/div[2]/div[1]/div[1]/b/h1/text()")
|
||||||
|
|
||||||
|
return detail,sort
|
Loading…
Reference in new issue