第一次增加

main
Pengkejian 1 year ago
parent 4fd68103c0
commit 6c12fe207d

Binary file not shown.

@ -0,0 +1,28 @@
import bigSortSpider
import spider
class Main:
def __init__(self):
self.spider = spider.Spider()
def main(self):
url = 'http://www.zhongyoo.com/name/'
big_hrefs = self.spider.big_sort_spider(url)
for href in big_hrefs:
more_hrefs = self.spider.small_sort_spider(href)
if more_hrefs != []:
for more_href in more_hrefs:
detail,sort = self.spider.more_medicinal_detail_href(more_href)
print(sort)
print(detail)
else:
detail,sort = self.spider.medicinal_detail_href(href)
print(sort)
print(detail)
if __name__ == '__main__':
mian = Main().main()

@ -0,0 +1,57 @@
import time
from selenium import webdriver
from lxml import etree
class Spider:
def __init__(self):
self.bro = webdriver.Chrome()
def small_sort_spider(self,url):
def link_get(resp, xpath):
html = etree.HTML(resp)
link = html.xpath(xpath)
return link
self.bro.get(url)
resp = self.bro.page_source
time.sleep(1)
more_href = link_get(resp,"/html/body/div[5]/div[2]/div[3]/div/dl/dd/ul/li[@class='more']/a/@href")
return more_href
def big_sort_spider(self, url):
def link_get(resp,xpath):
html = etree.HTML(resp)
link = html.xpath(xpath)
return link
self.bro.get(url)
resp = self.bro.page_source
href = link_get(resp,'/html/body/div[5]/div/div[1]/div/a/@href')
return href
def more_medicinal_detail_href(self,url):
def link_get(resp,xpath):
html = etree.HTML(resp)
link = html.xpath(xpath)
return link
self.bro.get(url)
resp = self.bro.page_source
detail = link_get(resp,'/html/body/div[5]/div[2]/div[4]/div/dl/dd/ul/li/a/@href')
sort = link_get(resp,'/html/body/div[5]/div[2]/div[1]/div[1]/b/h1/text()')
return detail,sort
def medicinal_detail_href(self,url):
def link_get(resp,xpath):
html = etree.HTML(resp)
link = html.xpath(xpath)
return link
self.bro.get(url)
resp = self.bro.page_source
detail = link_get(resp,'/html/body/div[5]/div[2]/div[4]/div/dl/dd/ul/li/a/@href')
sort = link_get(resp,"/html/body/div[5]/div[2]/div[1]/div[1]/b/h1/text()")
return detail,sort
Loading…
Cancel
Save