xpath/xiaoshuo.py

import scrapy

from ..items import ZhongtuwangItem

class XiaoshuoSpider(scrapy.Spider):
    name = "xiaoshuo"
    allowed_domains = ["www.bookschina.com"]
    start_urls = ["https://www.bookschina.com/24hour/54000000/"]
    base_url = "https://www.bookschina.com/24hour/54000000/1_0_"
    page = 1
    def parse(self, response):
        # name_list=response.xpath('//div[@class="infor"]/h2/a/text()').extract()
        # author_list=response.xpath('//div[@class="infor"]/div[@class="author"]/a/text()').extract()
        # publisher_list=response.xpath('//div[@class="infor"]/div[@class="publisher"]/a/text()').extract()
        # price_list = response.xpath('//div[@class="infor"]/div[@class="priceWrap"]/span[@class="sellPrice"]/text()').extract()
        src = response.xpath('//div[@class="cover"]/a/img/@data-original').extract()
        infor=response.xpath("//div[@class='infor']")
        for infor,src in zip(infor,src):
            name=infor.xpath(".//h2/a/text()").extract()
            author=infor.xpath(".//div[@class='author']/a/text()").extract()
            publisher=infor.xpath('.//div[@class="publisher"]/a/text()').extract()
            price=infor.xpath('.//div[@class="priceWrap"]/span[@class="sellPrice"]/text()').extract()
            book= ZhongtuwangItem(name=name,author=author,publisher=publisher,price=price,src=src)
            yield book
        if self.page < 12:
            self.page = self.page + 1
            url = self.base_url + str(self.page) + "/"
            print(url)
            yield scrapy.Request(url=url,callback=self.parse)

        pass