import scrapy from ..items import ZhongtuwangItem class XiaoshuoSpider(scrapy.Spider): name = "xiaoshuo" allowed_domains = ["www.bookschina.com"] start_urls = ["https://www.bookschina.com/24hour/54000000/"] base_url = "https://www.bookschina.com/24hour/54000000/1_0_" page = 1 def parse(self, response): # name_list=response.xpath('//div[@class="infor"]/h2/a/text()').extract() # author_list=response.xpath('//div[@class="infor"]/div[@class="author"]/a/text()').extract() # publisher_list=response.xpath('//div[@class="infor"]/div[@class="publisher"]/a/text()').extract() # price_list = response.xpath('//div[@class="infor"]/div[@class="priceWrap"]/span[@class="sellPrice"]/text()').extract() src = response.xpath('//div[@class="cover"]/a/img/@data-original').extract() infor=response.xpath("//div[@class='infor']") for infor,src in zip(infor,src): name=infor.xpath(".//h2/a/text()").extract() author=infor.xpath(".//div[@class='author']/a/text()").extract() publisher=infor.xpath('.//div[@class="publisher"]/a/text()').extract() price=infor.xpath('.//div[@class="priceWrap"]/span[@class="sellPrice"]/text()').extract() book= ZhongtuwangItem(name=name,author=author,publisher=publisher,price=price,src=src) yield book if self.page < 12: self.page = self.page + 1 url = self.base_url + str(self.page) + "/" print(url) yield scrapy.Request(url=url,callback=self.parse) pass