You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

32 lines
1.6 KiB

import scrapy
from ..items import ZhongtuwangItem
class XiaoshuoSpider(scrapy.Spider):
name = "xiaoshuo"
allowed_domains = ["www.bookschina.com"]
start_urls = ["https://www.bookschina.com/24hour/54000000/"]
base_url = "https://www.bookschina.com/24hour/54000000/1_0_"
page = 1
def parse(self, response):
# name_list=response.xpath('//div[@class="infor"]/h2/a/text()').extract()
# author_list=response.xpath('//div[@class="infor"]/div[@class="author"]/a/text()').extract()
# publisher_list=response.xpath('//div[@class="infor"]/div[@class="publisher"]/a/text()').extract()
# price_list = response.xpath('//div[@class="infor"]/div[@class="priceWrap"]/span[@class="sellPrice"]/text()').extract()
src = response.xpath('//div[@class="cover"]/a/img/@data-original').extract()
infor=response.xpath("//div[@class='infor']")
for infor,src in zip(infor,src):
name=infor.xpath(".//h2/a/text()").extract()
author=infor.xpath(".//div[@class='author']/a/text()").extract()
publisher=infor.xpath('.//div[@class="publisher"]/a/text()').extract()
price=infor.xpath('.//div[@class="priceWrap"]/span[@class="sellPrice"]/text()').extract()
book= ZhongtuwangItem(name=name,author=author,publisher=publisher,price=price,src=src)
yield book
if self.page < 12:
self.page = self.page + 1
url = self.base_url + str(self.page) + "/"
print(url)
yield scrapy.Request(url=url,callback=self.parse)
pass