You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
32 lines
1.6 KiB
32 lines
1.6 KiB
import scrapy
|
|
|
|
from ..items import ZhongtuwangItem
|
|
|
|
class XiaoshuoSpider(scrapy.Spider):
|
|
name = "xiaoshuo"
|
|
allowed_domains = ["www.bookschina.com"]
|
|
start_urls = ["https://www.bookschina.com/24hour/54000000/"]
|
|
base_url = "https://www.bookschina.com/24hour/54000000/1_0_"
|
|
page = 1
|
|
def parse(self, response):
|
|
# name_list=response.xpath('//div[@class="infor"]/h2/a/text()').extract()
|
|
# author_list=response.xpath('//div[@class="infor"]/div[@class="author"]/a/text()').extract()
|
|
# publisher_list=response.xpath('//div[@class="infor"]/div[@class="publisher"]/a/text()').extract()
|
|
# price_list = response.xpath('//div[@class="infor"]/div[@class="priceWrap"]/span[@class="sellPrice"]/text()').extract()
|
|
src = response.xpath('//div[@class="cover"]/a/img/@data-original').extract()
|
|
infor=response.xpath("//div[@class='infor']")
|
|
for infor,src in zip(infor,src):
|
|
name=infor.xpath(".//h2/a/text()").extract()
|
|
author=infor.xpath(".//div[@class='author']/a/text()").extract()
|
|
publisher=infor.xpath('.//div[@class="publisher"]/a/text()').extract()
|
|
price=infor.xpath('.//div[@class="priceWrap"]/span[@class="sellPrice"]/text()').extract()
|
|
book= ZhongtuwangItem(name=name,author=author,publisher=publisher,price=price,src=src)
|
|
yield book
|
|
if self.page < 12:
|
|
self.page = self.page + 1
|
|
url = self.base_url + str(self.page) + "/"
|
|
print(url)
|
|
yield scrapy.Request(url=url,callback=self.parse)
|
|
|
|
pass
|