From 409d790c6b5484a10fcfe263c75779731671fa40 Mon Sep 17 00:00:00 2001 From: p6mtf24ic Date: Wed, 27 Apr 2022 21:35:28 +0800 Subject: [PATCH] Delete 'jdpc.py' --- jdpc.py | 78 --------------------------------------------------------- 1 file changed, 78 deletions(-) delete mode 100644 jdpc.py diff --git a/jdpc.py b/jdpc.py deleted file mode 100644 index 4fcb68f..0000000 --- a/jdpc.py +++ /dev/null @@ -1,78 +0,0 @@ -import scrapy -from jd.items import JdItem - -class JdpcSpider(scrapy.Spider): - name = 'jdpc' - #allowed_domains = ['www.baidu.com'] - # start_urls = ['https://search.jd.com/Search?keyword=%E4%B9%A6%E7%B1%8D&wq=%E4%B9%A6%E7%B1%8D&pvid=7e0642e9f0f44d4daebb57808162dc47&page=1&s=1&click=0'] - # url = 'https://search.jd.com/Search?keyword=书籍&wq=书籍&pvid=5ec1c4896438490c829018b723b2f994&page={0}&s={1}&click=0' - # page_num1 = 3 - # page_num2 = 56 - start_urls = ['https://search.jd.com/Search?keyword=%E4%B9%A6%E7%B1%8D&enc=utf-8&wq=%E4%B9%A6%E7%B1%8D&pvid=15ed5ab337ca4f00a23ea5584cb872bd'] - url = 'https://search.jd.com/Search?keyword=书籍&wq=书籍&pvid=15ed5ab337ca4f00a23ea5584cb872bd&page={0}&s={1}&click=0' - page_num1 = 3 - page_num2 = 56 - - def parse(self, response): - lis = response.xpath("//div[@id='J_goodsList']/ul/li") - - for li in lis: #索引从1开始 - price = li.xpath("./div/div[2]/strong/i/text()").extract()[0] - name = li.xpath("./div/div[3]/a/em/text()").extract()[0] - #pj = li.xpath("./div/div[5]/strong/a/text()").extract()[0] - print(price, name) - item = JdItem() - item['price'] = price - item['name'] = name - #item['pj'] = pj - yield item #item提交给管道 - if self.page_num1 <= 51: - new_url = self.url.format(self.page_num1,self.page_num2) - self.page_num1 = self.page_num1 + 2 - self.page_num2 = self.page_num2 + 60 - yield scrapy.Request(new_url, callback=self.parse) - - # def parse(self, response): - # div_list = response.xpath('//ul[@class="gl-warp clearfix"]/li') - # # data=[] - # for div in div_list: - # item = JdItem - # name = div.xpath('./div[@class="gl-i-wrap"]/div[3]/a/em/text()').extract() - # price = div.xpath('./div[@class="gl-i-wrap"]/div[2]/strong/i/text()').extract() - # shop = div.xpath('./div[@class="gl-i-wrap"]/div[7]/span/a/text()').extract() - # author = div.xpath('./div[@class="gl-i-wrap"]/div[4]/span[1]/a/text()').extract() - # press = div.xpath('./div[@class="gl-i-wrap"]/div[4]/span[2]/a/text()').extract() - # name = ''.join(name) - # price = ''.join(price) - # # shop = ''.join(shop) - # author = ''.join(author) - # press = ''.join(press) - # print(name,price) - # item["name"] = name - # item["price"] = price - # yield item - -#数据解析 response就是请求成功之后的响应对象 - # def parse(self, response): 终端的持久化存储 - # lis = response.xpath("//div[@id='J_goodsList']/ul/li") - # all_data = [] - # for li in lis: #索引从1开始 - # price = li.xpath("./div/div[2]/strong/i/text()").extract()[0] - # name = li.xpath("./div/div[3]/a/em/text()").extract()[0] - # print(price, name) - # dic = { - # 'price':price, - # 'name':name - # } - # all_data.append(dic) - # #终端的持久化存储 scrapy crawl jdpc -o ./jdpc.csv - # return all_data - - - - - - - - -