Delete 'jdpc.py'

4 years ago · 409d790c6b
parent e42c7c7f96
commit 409d790c6b
1 changed files with 0 additions and 78 deletions
--- a/jdpc.py
+++ b/jdpc.py
@ -1,78 +0,0 @@
-import scrapy
-from jd.items import JdItem
-
-class JdpcSpider(scrapy.Spider):
-    name = 'jdpc'
-    #allowed_domains = ['www.baidu.com']
-    # start_urls = ['https://search.jd.com/Search?keyword=%E4%B9%A6%E7%B1%8D&wq=%E4%B9%A6%E7%B1%8D&pvid=7e0642e9f0f44d4daebb57808162dc47&page=1&s=1&click=0']
-    # url = 'https://search.jd.com/Search?keyword=书籍&wq=书籍&pvid=5ec1c4896438490c829018b723b2f994&page={0}&s={1}&click=0'
-    # page_num1 = 3
-    # page_num2 = 56
-    start_urls = ['https://search.jd.com/Search?keyword=%E4%B9%A6%E7%B1%8D&enc=utf-8&wq=%E4%B9%A6%E7%B1%8D&pvid=15ed5ab337ca4f00a23ea5584cb872bd']
-    url = 'https://search.jd.com/Search?keyword=书籍&wq=书籍&pvid=15ed5ab337ca4f00a23ea5584cb872bd&page={0}&s={1}&click=0'
-    page_num1 = 3
-    page_num2 = 56
-
-    def parse(self, response):
-        lis = response.xpath("//div[@id='J_goodsList']/ul/li")
-
-        for li in lis:                     #索引从1开始
-            price = li.xpath("./div/div[2]/strong/i/text()").extract()[0]
-            name = li.xpath("./div/div[3]/a/em/text()").extract()[0]
-            #pj = li.xpath("./div/div[5]/strong/a/text()").extract()[0]
-            print(price, name)
-            item = JdItem()
-            item['price'] = price
-            item['name'] = name
-            #item['pj'] = pj
-            yield item   #item提交给管道
-        if self.page_num1 <= 51:
-            new_url = self.url.format(self.page_num1,self.page_num2)
-            self.page_num1 = self.page_num1 + 2
-            self.page_num2 = self.page_num2 + 60
-            yield scrapy.Request(new_url, callback=self.parse)
-
-    # def parse(self, response):
-    #     div_list = response.xpath('//ul[@class="gl-warp clearfix"]/li')
-    #     # data=[]
-    #     for div in div_list:
-    #         item = JdItem
-    #         name = div.xpath('./div[@class="gl-i-wrap"]/div[3]/a/em/text()').extract()
-    #         price = div.xpath('./div[@class="gl-i-wrap"]/div[2]/strong/i/text()').extract()
-    #         shop = div.xpath('./div[@class="gl-i-wrap"]/div[7]/span/a/text()').extract()
-    #         author = div.xpath('./div[@class="gl-i-wrap"]/div[4]/span[1]/a/text()').extract()
-    #         press = div.xpath('./div[@class="gl-i-wrap"]/div[4]/span[2]/a/text()').extract()
-    #         name = ''.join(name)
-    #         price = ''.join(price)
-    #         # shop = ''.join(shop)
-    #         author = ''.join(author)
-    #         press = ''.join(press)
-    #         print(name,price)
-    #         item["name"] = name
-    #         item["price"] = price
-    #         yield item
-
-#数据解析 response就是请求成功之后的响应对象
-    # def parse(self, response):    终端的持久化存储
-    #     lis = response.xpath("//div[@id='J_goodsList']/ul/li")
-    #     all_data = []
-    #     for li in lis:                     #索引从1开始
-    #         price = li.xpath("./div/div[2]/strong/i/text()").extract()[0]
-    #         name = li.xpath("./div/div[3]/a/em/text()").extract()[0]
-    #         print(price, name)
-    #         dic = {
-    #             'price':price,
-    #             'name':name
-    #         }
-    #         all_data.append(dic)
-    #         #终端的持久化存储   scrapy crawl jdpc -o ./jdpc.csv
-    #     return all_data
-
-
-
-
-
-
-
-
-