Delete 'jdpc.py'

4 years ago · 3c06fc8af7
parent fe54b610b0
commit 3c06fc8af7
1 changed files with 0 additions and 60 deletions
--- a/jdpc.py
+++ b/jdpc.py
@ -1,60 +0,0 @@
-import scrapy
-from jd.items import JdItem
-
-class JdpcSpider(scrapy.Spider):
-    #爬虫名称
-    name = 'jdpc'
-    #设置起始url
-    start_urls = ['https://search.jd.com/Search?keyword=%E4%B9%A6%E7%B1%8D&enc=utf-8&wq=%E4%B9%A6%E7%B1%8D&pvid=15ed5ab337ca4f00a23ea5584cb872bd']
-    url = 'https://search.jd.com/Search?keyword=书籍&wq=书籍&pvid=15ed5ab337ca4f00a23ea5584cb872bd&page={0}&s={1}&click=0'
-    page_num1 = 3
-    page_num2 = 56
-    count = 1
-    #做数据解析
-    def parse(self, response):
-        #获取所有的li标签
-        lis = response.xpath("//div[@id='J_goodsList']/ul/li")
-        #for-in遍历li标签
-        for li in lis:                     #索引从1开始
-            #商品的价格
-            price = li.xpath("./div/div[2]/strong/i/text()").extract()[0]
-            #商品的名字
-            name = li.xpath("./div/div[3]/a/em/text()").extract()[0]
-            #商品的店铺
-            shop = li.xpath("./div/div[6]/a/text()").extract()
-            #使用join把shop编程字符串
-            shop = ''.join(shop)
-            #由于每个页面都会有广告，但是爬到的广告shop为空
-            if len(shop)>2: #把广告的商品去掉
-                print(self.count,price,name,shop)
-                self.count = self.count +1
-                #实例化一个管道对象
-                item = JdItem()
-                item['price'] = price
-                item['name'] = name
-                item['shop'] = shop
-                item['count'] = self.count
-                yield item   #item提交给管道
-        #做翻页操作
-        if self.page_num1 <= 199:
-            #设置新的url
-            new_url = self.url.format(self.page_num1,self.page_num2)
-            #翻页
-            self.page_num1 = self.page_num1 + 2
-            self.page_num2 = self.page_num2 + 60
-            self.count = self.count + 1
-            #回调函数，调用self.parse函数继续做数据解析
-            yield scrapy.Request(new_url, callback=self.parse)
-
-
-
-
-
-
-
-
-
-
-
-
-