Delete 'VulCrawler/spiders'

4 years ago · c20249c0cd
parent 79573409e2
commit c20249c0cd
1 changed files with 0 additions and 50 deletions
--- a/VulCrawler/spiders
+++ b/VulCrawler/spiders
@ -1,50 +0,0 @@
-import scrapy
-from bs4 import BeautifulSoup
-from scrapy_redis.spiders import RedisSpider
-from VulCrawl.items import VulcrawlItem
-from VulCrawl.items import Vulcrawl2Item
-
-# scrapy.Spider
-class VulcrawlSpider(RedisSpider):
-    name = 'vulcrawl'
-    #start_urls = ['http://www.cnnvd.org.cn/web/vulnerability/querylist.tag?pageno=1']
-    redis_key = "Vul"
-    page = [1, 1, 1]
-
-    def parse_tow(self, response):
-        html = response.body
-        soup = BeautifulSoup(html, "lxml")
-        tr_list = soup.findAll('tr')
-        item = Vulcrawl2Item()
-        del (tr_list[0])
-        for i in tr_list:
-            td = i.findAll('td')
-            item['Vulnerability_Type'] = td[2].string.strip()
-            item['time'] = td[0].string
-            item['title'] = td[1].string.strip()
-            item['url'] = "https://wooyun.m4d3bug.com/"+td[1].find('a')['href']
-
-            yield item
-            #print(info_one)
-
-    def parse(self, response):
-        #print(response.text())
-        if self.page[0] < 10:#10000:
-            li_list = response.xpath("/html/body/div[4]/div/div[1]/div/div[2]/ul/li")
-            for i in li_list:
-                item = VulcrawlItem()
-                item['title'] = i.xpath("./div[1]/a/text()").extract()[0].strip()
-                item['Numbering'] = i.xpath("./div[1]/p/a/text()").extract()[0]
-                item['url'] = "http://www.cnnvd.org.cn/" + i.xpath("./div[1]/a/@href").extract()[0].strip()
-                item['time'] = i.xpath("./div[2]/text()").extract()[2].strip()
-                yield item
-
-            url = 'http://www.cnnvd.org.cn/web/vulnerability/querylist.tag?pageno=' + str(self.page[0])
-            self.page[0] += 1
-            yield scrapy.Request(url=url, callback=self.parse)
-        # elif self.page[1] <= 2: #4400:
-        #     url2 = "https://wooyun.m4d3bug.com/search?keywords=&&content_search_by=by_bugs&&search_by_html=False&&page=" + str(self.page[1])
-        #     self.page[1] += 1
-        #     yield scrapy.Request(url=url2, callback=self.parse_tow)
-        #     #pass
-