Update vulcrawl2.py

master
pmh9c3ri2 3 years ago
parent 53c61f24b9
commit 916216bb5b

@ -1,5 +1,5 @@
import scrapy import scrapy
from VulCrawl.items import Vulcrawl2Item from VulCrawl.items import VulcrawlItem
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from scrapy_redis.spiders import RedisCrawlSpider from scrapy_redis.spiders import RedisCrawlSpider
from scrapy_redis.spiders import RedisSpider from scrapy_redis.spiders import RedisSpider
@ -12,24 +12,25 @@ class Vulcrawl2Spider(RedisSpider):
page = 2 page = 2
def parse(self, response): def parse(self, response):
html = response.body if self.page < 10:
soup = BeautifulSoup(html, "lxml") html = response.body
tr_list = soup.findAll('tr') soup = BeautifulSoup(html, "lxml")
item = Vulcrawl2Item() tr_list = soup.findAll('tr')
del (tr_list[0]) item = VulcrawlItem()
for i in tr_list: del (tr_list[0])
info_one = {} for i in tr_list:
td = i.findAll('td') info_one = {}
# info_one['time'] = td[0].string td = i.findAll('td')
# info_one['title'] = td[1].string.strip() # info_one['time'] = td[0].string
# info_one['Vulnerability_Type'] = td[2].string.strip() # info_one['title'] = td[1].string.strip()
item['Vulnerability_Type'] = td[2].string.strip() # info_one['Vulnerability_Type'] = td[2].string.strip()
item['title'] = td[0].string item['Numbering'] = td[2].string.strip()
item['time'] = td[1].string.strip() item['time'] = td[0].string
item['url'] = "https://wooyun.m4d3bug.com/" + td[1].find('a')['href'] item['title'] = td[1].string.strip()
item['url'] = "https://wooyun.m4d3bug.com/" + td[1].find('a')['href']
yield item yield item
# print(info_one) # print(info_one)
url2 = "https://wooyun.m4d3bug.com/search?keywords=&&content_search_by=by_bugs&&search_by_html=False&&page=" + str(self.page) self.page += 1
self.page += 1 url2 = "https://wooyun.m4d3bug.com/search?keywords=&&content_search_by=by_bugs&&search_by_html=False&&page=" + str(self.page)
yield scrapy.Request(url=url2, callback=self.parse) yield scrapy.Request(url=url2, callback=self.parse)

Loading…
Cancel
Save