You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

34 lines
983 B

import re
import scrapy
class IpproxySpider(scrapy.Spider):
name = 'ipproxy'
allowed_domains = ['kuaidaili.com']
start_urls = ['http://kuaidaili.com/free']
custom_settings={
'DOWNLOADER_MIDDLEWARES' :{
'spider.middlewares.IPDownloaderMiddleware': 543
},
'ITEM_PIPELINES' : {
'spider.pipelines.IPProxyPipeline': 500
},
'DOWNLOAD_DELAY' : 5
}
def parse(self, response):
for i in range(1,16):
IpPool = {}
ip = response.xpath('//*[@id="list"]/table/tbody/tr[{}]/td[1]'.format(i)).extract_first()
port = response.xpath('//*[@id="list"]/table/tbody/tr[{}]/td[2]'.format(i)).extract_first()
print(ip,port)
ip = re.findall('>.*?<',ip)[0][1:-1]
port = re.findall('>.*?<',port)[0][1:-1]
print(ip, port)
IpPool['ip']='http://'+ip+':'+port
print(IpPool['ip'])
yield IpPool