import re

import scrapy


class IpproxySpider(scrapy.Spider):
    name = 'ipproxy'
    allowed_domains = ['kuaidaili.com']
    start_urls = ['http://kuaidaili.com/free']
    custom_settings={
        'DOWNLOADER_MIDDLEWARES' :{
            'spider.middlewares.IPDownloaderMiddleware': 543
        },
        'ITEM_PIPELINES' : {
            'spider.pipelines.IPProxyPipeline': 500
        },
        'DOWNLOAD_DELAY' : 5
    }

    def parse(self, response):

        for i in range(1,16):
            IpPool = {}
            ip = response.xpath('//*[@id="list"]/table/tbody/tr[{}]/td[1]'.format(i)).extract_first()
            port = response.xpath('//*[@id="list"]/table/tbody/tr[{}]/td[2]'.format(i)).extract_first()
            print(ip,port)
            ip = re.findall('>.*?<',ip)[0][1:-1]
            port = re.findall('>.*?<',port)[0][1:-1]
            print(ip, port)
            IpPool['ip']='http://'+ip+':'+port
            print(IpPool['ip'])
            yield IpPool