From f881ba4164b21c391df2964c6a15a9020640e976 Mon Sep 17 00:00:00 2001 From: Q5nmvkg4x <1678550324@qq.com> Date: Sun, 7 Nov 2021 06:09:30 +0800 Subject: [PATCH] ADD file via upload --- ipproxy.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 ipproxy.py diff --git a/ipproxy.py b/ipproxy.py new file mode 100644 index 0000000..cc064a2 --- /dev/null +++ b/ipproxy.py @@ -0,0 +1,33 @@ +import re + +import scrapy + + +class IpproxySpider(scrapy.Spider): + name = 'ipproxy' + allowed_domains = ['kuaidaili.com'] + start_urls = ['http://kuaidaili.com/free'] + custom_settings={ + 'DOWNLOADER_MIDDLEWARES' :{ + 'spider.middlewares.IPDownloaderMiddleware': 543 + }, + 'ITEM_PIPELINES' : { + 'spider.pipelines.IPProxyPipeline': 500 + }, + 'DOWNLOAD_DELAY' : 5 + } + + def parse(self, response): + + for i in range(1,16): + IpPool = {} + ip = response.xpath('//*[@id="list"]/table/tbody/tr[{}]/td[1]'.format(i)).extract_first() + port = response.xpath('//*[@id="list"]/table/tbody/tr[{}]/td[2]'.format(i)).extract_first() + print(ip,port) + ip = re.findall('>.*?<',ip)[0][1:-1] + port = re.findall('>.*?<',port)[0][1:-1] + print(ip, port) + IpPool['ip']='http://'+ip+':'+port + print(IpPool['ip']) + yield IpPool +