parent
b8642fcca1
commit
f881ba4164
@ -0,0 +1,33 @@
|
||||
import re
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class IpproxySpider(scrapy.Spider):
|
||||
name = 'ipproxy'
|
||||
allowed_domains = ['kuaidaili.com']
|
||||
start_urls = ['http://kuaidaili.com/free']
|
||||
custom_settings={
|
||||
'DOWNLOADER_MIDDLEWARES' :{
|
||||
'spider.middlewares.IPDownloaderMiddleware': 543
|
||||
},
|
||||
'ITEM_PIPELINES' : {
|
||||
'spider.pipelines.IPProxyPipeline': 500
|
||||
},
|
||||
'DOWNLOAD_DELAY' : 5
|
||||
}
|
||||
|
||||
def parse(self, response):
|
||||
|
||||
for i in range(1,16):
|
||||
IpPool = {}
|
||||
ip = response.xpath('//*[@id="list"]/table/tbody/tr[{}]/td[1]'.format(i)).extract_first()
|
||||
port = response.xpath('//*[@id="list"]/table/tbody/tr[{}]/td[2]'.format(i)).extract_first()
|
||||
print(ip,port)
|
||||
ip = re.findall('>.*?<',ip)[0][1:-1]
|
||||
port = re.findall('>.*?<',port)[0][1:-1]
|
||||
print(ip, port)
|
||||
IpPool['ip']='http://'+ip+':'+port
|
||||
print(IpPool['ip'])
|
||||
yield IpPool
|
||||
|
Loading…
Reference in new issue