You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

63 lines
1.6 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.edge.options import Options
import time
import json
# 设置 Edge 以无头模式运行
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
# 实例化浏览器
driver = webdriver.Edge(options=options)
# driver = webdriver.Edge()
ip_list = []
ip_port_dict = {}
proxies = []
a = 0
for i in range(1, 10):
# 打开网页
driver.get(f'https://www.kuaidaili.com/free/inha/{i}/')
# 获取页面源代码
html = driver.page_source
# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(html, 'lxml')
all_l = soup.select('tbody>tr')
for all_2 in all_l:
# 确保tr标签内确实有td标签
if len(all_2.select('td')) >= 2:
a += 1
ip_l = all_2.select('td')[0].text.strip()
port_l = all_2.select('td')[1].text.strip()
# 分别添加到列表和字典中
ip_list.append(ip_l)
ip_port_dict[ip_l] = port_l
print(f'IP代理池正在加载第:{a}')
time.sleep(0.4)
print('IP代理池加载完成')
# 构建proxies列表
for ip in ip_list:
proxies.append({'http': f'http://{ip}:{ip_port_dict[ip]}'})
# 写入JSON文件
with open('ip代理池.json', 'w', encoding='utf-8') as f:
json.dump(proxies, f, ensure_ascii=False, indent=4)
print("写入文件完成ip代理池.json")
# 写入txt文件
# with open('ip代理池.txt', 'w', encoding='UTF-8') as f:
# for i in proxies:
# f.write(str(i) + ',' + "\n")
# print("写入文件完成ip代理池.txt")
driver.quit()
# time.sleep(999)