You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

63 lines
1.6 KiB

6 months ago
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.edge.options import Options
import time
import json
# 设置 Edge 以无头模式运行
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
# 实例化浏览器
driver = webdriver.Edge(options=options)
# driver = webdriver.Edge()
ip_list = []
ip_port_dict = {}
proxies = []
a = 0
for i in range(1, 10):
# 打开网页
driver.get(f'https://www.kuaidaili.com/free/inha/{i}/')
# 获取页面源代码
html = driver.page_source
# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(html, 'lxml')
all_l = soup.select('tbody>tr')
for all_2 in all_l:
# 确保tr标签内确实有td标签
if len(all_2.select('td')) >= 2:
a += 1
ip_l = all_2.select('td')[0].text.strip()
port_l = all_2.select('td')[1].text.strip()
# 分别添加到列表和字典中
ip_list.append(ip_l)
ip_port_dict[ip_l] = port_l
print(f'IP代理池正在加载第:{a}')
time.sleep(0.4)
print('IP代理池加载完成')
# 构建proxies列表
for ip in ip_list:
proxies.append({'http': f'http://{ip}:{ip_port_dict[ip]}'})
# 写入JSON文件
with open('ip代理池.json', 'w', encoding='utf-8') as f:
json.dump(proxies, f, ensure_ascii=False, indent=4)
print("写入文件完成ip代理池.json")
# 写入txt文件
# with open('ip代理池.txt', 'w', encoding='UTF-8') as f:
# for i in proxies:
# f.write(str(i) + ',' + "\n")
# print("写入文件完成ip代理池.txt")
driver.quit()
# time.sleep(999)