|
|
|
@ -0,0 +1,148 @@
|
|
|
|
|
#手动指定Chrome浏览器的路径
|
|
|
|
|
from selenium import webdriver
|
|
|
|
|
from selenium.webdriver import ActionChains
|
|
|
|
|
from selenium.webdriver.common.by import By
|
|
|
|
|
import ddddocr
|
|
|
|
|
import json
|
|
|
|
|
# from selenium.webdriver.chrome.service import Service
|
|
|
|
|
import time
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
def register_file(file_name):#开辟新道路
|
|
|
|
|
if not os.path.exists(file_name):
|
|
|
|
|
os.makedirs(file_name)
|
|
|
|
|
|
|
|
|
|
def register_start():
|
|
|
|
|
options = webdriver.ChromeOptions()
|
|
|
|
|
options.binary_location = "F:/Chrome114/APP/Chrome-bin/chrome.exe"#实际是104,版本问题,需要调用104版chrome【114->104】
|
|
|
|
|
# 防止打印一些无用的日志
|
|
|
|
|
options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
|
|
|
|
|
|
|
|
|
|
# 具体操作为在创建Chrome()对象时,添加executable_path参数指定为chromedriver.exe的路径
|
|
|
|
|
|
|
|
|
|
driver2 = webdriver.Chrome(options=options,executable_path=r'F:/scrapyer/chromedriver2.exe')#校验cookies是否正确
|
|
|
|
|
driver = webdriver.Chrome(options=options,executable_path=r'F:/scrapyer/chromedriver2.exe')#启动驱动
|
|
|
|
|
|
|
|
|
|
# 绕过机制检测
|
|
|
|
|
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
|
|
|
|
"source": """
|
|
|
|
|
Object.defineProperty(navigator, 'webdriver', {
|
|
|
|
|
get: () => false
|
|
|
|
|
})
|
|
|
|
|
"""
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
driver2.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
|
|
|
|
"source": """
|
|
|
|
|
Object.defineProperty(navigator, 'webdriver', {
|
|
|
|
|
get: () => false
|
|
|
|
|
})
|
|
|
|
|
"""
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
# 打开网页,不要使用VPN会被断链
|
|
|
|
|
driver.get('https://www.qb5.ch/')
|
|
|
|
|
'''如果用了VPN:加上
|
|
|
|
|
chrome_options = webdriver.ChromeOptions()
|
|
|
|
|
chrome_options.add_argument('--ignore-certificate-errors') √
|
|
|
|
|
driver = webdriver.Chrome(options=chrome_options)
|
|
|
|
|
'''
|
|
|
|
|
# time.sleep(5)
|
|
|
|
|
'''
|
|
|
|
|
# 登录账号密码,点击登录
|
|
|
|
|
-定位输入框<元素定位>,然后再输入内容
|
|
|
|
|
I. css选择器
|
|
|
|
|
II. xpath节点提取
|
|
|
|
|
出现哎呀出戳了,是因为网站检测到了你在使用selenium --> 绕过机制检测 #解决方案 //19绕过机制检测
|
|
|
|
|
'''
|
|
|
|
|
driver.find_element(By.CSS_SELECTOR,'body > div.header > div.header-right > div > a:nth-child(2)').click()#点击登录按钮
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
driver.find_element(By.CSS_SELECTOR,'#username').send_keys('lchnbnb1')#send_key('发送内容')
|
|
|
|
|
driver.find_element(By.CSS_SELECTOR,'#password').send_keys('admin666')
|
|
|
|
|
|
|
|
|
|
#多次尝试后人工筛选的错误补充
|
|
|
|
|
Error_str = ['日','l','引']
|
|
|
|
|
Error_int = ['8','1','3']
|
|
|
|
|
Error_identification = dict(zip(Error_str, Error_int))
|
|
|
|
|
|
|
|
|
|
ocr = ddddocr.DdddOcr()#图片识别库
|
|
|
|
|
while True:
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
img = driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img')
|
|
|
|
|
img.screenshot('end\\data\\qb5.png')
|
|
|
|
|
#实例化对象
|
|
|
|
|
#读取图片内容
|
|
|
|
|
f = open('end\\data\\qb5.png','rb')
|
|
|
|
|
img = f.read()
|
|
|
|
|
result = ocr.classification(img)
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
print(result)
|
|
|
|
|
if len(result) != 4:
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img').click()#识别失败刷新验证码按钮
|
|
|
|
|
continue
|
|
|
|
|
num = 0
|
|
|
|
|
for i in result:
|
|
|
|
|
if ord(i) >= 48 and ord(i) <= 57:
|
|
|
|
|
num += 1
|
|
|
|
|
elif i in Error_str:
|
|
|
|
|
num += 1
|
|
|
|
|
result = result.replace(i, Error_identification[i])
|
|
|
|
|
# print(result)
|
|
|
|
|
else:
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img').click()#识别失败刷新验证码按钮
|
|
|
|
|
break
|
|
|
|
|
if num == len(result):
|
|
|
|
|
break
|
|
|
|
|
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > input').send_keys(result)
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
|
|
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > div > input.btn').click()#点击登录按钮
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
#对cookie操作
|
|
|
|
|
cookie_data = driver.get_cookies()
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
|
|
#给其他组员进行爬虫测试的cookie_total
|
|
|
|
|
# cookie_total = ''
|
|
|
|
|
# for cookie_i in cookie_data:
|
|
|
|
|
# cookie_total = cookie_i['name'] + '=' + cookie_i['value'] + '; ' + cookie_total
|
|
|
|
|
# cookie_total = cookie_total[:-2]
|
|
|
|
|
# print(cookie_total)
|
|
|
|
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
|
|
#selenium的cookies文件处理
|
|
|
|
|
f_path = 'end\\data\\cookie_data.json'
|
|
|
|
|
|
|
|
|
|
def save_cookies(cookie_data, encoding="utf-8"):
|
|
|
|
|
with open(f_path, "w", encoding=encoding) as f_w:
|
|
|
|
|
json.dump(cookie_data, f_w)
|
|
|
|
|
|
|
|
|
|
def load_cookies(encoding="utf-8"):
|
|
|
|
|
if os.path.isfile(f_path):
|
|
|
|
|
with open(f_path, "r", encoding=encoding) as f_r:
|
|
|
|
|
user_status = json.load(f_r)
|
|
|
|
|
return user_status
|
|
|
|
|
|
|
|
|
|
def cookies_login(cookies: list):
|
|
|
|
|
print(cookies)
|
|
|
|
|
driver2.delete_all_cookies()
|
|
|
|
|
for new_cookies in cookies:
|
|
|
|
|
print(new_cookies)
|
|
|
|
|
driver2.add_cookie(new_cookies)
|
|
|
|
|
driver2.refresh()
|
|
|
|
|
|
|
|
|
|
save_cookies(cookie_data)
|
|
|
|
|
cookies = load_cookies()
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
driver2.get('https://www.qb5.ch/')#必须进行get,否则会卡在selenium chrome 的默认跳转页面data;
|
|
|
|
|
cookies_login(cookies)
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
driver2.get('https://www.qb5.ch/')
|
|
|
|
|
driver2.refresh()
|
|
|
|
|
time.sleep(100)
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
register_start()
|