You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

149 lines
5.6 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#手动指定Chrome浏览器的路径
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import ddddocr
import json
# from selenium.webdriver.chrome.service import Service
import time
import os
def register_file(file_name):#开辟新道路
if not os.path.exists(file_name):
os.makedirs(file_name)
def register_start():
options = webdriver.ChromeOptions()
options.binary_location = "F:/Chrome114/APP/Chrome-bin/chrome.exe"#实际是104版本问题需要调用104版chrome【114->104】
# 防止打印一些无用的日志
options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
# 具体操作为在创建Chrome()对象时添加executable_path参数指定为chromedriver.exe的路径
driver2 = webdriver.Chrome(options=options,executable_path=r'F:/scrapyer/chromedriver2.exe')#校验cookies是否正确
driver = webdriver.Chrome(options=options,executable_path=r'F:/scrapyer/chromedriver2.exe')#启动驱动
# 绕过机制检测
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => false
})
"""
})
driver2.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => false
})
"""
})
# 打开网页,不要使用VPN会被断链
driver.get('https://www.qb5.ch/')
'''如果用了VPN加上
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--ignore-certificate-errors') √
driver = webdriver.Chrome(options=chrome_options)
'''
# time.sleep(5)
'''
# 登录账号密码,点击登录
-定位输入框<元素定位>,然后再输入内容
I. css选择器
II. xpath节点提取
出现哎呀出戳了是因为网站检测到了你在使用selenium --> 绕过机制检测 #解决方案 //19绕过机制检测
'''
driver.find_element(By.CSS_SELECTOR,'body > div.header > div.header-right > div > a:nth-child(2)').click()#点击登录按钮
time.sleep(1)
driver.find_element(By.CSS_SELECTOR,'#username').send_keys('lchnbnb1')#send_key('发送内容')
driver.find_element(By.CSS_SELECTOR,'#password').send_keys('admin666')
#多次尝试后人工筛选的错误补充
Error_str = ['','l','']
Error_int = ['8','1','3']
Error_identification = dict(zip(Error_str, Error_int))
ocr = ddddocr.DdddOcr()#图片识别库
while True:
time.sleep(2)
img = driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img')
img.screenshot('end\\data\\qb5.png')
#实例化对象
#读取图片内容
f = open('end\\data\\qb5.png','rb')
img = f.read()
result = ocr.classification(img)
time.sleep(1)
print(result)
if len(result) != 4:
time.sleep(2)
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img').click()#识别失败刷新验证码按钮
continue
num = 0
for i in result:
if ord(i) >= 48 and ord(i) <= 57:
num += 1
elif i in Error_str:
num += 1
result = result.replace(i, Error_identification[i])
# print(result)
else:
time.sleep(2)
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img').click()#识别失败刷新验证码按钮
break
if num == len(result):
break
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > input').send_keys(result)
time.sleep(1)
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > div > input.btn').click()#点击登录按钮
time.sleep(2)
#对cookie操作
cookie_data = driver.get_cookies()
time.sleep(1)
#给其他组员进行爬虫测试的cookie_total
# cookie_total = ''
# for cookie_i in cookie_data:
# cookie_total = cookie_i['name'] + '=' + cookie_i['value'] + '; ' + cookie_total
# cookie_total = cookie_total[:-2]
# print(cookie_total)
time.sleep(1)
#selenium的cookies文件处理
f_path = 'end\\data\\cookie_data.json'
def save_cookies(cookie_data, encoding="utf-8"):
with open(f_path, "w", encoding=encoding) as f_w:
json.dump(cookie_data, f_w)
def load_cookies(encoding="utf-8"):
if os.path.isfile(f_path):
with open(f_path, "r", encoding=encoding) as f_r:
user_status = json.load(f_r)
return user_status
def cookies_login(cookies: list):
print(cookies)
driver2.delete_all_cookies()
for new_cookies in cookies:
print(new_cookies)
driver2.add_cookie(new_cookies)
driver2.refresh()
save_cookies(cookie_data)
cookies = load_cookies()
time.sleep(1)
driver2.get('https://www.qb5.ch/')#必须进行get否则会卡在selenium chrome 的默认跳转页面data;
cookies_login(cookies)
time.sleep(1)
driver2.get('https://www.qb5.ch/')
driver2.refresh()
time.sleep(100)
if __name__ == '__main__':
register_start()