ADD file via upload

main
pjfk9mlfx 1 year ago
parent 26fa87b3cd
commit 2eaeb7fd6b

@ -0,0 +1,148 @@
#手动指定Chrome浏览器的路径
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import ddddocr
import json
# from selenium.webdriver.chrome.service import Service
import time
import os
def register_file(file_name):#开辟新道路
if not os.path.exists(file_name):
os.makedirs(file_name)
def register_start():
options = webdriver.ChromeOptions()
options.binary_location = "F:/Chrome114/APP/Chrome-bin/chrome.exe"#实际是104版本问题需要调用104版chrome【114->104】
# 防止打印一些无用的日志
options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
# 具体操作为在创建Chrome()对象时添加executable_path参数指定为chromedriver.exe的路径
driver2 = webdriver.Chrome(options=options,executable_path=r'F:/scrapyer/chromedriver2.exe')#校验cookies是否正确
driver = webdriver.Chrome(options=options,executable_path=r'F:/scrapyer/chromedriver2.exe')#启动驱动
# 绕过机制检测
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => false
})
"""
})
driver2.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => false
})
"""
})
# 打开网页,不要使用VPN会被断链
driver.get('https://www.qb5.ch/')
'''如果用了VPN加上
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--ignore-certificate-errors')
driver = webdriver.Chrome(options=chrome_options)
'''
# time.sleep(5)
'''
# 登录账号密码,点击登录
-定位输入框<元素定位>然后再输入内容
I. css选择器
II. xpath节点提取
出现哎呀出戳了是因为网站检测到了你在使用selenium --> 绕过机制检测 #解决方案 //19绕过机制检测
'''
driver.find_element(By.CSS_SELECTOR,'body > div.header > div.header-right > div > a:nth-child(2)').click()#点击登录按钮
time.sleep(1)
driver.find_element(By.CSS_SELECTOR,'#username').send_keys('lchnbnb1')#send_key('发送内容')
driver.find_element(By.CSS_SELECTOR,'#password').send_keys('admin666')
#多次尝试后人工筛选的错误补充
Error_str = ['','l','']
Error_int = ['8','1','3']
Error_identification = dict(zip(Error_str, Error_int))
ocr = ddddocr.DdddOcr()#图片识别库
while True:
time.sleep(2)
img = driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img')
img.screenshot('end\\data\\qb5.png')
#实例化对象
#读取图片内容
f = open('end\\data\\qb5.png','rb')
img = f.read()
result = ocr.classification(img)
time.sleep(1)
print(result)
if len(result) != 4:
time.sleep(2)
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img').click()#识别失败刷新验证码按钮
continue
num = 0
for i in result:
if ord(i) >= 48 and ord(i) <= 57:
num += 1
elif i in Error_str:
num += 1
result = result.replace(i, Error_identification[i])
# print(result)
else:
time.sleep(2)
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img').click()#识别失败刷新验证码按钮
break
if num == len(result):
break
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > input').send_keys(result)
time.sleep(1)
driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > div > input.btn').click()#点击登录按钮
time.sleep(2)
#对cookie操作
cookie_data = driver.get_cookies()
time.sleep(1)
#给其他组员进行爬虫测试的cookie_total
# cookie_total = ''
# for cookie_i in cookie_data:
# cookie_total = cookie_i['name'] + '=' + cookie_i['value'] + '; ' + cookie_total
# cookie_total = cookie_total[:-2]
# print(cookie_total)
time.sleep(1)
#selenium的cookies文件处理
f_path = 'end\\data\\cookie_data.json'
def save_cookies(cookie_data, encoding="utf-8"):
with open(f_path, "w", encoding=encoding) as f_w:
json.dump(cookie_data, f_w)
def load_cookies(encoding="utf-8"):
if os.path.isfile(f_path):
with open(f_path, "r", encoding=encoding) as f_r:
user_status = json.load(f_r)
return user_status
def cookies_login(cookies: list):
print(cookies)
driver2.delete_all_cookies()
for new_cookies in cookies:
print(new_cookies)
driver2.add_cookie(new_cookies)
driver2.refresh()
save_cookies(cookie_data)
cookies = load_cookies()
time.sleep(1)
driver2.get('https://www.qb5.ch/')#必须进行get否则会卡在selenium chrome 的默认跳转页面data;
cookies_login(cookies)
time.sleep(1)
driver2.get('https://www.qb5.ch/')
driver2.refresh()
time.sleep(100)
if __name__ == '__main__':
register_start()
Loading…
Cancel
Save