#手动指定Chrome浏览器的路径 from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By import ddddocr import json # from selenium.webdriver.chrome.service import Service import time import os def register_file(file_name):#开辟新道路 if not os.path.exists(file_name): os.makedirs(file_name) def register_start(): options = webdriver.ChromeOptions() options.binary_location = "F:/Chrome114/APP/Chrome-bin/chrome.exe"#实际是104,版本问题,需要调用104版chrome【114->104】 # 防止打印一些无用的日志 options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging']) # 具体操作为在创建Chrome()对象时,添加executable_path参数指定为chromedriver.exe的路径 driver2 = webdriver.Chrome(options=options,executable_path=r'F:/scrapyer/chromedriver2.exe')#校验cookies是否正确 driver = webdriver.Chrome(options=options,executable_path=r'F:/scrapyer/chromedriver2.exe')#启动驱动 # 绕过机制检测 driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => false }) """ }) driver2.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": """ Object.defineProperty(navigator, 'webdriver', { get: () => false }) """ }) # 打开网页,不要使用VPN会被断链 driver.get('https://www.qb5.ch/') '''如果用了VPN:加上 chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--ignore-certificate-errors') √ driver = webdriver.Chrome(options=chrome_options) ''' # time.sleep(5) ''' # 登录账号密码,点击登录 -定位输入框<元素定位>,然后再输入内容 I. css选择器 II. xpath节点提取 出现哎呀出戳了,是因为网站检测到了你在使用selenium --> 绕过机制检测 #解决方案 //19绕过机制检测 ''' driver.find_element(By.CSS_SELECTOR,'body > div.header > div.header-right > div > a:nth-child(2)').click()#点击登录按钮 time.sleep(1) driver.find_element(By.CSS_SELECTOR,'#username').send_keys('lchnbnb1')#send_key('发送内容') driver.find_element(By.CSS_SELECTOR,'#password').send_keys('admin666') #多次尝试后人工筛选的错误补充 Error_str = ['日','l','引'] Error_int = ['8','1','3'] Error_identification = dict(zip(Error_str, Error_int)) ocr = ddddocr.DdddOcr()#图片识别库 while True: time.sleep(2) img = driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img') img.screenshot('end\\data\\qb5.png') #实例化对象 #读取图片内容 f = open('end\\data\\qb5.png','rb') img = f.read() result = ocr.classification(img) time.sleep(1) print(result) if len(result) != 4: time.sleep(2) driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img').click()#识别失败刷新验证码按钮 continue num = 0 for i in result: if ord(i) >= 48 and ord(i) <= 57: num += 1 elif i in Error_str: num += 1 result = result.replace(i, Error_identification[i]) # print(result) else: time.sleep(2) driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img').click()#识别失败刷新验证码按钮 break if num == len(result): break driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > input').send_keys(result) time.sleep(1) driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > div > input.btn').click()#点击登录按钮 time.sleep(2) #对cookie操作 cookie_data = driver.get_cookies() time.sleep(1) #给其他组员进行爬虫测试的cookie_total # cookie_total = '' # for cookie_i in cookie_data: # cookie_total = cookie_i['name'] + '=' + cookie_i['value'] + '; ' + cookie_total # cookie_total = cookie_total[:-2] # print(cookie_total) time.sleep(1) #selenium的cookies文件处理 f_path = 'end\\data\\cookie_data.json' def save_cookies(cookie_data, encoding="utf-8"): with open(f_path, "w", encoding=encoding) as f_w: json.dump(cookie_data, f_w) def load_cookies(encoding="utf-8"): if os.path.isfile(f_path): with open(f_path, "r", encoding=encoding) as f_r: user_status = json.load(f_r) return user_status def cookies_login(cookies: list): print(cookies) driver2.delete_all_cookies() for new_cookies in cookies: print(new_cookies) driver2.add_cookie(new_cookies) driver2.refresh() save_cookies(cookie_data) cookies = load_cookies() time.sleep(1) driver2.get('https://www.qb5.ch/')#必须进行get,否则会卡在selenium chrome 的默认跳转页面data; cookies_login(cookies) time.sleep(1) driver2.get('https://www.qb5.ch/') driver2.refresh() time.sleep(100) if __name__ == '__main__': register_start()