diff --git a/register.py b/register.py new file mode 100644 index 0000000..bde54ed --- /dev/null +++ b/register.py @@ -0,0 +1,148 @@ +#手动指定Chrome浏览器的路径 +from selenium import webdriver +from selenium.webdriver import ActionChains +from selenium.webdriver.common.by import By +import ddddocr +import json +# from selenium.webdriver.chrome.service import Service +import time +import os + +def register_file(file_name):#开辟新道路 + if not os.path.exists(file_name): + os.makedirs(file_name) + +def register_start(): + options = webdriver.ChromeOptions() + options.binary_location = "F:/Chrome114/APP/Chrome-bin/chrome.exe"#实际是104,版本问题,需要调用104版chrome【114->104】 + # 防止打印一些无用的日志 + options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging']) + + # 具体操作为在创建Chrome()对象时,添加executable_path参数指定为chromedriver.exe的路径 + + driver2 = webdriver.Chrome(options=options,executable_path=r'F:/scrapyer/chromedriver2.exe')#校验cookies是否正确 + driver = webdriver.Chrome(options=options,executable_path=r'F:/scrapyer/chromedriver2.exe')#启动驱动 + + # 绕过机制检测 + driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { + "source": """ + Object.defineProperty(navigator, 'webdriver', { + get: () => false + }) + """ + }) + + driver2.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { + "source": """ + Object.defineProperty(navigator, 'webdriver', { + get: () => false + }) + """ + }) + + # 打开网页,不要使用VPN会被断链 + driver.get('https://www.qb5.ch/') + '''如果用了VPN:加上 + chrome_options = webdriver.ChromeOptions() + chrome_options.add_argument('--ignore-certificate-errors') √ + driver = webdriver.Chrome(options=chrome_options) + ''' + # time.sleep(5) + ''' + # 登录账号密码,点击登录 + -定位输入框<元素定位>,然后再输入内容 + I. css选择器 + II. xpath节点提取 + 出现哎呀出戳了,是因为网站检测到了你在使用selenium --> 绕过机制检测 #解决方案 //19绕过机制检测 + ''' + driver.find_element(By.CSS_SELECTOR,'body > div.header > div.header-right > div > a:nth-child(2)').click()#点击登录按钮 + time.sleep(1) + driver.find_element(By.CSS_SELECTOR,'#username').send_keys('lchnbnb1')#send_key('发送内容') + driver.find_element(By.CSS_SELECTOR,'#password').send_keys('admin666') + + #多次尝试后人工筛选的错误补充 + Error_str = ['日','l','引'] + Error_int = ['8','1','3'] + Error_identification = dict(zip(Error_str, Error_int)) + + ocr = ddddocr.DdddOcr()#图片识别库 + while True: + time.sleep(2) + img = driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img') + img.screenshot('end\\data\\qb5.png') + #实例化对象 + #读取图片内容 + f = open('end\\data\\qb5.png','rb') + img = f.read() + result = ocr.classification(img) + time.sleep(1) + print(result) + if len(result) != 4: + time.sleep(2) + driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img').click()#识别失败刷新验证码按钮 + continue + num = 0 + for i in result: + if ord(i) >= 48 and ord(i) <= 57: + num += 1 + elif i in Error_str: + num += 1 + result = result.replace(i, Error_identification[i]) + # print(result) + else: + time.sleep(2) + driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > img').click()#识别失败刷新验证码按钮 + break + if num == len(result): + break + driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > p:nth-child(3) > input').send_keys(result) + time.sleep(1) + + driver.find_element(By.CSS_SELECTOR,'#main > div.login-main > form > fieldset > div > input.btn').click()#点击登录按钮 + time.sleep(2) + #对cookie操作 + cookie_data = driver.get_cookies() + time.sleep(1) + + #给其他组员进行爬虫测试的cookie_total + # cookie_total = '' + # for cookie_i in cookie_data: + # cookie_total = cookie_i['name'] + '=' + cookie_i['value'] + '; ' + cookie_total + # cookie_total = cookie_total[:-2] + # print(cookie_total) + + time.sleep(1) + + #selenium的cookies文件处理 + f_path = 'end\\data\\cookie_data.json' + + def save_cookies(cookie_data, encoding="utf-8"): + with open(f_path, "w", encoding=encoding) as f_w: + json.dump(cookie_data, f_w) + + def load_cookies(encoding="utf-8"): + if os.path.isfile(f_path): + with open(f_path, "r", encoding=encoding) as f_r: + user_status = json.load(f_r) + return user_status + + def cookies_login(cookies: list): + print(cookies) + driver2.delete_all_cookies() + for new_cookies in cookies: + print(new_cookies) + driver2.add_cookie(new_cookies) + driver2.refresh() + + save_cookies(cookie_data) + cookies = load_cookies() + time.sleep(1) + driver2.get('https://www.qb5.ch/')#必须进行get,否则会卡在selenium chrome 的默认跳转页面data; + cookies_login(cookies) + time.sleep(1) + driver2.get('https://www.qb5.ch/') + driver2.refresh() + time.sleep(100) + +if __name__ == '__main__': + register_start()