commit 92a8c0eed9d48edbb35b9b2c0912d3e4ad40e2a8 Author: niuniu <3307645003@qq.com> Date: Sun Jun 2 23:00:36 2024 +0800 上传 diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/firstpyproject.iml b/.idea/firstpyproject.iml new file mode 100644 index 0000000..2c80e12 --- /dev/null +++ b/.idea/firstpyproject.iml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..db7e132 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..a2496ba --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/back_image.png b/back_image.png new file mode 100644 index 0000000..005f1f0 Binary files /dev/null and b/back_image.png differ diff --git a/block.jpg b/block.jpg new file mode 100644 index 0000000..3ecb75d Binary files /dev/null and b/block.jpg differ diff --git a/get_contend.py b/get_contend.py new file mode 100644 index 0000000..ecec7a1 --- /dev/null +++ b/get_contend.py @@ -0,0 +1,61 @@ +import requests +import json +from lxml import etree +import js2py +import login +USERNAME="td188310339" +PASSWORD="Aa123456" +COOKIE=login(USERNAME,PASSWORD) +HEADERS={ + 'Cookies':COOKIE +} +BASE_URL='https://www.tadu.com/' +def get_id(num): + text =requests.get(f'{BASE_URL}/book/988681/{num+1}/?isfirstpart=true',headers=HEADERS).text + html=etree.HTML(text) + return html.xpath('//a[@id="paging_left"]/@href')[0].strip().split('/')[-1] +for chapter_num in range (1,101): + js_code = """ + function encipher(e) { + var a = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + , t = "=" + , o = function(e) { + var o, s = "", i = e.length; + for (o = 0; i - 2 > o; o += 3) + s += a[e.charCodeAt(o) >> 2], + s += a[((3 & e.charCodeAt(o)) << 4) + (e.charCodeAt(o + 1) >> 4)], + s += a[((15 & e.charCodeAt(o + 1)) << 2) + (e.charCodeAt(o + 2) >> 6)], + s += a[63 & e.charCodeAt(o + 2)]; + return i % 3 && (o = i - i % 3, + s += a[e.charCodeAt(o) >> 2], + i % 3 == 2 ? (s += a[((3 & e.charCodeAt(o)) << 4) + (e.charCodeAt(o + 1) >> 4)], + s += a[(15 & e.charCodeAt(o + 1)) << 2], + s += t) : (s += a[(3 & e.charCodeAt(o)) << 4], + s += t + t)), + s + }; + return o(e) + } + """ + encipher = js2py.eval_js(js_code) + + chapter_id = get_id(1) + + dataLimit = encipher(chapter_id) + + print(dataLimit) + + res = requests.get(f'{BASE_URL}getPartContentByCodeTable/988681/{chapter_num},headers=HEADERS)') + + text = res.text + + text = json.loads(text) + + text = text['data']['content'] + + html = etree.HTML(text) + + texts = html.xpath(f'//p[not(constains(@data-limit,"{dataLimit}"))]/text()') + + for txt in texts: + print(txt) \ No newline at end of file diff --git a/login.py b/login.py new file mode 100644 index 0000000..f834578 --- /dev/null +++ b/login.py @@ -0,0 +1,196 @@ +import random + +from selenium import webdriver +from selenium.webdriver import ActionChains +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +import time +import cv2 +import pytesseract +import requests +import os +import numpy as np +from io import BytesIO +from PIL import Image +from selenium.webdriver import ChromeOptions +driver = webdriver.Chrome() + +# 打开登录页面 +driver.get('https://www.tadu.com/v3/loginpage?logintype=taduphone') +# 等待账号输入框可见 + +# 去除浏览器识别 +option = ChromeOptions() +#隐藏浏览器 +option.add_argument('--headlless') +option.add_argument('--disable-gpu') + + +def get_tracks(distance): + distance += 20 + v = 0 + t = 0.2 + forward_tracks = [] + current = 0 + mid = distance * 3 / 5 + while current < distance: + if current < mid: + a = 2 + else: + a = -3 + s = v * t + 0.5 * a * (t ** 2) + v = v + a * t + current += s + forward_tracks.append(round(s)) + + back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1] + return {'forward_tracks': forward_tracks, 'back_tracks': back_tracks} + + +def crack_slider(tracks): + wait = WebDriverWait(driver, 5) + + slider = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'tc-drag-thumb'))) + ActionChains(driver).click_and_hold(slider).perform() # 模拟按住鼠标左键 + + for track in tracks['forward_tracks']: + ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform() + + time.sleep(0.5) + for back_tracks in tracks['back_tracks']: + ActionChains(driver).move_by_offset(xoffset=back_tracks, yoffset=0).perform() + print("1212313") + ActionChains(driver).move_by_offset(xoffset=-4, yoffset=0).perform() + ActionChains(driver).move_by_offset(xoffset=4, yoffset=0).perform() + time.sleep(0.5) + + ActionChains(driver).release().perform() # 释放左键 + return 0 + + +# 下载图片到本地 +def get_image(img_url,imgname): + # 以流的形式下载文件 + image=requests.get(img_url,stream=True) + imgName = ''.join(["./", imgname]) + with open(imgName, 'wb') as f: + for chunk in image.iter_content(chunk_size=1024): # 循环写入 chunk_size:每次下载的数据大小 + if chunk: + f.write(chunk) + f.flush() + f.close() + +def get_image_offset(background_image_url, slider_image_url): + back_image = 'back_image.png' # 背景图像命名 + + slider_image = 'slider_image.png' # 滑块图像命名 + + get_image(background_image_url, back_image) + + get_image(slider_image_url, slider_image) + + # 获取图片并灰度化 + block = cv2.imread(slider_image, 0) + + template = cv2.imread(back_image, 0) + + w, h = block.shape[::-1] + # print(w, h) + # 二值化后图片名称 + block_name = 'block.jpg' + template_name = 'template.jpg' + # 保存二值化后的图片 + cv2.imwrite(block_name, block) + + cv2.imwrite(template_name, template) + + block = cv2.imread(block_name) + + block = cv2.cvtColor(block, cv2.COLOR_RGB2GRAY) + + block = abs(255 - block) + + cv2.imwrite(block_name, block) + + block = cv2.imread(block_name) + + template = cv2.imread(template_name) + + # 获取偏移量 + # 模板匹配,查找block在template中的位置,返回result是一个矩阵,是每个点的匹配结果 + result = cv2.matchTemplate(block, template, cv2.TM_CCOEFF_NORMED) + + x, y = np.unravel_index(result.argmax(), result.shape) + + print(x, y) + # 由于获取到的验证码图片像素与实际的像素有差(实际:280*158 原图:680*390),故对获取到的坐标进行处理 + offset = y * (295 / 680) + + # 画矩形圈出匹配的区域 + # 参数解释:1.原图 2.矩阵的左上点坐标 3.矩阵的右下点坐标 4.画线对应的rgb颜色 5.线的宽度 + cv2.rectangle(template, (y, x), (y + w, x + h), (7, 249, 151), 2) + + show(template) + + return offset +# 显示图片 +def show(name): + + cv2.imshow('Show', name) + + cv2.waitKey(0) + + cv2.destroyAllWindows() + +# 实现规避检测 +option.add_experimental_option('excludeSwitches', ['enable-automation']) +option.add_experimental_option("detach", True) +# 采取去除特征识别,即以下两行代码。 +script = 'Object.defineProperty(navigator, "webdriver", {get: () => false,});' +driver.execute_script(script) + +js = 'return window.navigator.webdriver' +print(driver.execute_script(js)) # 可以直接在终端输出webdriver检测结果 + + +option.add_experimental_option('excludeSwitches', ['enable-automation']) +option.add_experimental_option("detach", True) + +account_switch=WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'phoneAccountSwitch'))) +account_switch.click() + +username_input = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'accountInput'))) +password_input = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'accountPass'))) + +username_input.send_keys('td188310339') # 用户名 +password_input.send_keys('Aa123456') # 密码 + +submit = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'accountLogin_bt'))) +submit.click() + +frame=WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'tcaptcha_iframe'))) +driver.switch_to.frame(frame) +time.sleep(10) + +img=WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'slideBg'))) +image_url = img.get_attribute('src') # 图片的URL + +img2=WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'slideBlock'))) +image2_url = img2.get_attribute('src') # 图片的URL + +x=get_image_offset(image_url,image2_url) +print(x) + +drag_button =WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'tcaptcha_drag_thumb'))) + +ActionChains(driver).click_and_hold(drag_button).perform() +time.sleep(0.5) +# 遍历轨迹进行滑动 +time.sleep(0.01) +crack_slider(get_tracks(x)) + +time.sleep(2) +cookies = driver.get_cookies() +cookies=''.join([f'{cookie["name"]}={cookie["value"]};' for cookie in cookies]) +print(cookies) diff --git a/slider_image.png b/slider_image.png new file mode 100644 index 0000000..68e096a Binary files /dev/null and b/slider_image.png differ diff --git a/template.jpg b/template.jpg new file mode 100644 index 0000000..cc921e9 Binary files /dev/null and b/template.jpg differ