commit 92a8c0eed9d48edbb35b9b2c0912d3e4ad40e2a8
Author: niuniu <3307645003@qq.com>
Date: Sun Jun 2 23:00:36 2024 +0800
上传
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/firstpyproject.iml b/.idea/firstpyproject.iml
new file mode 100644
index 0000000..2c80e12
--- /dev/null
+++ b/.idea/firstpyproject.iml
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..db7e132
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..a2496ba
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/back_image.png b/back_image.png
new file mode 100644
index 0000000..005f1f0
Binary files /dev/null and b/back_image.png differ
diff --git a/block.jpg b/block.jpg
new file mode 100644
index 0000000..3ecb75d
Binary files /dev/null and b/block.jpg differ
diff --git a/get_contend.py b/get_contend.py
new file mode 100644
index 0000000..ecec7a1
--- /dev/null
+++ b/get_contend.py
@@ -0,0 +1,61 @@
+import requests
+import json
+from lxml import etree
+import js2py
+import login
+USERNAME="td188310339"
+PASSWORD="Aa123456"
+COOKIE=login(USERNAME,PASSWORD)
+HEADERS={
+ 'Cookies':COOKIE
+}
+BASE_URL='https://www.tadu.com/'
+def get_id(num):
+ text =requests.get(f'{BASE_URL}/book/988681/{num+1}/?isfirstpart=true',headers=HEADERS).text
+ html=etree.HTML(text)
+ return html.xpath('//a[@id="paging_left"]/@href')[0].strip().split('/')[-1]
+for chapter_num in range (1,101):
+ js_code = """
+ function encipher(e) {
+ var a = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+ , t = "="
+ , o = function(e) {
+ var o, s = "", i = e.length;
+ for (o = 0; i - 2 > o; o += 3)
+ s += a[e.charCodeAt(o) >> 2],
+ s += a[((3 & e.charCodeAt(o)) << 4) + (e.charCodeAt(o + 1) >> 4)],
+ s += a[((15 & e.charCodeAt(o + 1)) << 2) + (e.charCodeAt(o + 2) >> 6)],
+ s += a[63 & e.charCodeAt(o + 2)];
+ return i % 3 && (o = i - i % 3,
+ s += a[e.charCodeAt(o) >> 2],
+ i % 3 == 2 ? (s += a[((3 & e.charCodeAt(o)) << 4) + (e.charCodeAt(o + 1) >> 4)],
+ s += a[(15 & e.charCodeAt(o + 1)) << 2],
+ s += t) : (s += a[(3 & e.charCodeAt(o)) << 4],
+ s += t + t)),
+ s
+ };
+ return o(e)
+ }
+ """
+ encipher = js2py.eval_js(js_code)
+
+ chapter_id = get_id(1)
+
+ dataLimit = encipher(chapter_id)
+
+ print(dataLimit)
+
+ res = requests.get(f'{BASE_URL}getPartContentByCodeTable/988681/{chapter_num},headers=HEADERS)')
+
+ text = res.text
+
+ text = json.loads(text)
+
+ text = text['data']['content']
+
+ html = etree.HTML(text)
+
+ texts = html.xpath(f'//p[not(constains(@data-limit,"{dataLimit}"))]/text()')
+
+ for txt in texts:
+ print(txt)
\ No newline at end of file
diff --git a/login.py b/login.py
new file mode 100644
index 0000000..f834578
--- /dev/null
+++ b/login.py
@@ -0,0 +1,196 @@
+import random
+
+from selenium import webdriver
+from selenium.webdriver import ActionChains
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import time
+import cv2
+import pytesseract
+import requests
+import os
+import numpy as np
+from io import BytesIO
+from PIL import Image
+from selenium.webdriver import ChromeOptions
+driver = webdriver.Chrome()
+
+# 打开登录页面
+driver.get('https://www.tadu.com/v3/loginpage?logintype=taduphone')
+# 等待账号输入框可见
+
+# 去除浏览器识别
+option = ChromeOptions()
+#隐藏浏览器
+option.add_argument('--headlless')
+option.add_argument('--disable-gpu')
+
+
+def get_tracks(distance):
+ distance += 20
+ v = 0
+ t = 0.2
+ forward_tracks = []
+ current = 0
+ mid = distance * 3 / 5
+ while current < distance:
+ if current < mid:
+ a = 2
+ else:
+ a = -3
+ s = v * t + 0.5 * a * (t ** 2)
+ v = v + a * t
+ current += s
+ forward_tracks.append(round(s))
+
+ back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1]
+ return {'forward_tracks': forward_tracks, 'back_tracks': back_tracks}
+
+
+def crack_slider(tracks):
+ wait = WebDriverWait(driver, 5)
+
+ slider = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'tc-drag-thumb')))
+ ActionChains(driver).click_and_hold(slider).perform() # 模拟按住鼠标左键
+
+ for track in tracks['forward_tracks']:
+ ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()
+
+ time.sleep(0.5)
+ for back_tracks in tracks['back_tracks']:
+ ActionChains(driver).move_by_offset(xoffset=back_tracks, yoffset=0).perform()
+ print("1212313")
+ ActionChains(driver).move_by_offset(xoffset=-4, yoffset=0).perform()
+ ActionChains(driver).move_by_offset(xoffset=4, yoffset=0).perform()
+ time.sleep(0.5)
+
+ ActionChains(driver).release().perform() # 释放左键
+ return 0
+
+
+# 下载图片到本地
+def get_image(img_url,imgname):
+ # 以流的形式下载文件
+ image=requests.get(img_url,stream=True)
+ imgName = ''.join(["./", imgname])
+ with open(imgName, 'wb') as f:
+ for chunk in image.iter_content(chunk_size=1024): # 循环写入 chunk_size:每次下载的数据大小
+ if chunk:
+ f.write(chunk)
+ f.flush()
+ f.close()
+
+def get_image_offset(background_image_url, slider_image_url):
+ back_image = 'back_image.png' # 背景图像命名
+
+ slider_image = 'slider_image.png' # 滑块图像命名
+
+ get_image(background_image_url, back_image)
+
+ get_image(slider_image_url, slider_image)
+
+ # 获取图片并灰度化
+ block = cv2.imread(slider_image, 0)
+
+ template = cv2.imread(back_image, 0)
+
+ w, h = block.shape[::-1]
+ # print(w, h)
+ # 二值化后图片名称
+ block_name = 'block.jpg'
+ template_name = 'template.jpg'
+ # 保存二值化后的图片
+ cv2.imwrite(block_name, block)
+
+ cv2.imwrite(template_name, template)
+
+ block = cv2.imread(block_name)
+
+ block = cv2.cvtColor(block, cv2.COLOR_RGB2GRAY)
+
+ block = abs(255 - block)
+
+ cv2.imwrite(block_name, block)
+
+ block = cv2.imread(block_name)
+
+ template = cv2.imread(template_name)
+
+ # 获取偏移量
+ # 模板匹配,查找block在template中的位置,返回result是一个矩阵,是每个点的匹配结果
+ result = cv2.matchTemplate(block, template, cv2.TM_CCOEFF_NORMED)
+
+ x, y = np.unravel_index(result.argmax(), result.shape)
+
+ print(x, y)
+ # 由于获取到的验证码图片像素与实际的像素有差(实际:280*158 原图:680*390),故对获取到的坐标进行处理
+ offset = y * (295 / 680)
+
+ # 画矩形圈出匹配的区域
+ # 参数解释:1.原图 2.矩阵的左上点坐标 3.矩阵的右下点坐标 4.画线对应的rgb颜色 5.线的宽度
+ cv2.rectangle(template, (y, x), (y + w, x + h), (7, 249, 151), 2)
+
+ show(template)
+
+ return offset
+# 显示图片
+def show(name):
+
+ cv2.imshow('Show', name)
+
+ cv2.waitKey(0)
+
+ cv2.destroyAllWindows()
+
+# 实现规避检测
+option.add_experimental_option('excludeSwitches', ['enable-automation'])
+option.add_experimental_option("detach", True)
+# 采取去除特征识别,即以下两行代码。
+script = 'Object.defineProperty(navigator, "webdriver", {get: () => false,});'
+driver.execute_script(script)
+
+js = 'return window.navigator.webdriver'
+print(driver.execute_script(js)) # 可以直接在终端输出webdriver检测结果
+
+
+option.add_experimental_option('excludeSwitches', ['enable-automation'])
+option.add_experimental_option("detach", True)
+
+account_switch=WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'phoneAccountSwitch')))
+account_switch.click()
+
+username_input = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'accountInput')))
+password_input = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'accountPass')))
+
+username_input.send_keys('td188310339') # 用户名
+password_input.send_keys('Aa123456') # 密码
+
+submit = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'accountLogin_bt')))
+submit.click()
+
+frame=WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'tcaptcha_iframe')))
+driver.switch_to.frame(frame)
+time.sleep(10)
+
+img=WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'slideBg')))
+image_url = img.get_attribute('src') # 图片的URL
+
+img2=WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'slideBlock')))
+image2_url = img2.get_attribute('src') # 图片的URL
+
+x=get_image_offset(image_url,image2_url)
+print(x)
+
+drag_button =WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'tcaptcha_drag_thumb')))
+
+ActionChains(driver).click_and_hold(drag_button).perform()
+time.sleep(0.5)
+# 遍历轨迹进行滑动
+time.sleep(0.01)
+crack_slider(get_tracks(x))
+
+time.sleep(2)
+cookies = driver.get_cookies()
+cookies=''.join([f'{cookie["name"]}={cookie["value"]};' for cookie in cookies])
+print(cookies)
diff --git a/slider_image.png b/slider_image.png
new file mode 100644
index 0000000..68e096a
Binary files /dev/null and b/slider_image.png differ
diff --git a/template.jpg b/template.jpg
new file mode 100644
index 0000000..cc921e9
Binary files /dev/null and b/template.jpg differ