master
commit
92a8c0eed9
@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.12 (firstpyproject)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (firstpyproject)" project-jdk-type="Python SDK" />
|
||||
</project>
|
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/firstpyproject.iml" filepath="$PROJECT_DIR$/.idea/firstpyproject.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
After Width: | Height: | Size: 18 KiB |
@ -0,0 +1,61 @@
|
||||
import requests
|
||||
import json
|
||||
from lxml import etree
|
||||
import js2py
|
||||
import login
|
||||
USERNAME="td188310339"
|
||||
PASSWORD="Aa123456"
|
||||
COOKIE=login(USERNAME,PASSWORD)
|
||||
HEADERS={
|
||||
'Cookies':COOKIE
|
||||
}
|
||||
BASE_URL='https://www.tadu.com/'
|
||||
def get_id(num):
|
||||
text =requests.get(f'{BASE_URL}/book/988681/{num+1}/?isfirstpart=true',headers=HEADERS).text
|
||||
html=etree.HTML(text)
|
||||
return html.xpath('//a[@id="paging_left"]/@href')[0].strip().split('/')[-1]
|
||||
for chapter_num in range (1,101):
|
||||
js_code = """
|
||||
function encipher(e) {
|
||||
var a = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||
, t = "="
|
||||
, o = function(e) {
|
||||
var o, s = "", i = e.length;
|
||||
for (o = 0; i - 2 > o; o += 3)
|
||||
s += a[e.charCodeAt(o) >> 2],
|
||||
s += a[((3 & e.charCodeAt(o)) << 4) + (e.charCodeAt(o + 1) >> 4)],
|
||||
s += a[((15 & e.charCodeAt(o + 1)) << 2) + (e.charCodeAt(o + 2) >> 6)],
|
||||
s += a[63 & e.charCodeAt(o + 2)];
|
||||
return i % 3 && (o = i - i % 3,
|
||||
s += a[e.charCodeAt(o) >> 2],
|
||||
i % 3 == 2 ? (s += a[((3 & e.charCodeAt(o)) << 4) + (e.charCodeAt(o + 1) >> 4)],
|
||||
s += a[(15 & e.charCodeAt(o + 1)) << 2],
|
||||
s += t) : (s += a[(3 & e.charCodeAt(o)) << 4],
|
||||
s += t + t)),
|
||||
s
|
||||
};
|
||||
return o(e)
|
||||
}
|
||||
"""
|
||||
encipher = js2py.eval_js(js_code)
|
||||
|
||||
chapter_id = get_id(1)
|
||||
|
||||
dataLimit = encipher(chapter_id)
|
||||
|
||||
print(dataLimit)
|
||||
|
||||
res = requests.get(f'{BASE_URL}getPartContentByCodeTable/988681/{chapter_num},headers=HEADERS)')
|
||||
|
||||
text = res.text
|
||||
|
||||
text = json.loads(text)
|
||||
|
||||
text = text['data']['content']
|
||||
|
||||
html = etree.HTML(text)
|
||||
|
||||
texts = html.xpath(f'//p[not(constains(@data-limit,"{dataLimit}"))]/text()')
|
||||
|
||||
for txt in texts:
|
||||
print(txt)
|
After Width: | Height: | Size: 5.4 KiB |
After Width: | Height: | Size: 31 KiB |
Loading…
Reference in new issue