You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

185 lines
6.5 KiB

import asyncio
import json
import os
import pickle
from hashlib import md5
from random import random
import httpx
from loguru import logger
from .captcha import recognize
class Tree:
def __init__(self, task):
self.task = task
self.children = []
class Spider(httpx.AsyncClient):
def __init__(self):
super().__init__()
async def login(self, username, password): # 账号密码登录
logger.info('正在获取验证码...')
result = await self.get(f'http://sso.ismartlearning.cn/captcha.html?{random()}')
code = recognize(result.content)
token = md5(password.encode()).hexdigest()
info = (await self.post(
'http://sso.ismartlearning.cn/v2/tickets-v2',
data={
'username': username,
'password': md5(token.encode() + b'fa&s*l%$k!fq$k!ld@fjlk').hexdigest(),
'captcha': code
},
headers={
'X-Requested-With': 'XMLHttpRequest',
'Origin': 'http://me.ismartlearning.cn',
'Referer': 'http://me.ismartlearning.cn/'
}
)).json()
logger.debug(info['result'])
if info['result']['code'] != -26:
raise AssertionError(f'[!] 登录失败: {info["result"]["msg"]}')
return info['result']
async def get_courses(self): # 获取用户课程列表
logger.info('正在获取课程列表...')
courses = (await self.post(
'https://school.ismartlearning.cn/client/course/list-of-student?status=1',
data={
'pager.currentPage': 1,
'pager.pageSize': 32767
}
)).json()['data']
return courses['list']
async def get_books(self, course): # 获取某课程的书籍列表
logger.info('正在获取书籍列表...')
await self.get_courses() # 必须有这个请求,否则后面会报错
books = (await self.post(
'http://school.ismartlearning.cn/client/course/textbook/list-of-student',
data={
'courseId': course['courseId']
}
)).json()['data']
return books
@staticmethod
def _merge_tasks(tasks): # 将任务列表重组成树形结构
id_record = {task['id']: Tree(task) for task in tasks}
root = Tree({
'book_id': tasks[0]['book_id'],
'unitStudyPercent': 0
})
for task_id in id_record:
node = id_record[task_id]
node_name = (f'{node.task["name"]} ' if 'name' in node.task else '') + f'[id:{node.task["id"]}]'
if 'parent_id' in node.task:
if (parent_id := node.task['parent_id']) in id_record:
id_record[parent_id].children.append(node)
else:
logger.warning(f'任务已忽略(父节点不存在):{node_name}')
else:
root.children.append(node)
return root
async def get_tasks(self, book, tree=False): # 获取某书籍的任务列表
logger.info('正在获取任务列表...')
await self.post('http://school.ismartlearning.cn/client/course/textbook/chapters')
tasks = (await self.post(
'http://school.ismartlearning.cn/client/course/textbook/chapters',
data={key: book[key] for key in ('bookId', 'bookType', 'courseId')}
)).json()['data']
if tree:
return self._merge_tasks(tasks)
else:
return tasks
async def get_paper(self, paper_id): # 获取任务点信息(包括题目和答案)
ticket = (await self.post(
'http://sso.ismartlearning.cn/v1/serviceTicket',
data={
'service': 'http://xot-api.ismartlearning.cn/client/textbook/paperinfo'
}
)).json()['data']['serverTicket']
logger.debug(f'Ticket: {ticket}')
paper_info = (await self.post(
'http://xot-api.ismartlearning.cn/client/textbook/paperinfo',
data={
'paperId': paper_id
},
headers={
'Origin': 'http://me.ismartlearning.cn',
'Referer': 'http://me.ismartlearning.cn/',
'X-Requested-With': 'XMLHttpRequest',
'Accept-Encoding': 'gzip, deflate'
},
params={
'ticket': ticket
}
)).json()['data']
return paper_info
async def download_tree(self, root):
async def download(task):
paper_id = task['paperId']
filepath = f'.cache/books/{root.task["book_id"]}/{paper_id}.json'
if os.path.exists(filepath):
return
async with limit: # 防止并发过高
result = await self.get_paper(paper_id)
result['task'] = task # 继续存入 Task
with open(filepath, 'w') as file:
json.dump(result, file)
def dfs(src):
if 'paperId' in (task := src.task):
logger.info(f'添加任务:{task["name"]}')
tasks.append(download(task))
for child in src.children:
dfs(child)
logger.info('开始下载试题及答案...')
os.makedirs(f'.cache/books/{root.task["book_id"]}', exist_ok=True)
with open(f'.cache/books/{root.task["book_id"]}/Tree.pck', 'wb') as fp:
pickle.dump(root, fp)
tasks, limit = [], asyncio.Semaphore(4)
dfs(root)
await asyncio.gather(*tasks)
logger.info('下载完成.')
async def get_user(self):
return (await self.post(
'https://school.ismartlearning.cn/client/user/student-info')
).json()
async def book_info(self, book_id):
ticket = (await self.post(
'http://sso.ismartlearning.cn/v1/serviceTicket',
data={
'service': 'http://book-api.ismartlearning.cn/client/v2/book/info'
}
)).json()['data']['serverTicket']
book_info = (await self.post(
'http://book-api.ismartlearning.cn/client/v2/book/info',
headers={
'Origin': 'http://me.ismartlearning.cn',
'Referer': 'http://me.ismartlearning.cn/',
'X-Requested-With': 'XMLHttpRequest',
'Accept-Encoding': 'gzip, deflate'
},
params={
'ticket': ticket
},
data={
'bookId': book_id,
'bookType': 0
}
)).json()
return book_info['data']