You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

185 lines
6.5 KiB

3 years ago
import asyncio
import json
import os
import pickle
from hashlib import md5
from random import random
import httpx
from loguru import logger
from .captcha import recognize
class Tree:
def __init__(self, task):
self.task = task
self.children = []
class Spider(httpx.AsyncClient):
def __init__(self):
super().__init__()
async def login(self, username, password): # 账号密码登录
logger.info('正在获取验证码...')
result = await self.get(f'http://sso.ismartlearning.cn/captcha.html?{random()}')
code = recognize(result.content)
token = md5(password.encode()).hexdigest()
info = (await self.post(
'http://sso.ismartlearning.cn/v2/tickets-v2',
data={
'username': username,
'password': md5(token.encode() + b'fa&s*l%$k!fq$k!ld@fjlk').hexdigest(),
'captcha': code
},
headers={
'X-Requested-With': 'XMLHttpRequest',
'Origin': 'http://me.ismartlearning.cn',
'Referer': 'http://me.ismartlearning.cn/'
}
)).json()
logger.debug(info['result'])
if info['result']['code'] != -26:
raise AssertionError(f'[!] 登录失败: {info["result"]["msg"]}')
return info['result']
async def get_courses(self): # 获取用户课程列表
logger.info('正在获取课程列表...')
courses = (await self.post(
'https://school.ismartlearning.cn/client/course/list-of-student?status=1',
data={
'pager.currentPage': 1,
'pager.pageSize': 32767
}
)).json()['data']
return courses['list']
async def get_books(self, course): # 获取某课程的书籍列表
logger.info('正在获取书籍列表...')
await self.get_courses() # 必须有这个请求,否则后面会报错
books = (await self.post(
'http://school.ismartlearning.cn/client/course/textbook/list-of-student',
data={
'courseId': course['courseId']
}
)).json()['data']
return books
@staticmethod
def _merge_tasks(tasks): # 将任务列表重组成树形结构
id_record = {task['id']: Tree(task) for task in tasks}
root = Tree({
'book_id': tasks[0]['book_id'],
'unitStudyPercent': 0
})
for task_id in id_record:
node = id_record[task_id]
node_name = (f'{node.task["name"]} ' if 'name' in node.task else '') + f'[id:{node.task["id"]}]'
if 'parent_id' in node.task:
if (parent_id := node.task['parent_id']) in id_record:
id_record[parent_id].children.append(node)
else:
logger.warning(f'任务已忽略(父节点不存在):{node_name}')
else:
root.children.append(node)
return root
async def get_tasks(self, book, tree=False): # 获取某书籍的任务列表
logger.info('正在获取任务列表...')
await self.post('http://school.ismartlearning.cn/client/course/textbook/chapters')
tasks = (await self.post(
'http://school.ismartlearning.cn/client/course/textbook/chapters',
data={key: book[key] for key in ('bookId', 'bookType', 'courseId')}
)).json()['data']
if tree:
return self._merge_tasks(tasks)
else:
return tasks
async def get_paper(self, paper_id): # 获取任务点信息(包括题目和答案)
ticket = (await self.post(
'http://sso.ismartlearning.cn/v1/serviceTicket',
data={
'service': 'http://xot-api.ismartlearning.cn/client/textbook/paperinfo'
}
)).json()['data']['serverTicket']
logger.debug(f'Ticket: {ticket}')
paper_info = (await self.post(
'http://xot-api.ismartlearning.cn/client/textbook/paperinfo',
data={
'paperId': paper_id
},
headers={
'Origin': 'http://me.ismartlearning.cn',
'Referer': 'http://me.ismartlearning.cn/',
'X-Requested-With': 'XMLHttpRequest',
'Accept-Encoding': 'gzip, deflate'
},
params={
'ticket': ticket
}
)).json()['data']
return paper_info
async def download_tree(self, root):
async def download(task):
paper_id = task['paperId']
filepath = f'.cache/books/{root.task["book_id"]}/{paper_id}.json'
if os.path.exists(filepath):
return
async with limit: # 防止并发过高
result = await self.get_paper(paper_id)
result['task'] = task # 继续存入 Task
with open(filepath, 'w') as file:
json.dump(result, file)
def dfs(src):
if 'paperId' in (task := src.task):
logger.info(f'添加任务:{task["name"]}')
tasks.append(download(task))
for child in src.children:
dfs(child)
logger.info('开始下载试题及答案...')
os.makedirs(f'.cache/books/{root.task["book_id"]}', exist_ok=True)
with open(f'.cache/books/{root.task["book_id"]}/Tree.pck', 'wb') as fp:
pickle.dump(root, fp)
tasks, limit = [], asyncio.Semaphore(4)
dfs(root)
await asyncio.gather(*tasks)
logger.info('下载完成.')
async def get_user(self):
return (await self.post(
'https://school.ismartlearning.cn/client/user/student-info')
).json()
async def book_info(self, book_id):
ticket = (await self.post(
'http://sso.ismartlearning.cn/v1/serviceTicket',
data={
'service': 'http://book-api.ismartlearning.cn/client/v2/book/info'
}
)).json()['data']['serverTicket']
book_info = (await self.post(
'http://book-api.ismartlearning.cn/client/v2/book/info',
headers={
'Origin': 'http://me.ismartlearning.cn',
'Referer': 'http://me.ismartlearning.cn/',
'X-Requested-With': 'XMLHttpRequest',
'Accept-Encoding': 'gzip, deflate'
},
params={
'ticket': ticket
},
data={
'bookId': book_id,
'bookType': 0
}
)).json()
return book_info['data']