From b85fe66bcb17ac68c2a0c01d11eb83ad9b5f89ad Mon Sep 17 00:00:00 2001 From: Mufanc <47652878+Mufanc@users.noreply.github.com> Date: Sun, 26 Sep 2021 01:14:37 +0800 Subject: [PATCH] Initial Commit --- .gitignore | 2 + .idea/.gitignore | 8 + .idea/iSmartAuto2.iml | 11 ++ .idea/inspectionProfiles/Project_Default.xml | 105 ++++++++++ .../inspectionProfiles/profiles_settings.xml | 6 + .idea/misc.xml | 4 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + automaton/__init__.py | 2 + automaton/captcha/__init__.py | 1 + automaton/captcha/captcha.py | 38 ++++ automaton/captcha/models/0.png | Bin 0 -> 426 bytes automaton/captcha/models/1.png | Bin 0 -> 410 bytes automaton/captcha/models/2.png | Bin 0 -> 429 bytes automaton/captcha/models/3.png | Bin 0 -> 404 bytes automaton/captcha/models/4.png | Bin 0 -> 395 bytes automaton/captcha/models/5.png | Bin 0 -> 390 bytes automaton/captcha/models/6.png | Bin 0 -> 403 bytes automaton/captcha/models/7.png | Bin 0 -> 414 bytes automaton/captcha/models/8.png | Bin 0 -> 417 bytes automaton/captcha/models/9.png | Bin 0 -> 422 bytes automaton/devtools.py | 77 ++++++++ automaton/ismart.py | 122 ++++++++++++ automaton/markdown/__init__.py | 1 + automaton/markdown/formatter.py | 34 ++++ automaton/markdown/generator.py | 133 +++++++++++++ automaton/markdown/md.py | 59 ++++++ automaton/spider.py | 184 ++++++++++++++++++ configs.py | 9 + configs.yml | 13 ++ main.py | 14 ++ 31 files changed, 837 insertions(+) create mode 100644 .gitignore create mode 100644 .idea/.gitignore create mode 100644 .idea/iSmartAuto2.iml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 automaton/__init__.py create mode 100644 automaton/captcha/__init__.py create mode 100644 automaton/captcha/captcha.py create mode 100644 automaton/captcha/models/0.png create mode 100644 automaton/captcha/models/1.png create mode 100644 automaton/captcha/models/2.png create mode 100644 automaton/captcha/models/3.png create mode 100644 automaton/captcha/models/4.png create mode 100644 automaton/captcha/models/5.png create mode 100644 automaton/captcha/models/6.png create mode 100644 automaton/captcha/models/7.png create mode 100644 automaton/captcha/models/8.png create mode 100644 automaton/captcha/models/9.png create mode 100644 automaton/devtools.py create mode 100644 automaton/ismart.py create mode 100644 automaton/markdown/__init__.py create mode 100644 automaton/markdown/formatter.py create mode 100644 automaton/markdown/generator.py create mode 100644 automaton/markdown/md.py create mode 100644 automaton/spider.py create mode 100644 configs.py create mode 100644 configs.yml create mode 100644 main.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5fcb02f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/.cache/* +/venv/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..73f69e0 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/iSmartAuto2.iml b/.idea/iSmartAuto2.iml new file mode 100644 index 0000000..f61c21b --- /dev/null +++ b/.idea/iSmartAuto2.iml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..a52339a --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,105 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..c76173c --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..9c51603 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/automaton/__init__.py b/automaton/__init__.py new file mode 100644 index 0000000..4211d10 --- /dev/null +++ b/automaton/__init__.py @@ -0,0 +1,2 @@ +from .ismart import finish +from .ismart import export diff --git a/automaton/captcha/__init__.py b/automaton/captcha/__init__.py new file mode 100644 index 0000000..cc96be8 --- /dev/null +++ b/automaton/captcha/__init__.py @@ -0,0 +1 @@ +from .captcha import recognize diff --git a/automaton/captcha/captcha.py b/automaton/captcha/captcha.py new file mode 100644 index 0000000..65c65a1 --- /dev/null +++ b/automaton/captcha/captcha.py @@ -0,0 +1,38 @@ +from os import path + +import cv2 +import numpy as np +from loguru import logger +from numpy import average, dot, linalg + +base_path = path.join(path.split(__file__)[0], 'models') + + +def similarity(img_1, img_2): + images = [img_1, img_2] + vectors = [] + norms = [] + for image in images: + vector = [average(pixels) for pixels in image] + vectors.append(vector) + norms.append(linalg.norm(vector, 2)) + a, b = vectors + a_norm, b_norm = norms + return dot(a / a_norm, b / b_norm) + + +def recognize(img_content: bytes): + img = cv2.imdecode(np.asarray(bytearray(img_content), dtype=np.uint8), cv2.IMREAD_COLOR) + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + img = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY)[1] + models = [cv2.imread(path.join(base_path, f'{i}.png')) for i in range(10)] + code = '' + for i in range(4): + code += sorted( + [(f'{j}', similarity(img[4:24, 9 + i * 15:24 + i * 15], std)) for j, std in enumerate(models)], + key=lambda x: x[1] + )[-1][0] + logger.info(f'识别结果:{code}') + if len(code) != 4: + logger.warning('验证码长度不是 4 位') + return code diff --git a/automaton/captcha/models/0.png b/automaton/captcha/models/0.png new file mode 100644 index 0000000000000000000000000000000000000000..bd3709b0205907935dddea6e683bdf5ca762418e GIT binary patch literal 426 zcmV;b0agBqP)2$~ zL5^EV5JSO7=9hK<(>7YFBxg{77miR{psgeziS6fkc9*}BNCI@unHgYr=Sl)dvc0=p zZ2`3fdc9t+Br|i)Nn-o^{l4Gtwm@68-QAh7-B!{$$2M@zd7cN5Wc!?>B$9xcNzx0D z)XTQ6wrUI5ZYybbCGoH4d3G0268)6~*xi}o+TEGa1*okguD0i#=Xu&Dkz_l`%y6l# zq}S{9{eF{3N&?sh+5$7v7EoJBfbGxcGc#OmB_#p={Q0xHfRemkuOz!G3D{jpwvzz9 z+RluU*aq4HNo)g2Y;#Eh+6s^aBmvq=N@AN!S1+}d1hh?Jdv|9BNFvELFf%_tKRV~U z-*4de_jhK1-IbK&c^*L5?&^PSB>`=LB(}NQ+6E+jzu%crTS>N)v;~sbPBJr+fFxW> zqKm67*UTi5^!a?A=K*$CN!q^O?{g0Sl9UA4-I)Q@3y@^Ht?eXjcXwuhwvyWZ2c*7I U>ttDzS^xk507*qoM6N<$f&gvDCIA2c literal 0 HcmV?d00001 diff --git a/automaton/captcha/models/1.png b/automaton/captcha/models/1.png new file mode 100644 index 0000000000000000000000000000000000000000..f6cfa532e1283656087fa4baf01da8f8aae0c7d5 GIT binary patch literal 410 zcmV;L0cHM)P)3SF z0cs>c5CcI;)$9NNce<2!3@jKN;pchIIc<})O#;}~%Qnz9i6jHNJ2R31zw`^RyOL(c zK-(meY)hJ%-IX*my9?+AxUD4Hz|2Ux_a?Ee*X||(BpKMbfd2#Z z%kJ7vVtZzG7trh8YvA5{&XIKQJ?BWWJu|xt?C#9$?#vj_>)w0L0VE~S3((iS*TC-1 zj9$DxpHExcZFwb;WI(TbZ(cx>nc3a8ws}eN^E~GqNr3IPNo-3>@;pxxNo|uDkkmGb zUP%Dk_ueFu+9r|Ywzee!lKj#OkObJ)%YY=n_UZ8A*WcwliZ}((c;US6c)60{jKEwJoV_62QQ{cV=u$vMtGWTm8~Di2>W^ zoO^GQw)*3Fo^y^Q@9xjf&+g95?n;`O-L(xQ0owBV2ko$4ue1i}LjV8(07*qoM6N<$ Ef+jb|_y7O^ literal 0 HcmV?d00001 diff --git a/automaton/captcha/models/2.png b/automaton/captcha/models/2.png new file mode 100644 index 0000000000000000000000000000000000000000..8a995f8d35f0e85ab959574dd79d017aec71cb7c GIT binary patch literal 429 zcmV;e0aE^nP)3?V zL5dhb38(nq^fZ3?(;lJ094iP0T+ss80*X~vo zfZeUCBqY0gcO~(h^E}V)Ruur-l5iy<34o*|+ss80KoS5+BmtP2?k0&V36tQmEeXG7 zrn>>~%EcrAl904b5&%g6wyR1~659Z{yOIFROn2i-Lh^Z@nUQqPkpzG2e? z0cs>c5CcI;HOBvcuDxB#I~oQYVc_%mJkMiWk}p87B)cnVcMV7al1Tdc`uco6Gqbxh z(-x2f?5^9|29f~!V|SnDNy5v#l1SR!nb9A%CE0FkK(F1M8NGIQW_)oO&;{7u-I+1;7ZwY$&r%*^frk__mzyEC)9wv+hcYMaDucXwuNO9E_5 znwcc$oZU?#2{53SZC$p3-Ay8?Z4$tMB*2$mN!r>z=OoePwvucEl5Fep1=w!8yEEf+ z&df;K-I=mh=zNY+JRCDdC&|0000W~ literal 0 HcmV?d00001 diff --git a/automaton/captcha/models/4.png b/automaton/captcha/models/4.png new file mode 100644 index 0000000000000000000000000000000000000000..f5da1e9d3be7ad34c09fa677aebcf4aa09957362 GIT binary patch literal 395 zcmV;60d)R}P)2$~ z0d89{5JSO7N=WX1+w3Wn_L~s4D4M^f@ z3rGTN&rDk-ku)>D07)~W%XThFfRfD2-W$+$&H-$%wUPkm9NU1}0!h|dl4eE{(AG8} zDGAV4NhH~p^m@H)1LvH*x9zrTt)HJC{nb`il9>V4T3nL2+H#4jm!u@N+u8;s<E!R0`@7-2XTibx`nE}o@dds!e;%Y0Y z?cO^H(3TJNl9a?Y*Uap_CH3N(8MWn?B)0e7NqTX$)mvMxB)XpG`TqXqY73lm_TGS! pa7hAYX02trt?k$A1=wyY=?|&SJ-d~tyLA8n002ovPDHLkV1g?eyP*I8 literal 0 HcmV?d00001 diff --git a/automaton/captcha/models/5.png b/automaton/captcha/models/5.png new file mode 100644 index 0000000000000000000000000000000000000000..4fa56baf5685c3878efb68e541fcae0177bcf127 GIT binary patch literal 390 zcmV;10eSw3P)2e? z0g59*5Cg%KYV802cei2{Bh2Co=0r~U2SJ(cY(G^Y`5joe@Ot_l7P1SNb1FP&QUK(NdVhiy4nJ5 zlh{@-NnG#yemnB(V*stt76Q k*P)2e? z0g7We6a>Mnr{8Yh|Fi~6mAeQUCL(HEU}n^|Yb_uN|CpI`PTB%V05yNqnQ05mOj{r- z*E#2To+K)uww)P3{r&y@e!o%M<`1B9sX!8-?aV~wO2Xxp1ZX=mfC^;qz1B(s%*;7Q zZM)WLJ2QZq1c(Z(wb}yboSES|=d88b0_s{TY7#D$D+%yC&)z#pTR@GPfZb3=O+o58X$>R+nG_@&P2e? z0cssc5JSNy^%&p(wod-Ev|+%oRZ1cWkOW9-YrBu6w!k^(-mA7rY)i7;hewj_-Idf9 z(C*r9D`|IU21sH%N!yu8^8NjN?^Pd3No@1$-h0mBvE3F>A4xpBJ2P!10ZDA1b9T3_ z{{H>@_4T#8GXo^C4eTy3Gm?Nl>LUqg3$)eK*0!W1JZj66#5PY`Nw(VpNdW!R7HBIe zi6kD|JZ*s_wza#qfqU;chhOKMeQfjWuI|0ICGoTcW~RRYNp1B{TOf&Tpsj6SW`2Ht z&N*!*C9$m}l6H4y07*PaBmqfm18voo=idAM{hefHKA#UDiKlInnNgmZ0VK`L?zZK* z_nvcrB(~KSXnW4-{{U@mx0S@Bwm@6kJZ<@v#5PY7eLkOajwC=*5+I2rKoZbaQWEvC z4V-iS{Q09NiEUtaXXf+ye0_cC-h0lmod=Kv?C#9)w3XEMKem=()^F#h2LJ#707*qo IM6N<$f&kdU&j0`b literal 0 HcmV?d00001 diff --git a/automaton/captcha/models/8.png b/automaton/captcha/models/8.png new file mode 100644 index 0000000000000000000000000000000000000000..30e73e5a5ec5104165f3848091e3840f02b62916 GIT binary patch literal 417 zcmV;S0bc%zP)2e? z0cylR3-+m#5|-_+udkUo=iK+rBr{W0l9B*O(%t8rBwScYXq#k`sv>C{Kvi{j5=kWUU=jdH z07$B;nZc^6nMs0Wo3@ik0zeXr%l0|vzVDf-Dt;uvA}J|}ZIS>a2e? z0gBi_5CcKA#{U0%@4O==ECfwFPXq)%85jImg%c zeMxAW1h6d$uzk+i-I*p}4xd7dPaXbb2{0_bWBw3TEVXiE~?y4vpU%(MmC z0!g0dk<^y9Gb1U9q+XJMbIx3CCD~?oZ3B|t_x-NtdA{#^-xqkE=bWR9wz_P$wcX2h z65B}t+rPiR?5^#$z|16(WIIV)Nx;k``M!^~yE`+xo8*08NlCaQ%}f$WZRsTmkaW&@ zo<~w!E?vFYUE7j&7qGp%w%ghUB(;@v&Y|t@&P-cC{{bY~-d$b%C24m7NxM5UfTZVn z&N*!*C9#bp+evIE0VK7ZnV+AZnQ1F2$;{AJ(##|Q+DdAh#CBUrfTSdU0NF!i+#Huk QYXATM07*qoM6N<$f^jCrq5uE@ literal 0 HcmV?d00001 diff --git a/automaton/devtools.py b/automaton/devtools.py new file mode 100644 index 0000000..22c9f7a --- /dev/null +++ b/automaton/devtools.py @@ -0,0 +1,77 @@ +import asyncio +import ctypes +import json +import re + +import httpx +import websockets +from loguru import logger + +from configs import configs + +_default_port = configs['browser']['port'] +_executable = configs['browser']['executable'] +_args = configs['browser']['args'] + + +class Browser(object): + @classmethod + def connect(cls): + return cls(_default_port) + + @classmethod + def launch(cls): + ctypes.windll.shell32.ShellExecuteW( + None, 'runas', _executable, + ' '.join([f'--remote-debugging-port={_default_port}', *_args]), + None, 1 + ) + return cls(_default_port) + + def __init__(self, dev_port): + self.port = dev_port + + async def wait_for_book(self): # 等待「教材学习」页面 + async with httpx.AsyncClient() as client: + while True: + logger.info('等待「教材学习」页面...') + try: + pages = (await client.get(f'http://127.0.0.1:{self.port}/json')).json() + for page in pages: + if re.match(r'.*me.ismartlearning.cn/center/student/course/bookLearn\.html.*', page['url']): + return Page(page['url'], page['webSocketDebuggerUrl']) + await asyncio.sleep(2) # 这样写跟套 finally 有区别 + except httpx.ConnectError: + await asyncio.sleep(2) + + +class Page(object): + def __init__(self, url, dev_url): + self.id = 0 + self.url, self.dev_url = url, dev_url + + async def send(self, command, params): + async with websockets.connect(self.dev_url) as devtools: + await devtools.send(json.dumps({ + 'id': self.id, + 'method': command, + 'params': params + })) + self.id += 1 + return json.loads(await devtools.recv()) + + async def eval(self, script): + result = await self.send( + 'Runtime.evaluate', { + 'expression': script, + 'awaitPromise': True + } + ) + return result['result'] + + async def submit(self, book_id, chapter_id, task_id, score, seconds, percent, user_id): + model = 'NetBrowser.submitTask("%s", "%s", "%s", 0, "%d", %d, %d, "%s");' + result = f'%7B%22studentid%22:{user_id},%22testInfo%22:%7B%22answerdata%22:%22%22,%22markdatao%22:%22%22%7D%7D' + return await self.eval( + model % (book_id, chapter_id, task_id, score, seconds, percent, result) + ) diff --git a/automaton/ismart.py b/automaton/ismart.py new file mode 100644 index 0000000..2b3d9f9 --- /dev/null +++ b/automaton/ismart.py @@ -0,0 +1,122 @@ +import json +import os +import pickle +import urllib.parse as parser + +from bs4 import BeautifulSoup +from loguru import logger +from random import random, randint + +from configs import configs +from .devtools import Browser +from .markdown import generate_md +from .spider import Spider + +random_args = { # 不同题型对应的随机时长和分数范围 + '1': { # 单选题 + 'time': (20, 60), # 完成时长 / 秒 + 'score': 1 # 得分 (归一化, 向上至满分) + }, + '2': { # 多选题 + 'time': (40, 120), + 'score': 0.9 + }, + '3': { # 判断题 + 'time': (20, 50), + 'score': 1 + }, + '4': { # 填空题 + 'time': (60, 180), + 'score': 1 + }, + '6': { # 连线题 + 'time': (60, 180), + 'score': 0.8 + }, + '8': { # 匹配题 + 'time': (30, 90), + 'score': 1 + }, + '9': { # 口语跟读 + 'time': (15, 30), + 'score': 0.8 + }, + '10': { # 短文改错 + 'time': (120, 180), + 'score': 0.7 + }, + '11': { # 选词填空 + 'time': (30, 90), + 'score': 0.9 + }, +} + + +def _random_progress(paper): + paper = BeautifulSoup(paper, 'lxml-xml') + questions = paper.select('element[knowledge]:has(> question_type)') + if questions: + total_score = 0 + my_score, my_time = 0, 0 + for que in questions: + qt_type = que.select_one('question_type').text + qt_score = int(que.select_one('question_score').text) + total_score += qt_score + + rate = 1 - (1 - random_args[qt_type]['score']) * random() + my_score += qt_score * rate + my_time += randint(*random_args[qt_type]['time']) + return int(100 * my_score / total_score), my_time + return 100, 5 + + +async def export(): # 导出某书籍的答案 + browser = Browser.connect() + page = await browser.wait_for_book() + params = dict(parser.parse_qsl(parser.urlsplit(page.url).query)) + # noinspection PyTypeChecker + book_id, course_id = params['bookId'], params['courseId'] + if not os.path.exists(f'.cache/books/{book_id}'): + async with Spider() as spider: + await spider.login(**configs['user']) + book = await spider.book_info(book_id) + book['courseId'] = course_id + tasks = await spider.get_tasks(book, tree=True) + await spider.download_tree(tasks) + with open(f'.cache/books/{book_id}/Tree.pck', 'rb') as fp: + generate_md(pickle.load(fp)) + + +async def finish(): # 直接完成某书籍的任务 + browser = Browser.connect() + page = await browser.wait_for_book() + params = dict(parser.parse_qsl(parser.urlsplit(page.url).query)) + # noinspection PyTypeChecker + book_id, course_id = params['bookId'], params['courseId'] + async with Spider() as spider: + await spider.login(**configs['user']) + if not os.path.exists(f'.cache/books/{book_id}'): + book = await spider.book_info(book_id) + book['courseId'] = course_id + tasks = await spider.get_tasks(book, tree=True) + await spider.download_tree(tasks) + user_id = (await spider.get_user())['data']['uid'] + logger.info('正在提交任务...') + for file in os.listdir(f'.cache/books/{book_id}'): + paper_id, ext = os.path.splitext(file) + if ext != '.json': + continue + + with open(f'.cache/books/{book_id}/{file}') as fp: + data = json.load(fp) + task = data['task'] + paper = data['paperData'] + score, time = _random_progress(paper) + result = await page.submit(book_id, task['chapterId'], task['id'], score, time, 100, user_id) + if result['wasThrown'] or not result['result']['value']: + logger.warning(f'任务 {task["name"]} [paperId: {paper_id}] 可能提交失败,请留意最终结果!') + logger.info('全部提交完成!') + + +async def finish_all(): # Todo: 全刷了? + pass diff --git a/automaton/markdown/__init__.py b/automaton/markdown/__init__.py new file mode 100644 index 0000000..5d77c46 --- /dev/null +++ b/automaton/markdown/__init__.py @@ -0,0 +1 @@ +from .md import generate_md diff --git a/automaton/markdown/formatter.py b/automaton/markdown/formatter.py new file mode 100644 index 0000000..e052814 --- /dev/null +++ b/automaton/markdown/formatter.py @@ -0,0 +1,34 @@ +import re + + +class Formatter: + @staticmethod + def fix_img(text): # 处理 标签 + return re.sub('', '「暂不支持图片显示澳」', text) + + @staticmethod + def rm_lgt(text): # 处理括号对 + return re.sub('<.+?>', '', text) + + @staticmethod + def fix_uline(text): # 处理下划线 + return re.sub('_{3,}', lambda mch: '\\_' * len(mch.group()), text) + + @staticmethod + def rm_head(text): # 处理数字标号 + return re.sub(r'^(?:\d+(?:\.| +\b))+\d+ ', '', text) + + @staticmethod + def fix_lf(text): # 处理换行 + text = re.sub('
', '\n\n', text) + return re.sub('

(.+?)

', lambda mch: mch.group(1) + '\n\n', text) + + @staticmethod + def fix_space(text): + return re.sub('(?: )+', ' ', text) + + +def fix(text, func_ptrs): + for func in func_ptrs: + text = getattr(Formatter, func)(text) + return text diff --git a/automaton/markdown/generator.py b/automaton/markdown/generator.py new file mode 100644 index 0000000..68815e5 --- /dev/null +++ b/automaton/markdown/generator.py @@ -0,0 +1,133 @@ +""" +不同 question type 对应的解析方法 +传入两个参数 ( question, answer, output ), 将输出行依次 append 到 output 队列中 +""" + +import re + +from .formatter import fix + + +class Generators: + @staticmethod + def type_1(que, ans, output): # 单选题 + # 提取题目内容 + question = que.select_one("question_text").text + question = fix(question, ('rm_lgt', 'fix_uline', 'fix_space')) + output.append(f'* **{question}**\n') + # 提取答案 + ans_id = que.attrs['id'] + corrects = set(ans.select_one(f'[id="{ans_id}"] > answers').text) + # 生成对应 Markdown + options = que.select('options > *') + for opt in options: + opt_id = opt.attrs['id'] + answer_text = fix(opt.text, ('rm_lgt', 'fix_space')) + if opt_id in corrects: # 高亮正确答案 + output.append(f'

  {opt_id}. {answer_text}

\n') + else: + output.append(f'  {opt_id}. {answer_text}\n') + + @staticmethod + def type_2(*args): # 多选题 + return Generators.type_1(*args) + + @staticmethod + def type_3(que, ans, output): # 判断题 + question = que.select_one("question_text").text + question = fix(question, ('rm_lgt', 'fix_uline', 'fix_space')) + output.append(f'* **{question}**\n') + # 提取答案 + ans_id = que.attrs['id'] + correct = ans.select_one(f'[id="{ans_id}"] > answers').text + # 生成对应 Markdown + output.append(f'* 答案:「**{correct}**」\n') + + @staticmethod + def type_4(que, ans, output): # 填空题 + # 提取题目内容 + question = que.select_one('question_text').text + question = re.sub('
', '\n', question) + question = fix(question, ('rm_lgt', 'fix_uline', 'fix_space')) + # 提取答案 + ans_id = que.attrs['id'] + corrects = ans.select(f'[id="{ans_id}"] answers > answer') + # 执行替换 + for ans in corrects: + question = question.replace( + '{{' + ans.attrs['id'] + '}}', + f' [{ans.text}] ' + ) + output.append(question + '\n') + + @staticmethod + def type_6(que, ans, output): # 连线题 + # 提取题目内容 + question = que.select_one('question_text').text + question = fix(question, ('rm_lgt', 'fix_uline', 'fix_space')) + output.append(f'* **{question}**\n') + # 提取答案 + options = que.select('options > *') + pairs = {} + for opt in options: + opt_id = opt.attrs['id'] + if opt_id not in pairs: + pairs[opt_id] = [0, 0] + flag = int(opt.attrs['flag']) + pairs[opt_id][flag - 1] = opt.text + output.append('| Part-A | Part-B |') + output.append('| :- | :- |') + for gp_id in pairs: + left = fix(pairs[gp_id][0], ('fix_img', 'rm_lgt', 'fix_uline', 'fix_space')).replace('|', '\\|') + right = fix(pairs[gp_id][1], ('fix_img', 'rm_lgt', 'fix_uline', 'fix_space')).replace('|', '\\|') + output.append(f'| {left} | {right} |') + output.append('') + + @staticmethod + def type_8(que, ans, output): # 匹配题 + # 提取题目内容 + question = que.select_one('question_text').text + question = fix(question, ('rm_lgt', 'fix_uline')) + # 提取答案 + ans_id = que.attrs['id'] + corrects = ans.select(f'[id="{ans_id}"] answers > answer') + # 执行替换 + question = fix(question, ('fix_lf', 'rm_lgt', 'fix_space')) + for ans in corrects: + question = question.replace( + '{{' + ans.attrs['id'] + '}}', + f' {ans.text} ' + ) + output.append(question + '\n') + + @staticmethod + def type_9(que, ans, output): # 口语跟读 + output.append('「口语跟读」\n') + + @staticmethod + def type_10(que, ans, output): # 短文改错 + output.append('* **短文改错**') + ans_id = que.attrs['id'] + corrects = ans.select(f'[id="{ans_id}"] answers > answer') + for i, ans in enumerate(corrects): + desc = re.sub('(?<=[A-Za-z0-9])(?=[\u4e00-\u9fa5])', ' ', ans.attrs['desc']) + desc = re.sub('(?<=[\u4e00-\u9fa5])(?=[A-Za-z0-9])', ' ', desc) + output.append(f'{i + 1}. {desc}\n') + output.append('') + + @staticmethod + def type_11(que, ans, output): # 选词填空 + # 提取题目内容 + question = que.select_one('question_text').text + question = fix(question, ('fix_uline', 'fix_lf', 'rm_lgt', 'fix_space')) + options = {opt.attrs['id']: opt.text for opt in que.select('options > option[flag="2"]')} + # 提取答案 + ans_id = que.attrs['id'] + corrects = ans.select(f'[id="{ans_id}"] answers > answer') + # 执行替换 + for ans in corrects: + question = question.replace( + '{{' + ans.attrs['id'] + '}}', + f' {options[ans.text]} ' + ) + output.append(question + '\n') diff --git a/automaton/markdown/md.py b/automaton/markdown/md.py new file mode 100644 index 0000000..3d7c9de --- /dev/null +++ b/automaton/markdown/md.py @@ -0,0 +1,59 @@ +import json +from collections import deque + +from bs4 import BeautifulSoup +from loguru import logger + +from .formatter import fix +from .generator import Generators + +_output = deque() + + +# 解码题目与答案 xml +def decode(que, ans, qt_type): + getattr(Generators, f'type_{qt_type}')(que, ans, _output) + + +# 生成每个 paper 的答案 +def unescape(node, book_id): + paper_id = node.task['paperId'] + with open(f'.cache/books/{book_id}/{paper_id}.json', 'r') as fp: + task = json.load(fp) + paper = BeautifulSoup(task['paperData'], 'lxml-xml') + answer = BeautifulSoup(task['answerData'], 'lxml-xml') + questions = paper.select('element[knowledge]:has(> question_type)') + if questions: + for que in questions: + qt_type = int(que.select_one('question_type').text) + decode(que, answer, qt_type) + return True + return False + + +# 深搜创建目录树 +def dfs(node, book_id, depth=2): + if title := node.task['name']: + logger.info(f'{". " * (depth - 1)}{title}') + title = fix(title, ('rm_head',)) + _output.append(f'{"#" * depth} {title}\n') + flag = False + if 'paperId' in node.task: + flag = unescape(node, book_id) + for ch in node.children: + if dfs(ch, book_id, depth + 1): + flag = True + if not flag: + _output.pop() + return flag + + +def generate_md(root): # 生成答案 + book_id = root.task['book_id'] + for ch in root.children: + dfs(ch, book_id) + with open('.cache/answer.md', 'w', encoding='utf-8') as file: + while len(_output): + line = _output.popleft() + file.write(line + '\n') + logger.info('Done.') diff --git a/automaton/spider.py b/automaton/spider.py new file mode 100644 index 0000000..46a24ed --- /dev/null +++ b/automaton/spider.py @@ -0,0 +1,184 @@ +import asyncio +import json +import os +import pickle +from hashlib import md5 +from random import random + +import httpx +from loguru import logger + +from .captcha import recognize + + +class Tree: + def __init__(self, task): + self.task = task + self.children = [] + + +class Spider(httpx.AsyncClient): + def __init__(self): + super().__init__() + + async def login(self, username, password): # 账号密码登录 + logger.info('正在获取验证码...') + result = await self.get(f'http://sso.ismartlearning.cn/captcha.html?{random()}') + code = recognize(result.content) + token = md5(password.encode()).hexdigest() + info = (await self.post( + 'http://sso.ismartlearning.cn/v2/tickets-v2', + data={ + 'username': username, + 'password': md5(token.encode() + b'fa&s*l%$k!fq$k!ld@fjlk').hexdigest(), + 'captcha': code + }, + headers={ + 'X-Requested-With': 'XMLHttpRequest', + 'Origin': 'http://me.ismartlearning.cn', + 'Referer': 'http://me.ismartlearning.cn/' + } + )).json() + logger.debug(info['result']) + + if info['result']['code'] != -26: + raise AssertionError(f'[!] 登录失败: {info["result"]["msg"]}') + return info['result'] + + async def get_courses(self): # 获取用户课程列表 + logger.info('正在获取课程列表...') + courses = (await self.post( + 'https://school.ismartlearning.cn/client/course/list-of-student?status=1', + data={ + 'pager.currentPage': 1, + 'pager.pageSize': 32767 + } + )).json()['data'] + return courses['list'] + + async def get_books(self, course): # 获取某课程的书籍列表 + logger.info('正在获取书籍列表...') + await self.get_courses() # 必须有这个请求,否则后面会报错 + books = (await self.post( + 'http://school.ismartlearning.cn/client/course/textbook/list-of-student', + data={ + 'courseId': course['courseId'] + } + )).json()['data'] + return books + + @staticmethod + def _merge_tasks(tasks): # 将任务列表重组成树形结构 + id_record = {task['id']: Tree(task) for task in tasks} + root = Tree({ + 'book_id': tasks[0]['book_id'], + 'unitStudyPercent': 0 + }) + + for task_id in id_record: + node = id_record[task_id] + node_name = (f'{node.task["name"]} ' if 'name' in node.task else '') + f'[id:{node.task["id"]}]' + if 'parent_id' in node.task: + if (parent_id := node.task['parent_id']) in id_record: + id_record[parent_id].children.append(node) + else: + logger.warning(f'任务已忽略(父节点不存在):{node_name}') + else: + root.children.append(node) + + return root + + async def get_tasks(self, book, tree=False): # 获取某书籍的任务列表 + logger.info('正在获取任务列表...') + await self.post('http://school.ismartlearning.cn/client/course/textbook/chapters') + tasks = (await self.post( + 'http://school.ismartlearning.cn/client/course/textbook/chapters', + data={key: book[key] for key in ('bookId', 'bookType', 'courseId')} + )).json()['data'] + if tree: + return self._merge_tasks(tasks) + else: + return tasks + + async def get_paper(self, paper_id): # 获取任务点信息(包括题目和答案) + ticket = (await self.post( + 'http://sso.ismartlearning.cn/v1/serviceTicket', + data={ + 'service': 'http://xot-api.ismartlearning.cn/client/textbook/paperinfo' + } + )).json()['data']['serverTicket'] + logger.debug(f'Ticket: {ticket}') + paper_info = (await self.post( + 'http://xot-api.ismartlearning.cn/client/textbook/paperinfo', + data={ + 'paperId': paper_id + }, + headers={ + 'Origin': 'http://me.ismartlearning.cn', + 'Referer': 'http://me.ismartlearning.cn/', + 'X-Requested-With': 'XMLHttpRequest', + 'Accept-Encoding': 'gzip, deflate' + }, + params={ + 'ticket': ticket + } + )).json()['data'] + return paper_info + + async def download_tree(self, root): + async def download(task): + paper_id = task['paperId'] + filepath = f'.cache/books/{root.task["book_id"]}/{paper_id}.json' + if os.path.exists(filepath): + return + async with limit: # 防止并发过高 + result = await self.get_paper(paper_id) + result['task'] = task # 继续存入 Task + with open(filepath, 'w') as file: + json.dump(result, file) + + def dfs(src): + if 'paperId' in (task := src.task): + logger.info(f'添加任务:{task["name"]}') + tasks.append(download(task)) + for child in src.children: + dfs(child) + + logger.info('开始下载试题及答案...') + os.makedirs(f'.cache/books/{root.task["book_id"]}', exist_ok=True) + with open(f'.cache/books/{root.task["book_id"]}/Tree.pck', 'wb') as fp: + pickle.dump(root, fp) + tasks, limit = [], asyncio.Semaphore(4) + dfs(root) + await asyncio.gather(*tasks) + logger.info('下载完成.') + + async def get_user(self): + return (await self.post( + 'https://school.ismartlearning.cn/client/user/student-info') + ).json() + + async def book_info(self, book_id): + ticket = (await self.post( + 'http://sso.ismartlearning.cn/v1/serviceTicket', + data={ + 'service': 'http://book-api.ismartlearning.cn/client/v2/book/info' + } + )).json()['data']['serverTicket'] + book_info = (await self.post( + 'http://book-api.ismartlearning.cn/client/v2/book/info', + headers={ + 'Origin': 'http://me.ismartlearning.cn', + 'Referer': 'http://me.ismartlearning.cn/', + 'X-Requested-With': 'XMLHttpRequest', + 'Accept-Encoding': 'gzip, deflate' + }, + params={ + 'ticket': ticket + }, + data={ + 'bookId': book_id, + 'bookType': 0 + } + )).json() + return book_info['data'] diff --git a/configs.py b/configs.py new file mode 100644 index 0000000..cdbd37f --- /dev/null +++ b/configs.py @@ -0,0 +1,9 @@ +import yaml + + +with open('configs.yml', 'r', encoding='utf-8') as _fp: + configs = yaml.safe_load(_fp) + +if __name__ == '__main__': + import json + print(json.dumps(configs, indent=4)) diff --git a/configs.yml b/configs.yml new file mode 100644 index 0000000..dc95ad5 --- /dev/null +++ b/configs.yml @@ -0,0 +1,13 @@ +# Todo: 每次 commit 之前务必清除账号密码 + +# iSmart 客户端相关配置 +browser: + executable: Z:\iSmart\client\iSmart.exe # 客户端可执行文件的路径 + args: # 启动 iSmart 客户端时额外提供的参数 + - --disable-web-security + port: 9222 # devTools 调试端口 + +# 用户相关配置(务必保持账号密码与 iSmart 中已登录的相同) +user: + username: <用户名> # 手机号 + password: <密码> # 密码 diff --git a/main.py b/main.py new file mode 100644 index 0000000..cf7e695 --- /dev/null +++ b/main.py @@ -0,0 +1,14 @@ +import asyncio +from automaton import finish + + +async def main(): + await finish() + + +if __name__ == '__main__': + loop = asyncio.new_event_loop() + try: + loop.run_until_complete(main()) + finally: + loop.close()