From f95df9b93857c03900defe911489d24f68ad24df Mon Sep 17 00:00:00 2001 From: JesterHey <144512889+JesterHey@users.noreply.github.com> Date: Mon, 4 Dec 2023 13:05:34 +0800 Subject: [PATCH] =?UTF-8?q?=E7=88=AC=E8=99=AB=E7=9B=B8=E5=85=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- get_params.py | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 get_params.py diff --git a/get_params.py b/get_params.py new file mode 100644 index 0000000..c6d47ce --- /dev/null +++ b/get_params.py @@ -0,0 +1,122 @@ +''' +本模块用于获取作业的参数,包括: +1、关卡数 +2、每关的参数 + a:任务描述 + b:编程要求 + c:编辑器中的所有代码 +3、课程id,实训id +4、写入本地json文件中 +''' + +#导入所需模块 +import json +from selenium.webdriver import Chrome +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +import re +from lxml import etree +import time +import requests +#配置参数 +opt = Options() +opt.add_experimental_option('detach', True) +chrome_driver = 'D:\ChromeDownload\chromedriver-win64\chromedriver-win64' +#以下部分在发行版本需要优化UI +# url = input('请输入作业中任意一关的网址:') +# user_name = input('请输入用户名:') +# password = input('请输入密码:') +url = 'https://www.educoder.net/tasks/27V4D95N/1191515/vmxpzae734bj?coursesId=27V4D95N' +user_name = 'hnu202311020126' +password = 'hzy123456' +safari = Chrome() +safari.get(url) +time.sleep(5) #等待页面加载 +#模拟登录 +safari.find_element(By.ID, 'login').send_keys(user_name) +safari.find_element(By.ID, 'password').send_keys(password) +safari.find_element(By.ID, 'password').send_keys(Keys.ENTER) +time.sleep(4) +#获取cookie,User-Agent +Cookie = safari.get_cookies() +User_Agent = safari.execute_script('return navigator.userAgent') +cookie = f'autologin_trustie={Cookie[1]["value"]}; _educoder_session={Cookie[0]["value"]}' +#获取关卡数 +#点击展开关卡页面 +safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[1]/a[1]').click() +time.sleep(2) +#关卡数量由 class = "flex-container challenge-title space-between" 的元素数量决定 +htmltxt = safari.page_source +html = etree.HTML(htmltxt) +task_num = html.xpath('count(//*[@class="flex-container challenge-title space-between"])') +task_num = int(task_num) +#关闭关卡页面 +safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[3]/div[1]').click() +#对于每一关,获取参数 +#每一关的参数由以下元素组成: +''' +/html/body/div[1]/div/div/div/div[2]/section[1]/div[3]/div[3]/div/div/div/div/div[3]/div[1]/a +/html/body/div[1]/div/div/div/div[2]/section[1]/div[3]/div[3]/div/div/div/div/div[4]/div[1]/a +''' +obj1 = re.compile(r'

任务描述

(?P.*?)

',re.S) +obj2 = re.compile(r'

编程要求

(?P.*?)

',re.S) +#初始化一个总的json文件,名称为课程的id,这一部分等会实现 +total = {} +#另外,目前好像只有实训作业有这些参数,其他的作业例如编程作业就没有,所以先判断一下是否为实训作业 +# def is_practice(): +# try: +# safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[1]/a[1]') +# return True +# except BaseException: +# return False +i=1 +try: + while i <= task_num: + safari.find_element(By.XPATH, '//*[@id="task-left-panel"]/div[1]/a[1]').click() + time.sleep(2) + safari.find_element(By.XPATH,f'/html/body/div[1]/div/div/div/div[2]/section[1]/div[3]/div[3]/div/div/div/div/div[{i}]/div[1]/a').click() + time.sleep(4) + #获取课程id -> 根据url中?前面的,最后一个/后面的那部分参数构造请求,同时,似乎还需要用到cookie,User-Agent和Referer参数,这些统一用selenium在登陆后获取并组装成headers + #获取cookie,User-Agent和Referer + cur_url=Referer = safari.current_url + identity = cur_url.split('/')[-1].split('?')[0] + id_url = f'https://data.educoder.net/api/tasks/{identity}.json?' + #获取课程id + headers = { + 'Cookie':cookie, + 'User-Agent':User_Agent, + 'Referer':Referer + } + try: + response = requests.get(url=id_url,headers=headers) + challenge_id = dict(response.json())['challenge']['id'] + shixun_id = dict(response.json())['challenge']['shixun_id'] + except BaseException: + print('获取课程id失败') + #获取任务描述(如果存在的话) + page_source = safari.page_source + describe = obj1.findall(page_source) + #获取编程要求(如果存在的话) + require = obj2.findall(page_source) + #获取编辑器中的代码,由于代码都是class = "view-line"的div,先找到所有class = "view-line"的div,获取其中的所有文本,再把不同行的代码用\n连接起来 + code = safari.find_elements(By.CLASS_NAME,'view-line') + code = '\n'.join([i.text for i in code]) + + #把参数存入字典,再转换为json格式 + task = { + 'describe':describe[0] if len(describe) != 0 else '', + 'require':require[0] if len(require) != 0 else '', + 'code':code, + 'verified': False #这个参数是用来标记答案是否被用户认证为正确答案的,初始值为False + } + #把每一关的参数存入总的字典中 + total[challenge_id] = task + #去往下一关 + i += 1 +except BaseException: + print('获取参数失败') +#把参数写入本地json文件中,文件名字与实shixun_name相同键为course_id,值为一个列表,列表中每个元素为一个字典,字典中包含每一关的参数 +#这些数据都是将要上传到云端的,为防止每次运行程序都覆盖了,需要改写写入文件的逻辑 +with open(f'{shixun_id}.json','w',encoding='utf-8') as f: + json.dump(total,f,ensure_ascii=False,indent=4)