From 3c06e34ce9bc5398fd41754d0612aceeff3bc23f Mon Sep 17 00:00:00 2001 From: JesterHey <144512889+JesterHey@users.noreply.github.com> Date: Tue, 5 Dec 2023 15:10:18 +0800 Subject: [PATCH] Add files via upload --- cloud.py | 21 ++--- get_answer.py | 3 + get_params.py | 208 +++++++++++++++++++++++++++++--------------------- main.py | 7 ++ 4 files changed, 142 insertions(+), 97 deletions(-) create mode 100644 main.py diff --git a/cloud.py b/cloud.py index 500bf01..07fc978 100644 --- a/cloud.py +++ b/cloud.py @@ -6,7 +6,8 @@ import get_answer #获取本地json文件 def get_json(file): return [i for i in os.listdir(file) if i.endswith('.json')] -json_name = get_json(os.getcwd())[0] +#获取json文件名 +json_name = get_json('./')[0] # 阿里云 OSS 配置 access_key_id = 'LTAI5t927vdUFZa9NRnWfrL3' access_key_secret = 'FbXoJUqe545eZhWFvADvGcFwatsGAx' @@ -15,13 +16,13 @@ endpoint = 'oss-cn-shenzhen.aliyuncs.com' # 创建Bucket对象,所有Object相关的接口都可以通过Bucket对象来进行 bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name) # 判断文件是否存在 -exist = bucket.object_exists(json_name) +def is_exist(name): + return bucket.object_exists(name) + +exist = is_exist(json_name) # 如果文件存在,则下载到本地并覆盖本地文件 -if exist: - bucket.get_object_to_file(json_name, json_name) -else: #否则调用OpenAI API获得答案,并上传到云服务器 - new_data = get_answer.new_data - #将new_data变为json格式写入本地并上传到云端 - with open(json_name, 'w', encoding='utf-8') as f: - json.dump(new_data, f, ensure_ascii=False, indent=4) - bucket.put_object_to_file(json_name,json_name) \ No newline at end of file +def download(name): + bucket.get_object_to_file(name, name) + +def upload(name): + bucket.put_object_from_file(name, name) \ No newline at end of file diff --git a/get_answer.py b/get_answer.py index 8a1732b..376eaee 100644 --- a/get_answer.py +++ b/get_answer.py @@ -18,6 +18,8 @@ file = os.getcwd() ''' 后续准备与云服务器连接,先判断当前json是否已在云服务器上,如果在,则直接调用, 节省调用API的时间和资费,否则,调用API,获得答案,并将答案存入云服务器 + +12.4晚更新: 阿里云服务器申请成功! ''' json_name = get_json(file)[0] @@ -78,6 +80,7 @@ def get_answer_from_api(jsonfile:dict,client:AsyncOpenAI,promot:str) -> dict: # 运行主函数 + #issue:由于异步获得的答案顺序不确定,需要处理 return asyncio.run(main(data=data)) new_data = get_answer_from_api(jsonfile=data,client=client,promot=promot) print(new_data) diff --git a/get_params.py b/get_params.py index 3d979c1..f4aff53 100644 --- a/get_params.py +++ b/get_params.py @@ -19,6 +19,7 @@ import re from lxml import etree import time import requests +from cloud import is_exist,download,upload #配置参数 opt = Options() opt.add_experimental_option('detach', True) @@ -30,92 +31,125 @@ chrome_driver = 'D:\ChromeDownload\chromedriver-win64\chromedriver-win64' url = 'https://www.educoder.net/tasks/27V4D95N/1191515/vmxpzae734bj?coursesId=27V4D95N' user_name = 'hnu202311020126' password = 'hzy123456' -safari = Chrome() -safari.get(url) -safari.implicitly_wait(10) -#模拟登录 -safari.find_element(By.ID, 'login').send_keys(user_name) -safari.find_element(By.ID, 'password').send_keys(password) -safari.find_element(By.ID, 'password').send_keys(Keys.ENTER) -time.sleep(2) -#获取cookie,User-Agent -Cookie = safari.get_cookies() -User_Agent = safari.execute_script('return navigator.userAgent') -cookie = f'autologin_trustie={Cookie[1]["value"]}; _educoder_session={Cookie[0]["value"]}' -#获取关卡数 -#点击展开关卡页面 -safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[1]/a[1]').click() -time.sleep(2) -#关卡数量由 class = "flex-container challenge-title space-between" 的元素数量决定 -htmltxt = safari.page_source -html = etree.HTML(htmltxt) -task_num = html.xpath('count(//*[@class="flex-container challenge-title space-between"])') -task_num = int(task_num) -#关闭关卡页面 -safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[3]/div[1]').click() -#对于每一关,获取参数 -#每一关的参数由以下元素组成: -''' -/html/body/div[1]/div/div/div/div[2]/section[1]/div[3]/div[3]/div/div/div/div/div[3]/div[1]/a -/html/body/div[1]/div/div/div/div[2]/section[1]/div[3]/div[3]/div/div/div/div/div[4]/div[1]/a -''' -obj1 = re.compile(r'

任务描述

(?P.*?)

',re.S) -obj2 = re.compile(r'

编程要求

(?P.*?)

',re.S) -#初始化一个总的json文件,名称为课程的id,这一部分等会实现 -total = {} -#另外,目前好像只有实训作业有这些参数,其他的作业例如编程作业就没有,所以先判断一下是否为实训作业 -# def is_practice(): -# try: -# safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[1]/a[1]') -# return True -# except BaseException: -# return False -i=1 -try: - while i <= task_num: - safari.implicitly_wait(10) - safari.find_element(By.XPATH, '//*[@id="task-left-panel"]/div[1]/a[1]').click() - safari.implicitly_wait(10) - safari.find_element(By.XPATH,f'/html/body/div[1]/div/div/div/div[2]/section[1]/div[3]/div[3]/div/div/div/div/div[{i}]/div[1]/a').click() - time.sleep(3) - #获取课程id -> 根据url中?前面的,最后一个/后面的那部分参数构造请求,同时,似乎还需要用到cookie,User-Agent和Referer参数,这些统一用selenium在登陆后获取并组装成headers - #获取cookie,User-Agent和Referer - cur_url=Referer = safari.current_url - identity = cur_url.split('/')[-1].split('?')[0] - id_url = f'https://data.educoder.net/api/tasks/{identity}.json?' - #获取课程id - headers = { - 'Cookie':cookie, - 'User-Agent':User_Agent, - 'Referer':Referer - } +# 另外,目前好像只有实训作业有这些参数,其他的作业例如编程作业就没有,所以先判断一下是否为实训作业,可以通过用户输入的url判断 +# 主要是看educoder.net/后面是否有tasks,如果有,则是实训作业,否则,不是实训作业 +def is_practice(url): + obj=re.compile(r'www.educoder.net/tasks') + if obj.search(url): + return True + else: + return False +if is_practice(url): + #构造selenium对象 + safari = Chrome() + safari.get(url) + #模拟登录 + safari.implicitly_wait(10) + safari.find_element(By.ID, 'login').send_keys(user_name) + safari.find_element(By.ID, 'password').send_keys(password) + safari.find_element(By.ID, 'password').send_keys(Keys.ENTER) + time.sleep(2) + #获取cookie,User-Agent + Cookie = safari.get_cookies() + User_Agent = safari.execute_script('return navigator.userAgent') + cookie = f'autologin_trustie={Cookie[1]["value"]}; _educoder_session={Cookie[0]["value"]}' + #先获取到shixun_id便于先判断云端文件是否存在 + cur_url = url + identity = cur_url.split('/')[-1].split('?')[0] + id_url = f'https://data.educoder.net/api/tasks/{identity}.json?' + headers = { + 'Cookie':cookie, + 'User-Agent':User_Agent, + 'Referer':cur_url + } + response = requests.get(url=id_url, headers=headers) + shixun_id = dict(response.json())['challenge']['shixun_id'] + #判断云端文件是否存在 + exist = is_exist(f'{shixun_id}.json') + if exist: #存在,则跳转到云端下载并终止本程序 + print('云端文件已存在,正在下载') + download(f'{shixun_id}.json') + safari.close() + exit() + else: #不存在,则继续执行本程序 + print('云端文件不存在,开始获取参数') + #获取关卡数 + #点击展开关卡页面 + safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[1]/a[1]').click() + time.sleep(2) + #关卡数量由 class = "flex-container challenge-title space-between" 的元素数量决定 + htmltxt = safari.page_source + html = etree.HTML(htmltxt) + task_num = html.xpath('count(//*[@class="flex-container challenge-title space-between"])') + task_num = int(task_num) + #关闭关卡页面 + safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[3]/div[1]').click() + #对于每一关,获取参数 + #每一关的参数由以下元素组成: + ''' + /html/body/div[1]/div/div/div/div[2]/section[1]/div[3]/div[3]/div/div/div/div/div[3]/div[1]/a + /html/body/div[1]/div/div/div/div[2]/section[1]/div[3]/div[3]/div/div/div/div/div[4]/div[1]/a + ''' + obj1 = re.compile(r'

任务描述

(?P.*?)

',re.S) + obj2 = re.compile(r'

编程要求

(?P.*?)

',re.S) + #初始化一个总的json文件,名称为课程的id + total = {} + #另外,目前好像只有实训作业有这些参数,其他的作业例如编程作业就没有,所以先判断一下是否为实训作业 + # def is_practice(): + # try: + # safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[1]/a[1]') + # return True + # except BaseException: + # return False + i=1 try: - response = requests.get(url=id_url,headers=headers) - challenge_id = dict(response.json())['challenge']['id'] - shixun_id = dict(response.json())['challenge']['shixun_id'] + while i <= task_num: + safari.implicitly_wait(10) + safari.find_element(By.XPATH, '//*[@id="task-left-panel"]/div[1]/a[1]').click() + safari.implicitly_wait(10) + safari.find_element(By.XPATH,f'/html/body/div[1]/div/div/div/div[2]/section[1]/div[3]/div[3]/div/div/div/div/div[{i}]/div[1]/a').click() + time.sleep(3) + #获取课程id -> 根据url中?前面的,最后一个/后面的那部分参数构造请求,同时,似乎还需要用到cookie,User-Agent和Referer参数,这些统一用selenium在登陆后获取并组装成headers + #获取cookie,User-Agent和Referer + cur_url=Referer = safari.current_url + identity = cur_url.split('/')[-1].split('?')[0] + id_url = f'https://data.educoder.net/api/tasks/{identity}.json?' + #获取课程id + headers = { + 'Cookie':cookie, + 'User-Agent':User_Agent, + 'Referer':Referer + } + try: + response = requests.get(url=id_url,headers=headers) + challenge_id = dict(response.json())['challenge']['id'] + shixun_id = dict(response.json())['challenge']['shixun_id'] + except BaseException: + print('获取课程id失败') + #获取任务描述(如果存在的话) + page_source = safari.page_source + describe = obj1.findall(page_source) + #获取编程要求(如果存在的话) + require = obj2.findall(page_source) + #获取编辑器中的代码,由于代码都是class = "view-line"的div,先找到所有class = "view-line"的div,获取其中的所有文本,再把不同行的代码用\n连接起来 + code = safari.find_elements(By.CLASS_NAME,'view-line') + code = '\n'.join([i.text for i in code]).lstrip('\n') + #把参数存入字典,再转换为json格式 + task = { + 'describe':describe[0] if len(describe) != 0 else '', + 'require':require[0] if len(require) != 0 else '', + 'code':code, + 'verified': False #这个参数是用来标记答案是否被用户认证为正确答案的,初始值为False + } + #把每一关的参数存入总的字典中 + total[challenge_id] = task + #去往下一关 + i += 1 except BaseException: - print('获取课程id失败') - #获取任务描述(如果存在的话) - page_source = safari.page_source - describe = obj1.findall(page_source) - #获取编程要求(如果存在的话) - require = obj2.findall(page_source) - #获取编辑器中的代码,由于代码都是class = "view-line"的div,先找到所有class = "view-line"的div,获取其中的所有文本,再把不同行的代码用\n连接起来 - code = safari.find_elements(By.CLASS_NAME,'view-line') - code = '\n'.join([i.text for i in code]).lstrip('\n') - #把参数存入字典,再转换为json格式 - task = { - 'describe':describe[0] if len(describe) != 0 else '', - 'require':require[0] if len(require) != 0 else '', - 'code':code, - 'verified': False #这个参数是用来标记答案是否被用户认证为正确答案的,初始值为False - } - #把每一关的参数存入总的字典中 - total[challenge_id] = task - #去往下一关 - i += 1 -except BaseException: - print('获取参数失败') -#把参数写入本地json文件中,文件名字与实shixun_name相同键为course_id,值为一个列表,列表中每个元素为一个字典,字典中包含每一关的参数 -with open(f'{shixun_id}.json','w',encoding='utf-8') as f: - json.dump(total,f,ensure_ascii=False,indent=4) + print('获取参数失败') + #把参数写入本地json文件中,文件名字与shixun_name相同键为course_id,值为一个列表,列表中每个元素为一个字典,字典中包含每一关的参数 + with open(f'{shixun_id}.json','w',encoding='utf-8') as f: + json.dump(total,f,ensure_ascii=False,indent=4) +else: + print('这不是一个实训作业') + exit() diff --git a/main.py b/main.py new file mode 100644 index 0000000..d54365a --- /dev/null +++ b/main.py @@ -0,0 +1,7 @@ +''' +主程序:整合各个模块 +1、ui文件调用相应ui模块 +2、get_params.py获取参数 +3、get_answer.py获取答案 +4、cloud.py将json文件存入云端 +''' \ No newline at end of file