更新爬虫逻辑

修复了爬去一些作业反复重启的bug,优化了爬虫逻辑
master
JesterHey 2 years ago committed by GitHub
parent 56ce67bbef
commit 2edb968773
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -37,4 +37,4 @@ def delete(name):
if is_exist(name):
bucket.delete_object(name)
if __name__ == '__main__':
print('测试')
print('测试部分')

@ -32,6 +32,7 @@ def get_programmingjson(file:str) -> list:
if i.endswith('.json') and i.startswith('pro'):
jsonfiles.append(i)
return jsonfiles
'''
与云服务器连接先判断当前json是否已在云服务器上如果在则直接调用
节省调用API的时间和资费否则调用API获得答案并将答案存入云服务器
@ -205,7 +206,6 @@ def get_programming_answer_from_api(jsonfile:list,client:AsyncOpenAI,promot:str)
if __name__ == '__main__':
promot=''
ans = get_shixunanswer_from_api(load_json_data(get_shixunjson(os.getcwd())[0]),client=client,promot=promot)
ans = get_programming_answer_from_api(jsonfile=get_programmingjson(os.getcwd()),client=client,promot=promot)
print(ans)
rewrite_shixun_json(get_shixunjson(os.getcwd())[0],ans)
#new_data = get_programming_answer_from_api(get_programmingjson(os.getcwd()),client=client,promot=promot2)
rewrite_programming_json(json_names=get_programmingjson(os.getcwd()),new_data=ans)

@ -25,7 +25,7 @@ from cloud import is_exist,download
#配置参数
opt = Options()
opt.add_experimental_option('detach', True)
#opt.add_argument('--headless')
opt.add_argument('--headless')
platf = platform.platform()
def is_practice(url:str) -> bool:
obj=re.compile(r'www.educoder.net/tasks')
@ -103,17 +103,13 @@ def get_parameters(url: str, user_name: str, password: str,retry:int=2):
if exist:
try:
print('云端文件已存在,正在下载')
download(f'{shixun_id}.json')
download(f'{shixun_id}_{language}.json')
# 检测本地文件是否下载完成
while True:
try:
if os.path.exists(f'{shixun_id}_{language}.json'):
print('下载完成')
print(f'{shixun_id}.json下载完成')
# 关闭浏览器
safari.quit()
return
break
except Exception as e:
print(e)
except Exception as e:
print(e)
else: #不存在,则继续执行本程序
@ -130,6 +126,7 @@ def get_parameters(url: str, user_name: str, password: str,retry:int=2):
#获取关卡数量
print(resp2.json())
task_num = len(resp2.json())
print(f'关卡数量为{task_num}')
resp2.close()
#回到第一关
time.sleep(2)
@ -194,11 +191,8 @@ def get_parameters(url: str, user_name: str, password: str,retry:int=2):
'language':
language if language != None else ''
}
print(f'{i}关的参数为{task}')
#把每一关的参数存入总的字典中
total[challenge_id] = task
print('当前参数为')
print(total)
#去往下一关
safari.implicitly_wait(10)
if i <= 1:

@ -1,3 +1,4 @@
'''
主程序整合各个模块
1ui文件调用相应ui模块
@ -5,6 +6,7 @@
3get_answer.py获取答案
4cloud.py将json文件存入云端
'''
# 生成图形化界面,引导用户登陆并输入实训网址
# 调用get_params.py获取参数这一步同时隐含了云端获取答案的过程
# 如果云端答案不存在则调用get_answer.py获取答案并展示给用户
@ -85,16 +87,15 @@ else:
os.remove('apis.json')
# 函数读取当前目录下的所有json文件
if ispractice:
JSS = get_shixunjson(os.getcwd())[0]
JSS = get_shixunjson(os.getcwd())
# 构建txt文件
transToTxt(JSS)
# 展示txt文件
# 判断云端是否存在答案json如果不存在则上传
print('答案获取完毕,开始展示')
print_txt(get_all_txt_file(os.getcwd()))
if not is_exist(JSS):
if not is_exist(JSS[0]):
print('开始上传答案到云端,请勿关闭程序')
upload(JSS)
upload(JSS[0])
print('上传完毕')
else:
JSS = get_programmingjson(os.getcwd())

Loading…
Cancel
Save