|
|
|
@ -45,7 +45,7 @@ def is_programming(url:str) -> bool:
|
|
|
|
|
# 另外,目前好像只有实训作业有这些参数,其他的作业例如编程作业就没有,所以先判断一下是否为实训作业,可以通过用户输入的url判断
|
|
|
|
|
# 主要是看educoder.net/后面是否有tasks,如果有,则是实训作业,否则,不是实训作业
|
|
|
|
|
#为方便main.py调用,将判断函数写入函数中,以下部分封装为函数
|
|
|
|
|
def get_parameters(url:str,user_name:str,password:str):
|
|
|
|
|
def get_parameters(url: str, user_name: str, password: str):
|
|
|
|
|
'''
|
|
|
|
|
用于获得实训作业的参数
|
|
|
|
|
url:实训网址
|
|
|
|
@ -67,7 +67,8 @@ def get_parameters(url:str,user_name:str,password:str):
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
#判断是否登录成功
|
|
|
|
|
try:
|
|
|
|
|
safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[1]/a[1]')
|
|
|
|
|
safari.find_element(By.XPATH,
|
|
|
|
|
'//*[@id="task-left-panel"]/div[1]/a[1]')
|
|
|
|
|
except BaseException:
|
|
|
|
|
print('登录失败 请检查输入信息是否正确')
|
|
|
|
|
# 关闭浏览器
|
|
|
|
@ -86,14 +87,15 @@ def get_parameters(url:str,user_name:str,password:str):
|
|
|
|
|
identity = cur_url.split('/')[-1].split('?')[0]
|
|
|
|
|
id_url = f'https://data.educoder.net/api/tasks/{identity}.json?'
|
|
|
|
|
headers = {
|
|
|
|
|
'Cookie':cookie,
|
|
|
|
|
'User-Agent':User_Agent,
|
|
|
|
|
'Referer':cur_url
|
|
|
|
|
'Cookie': cookie,
|
|
|
|
|
'User-Agent': User_Agent,
|
|
|
|
|
'Referer': cur_url
|
|
|
|
|
}
|
|
|
|
|
response = requests.get(url=id_url, headers=headers)
|
|
|
|
|
shixun_id = dict(response.json())['challenge']['shixun_id']
|
|
|
|
|
language = requests.get(f'https://data.educoder.net/api/tasks/{identity}/rep_content.json',headers=headers).json()['content']['language']
|
|
|
|
|
#判断云端文件是否存在
|
|
|
|
|
exist = is_exist(f'{shixun_id}.json')
|
|
|
|
|
exist = is_exist(f'{shixun_id}_{language}.json')
|
|
|
|
|
if exist:
|
|
|
|
|
try:
|
|
|
|
|
print('云端文件已存在,正在下载')
|
|
|
|
@ -101,7 +103,7 @@ def get_parameters(url:str,user_name:str,password:str):
|
|
|
|
|
# 检测本地文件是否下载完成
|
|
|
|
|
while True:
|
|
|
|
|
try:
|
|
|
|
|
if os.path.exists(f'{shixun_id}.json'):
|
|
|
|
|
if os.path.exists(f'{shixun_id}_{language}.json'):
|
|
|
|
|
print('下载完成')
|
|
|
|
|
safari.quit()
|
|
|
|
|
return
|
|
|
|
@ -110,46 +112,54 @@ def get_parameters(url:str,user_name:str,password:str):
|
|
|
|
|
print(e)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(e)
|
|
|
|
|
else:#不存在,则继续执行本程序
|
|
|
|
|
else: #不存在,则继续执行本程序
|
|
|
|
|
print('云端文件不存在,正在爬取')
|
|
|
|
|
#获取关卡数
|
|
|
|
|
#点击展开关卡页面
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[1]/a[1]').click()
|
|
|
|
|
safari.find_element(
|
|
|
|
|
By.XPATH, '//*[@id="task-left-panel"]/div[1]/a[1]').click()
|
|
|
|
|
#关卡数量由 class = "flex-container challenge-title space-between" 的元素数量决定
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
htmltxt = safari.page_source
|
|
|
|
|
html = etree.HTML(htmltxt)
|
|
|
|
|
task_num = html.xpath('count(//*[@class="flex-container challenge-title space-between"])')
|
|
|
|
|
task_num = html.xpath(
|
|
|
|
|
'count(//*[@class="flex-container challenge-title space-between"])'
|
|
|
|
|
)
|
|
|
|
|
task_num = int(task_num)
|
|
|
|
|
print(f'关卡数量为{task_num}')
|
|
|
|
|
#回到第一关
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
safari.find_element(By.XPATH,'//*[@id="task-left-panel"]/div[3]/div[3]/div/div/div/div/div[1]/div[1]/a').click()
|
|
|
|
|
safari.find_element(
|
|
|
|
|
By.XPATH,
|
|
|
|
|
'//*[@id="task-left-panel"]/div[3]/div[3]/div/div/div/div/div[1]/div[1]/a'
|
|
|
|
|
).click()
|
|
|
|
|
#对于每一关,获取参数
|
|
|
|
|
#每一关的参数由以下元素组成:
|
|
|
|
|
'''
|
|
|
|
|
/html/body/div[1]/div/div/div/div[2]/section[1]/div[3]/div[3]/div/div/div/div/div[3]/div[1]/a
|
|
|
|
|
/html/body/div[1]/div/div/div/div[2]/section[1]/div[3]/div[3]/div/div/div/div/div[4]/div[1]/a
|
|
|
|
|
'''
|
|
|
|
|
obj1 = re.compile(r'<h3 id="任务描述">任务描述</h3><p>(?P<describe>.*?)</p>',re.S)
|
|
|
|
|
obj2 = re.compile(r'<h3 id="编程要求">编程要求</h3><p>(?P<require>.*?)</p>',re.S)
|
|
|
|
|
obj1 = re.compile(
|
|
|
|
|
r'<h3 id="任务描述">任务描述</h3><p>(?P<describe>.*?)</p>', re.S)
|
|
|
|
|
obj2 = re.compile(
|
|
|
|
|
r'<h3 id="编程要求">编程要求</h3><p>(?P<require>.*?)</p>', re.S)
|
|
|
|
|
#初始化一个字典,用于存放所有关卡的参数
|
|
|
|
|
total = {}
|
|
|
|
|
i=1
|
|
|
|
|
i = 1
|
|
|
|
|
try:
|
|
|
|
|
while i <= task_num:
|
|
|
|
|
cur_url=Referer = safari.current_url
|
|
|
|
|
cur_url = Referer = safari.current_url
|
|
|
|
|
identity = cur_url.split('/')[-1].split('?')[0]
|
|
|
|
|
id_url = f'https://data.educoder.net/api/tasks/{identity}.json?'
|
|
|
|
|
#获取课程id
|
|
|
|
|
headers = {
|
|
|
|
|
'Cookie':cookie,
|
|
|
|
|
'User-Agent':User_Agent,
|
|
|
|
|
'Referer':Referer
|
|
|
|
|
'Cookie': cookie,
|
|
|
|
|
'User-Agent': User_Agent,
|
|
|
|
|
'Referer': Referer
|
|
|
|
|
}
|
|
|
|
|
try:
|
|
|
|
|
response = requests.get(url=id_url,headers=headers)
|
|
|
|
|
response = requests.get(url=id_url, headers=headers)
|
|
|
|
|
challenge_id = dict(response.json())['challenge']['id']
|
|
|
|
|
shixun_id = dict(response.json())['challenge']['shixun_id']
|
|
|
|
|
except BaseException:
|
|
|
|
@ -162,14 +172,25 @@ def get_parameters(url:str,user_name:str,password:str):
|
|
|
|
|
require = obj2.findall(page_source)
|
|
|
|
|
#获取编辑器中的代码,采用requests抓取https://data.educoder.net/api/tasks/{identity}/rep_content.json中的content中的content
|
|
|
|
|
# 然后然后,这个content是一个base64编码的字符串,需要解码
|
|
|
|
|
code = requests.get(f'https://data.educoder.net/api/tasks/{identity}/rep_content.json',headers=headers).json()['content']['content']
|
|
|
|
|
code = requests.get(
|
|
|
|
|
f'https://data.educoder.net/api/tasks/{identity}/rep_content.json',
|
|
|
|
|
headers=headers).json()['content']['content']
|
|
|
|
|
#把参数存入字典,再转换为json格式
|
|
|
|
|
task = {
|
|
|
|
|
'describe':describe[0] if len(describe) != 0 else '',
|
|
|
|
|
'require':require[0] if len(require) != 0 else '',
|
|
|
|
|
'code':code,
|
|
|
|
|
'verified': False, #这个参数是用来标记答案是否被用户认证为正确答案的,初始值为False
|
|
|
|
|
'last_modified': time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) #这个参数是用来标记答案最后一次被修改的时间,初始值为当前时间
|
|
|
|
|
'describe':
|
|
|
|
|
describe[0] if len(describe) != 0 else '',
|
|
|
|
|
'require':
|
|
|
|
|
require[0] if len(require) != 0 else '',
|
|
|
|
|
'code':
|
|
|
|
|
code,
|
|
|
|
|
'verified':
|
|
|
|
|
False, #这个参数是用来标记答案是否被用户认证为正确答案的,初始值为False
|
|
|
|
|
'last_modified':
|
|
|
|
|
time.strftime(
|
|
|
|
|
'%Y-%m-%d %H:%M:%S', time.localtime(
|
|
|
|
|
time.time())), #这个参数是用来标记答案最后一次被修改的时间,初始值为当前时间
|
|
|
|
|
'language':
|
|
|
|
|
language if language != None else ''
|
|
|
|
|
}
|
|
|
|
|
#把每一关的参数存入总的字典中
|
|
|
|
|
total[challenge_id] = task
|
|
|
|
@ -178,7 +199,7 @@ def get_parameters(url:str,user_name:str,password:str):
|
|
|
|
|
if i == 1:
|
|
|
|
|
i += 1
|
|
|
|
|
safari.find_element(By.XPATH,f'//*[@id="task-right-panel"]/div[4]/div/div[2]/a').click()
|
|
|
|
|
elif i<task_num:
|
|
|
|
|
elif i < task_num:
|
|
|
|
|
i += 1
|
|
|
|
|
safari.find_element(By.XPATH,f'//*[@id="task-right-panel"]/div[4]/div/div[2]/a[2]').click()
|
|
|
|
|
else:
|
|
|
|
@ -188,7 +209,6 @@ def get_parameters(url:str,user_name:str,password:str):
|
|
|
|
|
print(f'{i}关参数获取参数失败')
|
|
|
|
|
#判断爬取到的代码是否存在空值或者键的数量是否与关卡数量相等,如果不相等,则说明爬取失败,需要重新爬取
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# if len(total) != task_num:
|
|
|
|
|
# print('参数爬取失败,正在重新爬取')
|
|
|
|
|
# get_parameters(url,user_name,password)
|
|
|
|
@ -198,17 +218,16 @@ def get_parameters(url:str,user_name:str,password:str):
|
|
|
|
|
# print('参数爬取失败,正在重新爬取')
|
|
|
|
|
# get_parameters(url,user_name,password)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#把参数写入本地json文件中,文件名字与shixun_name相同键为course_id,值为一个列表,列表中每个元素为一个字典,字典中包含每一关的参数
|
|
|
|
|
with open(f'{shixun_id}.json','w',encoding='utf-8') as f:
|
|
|
|
|
json.dump(total,f,ensure_ascii=False,indent=4)
|
|
|
|
|
with open(f'{shixun_id}_{language}.json', 'w', encoding='utf-8') as f:
|
|
|
|
|
json.dump(total, f, ensure_ascii=False, indent=4)
|
|
|
|
|
print('参数爬取完成')
|
|
|
|
|
#关闭浏览器
|
|
|
|
|
safari.quit()
|
|
|
|
|
else:
|
|
|
|
|
print('这不是一个实训作业')
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_parameters_of_programming(url:str,user_name:str,password:str):
|
|
|
|
|
'''
|
|
|
|
|
用于获得编程作业的参数
|
|
|
|
@ -222,7 +241,7 @@ def get_parameters_of_programming(url:str,user_name:str,password:str):
|
|
|
|
|
# 检查是否为编程作业
|
|
|
|
|
if is_programming(url):
|
|
|
|
|
# 构造selenium对象
|
|
|
|
|
#构造selenium对象
|
|
|
|
|
#构造selenium对象
|
|
|
|
|
safari = Chrome(options=opt)
|
|
|
|
|
safari.get(url)
|
|
|
|
|
#模拟登录
|
|
|
|
@ -231,8 +250,8 @@ def get_parameters_of_programming(url:str,user_name:str,password:str):
|
|
|
|
|
safari.find_element(By.ID, 'password').send_keys(password)
|
|
|
|
|
safari.find_element(By.ID, 'password').send_keys(Keys.ENTER)
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
#判断是否登录成功
|
|
|
|
|
try:
|
|
|
|
|
#判断是否登录成功
|
|
|
|
|
try:
|
|
|
|
|
safari.find_element(By.XPATH,'//*[@id="root"]/div/div/div/div/div/div/div/section[1]/div/div[4]')
|
|
|
|
|
except BaseException:
|
|
|
|
|
print('登录失败 请检查输入信息是否正确')
|
|
|
|
@ -275,8 +294,8 @@ def get_parameters_of_programming(url:str,user_name:str,password:str):
|
|
|
|
|
break
|
|
|
|
|
except:
|
|
|
|
|
continue
|
|
|
|
|
i = 1
|
|
|
|
|
while i<=task_num:
|
|
|
|
|
i = 1
|
|
|
|
|
while i<=task_num:
|
|
|
|
|
# 对于每一关,获取参数
|
|
|
|
|
cur_url = safari.current_url
|
|
|
|
|
identity = cur_url.split('/')[-1].split('?')[0]
|
|
|
|
@ -287,12 +306,13 @@ def get_parameters_of_programming(url:str,user_name:str,password:str):
|
|
|
|
|
total = {}
|
|
|
|
|
# 题目id
|
|
|
|
|
pro_id = problem_data['hack']['id']
|
|
|
|
|
language = problem_data['hack']['language']
|
|
|
|
|
# 获取id后,判断云端是否存在该文件,如果存在,则跳过,如果不存在,则继续执行本程序
|
|
|
|
|
exist = is_exist(f'pro_{pro_id}.json')
|
|
|
|
|
exist = is_exist(f'pro_{pro_id}_{language}.json')
|
|
|
|
|
if exist:
|
|
|
|
|
print('云端文件已存在,正在下载')
|
|
|
|
|
download(f'pro_{pro_id}.json')
|
|
|
|
|
print(f'pro_{pro_id}.json下载完成')
|
|
|
|
|
download(f'pro_{pro_id}_{language}.json')
|
|
|
|
|
print(f'pro_{pro_id}_{language}.json下载完成')
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
print('云端文件不存在,正在爬取')
|
|
|
|
@ -305,10 +325,11 @@ def get_parameters_of_programming(url:str,user_name:str,password:str):
|
|
|
|
|
'describe':describe,
|
|
|
|
|
'code':code,
|
|
|
|
|
'verified': False, #这个参数是用来标记答案是否被用户认证为正确答案的,初始值为False(暂时没啥用)
|
|
|
|
|
'last_modified': time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) #这个参数是用来标记答案最后一次被修改的时间,初始值为当前时间
|
|
|
|
|
'last_modified': time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())), #这个参数是用来标记答案最后一次被修改的时间,初始值为当前时间
|
|
|
|
|
'language':language if language != None else ''
|
|
|
|
|
}
|
|
|
|
|
# 写入本地json,命名为pro_pro_id.json
|
|
|
|
|
with open(f'pro_{pro_id}.json','w',encoding='utf-8') as f:
|
|
|
|
|
with open(f'pro_{pro_id}_{language}.json','w',encoding='utf-8') as f:
|
|
|
|
|
json.dump(total,f,ensure_ascii=False)
|
|
|
|
|
print(f'{pro_id}完成')
|
|
|
|
|
# 去往下一关
|
|
|
|
@ -322,10 +343,10 @@ def get_parameters_of_programming(url:str,user_name:str,password:str):
|
|
|
|
|
safari.quit()
|
|
|
|
|
else:
|
|
|
|
|
print('不是编程作业')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
url = 'https://www.educoder.net/myproblems/9kwnlzvcegsa?type=1'
|
|
|
|
|
user_name = 'hnu202311020126'
|
|
|
|
|
password = 'hzy123456'
|
|
|
|
|
get_parameters_of_programming(url,user_name,password)
|
|
|
|
|
print('结束测试')
|
|
|
|
|
print('结束测试')
|
|
|
|
|