You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
103 lines
4.7 KiB
103 lines
4.7 KiB
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import os
|
|
import json
|
|
|
|
MODEL_MAPPING = {
|
|
'search': {
|
|
"gpt-5": "search-1",
|
|
"gpt-5-mini": "search-2",
|
|
"Qwen/Qwen3-32B": "search-3"
|
|
},
|
|
'enhance_reasoning': {
|
|
"gpt-5": "reasoner-1",
|
|
"gpt-5-mini": "reasoner-2",
|
|
"Qwen/Qwen2.5-Coder-32B-Instruct": "reasoner-3"
|
|
},
|
|
'answer': {
|
|
"Qwen/Qwen2.5-Math-72B-Instruct": "answer-math-1",
|
|
"Qwen/Qwen2.5-Math-7B-Instruct": "answer-math-2",
|
|
"gpt-5": "answer-1",
|
|
"gpt-5-mini": "answer-2",
|
|
"meta-llama/Llama-3.3-70B-Instruct": "answer-3",
|
|
"Qwen/Qwen3-32B": "answer-4"
|
|
}
|
|
}
|
|
|
|
task_id = '66f5e796acadd55c11fb11f5'
|
|
output_path = f'example.json'
|
|
output_dir = 'sft_data'
|
|
with open('evaluation/hle.jsonl') as f:
|
|
lines = f.readlines()
|
|
id2example = {}
|
|
for l in lines:
|
|
e = json.loads(l)
|
|
id2example[e['id']] = e
|
|
with open(output_path) as f:
|
|
results_data = json.load(f)
|
|
problem = id2example[task_id]['question']
|
|
messages = [
|
|
{"role": "system", "content": "You are good at using tools.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>\n{\"type\": \"function\", \"function\": {\"name\": \"code_interpreter\", \"description\": \"python executor to execute code and return outputs\", \"parameters\": {\"properties\": {\"code\": {\"description\": \"The code to execute\", \"type\": \"string\"}}, \"required\": [\"code\"], \"title\": \"parameters\", \"type\": \"object\"}}}\n{\"type\": \"function\", \"function\": {\"name\": \"search\", \"description\": \"Search for missing information\", \"parameters\": {\"properties\": {\"query\": {\"description\": \"The query used to search missing information\", \"type\": \"string\"}}, \"required\": [\"query\"], \"title\": \"parameters\", \"type\": \"object\"}}}\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call>"},
|
|
{"role": "user", "content": f"Problem: {problem}"}
|
|
]
|
|
documents = []
|
|
for i in range(100):
|
|
if not f"turn_{i}_response" in results_data["tool_responses"]:
|
|
continue
|
|
model_response = results_data["all_tool_calls"][i][0][1]
|
|
cur_dict = results_data["tool_responses"][f"turn_{i}_response"][0]
|
|
|
|
if "search_results_data" in cur_dict:
|
|
query = cur_dict['query']
|
|
context = ''
|
|
for d in cur_dict["search_results_data"]:
|
|
if not d in documents:
|
|
documents.append(d)
|
|
context += d+'\n\n'
|
|
tool_call_content = '<tool_call>{"name": "search", "arguments": {"query": "QUERY_TO_REPLACE"}}</tool_call>'
|
|
tool_call_content = tool_call_content.replace("QUERY_TO_REPLACE",query)
|
|
messages.append({
|
|
'role': 'assistant',
|
|
'content': model_response+tool_call_content
|
|
})
|
|
messages.append({
|
|
'role': 'user',
|
|
'content': "Search results:\n"+context
|
|
})
|
|
elif "generated_code" in cur_dict and "exec_result" in cur_dict:
|
|
tool_call_content = '<tool_call>{"name": "code_interpreter", "arguments": {"query": "CODE_TO_REPLACE"}}</tool_call>'
|
|
tool_call_content = tool_call_content.replace("CODE_TO_REPLACE",cur_dict["generated_code"])
|
|
messages.append({
|
|
'role': 'assistant',
|
|
'content': model_response+tool_call_content
|
|
})
|
|
messages.append({
|
|
'role': 'user',
|
|
'content': 'Execution results:\n'+cur_dict["exec_result"]
|
|
})
|
|
elif 'answer_response' in cur_dict:
|
|
messages.append({
|
|
'role': 'assistant',
|
|
'content': model_response+cur_dict['answer_response']
|
|
})
|
|
|
|
data_idx = 0
|
|
if not os.path.isdir(output_dir):
|
|
os.makedirs(output_dir,exist_ok=True)
|
|
for i in range(3,len(messages)+1,2):
|
|
data_idx += 1
|
|
with open(os.path.join(output_dir,f"{data_idx}.json"),'w') as f:
|
|
json.dump(messages[:i],f,indent=2) |