# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import json MODEL_MAPPING = { 'search': { "gpt-5": "search-1", "gpt-5-mini": "search-2", "Qwen/Qwen3-32B": "search-3" }, 'enhance_reasoning': { "gpt-5": "reasoner-1", "gpt-5-mini": "reasoner-2", "Qwen/Qwen2.5-Coder-32B-Instruct": "reasoner-3" }, 'answer': { "Qwen/Qwen2.5-Math-72B-Instruct": "answer-math-1", "Qwen/Qwen2.5-Math-7B-Instruct": "answer-math-2", "gpt-5": "answer-1", "gpt-5-mini": "answer-2", "meta-llama/Llama-3.3-70B-Instruct": "answer-3", "Qwen/Qwen3-32B": "answer-4" } } task_id = '66f5e796acadd55c11fb11f5' output_path = f'example.json' output_dir = 'sft_data' with open('evaluation/hle.jsonl') as f: lines = f.readlines() id2example = {} for l in lines: e = json.loads(l) id2example[e['id']] = e with open(output_path) as f: results_data = json.load(f) problem = id2example[task_id]['question'] messages = [ {"role": "system", "content": "You are good at using tools.\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n{\"type\": \"function\", \"function\": {\"name\": \"code_interpreter\", \"description\": \"python executor to execute code and return outputs\", \"parameters\": {\"properties\": {\"code\": {\"description\": \"The code to execute\", \"type\": \"string\"}}, \"required\": [\"code\"], \"title\": \"parameters\", \"type\": \"object\"}}}\n{\"type\": \"function\", \"function\": {\"name\": \"search\", \"description\": \"Search for missing information\", \"parameters\": {\"properties\": {\"query\": {\"description\": \"The query used to search missing information\", \"type\": \"string\"}}, \"required\": [\"query\"], \"title\": \"parameters\", \"type\": \"object\"}}}\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n"}, {"role": "user", "content": f"Problem: {problem}"} ] documents = [] for i in range(100): if not f"turn_{i}_response" in results_data["tool_responses"]: continue model_response = results_data["all_tool_calls"][i][0][1] cur_dict = results_data["tool_responses"][f"turn_{i}_response"][0] if "search_results_data" in cur_dict: query = cur_dict['query'] context = '' for d in cur_dict["search_results_data"]: if not d in documents: documents.append(d) context += d+'\n\n' tool_call_content = '{"name": "search", "arguments": {"query": "QUERY_TO_REPLACE"}}' tool_call_content = tool_call_content.replace("QUERY_TO_REPLACE",query) messages.append({ 'role': 'assistant', 'content': model_response+tool_call_content }) messages.append({ 'role': 'user', 'content': "Search results:\n"+context }) elif "generated_code" in cur_dict and "exec_result" in cur_dict: tool_call_content = '{"name": "code_interpreter", "arguments": {"query": "CODE_TO_REPLACE"}}' tool_call_content = tool_call_content.replace("CODE_TO_REPLACE",cur_dict["generated_code"]) messages.append({ 'role': 'assistant', 'content': model_response+tool_call_content }) messages.append({ 'role': 'user', 'content': 'Execution results:\n'+cur_dict["exec_result"] }) elif 'answer_response' in cur_dict: messages.append({ 'role': 'assistant', 'content': model_response+cur_dict['answer_response'] }) data_idx = 0 if not os.path.isdir(output_dir): os.makedirs(output_dir,exist_ok=True) for i in range(3,len(messages)+1,2): data_idx += 1 with open(os.path.join(output_dir,f"{data_idx}.json"),'w') as f: json.dump(messages[:i],f,indent=2)