From 69f76d04d16d42fdad74a01557334a33267c93b7 Mon Sep 17 00:00:00 2001 From: ay0w55p <1036713672@qq.com> Date: Thu, 8 May 2025 16:05:00 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86=E5=91=BC=E5=8F=B7?= =?UTF-8?q?=E6=8F=90=E5=8F=96=E5=8A=9F=E8=83=BD=EF=BC=8C=E4=BD=86=E6=95=88?= =?UTF-8?q?=E6=9E=9C=E8=BF=98=E5=BE=85=E5=AE=8C=E5=96=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../呼号提取(qwen+deepseek).py | 109 ++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 班长后端/呼号提取(qwen+deepseek).py diff --git a/班长后端/呼号提取(qwen+deepseek).py b/班长后端/呼号提取(qwen+deepseek).py new file mode 100644 index 0000000..05cda0a --- /dev/null +++ b/班长后端/呼号提取(qwen+deepseek).py @@ -0,0 +1,109 @@ +import ollama +import concurrent.futures +import json +from typing import Dict, Any, Tuple + +# ==================== 核心处理模块 ==================== +class CallSignExtractor: + def __init__(self, model1="deepseek-r1:8b", model2="qwen2"): + self.model1 = model1 + self.model2 = model2 + self.prompt_template = """你是一个航空通信专家,请从以下对话中提取**所有**飞机呼号,请严格按照以下要求操作: +1. 从对话文本中提取所有出现的航空器呼号(格式示例:lufthansa seven three nine) +2. 每个呼号对应完整的原始对话文本 +3. 多个呼号需分别列出 +4. 严格保持如下JSON格式:{{"<呼号1>": "完整原始文本","<呼号2>": "完整原始文本"}} +5. 不添加任何额外信息 + +示例输入: +{{""PI : radar aero flot one two two good afternoon passing flight level two +PI : eight climb flight level two eight zero +AT : aero flot one two two hello radar contact proceed to pemur climb to flight level three one zero +PI : climbing level three one zero cleared to direct pemur aero flot one two two +PIAT : praha good afternoon malaysian two one flight level three three zero squawking zero six four seven malaysian two one hello radar contact maintain flight level three three zero +PI : level three three zero malaysian two one" +"}} + + +示例输出: +{{"aero flot one two two": "PI : radar aero flot one two two good afternoon passing flight level two +PI : eight climb flight level two eight zero +AT : aero flot one two two hello radar contact proceed to pemur climb to flight level three one zero +PI : climbing level three one zero cleared to direct pemur aero flot one two two","malaysian two one":"PIAT : praha good afternoon malaysian two one flight level three three zero squawking zero six four seven malaysian two one hello radar contact maintain flight level three three zero +PI : level three three zero malaysian two one"}} + +现在请处理以下文本: +{raw_text}""" + + def _call_model(self, model_name: str, prompt: str) -> str: + """调用Ollama模型并返回响应""" + try: + response = ollama.generate(model=model_name, prompt=prompt) + print(response["response"]) + return response['response'] + except Exception as e: + print(f"模型调用错误 ({model_name}): {e}") + return "" + + def _dual_model_inference(self, prompt: str) -> Tuple[str, str]: + """并行执行双模型推理""" + with concurrent.futures.ThreadPoolExecutor() as executor: + future1 = executor.submit(self._call_model, self.model1, prompt) + future2 = executor.submit(self._call_model, self.model2, prompt) + return future1.result(), future2.result() + + def _merge_results(self, result1: str, result2: str) -> Dict[str, str]: + """智能融合双模型结果""" + try: + json1 = json.loads(result1) if result1.strip() else {} + except json.JSONDecodeError: + json1 = {} + + try: + json2 = json.loads(result2) if result2.strip() else {} + except json.JSONDecodeError: + json2 = {} + + # 合并策略:保留所有识别结果,冲突时优先模型1 + merged = {**json1, **json2} # 后者覆盖前者 + return merged + + def extract_call_signs(self, raw_text: str) -> Dict[str, str]: + """执行完整提取流程""" + prompt = self.prompt_template.format(raw_text=raw_text) + result1, result2 = self._dual_model_inference(prompt) + return self._merge_results(result1, result2) + +# ==================== 数据预处理模块 ==================== +def preprocess_data(input_data: Dict[str, str]) -> str: + """预处理输入数据(示例实现)""" + # 实际应用中可添加更多预处理逻辑 + return input_data.get("text", "") + +# ==================== 主执行流程 ==================== +def main(): + # 示例输入数据 + input_data = { + "id": "ACCU-0H4CW3", + "text": """PI : radar aero flot one two two good afternoon passing flight level two +PI : eight climb flight level two eight zero +AT : aero flot one two two hello radar contact proceed to pemur climb to flight level three one zero +PI : climbing level three one zero cleared to direct pemur aero flot one two two +PIAT : praha good afternoon malaysian two one flight level three three zero squawking zero six four seven malaysian two one hello radar contact maintain flight level three three zero +PI : level three three zero malaysian two one""" + } + + # 预处理 + raw_text = preprocess_data(input_data) + print(raw_text) #把id去掉,只返回text字段 + # 初始化提取器 + extractor = CallSignExtractor() + + # 执行提取 + result = extractor.extract_call_signs(raw_text) + + # 格式化输出 + print(json.dumps(result, indent=2, ensure_ascii=False)) + +if __name__ == "__main__": + main() \ No newline at end of file