|
|
|
@ -0,0 +1,109 @@
|
|
|
|
|
import ollama
|
|
|
|
|
import concurrent.futures
|
|
|
|
|
import json
|
|
|
|
|
from typing import Dict, Any, Tuple
|
|
|
|
|
|
|
|
|
|
# ==================== 核心处理模块 ====================
|
|
|
|
|
class CallSignExtractor:
|
|
|
|
|
def __init__(self, model1="deepseek-r1:8b", model2="qwen2"):
|
|
|
|
|
self.model1 = model1
|
|
|
|
|
self.model2 = model2
|
|
|
|
|
self.prompt_template = """你是一个航空通信专家,请从以下对话中提取**所有**飞机呼号,请严格按照以下要求操作:
|
|
|
|
|
1. 从对话文本中提取所有出现的航空器呼号(格式示例:lufthansa seven three nine)
|
|
|
|
|
2. 每个呼号对应完整的原始对话文本
|
|
|
|
|
3. 多个呼号需分别列出
|
|
|
|
|
4. 严格保持如下JSON格式:{{"<呼号1>": "完整原始文本","<呼号2>": "完整原始文本"}}
|
|
|
|
|
5. 不添加任何额外信息
|
|
|
|
|
|
|
|
|
|
示例输入:
|
|
|
|
|
{{""PI : radar aero flot one two two good afternoon passing flight level two
|
|
|
|
|
PI : eight climb flight level two eight zero
|
|
|
|
|
AT : aero flot one two two hello radar contact proceed to pemur climb to flight level three one zero
|
|
|
|
|
PI : climbing level three one zero cleared to direct pemur aero flot one two two
|
|
|
|
|
PIAT : praha good afternoon malaysian two one flight level three three zero squawking zero six four seven malaysian two one hello radar contact maintain flight level three three zero
|
|
|
|
|
PI : level three three zero malaysian two one"
|
|
|
|
|
"}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
示例输出:
|
|
|
|
|
{{"aero flot one two two": "PI : radar aero flot one two two good afternoon passing flight level two
|
|
|
|
|
PI : eight climb flight level two eight zero
|
|
|
|
|
AT : aero flot one two two hello radar contact proceed to pemur climb to flight level three one zero
|
|
|
|
|
PI : climbing level three one zero cleared to direct pemur aero flot one two two","malaysian two one":"PIAT : praha good afternoon malaysian two one flight level three three zero squawking zero six four seven malaysian two one hello radar contact maintain flight level three three zero
|
|
|
|
|
PI : level three three zero malaysian two one"}}
|
|
|
|
|
|
|
|
|
|
现在请处理以下文本:
|
|
|
|
|
{raw_text}"""
|
|
|
|
|
|
|
|
|
|
def _call_model(self, model_name: str, prompt: str) -> str:
|
|
|
|
|
"""调用Ollama模型并返回响应"""
|
|
|
|
|
try:
|
|
|
|
|
response = ollama.generate(model=model_name, prompt=prompt)
|
|
|
|
|
print(response["response"])
|
|
|
|
|
return response['response']
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"模型调用错误 ({model_name}): {e}")
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
def _dual_model_inference(self, prompt: str) -> Tuple[str, str]:
|
|
|
|
|
"""并行执行双模型推理"""
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
|
|
|
future1 = executor.submit(self._call_model, self.model1, prompt)
|
|
|
|
|
future2 = executor.submit(self._call_model, self.model2, prompt)
|
|
|
|
|
return future1.result(), future2.result()
|
|
|
|
|
|
|
|
|
|
def _merge_results(self, result1: str, result2: str) -> Dict[str, str]:
|
|
|
|
|
"""智能融合双模型结果"""
|
|
|
|
|
try:
|
|
|
|
|
json1 = json.loads(result1) if result1.strip() else {}
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
json1 = {}
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
json2 = json.loads(result2) if result2.strip() else {}
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
json2 = {}
|
|
|
|
|
|
|
|
|
|
# 合并策略:保留所有识别结果,冲突时优先模型1
|
|
|
|
|
merged = {**json1, **json2} # 后者覆盖前者
|
|
|
|
|
return merged
|
|
|
|
|
|
|
|
|
|
def extract_call_signs(self, raw_text: str) -> Dict[str, str]:
|
|
|
|
|
"""执行完整提取流程"""
|
|
|
|
|
prompt = self.prompt_template.format(raw_text=raw_text)
|
|
|
|
|
result1, result2 = self._dual_model_inference(prompt)
|
|
|
|
|
return self._merge_results(result1, result2)
|
|
|
|
|
|
|
|
|
|
# ==================== 数据预处理模块 ====================
|
|
|
|
|
def preprocess_data(input_data: Dict[str, str]) -> str:
|
|
|
|
|
"""预处理输入数据(示例实现)"""
|
|
|
|
|
# 实际应用中可添加更多预处理逻辑
|
|
|
|
|
return input_data.get("text", "")
|
|
|
|
|
|
|
|
|
|
# ==================== 主执行流程 ====================
|
|
|
|
|
def main():
|
|
|
|
|
# 示例输入数据
|
|
|
|
|
input_data = {
|
|
|
|
|
"id": "ACCU-0H4CW3",
|
|
|
|
|
"text": """PI : radar aero flot one two two good afternoon passing flight level two
|
|
|
|
|
PI : eight climb flight level two eight zero
|
|
|
|
|
AT : aero flot one two two hello radar contact proceed to pemur climb to flight level three one zero
|
|
|
|
|
PI : climbing level three one zero cleared to direct pemur aero flot one two two
|
|
|
|
|
PIAT : praha good afternoon malaysian two one flight level three three zero squawking zero six four seven malaysian two one hello radar contact maintain flight level three three zero
|
|
|
|
|
PI : level three three zero malaysian two one"""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 预处理
|
|
|
|
|
raw_text = preprocess_data(input_data)
|
|
|
|
|
print(raw_text) #把id去掉,只返回text字段
|
|
|
|
|
# 初始化提取器
|
|
|
|
|
extractor = CallSignExtractor()
|
|
|
|
|
|
|
|
|
|
# 执行提取
|
|
|
|
|
result = extractor.extract_call_signs(raw_text)
|
|
|
|
|
|
|
|
|
|
# 格式化输出
|
|
|
|
|
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|