添加了呼号提取功能,但效果还待完善

main
ay0w55p 4 months ago
parent 3d3b4f0bfc
commit 69f76d04d1

@ -0,0 +1,109 @@
import ollama
import concurrent.futures
import json
from typing import Dict, Any, Tuple
# ==================== 核心处理模块 ====================
class CallSignExtractor:
def __init__(self, model1="deepseek-r1:8b", model2="qwen2"):
self.model1 = model1
self.model2 = model2
self.prompt_template = """你是一个航空通信专家,请从以下对话中提取**所有**飞机呼号,请严格按照以下要求操作:
1. 从对话文本中提取所有出现的航空器呼号格式示例lufthansa seven three nine
2. 每个呼号对应完整的原始对话文本
3. 多个呼号需分别列出
4. 严格保持如下JSON格式{{"<呼号1>": "完整原始文本","<呼号2>": "完整原始文本"}}
5. 不添加任何额外信息
示例输入
{{""PI : radar aero flot one two two good afternoon passing flight level two
PI : eight climb flight level two eight zero
AT : aero flot one two two hello radar contact proceed to pemur climb to flight level three one zero
PI : climbing level three one zero cleared to direct pemur aero flot one two two
PIAT : praha good afternoon malaysian two one flight level three three zero squawking zero six four seven malaysian two one hello radar contact maintain flight level three three zero
PI : level three three zero malaysian two one"
"}}
示例输出
{{"aero flot one two two": "PI : radar aero flot one two two good afternoon passing flight level two
PI : eight climb flight level two eight zero
AT : aero flot one two two hello radar contact proceed to pemur climb to flight level three one zero
PI : climbing level three one zero cleared to direct pemur aero flot one two two","malaysian two one":"PIAT : praha good afternoon malaysian two one flight level three three zero squawking zero six four seven malaysian two one hello radar contact maintain flight level three three zero
PI : level three three zero malaysian two one"}}
现在请处理以下文本
{raw_text}"""
def _call_model(self, model_name: str, prompt: str) -> str:
"""调用Ollama模型并返回响应"""
try:
response = ollama.generate(model=model_name, prompt=prompt)
print(response["response"])
return response['response']
except Exception as e:
print(f"模型调用错误 ({model_name}): {e}")
return ""
def _dual_model_inference(self, prompt: str) -> Tuple[str, str]:
"""并行执行双模型推理"""
with concurrent.futures.ThreadPoolExecutor() as executor:
future1 = executor.submit(self._call_model, self.model1, prompt)
future2 = executor.submit(self._call_model, self.model2, prompt)
return future1.result(), future2.result()
def _merge_results(self, result1: str, result2: str) -> Dict[str, str]:
"""智能融合双模型结果"""
try:
json1 = json.loads(result1) if result1.strip() else {}
except json.JSONDecodeError:
json1 = {}
try:
json2 = json.loads(result2) if result2.strip() else {}
except json.JSONDecodeError:
json2 = {}
# 合并策略保留所有识别结果冲突时优先模型1
merged = {**json1, **json2} # 后者覆盖前者
return merged
def extract_call_signs(self, raw_text: str) -> Dict[str, str]:
"""执行完整提取流程"""
prompt = self.prompt_template.format(raw_text=raw_text)
result1, result2 = self._dual_model_inference(prompt)
return self._merge_results(result1, result2)
# ==================== 数据预处理模块 ====================
def preprocess_data(input_data: Dict[str, str]) -> str:
"""预处理输入数据(示例实现)"""
# 实际应用中可添加更多预处理逻辑
return input_data.get("text", "")
# ==================== 主执行流程 ====================
def main():
# 示例输入数据
input_data = {
"id": "ACCU-0H4CW3",
"text": """PI : radar aero flot one two two good afternoon passing flight level two
PI : eight climb flight level two eight zero
AT : aero flot one two two hello radar contact proceed to pemur climb to flight level three one zero
PI : climbing level three one zero cleared to direct pemur aero flot one two two
PIAT : praha good afternoon malaysian two one flight level three three zero squawking zero six four seven malaysian two one hello radar contact maintain flight level three three zero
PI : level three three zero malaysian two one"""
}
# 预处理
raw_text = preprocess_data(input_data)
print(raw_text) #把id去掉只返回text字段
# 初始化提取器
extractor = CallSignExtractor()
# 执行提取
result = extractor.extract_call_signs(raw_text)
# 格式化输出
print(json.dumps(result, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()
Loading…
Cancel
Save