Compare commits

...

4 Commits

Author SHA1 Message Date
pnxf6w84l bdf1d7d1d4 111
5 months ago
pnxf6w84l 13498da39d 111
5 months ago
pnxf6w84l 7519f2fc66 111
5 months ago
pnxf6w84l e3e292ec54 111
5 months ago

@ -0,0 +1,12 @@
from utils.nlp_utils import CallsignExtractor
# 测试样例
extractor = CallsignExtractor()
sample_texts = [
"Jade23 requesting descent to FL180",
"Tower, Snowbird inbound with info Q",
"Speedbird 123 cleared direct to WP"
]
for text in sample_texts:
print(f"文本: {text} → 呼号: {extractor.extract(text)}")

@ -1,3 +1,4 @@
import pandas as pd
from sqlalchemy import create_engine, text

@ -0,0 +1,31 @@
from transformers import pipeline
import re
class CallsignExtractor:
def __init__(self):
# 初始化模型(懒加载减少内存占用)
self._ner_model = None
self._patterns = [
r"\b[A-Z]{2,}\d{2,4}\b", # 如AA1234
r"\b[A-Z][a-z]+\d*\b", # 如Jade23
r"\b[A-Z]+\s?\d{3,4}\b" # 如SPEED 123
]
@property
def ner_model(self):
if self._ner_model is None:
self._ner_model = pipeline("ner", model="dslim/bert-base-NER")
return self._ner_model
def extract(self, text: str) -> str:
"""主提取函数"""
# 先尝试正则匹配(更快)
for pattern in self._patterns:
match = re.search(pattern, text)
if match:
return match.group()
# 正则失败再用BERT
entities = self.ner_model(text)
org_entities = [e["word"] for e in entities if e["entity"].startswith("ORG")]
return org_entities[0] if org_entities else None
Loading…
Cancel
Save