Compare commits
1 Commits
bdf1d7d1d4
...
eed3c58da2
Author | SHA1 | Date |
---|---|---|
|
eed3c58da2 | 5 months ago |
@ -1,12 +0,0 @@
|
||||
from utils.nlp_utils import CallsignExtractor
|
||||
|
||||
# 测试样例
|
||||
extractor = CallsignExtractor()
|
||||
sample_texts = [
|
||||
"Jade23 requesting descent to FL180",
|
||||
"Tower, Snowbird inbound with info Q",
|
||||
"Speedbird 123 cleared direct to WP"
|
||||
]
|
||||
|
||||
for text in sample_texts:
|
||||
print(f"文本: {text} → 呼号: {extractor.extract(text)}")
|
Binary file not shown.
Binary file not shown.
@ -1,31 +0,0 @@
|
||||
from transformers import pipeline
|
||||
import re
|
||||
|
||||
class CallsignExtractor:
|
||||
def __init__(self):
|
||||
# 初始化模型(懒加载减少内存占用)
|
||||
self._ner_model = None
|
||||
self._patterns = [
|
||||
r"\b[A-Z]{2,}\d{2,4}\b", # 如AA1234
|
||||
r"\b[A-Z][a-z]+\d*\b", # 如Jade23
|
||||
r"\b[A-Z]+\s?\d{3,4}\b" # 如SPEED 123
|
||||
]
|
||||
|
||||
@property
|
||||
def ner_model(self):
|
||||
if self._ner_model is None:
|
||||
self._ner_model = pipeline("ner", model="dslim/bert-base-NER")
|
||||
return self._ner_model
|
||||
|
||||
def extract(self, text: str) -> str:
|
||||
"""主提取函数"""
|
||||
# 先尝试正则匹配(更快)
|
||||
for pattern in self._patterns:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
return match.group()
|
||||
|
||||
# 正则失败再用BERT
|
||||
entities = self.ner_model(text)
|
||||
org_entities = [e["word"] for e in entities if e["entity"].startswith("ORG")]
|
||||
return org_entities[0] if org_entities else None
|
Loading…
Reference in new issue