Compare commits
4 Commits
eed3c58da2
...
bdf1d7d1d4
Author | SHA1 | Date |
---|---|---|
|
bdf1d7d1d4 | 5 months ago |
|
13498da39d | 5 months ago |
|
7519f2fc66 | 5 months ago |
|
e3e292ec54 | 5 months ago |
@ -0,0 +1,12 @@
|
|||||||
|
from utils.nlp_utils import CallsignExtractor
|
||||||
|
|
||||||
|
# 测试样例
|
||||||
|
extractor = CallsignExtractor()
|
||||||
|
sample_texts = [
|
||||||
|
"Jade23 requesting descent to FL180",
|
||||||
|
"Tower, Snowbird inbound with info Q",
|
||||||
|
"Speedbird 123 cleared direct to WP"
|
||||||
|
]
|
||||||
|
|
||||||
|
for text in sample_texts:
|
||||||
|
print(f"文本: {text} → 呼号: {extractor.extract(text)}")
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1,31 @@
|
|||||||
|
from transformers import pipeline
|
||||||
|
import re
|
||||||
|
|
||||||
|
class CallsignExtractor:
|
||||||
|
def __init__(self):
|
||||||
|
# 初始化模型(懒加载减少内存占用)
|
||||||
|
self._ner_model = None
|
||||||
|
self._patterns = [
|
||||||
|
r"\b[A-Z]{2,}\d{2,4}\b", # 如AA1234
|
||||||
|
r"\b[A-Z][a-z]+\d*\b", # 如Jade23
|
||||||
|
r"\b[A-Z]+\s?\d{3,4}\b" # 如SPEED 123
|
||||||
|
]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ner_model(self):
|
||||||
|
if self._ner_model is None:
|
||||||
|
self._ner_model = pipeline("ner", model="dslim/bert-base-NER")
|
||||||
|
return self._ner_model
|
||||||
|
|
||||||
|
def extract(self, text: str) -> str:
|
||||||
|
"""主提取函数"""
|
||||||
|
# 先尝试正则匹配(更快)
|
||||||
|
for pattern in self._patterns:
|
||||||
|
match = re.search(pattern, text)
|
||||||
|
if match:
|
||||||
|
return match.group()
|
||||||
|
|
||||||
|
# 正则失败再用BERT
|
||||||
|
entities = self.ner_model(text)
|
||||||
|
org_entities = [e["word"] for e in entities if e["entity"].startswith("ORG")]
|
||||||
|
return org_entities[0] if org_entities else None
|
Loading…
Reference in new issue