pnxf6w84l 5 months ago
parent 7519f2fc66
commit 13498da39d

@ -0,0 +1,31 @@
from transformers import pipeline
import re
class CallsignExtractor:
def __init__(self):
# 初始化模型(懒加载减少内存占用)
self._ner_model = None
self._patterns = [
r"\b[A-Z]{2,}\d{2,4}\b", # 如AA1234
r"\b[A-Z][a-z]+\d*\b", # 如Jade23
r"\b[A-Z]+\s?\d{3,4}\b" # 如SPEED 123
]
@property
def ner_model(self):
if self._ner_model is None:
self._ner_model = pipeline("ner", model="dslim/bert-base-NER")
return self._ner_model
def extract(self, text: str) -> str:
"""主提取函数"""
# 先尝试正则匹配(更快)
for pattern in self._patterns:
match = re.search(pattern, text)
if match:
return match.group()
# 正则失败再用BERT
entities = self.ner_model(text)
org_entities = [e["word"] for e in entities if e["entity"].startswith("ORG")]
return org_entities[0] if org_entities else None
Loading…
Cancel
Save