Update elasticsearch_backend.py

hjn_branch
plqo32bax 5 months ago
parent 35c74dab0a
commit 63a0941d54

@ -1,183 +1,144 @@
# 导入Django字符串处理工具及Elasticsearch相关依赖
from django.utils.encoding import force_str
from elasticsearch_dsl import Q
# 导入Haystack搜索引擎基础类和工具
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query
from haystack.forms import ModelSearchForm
from haystack.models import SearchResult
from haystack.utils import log as logging
# 导入博客相关的ES文档类、管理类和模型
from blog.documents import ArticleDocument, ArticleDocumentManager
from blog.models import Article
# 创建日志对象
logger = logging.getLogger(__name__)
# 自定义Elasticsearch搜索后端继承Haystack基础搜索后端
class ElasticSearchBackend(BaseSearchBackend):
def __init__(self, connection_alias, **connection_options):
super(
ElasticSearchBackend,
self).__init__(
connection_alias,
**connection_options)
self.manager = ArticleDocumentManager()
self.include_spelling = True
super().__init__(connection_alias,** connection_options)
self.manager = ArticleDocumentManager() # 初始化文档管理器
self.include_spelling = True # 启用拼写建议功能
# 转换模型实例为ES文档格式
def _get_models(self, iterable):
models = iterable if iterable and iterable[0] else Article.objects.all()
docs = self.manager.convert_to_doc(models)
return docs
models = iterable if (iterable and iterable[0]) else Article.objects.all()
return self.manager.convert_to_doc(models)
# 创建索引并批量导入文档
def _create(self, models):
self.manager.create_index()
docs = self._get_models(models)
self.manager.rebuild(docs)
self.manager.rebuild(self._get_models(models))
# 删除指定模型对应的ES文档
def _delete(self, models):
for m in models:
m.delete()
return True
# 重建索引,更新文档数据
def _rebuild(self, models):
models = models if models else Article.objects.all()
docs = self.manager.convert_to_doc(models)
self.manager.update_docs(docs)
self.manager.update_docs(self.manager.convert_to_doc(models))
# 批量更新ES文档
def update(self, index, iterable, commit=True):
self.manager.update_docs(self._get_models(iterable))
models = self._get_models(iterable)
self.manager.update_docs(models)
# 移除单个对象对应的ES文档
def remove(self, obj_or_string):
models = self._get_models([obj_or_string])
self._delete(models)
self._delete(self._get_models([obj_or_string]))
# 清空索引数据
def clear(self, models=None, commit=True):
self.remove(None)
@staticmethod
# 获取搜索推荐词,无推荐则返回原搜索词
def get_suggestion(query: str) -> str:
"""获取推荐词, 如果没有找到添加原搜索词"""
search = ArticleDocument.search() \
.query("match", body=query) \
.suggest('suggest_search', query, term={'field': 'body'}) \
.execute()
search = ArticleDocument.search().query("match", body=query) \
.suggest('suggest_search', query, term={'field': 'body'}).execute()
keywords = []
for suggest in search.suggest.suggest_search:
if suggest["options"]:
keywords.append(suggest["options"][0]["text"])
else:
keywords.append(suggest["text"])
keywords.append(suggest["options"][0]["text"] if suggest["options"] else suggest["text"])
return ' '.join(keywords)
# 核心搜索方法,带日志记录
@log_query
def search(self, query_string, **kwargs):
logger.info('search query_string:' + query_string)
start_offset, end_offset = kwargs.get('start_offset'), kwargs.get('end_offset')
start_offset = kwargs.get('start_offset')
end_offset = kwargs.get('end_offset')
# 推荐词搜索
if getattr(self, "is_suggest", None):
suggestion = self.get_suggestion(query_string)
else:
suggestion = query_string
q = Q('bool',
should=[Q('match', body=suggestion), Q('match', title=suggestion)],
minimum_should_match="70%")
search = ArticleDocument.search() \
.query('bool', filter=[q]) \
.filter('term', status='p') \
.filter('term', type='a') \
.source(False)[start_offset: end_offset]
# 处理搜索推荐词
suggestion = self.get_suggestion(query_string) if getattr(self, "is_suggest", None) else query_string
# 构建搜索条件匹配正文和标题最低匹配度70%
q = Q('bool', should=[Q('match', body=suggestion), Q('match', title=suggestion)], minimum_should_match="70%")
# 执行搜索:筛选已发布文章,指定结果范围
search = ArticleDocument.search().query('bool', filter=[q]) \
.filter('term', status='p').filter('term', type='a').source(False)[start_offset: end_offset]
results = search.execute()
hits = results['hits'].total
raw_results = []
# 格式化搜索结果为Haystack的SearchResult格式
for raw_result in results['hits']['hits']:
app_label = 'blog'
model_name = 'Article'
additional_fields = {}
result_class = SearchResult
result = result_class(
app_label,
model_name,
raw_result['_id'],
raw_result['_score'],
**additional_fields)
result = SearchResult('blog', 'Article', raw_result['_id'], raw_result['_score'])
raw_results.append(result)
facets = {}
spelling_suggestion = None if query_string == suggestion else suggestion
return {
'results': raw_results,
'hits': hits,
'facets': facets,
'spelling_suggestion': spelling_suggestion,
}
# 返回搜索结果、总数、推荐词等
spelling_suggestion = None if query_string == suggestion else suggestion
return {'results': raw_results, 'hits': hits, 'facets': {}, 'spelling_suggestion': spelling_suggestion}
# 自定义搜索查询类继承Haystack基础查询类
class ElasticSearchQuery(BaseSearchQuery):
# 转换时间格式适配搜索
def _convert_datetime(self, date):
if hasattr(date, 'hour'):
return force_str(date.strftime('%Y%m%d%H%M%S'))
else:
return force_str(date.strftime('%Y%m%d000000'))
fmt = '%Y%m%d%H%M%S' if hasattr(date, 'hour') else '%Y%m%d000000'
return force_str(date.strftime(fmt))
# 清洗查询语句,处理保留词和字符
def clean(self, query_fragment):
"""
Provides a mechanism for sanitizing user input before presenting the
value to the backend.
Whoosh 1.X differs here in that you can no longer use a backslash
to escape reserved characters. Instead, the whole word should be
quoted.
"""
words = query_fragment.split()
cleaned_words = []
for word in words:
for word in query_fragment.split():
if word in self.backend.RESERVED_WORDS:
word = word.replace(word, word.lower())
word = word.lower()
for char in self.backend.RESERVED_CHARACTERS:
if char in word:
word = "'%s'" % word
word = f"'{word}'"
break
cleaned_words.append(word)
return ' '.join(cleaned_words)
# 构建查询片段
def build_query_fragment(self, field, filter_type, value):
return value.query_string
# 获取搜索结果总数
def get_count(self):
results = self.get_results()
return len(results) if results else 0
return len(self.get_results()) if self.get_results() else 0
# 获取拼写建议
def get_spelling_suggestion(self, preferred_query=None):
return self._spelling_suggestion
# 构建查询参数
def build_params(self, spelling_query=None):
kwargs = super(ElasticSearchQuery, self).build_params(spelling_query=spelling_query)
return kwargs
return super().build_params(spelling_query=spelling_query)
# 自定义搜索表单继承Haystack模型搜索表单
class ElasticSearchModelSearchForm(ModelSearchForm):
# 重写搜索方法,控制是否启用搜索建议
def search(self):
# 是否建议搜索
self.searchqueryset.query.backend.is_suggest = self.data.get("is_suggest") != "no"
sqs = super().search()
return sqs
return super().search()
# 自定义搜索引擎引擎类,指定后端和查询类
class ElasticSearchEngine(BaseEngine):
backend = ElasticSearchBackend
query = ElasticSearchQuery
query = ElasticSearchQuery
Loading…
Cancel
Save