|
|
|
@ -8,55 +8,124 @@ from haystack.utils import log as logging
|
|
|
|
from blog.documents import ArticleDocument, ArticleDocumentManager
|
|
|
|
from blog.documents import ArticleDocument, ArticleDocumentManager
|
|
|
|
from blog.models import Article
|
|
|
|
from blog.models import Article
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 获取当前模块的日志记录器,用于记录搜索相关的日志信息
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ElasticSearchBackend(BaseSearchBackend):
|
|
|
|
class ElasticSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
自定义 Elasticsearch 搜索后端,继承自 Haystack 的 BaseSearchBackend。
|
|
|
|
|
|
|
|
负责与 Elasticsearch 交互,实现索引的创建、更新、删除、查询等操作。
|
|
|
|
|
|
|
|
使用自定义的 ArticleDocumentManager 来管理文档索引。
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, connection_alias, **connection_options):
|
|
|
|
def __init__(self, connection_alias, **connection_options):
|
|
|
|
super(
|
|
|
|
"""
|
|
|
|
ElasticSearchBackend,
|
|
|
|
初始化搜索后端。
|
|
|
|
self).__init__(
|
|
|
|
|
|
|
|
connection_alias,
|
|
|
|
参数:
|
|
|
|
**connection_options)
|
|
|
|
connection_alias: 连接别名(Haystack 配置中的别名)
|
|
|
|
self.manager = ArticleDocumentManager()
|
|
|
|
**connection_options: 连接选项(如主机、端口等)
|
|
|
|
self.include_spelling = True
|
|
|
|
"""
|
|
|
|
|
|
|
|
super(ElasticSearchBackend, self).__init__(connection_alias, **connection_options)
|
|
|
|
|
|
|
|
self.manager = ArticleDocumentManager() # 实例化文档管理器
|
|
|
|
|
|
|
|
self.include_spelling = True # 启用拼写建议功能
|
|
|
|
|
|
|
|
|
|
|
|
def _get_models(self, iterable):
|
|
|
|
def _get_models(self, iterable):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
将模型实例或查询集转换为可索引的文档对象列表。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
iterable: 模型实例列表或查询集,若为空则默认使用所有 Article
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
|
|
|
文档对象列表(用于索引)
|
|
|
|
|
|
|
|
"""
|
|
|
|
models = iterable if iterable and iterable[0] else Article.objects.all()
|
|
|
|
models = iterable if iterable and iterable[0] else Article.objects.all()
|
|
|
|
docs = self.manager.convert_to_doc(models)
|
|
|
|
docs = self.manager.convert_to_doc(models)
|
|
|
|
return docs
|
|
|
|
return docs
|
|
|
|
|
|
|
|
|
|
|
|
def _create(self, models):
|
|
|
|
def _create(self, models):
|
|
|
|
self.manager.create_index()
|
|
|
|
"""
|
|
|
|
docs = self._get_models(models)
|
|
|
|
创建新的索引并填充数据。
|
|
|
|
self.manager.rebuild(docs)
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
models: 要索引的模型数据(可选)
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.manager.create_index() # 创建索引结构
|
|
|
|
|
|
|
|
docs = self._get_models(models) # 转换为文档
|
|
|
|
|
|
|
|
self.manager.rebuild(docs) # 重建索引数据
|
|
|
|
|
|
|
|
|
|
|
|
def _delete(self, models):
|
|
|
|
def _delete(self, models):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
从索引中删除指定的文档。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
models: 要删除的文档对象列表
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
|
|
|
bool: 删除是否成功
|
|
|
|
|
|
|
|
"""
|
|
|
|
for m in models:
|
|
|
|
for m in models:
|
|
|
|
m.delete()
|
|
|
|
m.delete() # 调用文档对象的 delete 方法
|
|
|
|
return True
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def _rebuild(self, models):
|
|
|
|
def _rebuild(self, models):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
重建索引。如果 models 为空,则重建所有文章索引。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
models: 要重建索引的数据(可选)
|
|
|
|
|
|
|
|
"""
|
|
|
|
models = models if models else Article.objects.all()
|
|
|
|
models = models if models else Article.objects.all()
|
|
|
|
docs = self.manager.convert_to_doc(models)
|
|
|
|
docs = self.manager.convert_to_doc(models)
|
|
|
|
self.manager.update_docs(docs)
|
|
|
|
self.manager.update_docs(docs) # 更新文档到索引
|
|
|
|
|
|
|
|
|
|
|
|
def update(self, index, iterable, commit=True):
|
|
|
|
def update(self, index, iterable, commit=True):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
更新索引中的文档。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
index: 索引对象(未使用)
|
|
|
|
|
|
|
|
iterable: 要更新的模型实例或查询集
|
|
|
|
|
|
|
|
commit: 是否立即提交(未使用)
|
|
|
|
|
|
|
|
"""
|
|
|
|
models = self._get_models(iterable)
|
|
|
|
models = self._get_models(iterable)
|
|
|
|
self.manager.update_docs(models)
|
|
|
|
self.manager.update_docs(models) # 将转换后的文档更新到索引
|
|
|
|
|
|
|
|
|
|
|
|
def remove(self, obj_or_string):
|
|
|
|
def remove(self, obj_or_string):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
从索引中移除单个对象或字符串。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
obj_or_string: 要移除的对象或标识符
|
|
|
|
|
|
|
|
"""
|
|
|
|
models = self._get_models([obj_or_string])
|
|
|
|
models = self._get_models([obj_or_string])
|
|
|
|
self._delete(models)
|
|
|
|
self._delete(models)
|
|
|
|
|
|
|
|
|
|
|
|
def clear(self, models=None, commit=True):
|
|
|
|
def clear(self, models=None, commit=True):
|
|
|
|
self.remove(None)
|
|
|
|
"""
|
|
|
|
|
|
|
|
清空整个索引(删除所有文档)。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
models: 模型列表(未使用)
|
|
|
|
|
|
|
|
commit: 是否立即提交(未使用)
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.remove(None) # 调用 remove 方法清空
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
@staticmethod
|
|
|
|
def get_suggestion(query: str) -> str:
|
|
|
|
def get_suggestion(query: str) -> str:
|
|
|
|
"""获取推荐词, 如果没有找到添加原搜索词"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
根据用户输入的查询词,获取拼写建议(搜索推荐)。
|
|
|
|
|
|
|
|
使用 Elasticsearch 的 term suggester 功能。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
query (str): 用户输入的原始搜索词
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
|
|
|
str: 推荐的搜索词(多个词用空格连接)
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
# 执行搜索并获取建议
|
|
|
|
search = ArticleDocument.search() \
|
|
|
|
search = ArticleDocument.search() \
|
|
|
|
.query("match", body=query) \
|
|
|
|
.query("match", body=query) \
|
|
|
|
.suggest('suggest_search', query, term={'field': 'body'}) \
|
|
|
|
.suggest('suggest_search', query, term={'field': 'body'}) \
|
|
|
|
@ -65,29 +134,44 @@ class ElasticSearchBackend(BaseSearchBackend):
|
|
|
|
keywords = []
|
|
|
|
keywords = []
|
|
|
|
for suggest in search.suggest.suggest_search:
|
|
|
|
for suggest in search.suggest.suggest_search:
|
|
|
|
if suggest["options"]:
|
|
|
|
if suggest["options"]:
|
|
|
|
keywords.append(suggest["options"][0]["text"])
|
|
|
|
keywords.append(suggest["options"][0]["text"]) # 使用第一个建议
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
keywords.append(suggest["text"])
|
|
|
|
keywords.append(suggest["text"]) # 无建议则使用原词
|
|
|
|
|
|
|
|
|
|
|
|
return ' '.join(keywords)
|
|
|
|
return ' '.join(keywords)
|
|
|
|
|
|
|
|
|
|
|
|
@log_query
|
|
|
|
@log_query
|
|
|
|
def search(self, query_string, **kwargs):
|
|
|
|
def search(self, query_string, **kwargs):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
执行搜索查询。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
query_string: 用户输入的搜索关键词
|
|
|
|
|
|
|
|
**kwargs: 其他搜索参数(如分页偏移量)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
|
|
|
dict: 包含搜索结果、命中数、拼写建议等信息的字典
|
|
|
|
|
|
|
|
"""
|
|
|
|
logger.info('search query_string:' + query_string)
|
|
|
|
logger.info('search query_string:' + query_string)
|
|
|
|
|
|
|
|
|
|
|
|
start_offset = kwargs.get('start_offset')
|
|
|
|
start_offset = kwargs.get('start_offset')
|
|
|
|
end_offset = kwargs.get('end_offset')
|
|
|
|
end_offset = kwargs.get('end_offset')
|
|
|
|
|
|
|
|
|
|
|
|
# 推荐词搜索
|
|
|
|
# 判断是否启用拼写建议
|
|
|
|
if getattr(self, "is_suggest", None):
|
|
|
|
if getattr(self, "is_suggest", None):
|
|
|
|
suggestion = self.get_suggestion(query_string)
|
|
|
|
suggestion = self.get_suggestion(query_string)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
suggestion = query_string
|
|
|
|
suggestion = query_string
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 构建布尔查询:在 body 和 title 字段中匹配,至少 70% 的 should 条件匹配
|
|
|
|
q = Q('bool',
|
|
|
|
q = Q('bool',
|
|
|
|
should=[Q('match', body=suggestion), Q('match', title=suggestion)],
|
|
|
|
should=[Q('match', body=suggestion), Q('match', title=suggestion)],
|
|
|
|
minimum_should_match="70%")
|
|
|
|
minimum_should_match="70%")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 构建搜索请求:
|
|
|
|
|
|
|
|
# - 查询条件:q
|
|
|
|
|
|
|
|
# - 过滤:status='p'(已发布),type='a'(文章)
|
|
|
|
|
|
|
|
# - 不返回源数据(source=False)
|
|
|
|
|
|
|
|
# - 分页:[start_offset: end_offset]
|
|
|
|
search = ArticleDocument.search() \
|
|
|
|
search = ArticleDocument.search() \
|
|
|
|
.query('bool', filter=[q]) \
|
|
|
|
.query('bool', filter=[q]) \
|
|
|
|
.filter('term', status='p') \
|
|
|
|
.filter('term', status='p') \
|
|
|
|
@ -95,13 +179,14 @@ class ElasticSearchBackend(BaseSearchBackend):
|
|
|
|
.source(False)[start_offset: end_offset]
|
|
|
|
.source(False)[start_offset: end_offset]
|
|
|
|
|
|
|
|
|
|
|
|
results = search.execute()
|
|
|
|
results = search.execute()
|
|
|
|
hits = results['hits'].total
|
|
|
|
hits = results['hits'].total # 总命中数
|
|
|
|
raw_results = []
|
|
|
|
raw_results = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 将 Elasticsearch 返回的结果转换为 Haystack 的 SearchResult 对象
|
|
|
|
for raw_result in results['hits']['hits']:
|
|
|
|
for raw_result in results['hits']['hits']:
|
|
|
|
app_label = 'blog'
|
|
|
|
app_label = 'blog'
|
|
|
|
model_name = 'Article'
|
|
|
|
model_name = 'Article'
|
|
|
|
additional_fields = {}
|
|
|
|
additional_fields = {}
|
|
|
|
|
|
|
|
|
|
|
|
result_class = SearchResult
|
|
|
|
result_class = SearchResult
|
|
|
|
|
|
|
|
|
|
|
|
result = result_class(
|
|
|
|
result = result_class(
|
|
|
|
@ -111,19 +196,35 @@ class ElasticSearchBackend(BaseSearchBackend):
|
|
|
|
raw_result['_score'],
|
|
|
|
raw_result['_score'],
|
|
|
|
**additional_fields)
|
|
|
|
**additional_fields)
|
|
|
|
raw_results.append(result)
|
|
|
|
raw_results.append(result)
|
|
|
|
facets = {}
|
|
|
|
|
|
|
|
|
|
|
|
facets = {} # 聚合结果(当前未使用)
|
|
|
|
|
|
|
|
# 如果建议词与原词不同,则提供拼写建议
|
|
|
|
spelling_suggestion = None if query_string == suggestion else suggestion
|
|
|
|
spelling_suggestion = None if query_string == suggestion else suggestion
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
return {
|
|
|
|
'results': raw_results,
|
|
|
|
'results': raw_results, # 搜索结果列表
|
|
|
|
'hits': hits,
|
|
|
|
'hits': hits, # 总命中数
|
|
|
|
'facets': facets,
|
|
|
|
'facets': facets, # 聚合信息
|
|
|
|
'spelling_suggestion': spelling_suggestion,
|
|
|
|
'spelling_suggestion': spelling_suggestion, # 拼写建议
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ElasticSearchQuery(BaseSearchQuery):
|
|
|
|
class ElasticSearchQuery(BaseSearchQuery):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
自定义搜索查询类,继承自 Haystack 的 BaseSearchQuery。
|
|
|
|
|
|
|
|
负责构建和处理搜索查询语句。
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def _convert_datetime(self, date):
|
|
|
|
def _convert_datetime(self, date):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
将日期时间对象转换为字符串格式,用于索引查询。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
date: 日期或 datetime 对象
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
|
|
|
str: 格式化后的字符串(YYYYMMDDHHMMSS 或 YYYYMMDD000000)
|
|
|
|
|
|
|
|
"""
|
|
|
|
if hasattr(date, 'hour'):
|
|
|
|
if hasattr(date, 'hour'):
|
|
|
|
return force_str(date.strftime('%Y%m%d%H%M%S'))
|
|
|
|
return force_str(date.strftime('%Y%m%d%H%M%S'))
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
@ -131,12 +232,14 @@ class ElasticSearchQuery(BaseSearchQuery):
|
|
|
|
|
|
|
|
|
|
|
|
def clean(self, query_fragment):
|
|
|
|
def clean(self, query_fragment):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Provides a mechanism for sanitizing user input before presenting the
|
|
|
|
清理用户输入的查询片段,防止特殊字符引发语法错误。
|
|
|
|
value to the backend.
|
|
|
|
将包含保留字符的词用引号包围。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
query_fragment: 查询片段字符串
|
|
|
|
|
|
|
|
|
|
|
|
Whoosh 1.X differs here in that you can no longer use a backslash
|
|
|
|
返回:
|
|
|
|
to escape reserved characters. Instead, the whole word should be
|
|
|
|
str: 清理后的查询字符串
|
|
|
|
quoted.
|
|
|
|
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
words = query_fragment.split()
|
|
|
|
words = query_fragment.split()
|
|
|
|
cleaned_words = []
|
|
|
|
cleaned_words = []
|
|
|
|
@ -155,29 +258,79 @@ class ElasticSearchQuery(BaseSearchQuery):
|
|
|
|
return ' '.join(cleaned_words)
|
|
|
|
return ' '.join(cleaned_words)
|
|
|
|
|
|
|
|
|
|
|
|
def build_query_fragment(self, field, filter_type, value):
|
|
|
|
def build_query_fragment(self, field, filter_type, value):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
构建查询片段。此处直接返回 value.query_string。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
field: 字段名(未使用)
|
|
|
|
|
|
|
|
filter_type: 过滤类型(未使用)
|
|
|
|
|
|
|
|
value: 查询值对象,包含 query_string 属性
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
|
|
|
str: 查询字符串
|
|
|
|
|
|
|
|
"""
|
|
|
|
return value.query_string
|
|
|
|
return value.query_string
|
|
|
|
|
|
|
|
|
|
|
|
def get_count(self):
|
|
|
|
def get_count(self):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
获取搜索结果总数。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
|
|
|
int: 结果数量
|
|
|
|
|
|
|
|
"""
|
|
|
|
results = self.get_results()
|
|
|
|
results = self.get_results()
|
|
|
|
return len(results) if results else 0
|
|
|
|
return len(results) if results else 0
|
|
|
|
|
|
|
|
|
|
|
|
def get_spelling_suggestion(self, preferred_query=None):
|
|
|
|
def get_spelling_suggestion(self, preferred_query=None):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
获取拼写建议。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
preferred_query: 优先使用的查询词(未使用)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
|
|
|
str: 拼写建议
|
|
|
|
|
|
|
|
"""
|
|
|
|
return self._spelling_suggestion
|
|
|
|
return self._spelling_suggestion
|
|
|
|
|
|
|
|
|
|
|
|
def build_params(self, spelling_query=None):
|
|
|
|
def build_params(self, spelling_query=None):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
构建传递给后端的参数字典。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
|
|
|
spelling_query: 拼写建议查询词
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
|
|
|
dict: 参数字典
|
|
|
|
|
|
|
|
"""
|
|
|
|
kwargs = super(ElasticSearchQuery, self).build_params(spelling_query=spelling_query)
|
|
|
|
kwargs = super(ElasticSearchQuery, self).build_params(spelling_query=spelling_query)
|
|
|
|
return kwargs
|
|
|
|
return kwargs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ElasticSearchModelSearchForm(ModelSearchForm):
|
|
|
|
class ElasticSearchModelSearchForm(ModelSearchForm):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
自定义搜索表单,继承自 Haystack 的 ModelSearchForm。
|
|
|
|
|
|
|
|
用于处理用户提交的搜索请求,支持启用/禁用拼写建议。
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def search(self):
|
|
|
|
def search(self):
|
|
|
|
# 是否建议搜索
|
|
|
|
"""
|
|
|
|
|
|
|
|
重写 search 方法,在搜索前设置是否启用拼写建议。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
|
|
|
SearchQuerySet: 搜索结果集
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
# 根据表单数据决定是否启用建议搜索
|
|
|
|
self.searchqueryset.query.backend.is_suggest = self.data.get("is_suggest") != "no"
|
|
|
|
self.searchqueryset.query.backend.is_suggest = self.data.get("is_suggest") != "no"
|
|
|
|
sqs = super().search()
|
|
|
|
sqs = super().search()
|
|
|
|
return sqs
|
|
|
|
return sqs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ElasticSearchEngine(BaseEngine):
|
|
|
|
class ElasticSearchEngine(BaseEngine):
|
|
|
|
backend = ElasticSearchBackend
|
|
|
|
"""
|
|
|
|
query = ElasticSearchQuery
|
|
|
|
自定义搜索引擎,集成 Backend、Query 和 Form。
|
|
|
|
|
|
|
|
在 Haystack 配置中引用此类来启用自定义搜索功能。
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
backend = ElasticSearchBackend # 使用自定义后端
|
|
|
|
|
|
|
|
query = ElasticSearchQuery # 使用自定义查询类
|
|
|
|
|
|
|
|
# form = ElasticSearchModelSearchForm # 原代码未启用,但设计意图是使用自定义表单
|