|
|
|
|
@ -0,0 +1,316 @@
|
|
|
|
|
# 导入必要模块
|
|
|
|
|
from django.utils.encoding import force_str # 用于将数据转换为字符串(兼容Python 2/3)
|
|
|
|
|
from elasticsearch_dsl import Q # Elasticsearch DSL的查询构建工具
|
|
|
|
|
from haystack.backends import ( # Haystack搜索框架的基础类
|
|
|
|
|
BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query
|
|
|
|
|
)
|
|
|
|
|
from haystack.forms import ModelSearchForm # Haystack默认的模型搜索表单
|
|
|
|
|
from haystack.models import SearchResult # Haystack的搜索结果封装类
|
|
|
|
|
from haystack.utils import log as logging # Haystack的日志工具
|
|
|
|
|
|
|
|
|
|
# 导入项目内部模块
|
|
|
|
|
from blog.documents import ArticleDocument, ArticleDocumentManager # 文章的Elasticsearch文档定义及管理器
|
|
|
|
|
from blog.models import Article # 博客文章模型
|
|
|
|
|
|
|
|
|
|
# 创建当前模块的日志记录器,用于记录搜索相关日志
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ElasticSearchBackend(BaseSearchBackend):
|
|
|
|
|
"""
|
|
|
|
|
基于Elasticsearch的搜索后端实现,继承自Haystack的BaseSearchBackend
|
|
|
|
|
|
|
|
|
|
作用:实现与Elasticsearch的交互逻辑,包括索引的创建、更新、删除,
|
|
|
|
|
以及搜索查询的执行、拼写建议等功能
|
|
|
|
|
"""
|
|
|
|
|
def __init__(self, connection_alias, **connection_options):
|
|
|
|
|
"""
|
|
|
|
|
初始化搜索后端
|
|
|
|
|
|
|
|
|
|
:param connection_alias: 数据库连接别名(用于多后端配置)
|
|
|
|
|
:param connection_options: 连接参数(如主机、端口等)
|
|
|
|
|
"""
|
|
|
|
|
super(ElasticSearchBackend, self).__init__(connection_alias,** connection_options)
|
|
|
|
|
self.manager = ArticleDocumentManager() # 初始化文章文档管理器(处理索引操作)
|
|
|
|
|
self.include_spelling = True # 启用拼写建议功能
|
|
|
|
|
|
|
|
|
|
def _get_models(self, iterable):
|
|
|
|
|
"""
|
|
|
|
|
将模型实例列表转换为Elasticsearch文档对象
|
|
|
|
|
|
|
|
|
|
:param iterable: 模型实例列表(如Article对象列表)
|
|
|
|
|
:return: 转换后的Elasticsearch文档列表
|
|
|
|
|
"""
|
|
|
|
|
# 若输入为空,默认使用所有已发布的文章
|
|
|
|
|
models = iterable if iterable and iterable[0] else Article.objects.all()
|
|
|
|
|
# 通过管理器将模型转换为文档
|
|
|
|
|
docs = self.manager.convert_to_doc(models)
|
|
|
|
|
return docs
|
|
|
|
|
|
|
|
|
|
def _create(self, models):
|
|
|
|
|
"""
|
|
|
|
|
创建索引并初始化文档(全量重建索引时使用)
|
|
|
|
|
|
|
|
|
|
:param models: 模型实例列表
|
|
|
|
|
"""
|
|
|
|
|
self.manager.create_index() # 创建Elasticsearch索引(若不存在)
|
|
|
|
|
docs = self._get_models(models) # 转换模型为文档
|
|
|
|
|
self.manager.rebuild(docs) # 全量重建索引(清空旧数据后插入新数据)
|
|
|
|
|
|
|
|
|
|
def _delete(self, models):
|
|
|
|
|
"""
|
|
|
|
|
从索引中删除指定模型对应的文档
|
|
|
|
|
|
|
|
|
|
:param models: 要删除的模型实例列表
|
|
|
|
|
:return: 操作是否成功(始终返回True)
|
|
|
|
|
"""
|
|
|
|
|
for m in models:
|
|
|
|
|
m.delete() # 调用文档的删除方法
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def _rebuild(self, models):
|
|
|
|
|
"""
|
|
|
|
|
增量更新索引(适用于部分数据更新)
|
|
|
|
|
|
|
|
|
|
:param models: 需要更新的模型实例列表(若为空则更新所有文章)
|
|
|
|
|
"""
|
|
|
|
|
models = models if models else Article.objects.all() # 处理空输入
|
|
|
|
|
docs = self._get_models(models) # 转换模型为文档
|
|
|
|
|
self.manager.update_docs(docs) # 增量更新文档
|
|
|
|
|
|
|
|
|
|
def update(self, index, iterable, commit=True):
|
|
|
|
|
"""
|
|
|
|
|
Haystack标准接口:更新索引(用于实时同步模型变更)
|
|
|
|
|
|
|
|
|
|
:param index: 索引名称(当前实现未使用,由管理器处理)
|
|
|
|
|
:param iterable: 模型实例列表
|
|
|
|
|
:param commit: 是否立即提交(当前实现未使用)
|
|
|
|
|
"""
|
|
|
|
|
models = self._get_models(iterable) # 转换模型为文档
|
|
|
|
|
self.manager.update_docs(models) # 执行更新
|
|
|
|
|
|
|
|
|
|
def remove(self, obj_or_string):
|
|
|
|
|
"""
|
|
|
|
|
Haystack标准接口:从索引中移除指定对象
|
|
|
|
|
|
|
|
|
|
:param obj_or_string: 模型实例或ID字符串
|
|
|
|
|
"""
|
|
|
|
|
models = self._get_models([obj_or_string]) # 转换为文档
|
|
|
|
|
self._delete(models) # 执行删除
|
|
|
|
|
|
|
|
|
|
def clear(self, models=None, commit=True):
|
|
|
|
|
"""
|
|
|
|
|
Haystack标准接口:清空索引(或指定模型的索引)
|
|
|
|
|
|
|
|
|
|
:param models: 可选,指定要清空的模型类(当前实现忽略,清空所有)
|
|
|
|
|
:param commit: 是否立即提交(当前实现未使用)
|
|
|
|
|
"""
|
|
|
|
|
self.remove(None) # 调用删除方法清空所有
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def get_suggestion(query: str) -> str:
|
|
|
|
|
"""
|
|
|
|
|
获取搜索建议词(基于Elasticsearch的拼写纠错功能)
|
|
|
|
|
|
|
|
|
|
:param query: 用户输入的搜索词
|
|
|
|
|
:return: 建议的修正词(多个词用空格拼接)
|
|
|
|
|
"""
|
|
|
|
|
# 构建搜索查询:匹配文章内容,并启用拼写建议
|
|
|
|
|
search = ArticleDocument.search() \
|
|
|
|
|
.query("match", body=query) \
|
|
|
|
|
.suggest('suggest_search', query, term={'field': 'body'}) \
|
|
|
|
|
.execute()
|
|
|
|
|
|
|
|
|
|
keywords = []
|
|
|
|
|
# 提取建议结果
|
|
|
|
|
for suggest in search.suggest.suggest_search:
|
|
|
|
|
if suggest["options"]: # 若有建议词,取第一个
|
|
|
|
|
keywords.append(suggest["options"][0]["text"])
|
|
|
|
|
else: # 若无建议,保留原词
|
|
|
|
|
keywords.append(suggest["text"])
|
|
|
|
|
|
|
|
|
|
return ' '.join(keywords) # 拼接建议词为字符串
|
|
|
|
|
|
|
|
|
|
@log_query # Haystack装饰器:记录查询日志
|
|
|
|
|
def search(self, query_string, **kwargs):
|
|
|
|
|
"""
|
|
|
|
|
执行搜索查询的核心方法
|
|
|
|
|
|
|
|
|
|
:param query_string: 用户输入的搜索字符串
|
|
|
|
|
:param kwargs: 额外参数(如分页偏移量start_offset/end_offset)
|
|
|
|
|
:return: 搜索结果字典(包含结果列表、命中数、拼写建议等)
|
|
|
|
|
"""
|
|
|
|
|
logger.info('search query_string:' + query_string) # 记录搜索词
|
|
|
|
|
|
|
|
|
|
# 获取分页参数(用于限制返回结果范围)
|
|
|
|
|
start_offset = kwargs.get('start_offset', 0)
|
|
|
|
|
end_offset = kwargs.get('end_offset', 10) # 默认返回前10条
|
|
|
|
|
|
|
|
|
|
# 判断是否需要启用拼写建议(通过is_suggest参数控制)
|
|
|
|
|
if getattr(self, "is_suggest", None):
|
|
|
|
|
suggestion = self.get_suggestion(query_string) # 获取建议词
|
|
|
|
|
else:
|
|
|
|
|
suggestion = query_string # 不启用建议,使用原搜索词
|
|
|
|
|
|
|
|
|
|
# 构建Elasticsearch查询条件:
|
|
|
|
|
# 1. 布尔查询(should):匹配标题或内容,至少满足70%的条件
|
|
|
|
|
q = Q('bool',
|
|
|
|
|
should=[Q('match', body=suggestion), Q('match', title=suggestion)],
|
|
|
|
|
minimum_should_match="70%")
|
|
|
|
|
|
|
|
|
|
# 构建完整搜索:
|
|
|
|
|
# - 应用上述查询条件
|
|
|
|
|
# - 过滤:仅包含已发布(status='p')的文章(type='a')
|
|
|
|
|
# - 不返回原始文档内容(source=False)
|
|
|
|
|
# - 应用分页偏移
|
|
|
|
|
search = ArticleDocument.search() \
|
|
|
|
|
.query('bool', filter=[q]) \
|
|
|
|
|
.filter('term', status='p') \
|
|
|
|
|
.filter('term', type='a') \
|
|
|
|
|
.source(False)[start_offset: end_offset]
|
|
|
|
|
|
|
|
|
|
# 执行搜索并处理结果
|
|
|
|
|
results = search.execute()
|
|
|
|
|
hits = results['hits'].total # 总命中数
|
|
|
|
|
raw_results = []
|
|
|
|
|
|
|
|
|
|
# 遍历搜索结果,转换为Haystack的SearchResult格式
|
|
|
|
|
for raw_result in results['hits']['hits']:
|
|
|
|
|
app_label = 'blog' # 应用标签(固定为博客应用)
|
|
|
|
|
model_name = 'Article' # 模型名称(固定为文章模型)
|
|
|
|
|
additional_fields = {} # 额外字段(当前未使用)
|
|
|
|
|
|
|
|
|
|
# 创建SearchResult实例(适配Haystack的结果格式)
|
|
|
|
|
result = SearchResult(
|
|
|
|
|
app_label,
|
|
|
|
|
model_name,
|
|
|
|
|
raw_result['_id'], # 文档ID(对应文章ID)
|
|
|
|
|
raw_result['_score'], # 匹配得分
|
|
|
|
|
**additional_fields
|
|
|
|
|
)
|
|
|
|
|
raw_results.append(result)
|
|
|
|
|
|
|
|
|
|
# 构建返回结果字典
|
|
|
|
|
facets = {} # 分面搜索结果(当前未实现)
|
|
|
|
|
# 若建议词与原词不同,则返回建议词;否则为None
|
|
|
|
|
spelling_suggestion = None if query_string == suggestion else suggestion
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
'results': raw_results, # 搜索结果列表(SearchResult实例)
|
|
|
|
|
'hits': hits, # 总命中数
|
|
|
|
|
'facets': facets, # 分面数据
|
|
|
|
|
'spelling_suggestion': spelling_suggestion, # 拼写建议
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ElasticSearchQuery(BaseSearchQuery):
|
|
|
|
|
"""
|
|
|
|
|
自定义搜索查询类,继承自Haystack的BaseSearchQuery
|
|
|
|
|
|
|
|
|
|
作用:处理搜索查询的构建逻辑,包括查询字符串清洗、参数转换等
|
|
|
|
|
"""
|
|
|
|
|
def _convert_datetime(self, date):
|
|
|
|
|
"""
|
|
|
|
|
转换日期时间为Elasticsearch兼容的字符串格式
|
|
|
|
|
|
|
|
|
|
:param date: 日期时间对象
|
|
|
|
|
:return: 格式化的字符串(如'20231018123000')
|
|
|
|
|
"""
|
|
|
|
|
if hasattr(date, 'hour'): # 若包含时间信息(datetime对象)
|
|
|
|
|
return force_str(date.strftime('%Y%m%d%H%M%S'))
|
|
|
|
|
else: # 仅日期(date对象),时间部分设为00:00:00
|
|
|
|
|
return force_str(date.strftime('%Y%m%d000000'))
|
|
|
|
|
|
|
|
|
|
def clean(self, query_fragment):
|
|
|
|
|
"""
|
|
|
|
|
清洗用户输入的查询片段,处理保留字和特殊字符
|
|
|
|
|
|
|
|
|
|
:param query_fragment: 用户输入的查询字符串片段
|
|
|
|
|
:return: 清洗后的查询字符串
|
|
|
|
|
"""
|
|
|
|
|
words = query_fragment.split() # 按空格拆分词语
|
|
|
|
|
cleaned_words = []
|
|
|
|
|
|
|
|
|
|
for word in words:
|
|
|
|
|
# 处理Elasticsearch保留字(转为小写)
|
|
|
|
|
if word in self.backend.RESERVED_WORDS:
|
|
|
|
|
word = word.replace(word, word.lower())
|
|
|
|
|
|
|
|
|
|
# 处理特殊字符(若包含特殊字符,用单引号包裹)
|
|
|
|
|
for char in self.backend.RESERVED_CHARACTERS:
|
|
|
|
|
if char in word:
|
|
|
|
|
word = "'%s'" % word
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
cleaned_words.append(word)
|
|
|
|
|
|
|
|
|
|
return ' '.join(cleaned_words) # 拼接清洗后的词语
|
|
|
|
|
|
|
|
|
|
def build_query_fragment(self, field, filter_type, value):
|
|
|
|
|
"""
|
|
|
|
|
构建查询片段(适配Elasticsearch的查询语法)
|
|
|
|
|
|
|
|
|
|
:param field: 搜索字段
|
|
|
|
|
:param filter_type: 过滤类型
|
|
|
|
|
:param value: 查询值
|
|
|
|
|
:return: 构建的查询字符串
|
|
|
|
|
"""
|
|
|
|
|
return value.query_string # 直接使用查询字符串(由value提供)
|
|
|
|
|
|
|
|
|
|
def get_count(self):
|
|
|
|
|
"""
|
|
|
|
|
获取搜索结果总数
|
|
|
|
|
|
|
|
|
|
:return: 结果数量
|
|
|
|
|
"""
|
|
|
|
|
results = self.get_results()
|
|
|
|
|
return len(results) if results else 0
|
|
|
|
|
|
|
|
|
|
def get_spelling_suggestion(self, preferred_query=None):
|
|
|
|
|
"""
|
|
|
|
|
获取拼写建议(适配Haystack接口)
|
|
|
|
|
|
|
|
|
|
:param preferred_query: 优先使用的查询(未使用)
|
|
|
|
|
:return: 拼写建议词
|
|
|
|
|
"""
|
|
|
|
|
return self._spelling_suggestion
|
|
|
|
|
|
|
|
|
|
def build_params(self, spelling_query=None):
|
|
|
|
|
"""
|
|
|
|
|
构建查询参数(适配Haystack接口)
|
|
|
|
|
|
|
|
|
|
:param spelling_query: 拼写建议查询(未使用)
|
|
|
|
|
:return: 构建的参数字典
|
|
|
|
|
"""
|
|
|
|
|
kwargs = super(ElasticSearchQuery, self).build_params(spelling_query=spelling_query)
|
|
|
|
|
return kwargs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ElasticSearchModelSearchForm(ModelSearchForm):
|
|
|
|
|
"""
|
|
|
|
|
自定义搜索表单,继承自Haystack的ModelSearchForm
|
|
|
|
|
|
|
|
|
|
作用:扩展默认搜索表单,支持控制是否启用拼写建议
|
|
|
|
|
"""
|
|
|
|
|
def search(self):
|
|
|
|
|
"""
|
|
|
|
|
执行搜索,根据表单参数控制拼写建议
|
|
|
|
|
|
|
|
|
|
:return: 搜索结果集(SearchQuerySet)
|
|
|
|
|
"""
|
|
|
|
|
# 通过表单数据中的"is_suggest"参数控制是否启用拼写建议
|
|
|
|
|
# 若"is_suggest"为"no",则禁用建议
|
|
|
|
|
self.searchqueryset.query.backend.is_suggest = self.data.get("is_suggest") != "no"
|
|
|
|
|
# 调用父类方法执行搜索
|
|
|
|
|
sqs = super().search()
|
|
|
|
|
return sqs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ElasticSearchEngine(BaseEngine):
|
|
|
|
|
"""
|
|
|
|
|
Elasticsearch搜索引擎入口类,继承自Haystack的BaseEngine
|
|
|
|
|
|
|
|
|
|
作用:绑定后端实现和查询类,作为Haystack的引擎配置入口
|
|
|
|
|
"""
|
|
|
|
|
backend = ElasticSearchBackend # 指定使用的搜索后端
|
|
|
|
|
query = ElasticSearchQuery # 指定使用的查询类
|