From e415eba80fba4667100ec61709dd3700bdd6b97e Mon Sep 17 00:00:00 2001 From: pup2vei6r <1903355860@qq.com> Date: Mon, 10 Nov 2025 00:30:25 +0800 Subject: [PATCH] ADD file via upload --- src/elasticsearch_backend.py | 217 +++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 src/elasticsearch_backend.py diff --git a/src/elasticsearch_backend.py b/src/elasticsearch_backend.py new file mode 100644 index 0000000..83e0ec8 --- /dev/null +++ b/src/elasticsearch_backend.py @@ -0,0 +1,217 @@ +#yyb 导入必要的模块和类 +from django.utils.encoding import force_str +from elasticsearch_dsl import Q +from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query +from haystack.forms import ModelSearchForm +from haystack.models import SearchResult +from haystack.utils import log as logging + +#yyb 导入自定义的Elasticsearch文档和模型 +from blog.documents import ArticleDocument, ArticleDocumentManager +from blog.models import Article + +#yyb 获取日志记录器 +logger = logging.getLogger(__name__) + + +class ElasticSearchBackend(BaseSearchBackend): + """#yyb Elasticsearch搜索后端实现""" + + def __init__(self, connection_alias, **connection_options): + """#yyb 初始化Elasticsearch后端""" + super( + ElasticSearchBackend, + self).__init__( + connection_alias, + **connection_options) + self.manager = ArticleDocumentManager() #yyb 文章文档管理器 + self.include_spelling = True #yyb 是否包含拼写建议 + + def _get_models(self, iterable): + """#yyb 获取模型实例并转换为文档""" + #yyb 如果有提供模型列表则使用,否则获取所有文章 + models = iterable if iterable and iterable[0] else Article.objects.all() + docs = self.manager.convert_to_doc(models) #yyb 将模型转换为Elasticsearch文档 + return docs + + def _create(self, models): + """#yyb 创建索引并重建文档""" + self.manager.create_index() #yyb 创建Elasticsearch索引 + docs = self._get_models(models) + self.manager.rebuild(docs) #yyb 重建所有文档 + + def _delete(self, models): + """#yyb 删除文档""" + for m in models: + m.delete() + return True + + def _rebuild(self, models): + """#yyb 重建索引文档""" + models = models if models else Article.objects.all() + docs = self.manager.convert_to_doc(models) + self.manager.update_docs(docs) #yyb 更新文档 + + def update(self, index, iterable, commit=True): + """#yyb 更新文档""" + models = self._get_models(iterable) + self.manager.update_docs(models) + + def remove(self, obj_or_string): + """#yyb 移除指定对象""" + models = self._get_models([obj_or_string]) + self._delete(models) + + def clear(self, models=None, commit=True): + """#yyb 清空索引""" + self.remove(None) + + @staticmethod + def get_suggestion(query: str) -> str: + """#yyb 获取搜索建议词,如果没有找到建议词则返回原搜索词""" + + #yyb 构建搜索建议查询 + search = ArticleDocument.search() \ + .query("match", body=query) \ + .suggest('suggest_search', query, term={'field': 'body'}) \ + .execute() + + keywords = [] + #yyb 处理建议结果 + for suggest in search.suggest.suggest_search: + if suggest["options"]: + keywords.append(suggest["options"][0]["text"]) #yyb 使用建议词 + else: + keywords.append(suggest["text"]) #yyb 使用原词 + + return ' '.join(keywords) + + @log_query #yyb 记录查询日志的装饰器 + def search(self, query_string, **kwargs): + """#yyb 执行搜索查询""" + logger.info('search query_string:' + query_string) + + #yyb 获取分页参数 + start_offset = kwargs.get('start_offset') + end_offset = kwargs.get('end_offset') + + #yyb 推荐词搜索:如果启用建议,则获取建议词 + if getattr(self, "is_suggest", None): + suggestion = self.get_suggestion(query_string) + else: + suggestion = query_string + + #yyb 构建搜索查询:在标题和正文中匹配,设置最小匹配度70% + q = Q('bool', + should=[Q('match', body=suggestion), Q('match', title=suggestion)], + minimum_should_match="70%") + + #yyb 执行搜索:过滤已发布的状态为p且类型为a的文章 + search = ArticleDocument.search() \ + .query('bool', filter=[q]) \ + .filter('term', status='p') \ + .filter('term', type='a') \ + .source(False)[start_offset: end_offset] #yyb 不返回源文档内容,只返回元数据 + + results = search.execute() + hits = results['hits'].total #yyb 总命中数 + raw_results = [] + + #yyb 处理搜索结果 + for raw_result in results['hits']['hits']: + app_label = 'blog' + model_name = 'Article' + additional_fields = {} + + result_class = SearchResult + + #yyb 创建搜索结果对象 + result = result_class( + app_label, + model_name, + raw_result['_id'], #yyb 文档ID + raw_result['_score'], #yyb 相关性分数 + **additional_fields) + raw_results.append(result) + + facets = {} + #yyb 如果查询词与建议词不同,则设置拼写建议 + spelling_suggestion = None if query_string == suggestion else suggestion + + return { + 'results': raw_results, #yyb 搜索结果列表 + 'hits': hits, #yyb 总命中数 + 'facets': facets, #yyb 分面搜索数据 + 'spelling_suggestion': spelling_suggestion, #yyb 拼写建议 + } + + +class ElasticSearchQuery(BaseSearchQuery): + """#yyb Elasticsearch查询构建器""" + + def _convert_datetime(self, date): + """#yyb 转换日期时间格式""" + if hasattr(date, 'hour'): + return force_str(date.strftime('%Y%m%d%H%M%S')) #yyb 包含时间的格式 + else: + return force_str(date.strftime('%Y%m%d000000')) #yyb 只包含日期的格式 + + def clean(self, query_fragment): + """ + #yyb 清理用户输入的查询片段,转义保留字符 + + #yyb Whoosh 1.X与此不同,不再使用反斜杠转义保留字符, + #yyb 而是应该引用整个单词。 + """ + words = query_fragment.split() + cleaned_words = [] + + for word in words: + #yyb 处理保留字 + if word in self.backend.RESERVED_WORDS: + word = word.replace(word, word.lower()) + + #yyb 处理保留字符 + for char in self.backend.RESERVED_CHARACTERS: + if char in word: + word = "'%s'" % word #yyb 用引号包围包含保留字符的单词 + break + + cleaned_words.append(word) + + return ' '.join(cleaned_words) + + def build_query_fragment(self, field, filter_type, value): + """#yyb 构建查询片段""" + return value.query_string + + def get_count(self): + """#yyb 获取搜索结果数量""" + results = self.get_results() + return len(results) if results else 0 + + def get_spelling_suggestion(self, preferred_query=None): + """#yyb 获取拼写建议""" + return self._spelling_suggestion + + def build_params(self, spelling_query=None): + """#yyb 构建查询参数""" + kwargs = super(ElasticSearchQuery, self).build_params(spelling_query=spelling_query) + return kwargs + + +class ElasticSearchModelSearchForm(ModelSearchForm): + """#yyb Elasticsearch模型搜索表单""" + + def search(self): + """#yyb 执行搜索,根据参数决定是否使用建议搜索""" + #yyb 是否建议搜索:从请求数据中获取is_suggest参数 + self.searchqueryset.query.backend.is_suggest = self.data.get("is_suggest") != "no" + sqs = super().search() #yyb 调用父类搜索方法 + return sqs + + +class ElasticSearchEngine(BaseEngine): + """#yyb Elasticsearch搜索引擎""" + backend = ElasticSearchBackend #yyb 指定后端类 + query = ElasticSearchQuery #yyb 指定查询类 \ No newline at end of file