ADD file via upload

yyb_branch
pup2vei6r 4 months ago
parent 7c3d3da5b8
commit e415eba80f

@ -0,0 +1,217 @@
#yyb 导入必要的模块和类
from django.utils.encoding import force_str
from elasticsearch_dsl import Q
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query
from haystack.forms import ModelSearchForm
from haystack.models import SearchResult
from haystack.utils import log as logging
#yyb 导入自定义的Elasticsearch文档和模型
from blog.documents import ArticleDocument, ArticleDocumentManager
from blog.models import Article
#yyb 获取日志记录器
logger = logging.getLogger(__name__)
class ElasticSearchBackend(BaseSearchBackend):
"""#yyb Elasticsearch搜索后端实现"""
def __init__(self, connection_alias, **connection_options):
"""#yyb 初始化Elasticsearch后端"""
super(
ElasticSearchBackend,
self).__init__(
connection_alias,
**connection_options)
self.manager = ArticleDocumentManager() #yyb 文章文档管理器
self.include_spelling = True #yyb 是否包含拼写建议
def _get_models(self, iterable):
"""#yyb 获取模型实例并转换为文档"""
#yyb 如果有提供模型列表则使用,否则获取所有文章
models = iterable if iterable and iterable[0] else Article.objects.all()
docs = self.manager.convert_to_doc(models) #yyb 将模型转换为Elasticsearch文档
return docs
def _create(self, models):
"""#yyb 创建索引并重建文档"""
self.manager.create_index() #yyb 创建Elasticsearch索引
docs = self._get_models(models)
self.manager.rebuild(docs) #yyb 重建所有文档
def _delete(self, models):
"""#yyb 删除文档"""
for m in models:
m.delete()
return True
def _rebuild(self, models):
"""#yyb 重建索引文档"""
models = models if models else Article.objects.all()
docs = self.manager.convert_to_doc(models)
self.manager.update_docs(docs) #yyb 更新文档
def update(self, index, iterable, commit=True):
"""#yyb 更新文档"""
models = self._get_models(iterable)
self.manager.update_docs(models)
def remove(self, obj_or_string):
"""#yyb 移除指定对象"""
models = self._get_models([obj_or_string])
self._delete(models)
def clear(self, models=None, commit=True):
"""#yyb 清空索引"""
self.remove(None)
@staticmethod
def get_suggestion(query: str) -> str:
"""#yyb 获取搜索建议词,如果没有找到建议词则返回原搜索词"""
#yyb 构建搜索建议查询
search = ArticleDocument.search() \
.query("match", body=query) \
.suggest('suggest_search', query, term={'field': 'body'}) \
.execute()
keywords = []
#yyb 处理建议结果
for suggest in search.suggest.suggest_search:
if suggest["options"]:
keywords.append(suggest["options"][0]["text"]) #yyb 使用建议词
else:
keywords.append(suggest["text"]) #yyb 使用原词
return ' '.join(keywords)
@log_query #yyb 记录查询日志的装饰器
def search(self, query_string, **kwargs):
"""#yyb 执行搜索查询"""
logger.info('search query_string:' + query_string)
#yyb 获取分页参数
start_offset = kwargs.get('start_offset')
end_offset = kwargs.get('end_offset')
#yyb 推荐词搜索:如果启用建议,则获取建议词
if getattr(self, "is_suggest", None):
suggestion = self.get_suggestion(query_string)
else:
suggestion = query_string
#yyb 构建搜索查询在标题和正文中匹配设置最小匹配度70%
q = Q('bool',
should=[Q('match', body=suggestion), Q('match', title=suggestion)],
minimum_should_match="70%")
#yyb 执行搜索过滤已发布的状态为p且类型为a的文章
search = ArticleDocument.search() \
.query('bool', filter=[q]) \
.filter('term', status='p') \
.filter('term', type='a') \
.source(False)[start_offset: end_offset] #yyb 不返回源文档内容,只返回元数据
results = search.execute()
hits = results['hits'].total #yyb 总命中数
raw_results = []
#yyb 处理搜索结果
for raw_result in results['hits']['hits']:
app_label = 'blog'
model_name = 'Article'
additional_fields = {}
result_class = SearchResult
#yyb 创建搜索结果对象
result = result_class(
app_label,
model_name,
raw_result['_id'], #yyb 文档ID
raw_result['_score'], #yyb 相关性分数
**additional_fields)
raw_results.append(result)
facets = {}
#yyb 如果查询词与建议词不同,则设置拼写建议
spelling_suggestion = None if query_string == suggestion else suggestion
return {
'results': raw_results, #yyb 搜索结果列表
'hits': hits, #yyb 总命中数
'facets': facets, #yyb 分面搜索数据
'spelling_suggestion': spelling_suggestion, #yyb 拼写建议
}
class ElasticSearchQuery(BaseSearchQuery):
"""#yyb Elasticsearch查询构建器"""
def _convert_datetime(self, date):
"""#yyb 转换日期时间格式"""
if hasattr(date, 'hour'):
return force_str(date.strftime('%Y%m%d%H%M%S')) #yyb 包含时间的格式
else:
return force_str(date.strftime('%Y%m%d000000')) #yyb 只包含日期的格式
def clean(self, query_fragment):
"""
#yyb 清理用户输入的查询片段,转义保留字符
#yyb Whoosh 1.X与此不同不再使用反斜杠转义保留字符
#yyb 而是应该引用整个单词。
"""
words = query_fragment.split()
cleaned_words = []
for word in words:
#yyb 处理保留字
if word in self.backend.RESERVED_WORDS:
word = word.replace(word, word.lower())
#yyb 处理保留字符
for char in self.backend.RESERVED_CHARACTERS:
if char in word:
word = "'%s'" % word #yyb 用引号包围包含保留字符的单词
break
cleaned_words.append(word)
return ' '.join(cleaned_words)
def build_query_fragment(self, field, filter_type, value):
"""#yyb 构建查询片段"""
return value.query_string
def get_count(self):
"""#yyb 获取搜索结果数量"""
results = self.get_results()
return len(results) if results else 0
def get_spelling_suggestion(self, preferred_query=None):
"""#yyb 获取拼写建议"""
return self._spelling_suggestion
def build_params(self, spelling_query=None):
"""#yyb 构建查询参数"""
kwargs = super(ElasticSearchQuery, self).build_params(spelling_query=spelling_query)
return kwargs
class ElasticSearchModelSearchForm(ModelSearchForm):
"""#yyb Elasticsearch模型搜索表单"""
def search(self):
"""#yyb 执行搜索,根据参数决定是否使用建议搜索"""
#yyb 是否建议搜索从请求数据中获取is_suggest参数
self.searchqueryset.query.backend.is_suggest = self.data.get("is_suggest") != "no"
sqs = super().search() #yyb 调用父类搜索方法
return sqs
class ElasticSearchEngine(BaseEngine):
"""#yyb Elasticsearch搜索引擎"""
backend = ElasticSearchBackend #yyb 指定后端类
query = ElasticSearchQuery #yyb 指定查询类
Loading…
Cancel
Save