|
|
|
|
@ -1,7 +1,9 @@
|
|
|
|
|
# encoding: utf-8
|
|
|
|
|
|
|
|
|
|
# Handle minimum requirement.
|
|
|
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
|
|
|
|
|
|
# Handle minimum requirement.
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import re
|
|
|
|
|
@ -9,6 +11,7 @@ import shutil
|
|
|
|
|
import threading
|
|
|
|
|
import warnings
|
|
|
|
|
|
|
|
|
|
# Handle minimum requirement.
|
|
|
|
|
import six
|
|
|
|
|
from django.conf import settings
|
|
|
|
|
from django.core.exceptions import ImproperlyConfigured
|
|
|
|
|
@ -34,6 +37,7 @@ from whoosh.qparser import QueryParser
|
|
|
|
|
from whoosh.searching import ResultsPage
|
|
|
|
|
from whoosh.writing import AsyncWriter
|
|
|
|
|
|
|
|
|
|
#psr:尝试导入whoosh库,如果失败,则抛出依赖缺失异常
|
|
|
|
|
try:
|
|
|
|
|
import whoosh
|
|
|
|
|
except ImportError:
|
|
|
|
|
@ -41,29 +45,33 @@ except ImportError:
|
|
|
|
|
"The 'whoosh' backend requires the installation of 'Whoosh'. Please refer to the documentation.")
|
|
|
|
|
|
|
|
|
|
# Handle minimum requirement.
|
|
|
|
|
#psr:检查whoosh版本要求,必须大于等于2.5.0
|
|
|
|
|
if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
|
|
|
|
|
raise MissingDependency(
|
|
|
|
|
"The 'whoosh' backend requires version 2.5.0 or greater.")
|
|
|
|
|
|
|
|
|
|
# Bubble up the correct error.
|
|
|
|
|
|
|
|
|
|
#psr:定义日期时间正则表达式模式
|
|
|
|
|
DATETIME_REGEX = re.compile(
|
|
|
|
|
'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$')
|
|
|
|
|
#psr:创建线程本地存储对象
|
|
|
|
|
LOCALS = threading.local()
|
|
|
|
|
LOCALS.RAM_STORE = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#psr:自定义HTML格式化器类,用于高亮显示搜索结果
|
|
|
|
|
class WhooshHtmlFormatter(HtmlFormatter):
|
|
|
|
|
"""
|
|
|
|
|
This is a HtmlFormatter simpler than the whoosh.HtmlFormatter.
|
|
|
|
|
We use it to have consistent results across backends. Specifically,
|
|
|
|
|
Solr, Xapian and Elasticsearch are using this formatting.
|
|
|
|
|
"""
|
|
|
|
|
#psr:定义HTML模板格式
|
|
|
|
|
template = '<%(tag)s>%(t)s</%(tag)s>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#psr:定义Whoosh搜索后端类,继承自BaseSearchBackend
|
|
|
|
|
class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
# Word reserved by Whoosh for special use.
|
|
|
|
|
#psr:定义Whoosh保留关键字
|
|
|
|
|
RESERVED_WORDS = (
|
|
|
|
|
'AND',
|
|
|
|
|
'NOT',
|
|
|
|
|
@ -74,35 +82,45 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
# Characters reserved by Whoosh for special use.
|
|
|
|
|
# The '\\' must come first, so as not to overwrite the other slash
|
|
|
|
|
# replacements.
|
|
|
|
|
#psr:定义Whoosh保留字符
|
|
|
|
|
RESERVED_CHARACTERS = (
|
|
|
|
|
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
|
|
|
|
|
'[', ']', '^', '"', '~', '*', '?', ':', '.',
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
#psr:初始化Whoosh搜索后端
|
|
|
|
|
def __init__(self, connection_alias, **connection_options):
|
|
|
|
|
super(
|
|
|
|
|
WhooshSearchBackend,
|
|
|
|
|
self).__init__(
|
|
|
|
|
connection_alias,
|
|
|
|
|
**connection_options)
|
|
|
|
|
#psr:标记设置是否完成
|
|
|
|
|
self.setup_complete = False
|
|
|
|
|
#psr:是否使用文件存储
|
|
|
|
|
self.use_file_storage = True
|
|
|
|
|
#psr:POST请求限制大小
|
|
|
|
|
self.post_limit = getattr(
|
|
|
|
|
connection_options,
|
|
|
|
|
'POST_LIMIT',
|
|
|
|
|
128 * 1024 * 1024)
|
|
|
|
|
#psr:索引文件路径
|
|
|
|
|
self.path = connection_options.get('PATH')
|
|
|
|
|
|
|
|
|
|
#psr:根据配置决定存储类型
|
|
|
|
|
if connection_options.get('STORAGE', 'file') != 'file':
|
|
|
|
|
self.use_file_storage = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#psr:验证文件存储配置
|
|
|
|
|
if self.use_file_storage and not self.path:
|
|
|
|
|
raise ImproperlyConfigured(
|
|
|
|
|
"You must specify a 'PATH' in your settings for connection '%s'." %
|
|
|
|
|
connection_alias)
|
|
|
|
|
|
|
|
|
|
#psr:获取日志记录器
|
|
|
|
|
self.log = logging.getLogger('haystack')
|
|
|
|
|
|
|
|
|
|
#psr:设置Whoosh索引环境
|
|
|
|
|
def setup(self):
|
|
|
|
|
"""
|
|
|
|
|
Defers loading until needed.
|
|
|
|
|
@ -111,15 +129,18 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
new_index = False
|
|
|
|
|
|
|
|
|
|
# Make sure the index is there.
|
|
|
|
|
#psr:创建索引目录如果不存在
|
|
|
|
|
if self.use_file_storage and not os.path.exists(self.path):
|
|
|
|
|
os.makedirs(self.path)
|
|
|
|
|
new_index = True
|
|
|
|
|
|
|
|
|
|
#psr:检查索引目录写入权限
|
|
|
|
|
if self.use_file_storage and not os.access(self.path, os.W_OK):
|
|
|
|
|
raise IOError(
|
|
|
|
|
"The path to your Whoosh index '%s' is not writable for the current user/group." %
|
|
|
|
|
self.path)
|
|
|
|
|
|
|
|
|
|
#psr:根据配置选择存储方式
|
|
|
|
|
if self.use_file_storage:
|
|
|
|
|
self.storage = FileStorage(self.path)
|
|
|
|
|
else:
|
|
|
|
|
@ -130,10 +151,12 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
self.storage = LOCALS.RAM_STORE
|
|
|
|
|
|
|
|
|
|
#psr:构建索引模式和解析器
|
|
|
|
|
self.content_field_name, self.schema = self.build_schema(
|
|
|
|
|
connections[self.connection_alias].get_unified_index().all_searchfields())
|
|
|
|
|
self.parser = QueryParser(self.content_field_name, schema=self.schema)
|
|
|
|
|
|
|
|
|
|
#psr:创建或打开索引
|
|
|
|
|
if new_index is True:
|
|
|
|
|
self.index = self.storage.create_index(self.schema)
|
|
|
|
|
else:
|
|
|
|
|
@ -142,9 +165,12 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
except index.EmptyIndexError:
|
|
|
|
|
self.index = self.storage.create_index(self.schema)
|
|
|
|
|
|
|
|
|
|
#psr:标记设置完成
|
|
|
|
|
self.setup_complete = True
|
|
|
|
|
|
|
|
|
|
#psr:构建Whoosh索引模式
|
|
|
|
|
def build_schema(self, fields):
|
|
|
|
|
#psr:定义基本模式字段
|
|
|
|
|
schema_fields = {
|
|
|
|
|
ID: WHOOSH_ID(stored=True, unique=True),
|
|
|
|
|
DJANGO_CT: WHOOSH_ID(stored=True),
|
|
|
|
|
@ -152,11 +178,14 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
}
|
|
|
|
|
# Grab the number of keys that are hard-coded into Haystack.
|
|
|
|
|
# We'll use this to (possibly) fail slightly more gracefully later.
|
|
|
|
|
#psr:获取初始键数量
|
|
|
|
|
initial_key_count = len(schema_fields)
|
|
|
|
|
content_field_name = ''
|
|
|
|
|
|
|
|
|
|
#psr:根据字段类型构建相应的模式字段
|
|
|
|
|
for field_name, field_class in fields.items():
|
|
|
|
|
if field_class.is_multivalued:
|
|
|
|
|
#psr:处理多值字段
|
|
|
|
|
if field_class.indexed is False:
|
|
|
|
|
schema_fields[field_class.index_fieldname] = IDLIST(
|
|
|
|
|
stored=True, field_boost=field_class.boost)
|
|
|
|
|
@ -164,48 +193,62 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
schema_fields[field_class.index_fieldname] = KEYWORD(
|
|
|
|
|
stored=True, commas=True, scorable=True, field_boost=field_class.boost)
|
|
|
|
|
elif field_class.field_type in ['date', 'datetime']:
|
|
|
|
|
#psr:处理日期时间字段
|
|
|
|
|
schema_fields[field_class.index_fieldname] = DATETIME(
|
|
|
|
|
stored=field_class.stored, sortable=True)
|
|
|
|
|
elif field_class.field_type == 'integer':
|
|
|
|
|
#psr:处理整数字段
|
|
|
|
|
schema_fields[field_class.index_fieldname] = NUMERIC(
|
|
|
|
|
stored=field_class.stored, numtype=int, field_boost=field_class.boost)
|
|
|
|
|
elif field_class.field_type == 'float':
|
|
|
|
|
#psr:处理浮点数字段
|
|
|
|
|
schema_fields[field_class.index_fieldname] = NUMERIC(
|
|
|
|
|
stored=field_class.stored, numtype=float, field_boost=field_class.boost)
|
|
|
|
|
elif field_class.field_type == 'boolean':
|
|
|
|
|
#psr:处理布尔字段
|
|
|
|
|
# Field boost isn't supported on BOOLEAN as of 1.8.2.
|
|
|
|
|
schema_fields[field_class.index_fieldname] = BOOLEAN(
|
|
|
|
|
stored=field_class.stored)
|
|
|
|
|
elif field_class.field_type == 'ngram':
|
|
|
|
|
#psr:处理ngram字段
|
|
|
|
|
schema_fields[field_class.index_fieldname] = NGRAM(
|
|
|
|
|
minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
|
|
|
|
|
elif field_class.field_type == 'edge_ngram':
|
|
|
|
|
#psr:处理边缘ngram字段
|
|
|
|
|
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start',
|
|
|
|
|
stored=field_class.stored,
|
|
|
|
|
field_boost=field_class.boost)
|
|
|
|
|
else:
|
|
|
|
|
#psr:处理文本字段,使用中文分析器
|
|
|
|
|
# schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True)
|
|
|
|
|
schema_fields[field_class.index_fieldname] = TEXT(
|
|
|
|
|
stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True)
|
|
|
|
|
#psr:处理文档字段
|
|
|
|
|
if field_class.document is True:
|
|
|
|
|
content_field_name = field_class.index_fieldname
|
|
|
|
|
schema_fields[field_class.index_fieldname].spelling = True
|
|
|
|
|
|
|
|
|
|
# Fail more gracefully than relying on the backend to die if no fields
|
|
|
|
|
# are found.
|
|
|
|
|
#psr:验证是否有字段被添加到模式中
|
|
|
|
|
if len(schema_fields) <= initial_key_count:
|
|
|
|
|
raise SearchBackendError(
|
|
|
|
|
"No fields were found in any search_indexes. Please correct this before attempting to search.")
|
|
|
|
|
|
|
|
|
|
#psr:返回内容字段名和构建好的模式
|
|
|
|
|
return (content_field_name, Schema(**schema_fields))
|
|
|
|
|
|
|
|
|
|
#psr:更新索引中的文档
|
|
|
|
|
def update(self, index, iterable, commit=True):
|
|
|
|
|
#psr:如果设置未完成则先进行设置
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
|
|
|
|
|
#psr:刷新索引并创建异步写入器
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
writer = AsyncWriter(self.index)
|
|
|
|
|
|
|
|
|
|
#psr:遍历可迭代对象准备并更新文档
|
|
|
|
|
for obj in iterable:
|
|
|
|
|
try:
|
|
|
|
|
doc = index.full_prepare(obj)
|
|
|
|
|
@ -214,22 +257,27 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
else:
|
|
|
|
|
# Really make sure it's unicode, because Whoosh won't have it any
|
|
|
|
|
# other way.
|
|
|
|
|
#psr:确保文档内容都是unicode格式
|
|
|
|
|
for key in doc:
|
|
|
|
|
doc[key] = self._from_python(doc[key])
|
|
|
|
|
|
|
|
|
|
# Document boosts aren't supported in Whoosh 2.5.0+.
|
|
|
|
|
#psr:删除文档提升字段(Whoosh 2.5.0+不支持)
|
|
|
|
|
if 'boost' in doc:
|
|
|
|
|
del doc['boost']
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
#psr:更新文档
|
|
|
|
|
writer.update_document(**doc)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
#psr:处理异常情况
|
|
|
|
|
if not self.silently_fail:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
# We'll log the object identifier but won't include the actual object
|
|
|
|
|
# to avoid the possibility of that generating encoding errors while
|
|
|
|
|
# processing the log message:
|
|
|
|
|
#psr:记录错误日志
|
|
|
|
|
self.log.error(
|
|
|
|
|
u"%s while preparing object for update" %
|
|
|
|
|
e.__class__.__name__,
|
|
|
|
|
@ -239,43 +287,54 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
"index": index,
|
|
|
|
|
"object": get_identifier(obj)}})
|
|
|
|
|
|
|
|
|
|
#psr:如果有数据则提交更改
|
|
|
|
|
if len(iterable) > 0:
|
|
|
|
|
# For now, commit no matter what, as we run into locking issues
|
|
|
|
|
# otherwise.
|
|
|
|
|
writer.commit()
|
|
|
|
|
|
|
|
|
|
#psr:从索引中移除对象
|
|
|
|
|
def remove(self, obj_or_string, commit=True):
|
|
|
|
|
#psr:如果设置未完成则先进行设置
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
|
|
|
|
|
#psr:刷新索引并获取对象标识符
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
whoosh_id = get_identifier(obj_or_string)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
#psr:通过查询删除文档
|
|
|
|
|
self.index.delete_by_query(
|
|
|
|
|
q=self.parser.parse(
|
|
|
|
|
u'%s:"%s"' %
|
|
|
|
|
(ID, whoosh_id)))
|
|
|
|
|
u'%s:"%s"' % (ID, whoosh_id)))
|
|
|
|
|
except Exception as e:
|
|
|
|
|
#psr:处理异常情况
|
|
|
|
|
if not self.silently_fail:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
#psr:记录错误日志
|
|
|
|
|
self.log.error(
|
|
|
|
|
"Failed to remove document '%s' from Whoosh: %s",
|
|
|
|
|
whoosh_id,
|
|
|
|
|
e,
|
|
|
|
|
exc_info=True)
|
|
|
|
|
|
|
|
|
|
#psr:清空索引
|
|
|
|
|
def clear(self, models=None, commit=True):
|
|
|
|
|
#psr:如果设置未完成则先进行设置
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
|
|
|
|
|
#psr:刷新索引
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
|
|
|
|
|
#psr:验证模型参数
|
|
|
|
|
if models is not None:
|
|
|
|
|
assert isinstance(models, (list, tuple))
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
#psr:根据参数决定是删除整个索引还是特定模型
|
|
|
|
|
if models is None:
|
|
|
|
|
self.delete_index()
|
|
|
|
|
else:
|
|
|
|
|
@ -283,16 +342,17 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
for model in models:
|
|
|
|
|
models_to_delete.append(
|
|
|
|
|
u"%s:%s" %
|
|
|
|
|
(DJANGO_CT, get_model_ct(model)))
|
|
|
|
|
u"%s:%s" % (DJANGO_CT, get_model_ct(model)))
|
|
|
|
|
|
|
|
|
|
self.index.delete_by_query(
|
|
|
|
|
q=self.parser.parse(
|
|
|
|
|
u" OR ".join(models_to_delete)))
|
|
|
|
|
except Exception as e:
|
|
|
|
|
#psr:处理异常情况
|
|
|
|
|
if not self.silently_fail:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
#psr:记录错误日志
|
|
|
|
|
if models is not None:
|
|
|
|
|
self.log.error(
|
|
|
|
|
"Failed to clear Whoosh index of models '%s': %s",
|
|
|
|
|
@ -303,31 +363,34 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
self.log.error(
|
|
|
|
|
"Failed to clear Whoosh index: %s", e, exc_info=True)
|
|
|
|
|
|
|
|
|
|
#psr:删除整个索引
|
|
|
|
|
def delete_index(self):
|
|
|
|
|
# Per the Whoosh mailing list, if wiping out everything from the index,
|
|
|
|
|
# it's much more efficient to simply delete the index files.
|
|
|
|
|
#psr:根据存储类型删除索引文件
|
|
|
|
|
if self.use_file_storage and os.path.exists(self.path):
|
|
|
|
|
shutil.rmtree(self.path)
|
|
|
|
|
elif not self.use_file_storage:
|
|
|
|
|
self.storage.clean()
|
|
|
|
|
|
|
|
|
|
# Recreate everything.
|
|
|
|
|
#psr:重新创建索引环境
|
|
|
|
|
self.setup()
|
|
|
|
|
|
|
|
|
|
#psr:优化索引
|
|
|
|
|
def optimize(self):
|
|
|
|
|
#psr:如果设置未完成则先进行设置
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
|
|
|
|
|
#psr:刷新索引并执行优化
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
self.index.optimize()
|
|
|
|
|
|
|
|
|
|
#psr:计算分页参数
|
|
|
|
|
def calculate_page(self, start_offset=0, end_offset=None):
|
|
|
|
|
# Prevent against Whoosh throwing an error. Requires an end_offset
|
|
|
|
|
# greater than 0.
|
|
|
|
|
#psr:防止Whoosh抛出错误,确保结束偏移量大于0
|
|
|
|
|
if end_offset is not None and end_offset <= 0:
|
|
|
|
|
end_offset = 1
|
|
|
|
|
|
|
|
|
|
# Determine the page.
|
|
|
|
|
#psr:确定页码
|
|
|
|
|
page_num = 0
|
|
|
|
|
|
|
|
|
|
if end_offset is None:
|
|
|
|
|
@ -341,10 +404,11 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
if page_length and page_length > 0:
|
|
|
|
|
page_num = int(start_offset / page_length)
|
|
|
|
|
|
|
|
|
|
# Increment because Whoosh uses 1-based page numbers.
|
|
|
|
|
#psr:增加页码因为Whoosh使用1基页码
|
|
|
|
|
page_num += 1
|
|
|
|
|
return page_num, page_length
|
|
|
|
|
|
|
|
|
|
#psr:执行搜索查询
|
|
|
|
|
@log_query
|
|
|
|
|
def search(
|
|
|
|
|
self,
|
|
|
|
|
@ -366,10 +430,11 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
limit_to_registered_models=None,
|
|
|
|
|
result_class=None,
|
|
|
|
|
**kwargs):
|
|
|
|
|
#psr:如果设置未完成则先进行设置
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
|
|
|
|
|
# A zero length query should return no results.
|
|
|
|
|
#psr:处理零长度查询
|
|
|
|
|
if len(query_string) == 0:
|
|
|
|
|
return {
|
|
|
|
|
'results': [],
|
|
|
|
|
@ -378,8 +443,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
query_string = force_str(query_string)
|
|
|
|
|
|
|
|
|
|
# A one-character query (non-wildcard) gets nabbed by a stopwords
|
|
|
|
|
# filter and should yield zero results.
|
|
|
|
|
#psr:处理单字符查询
|
|
|
|
|
if len(query_string) <= 1 and query_string != u'*':
|
|
|
|
|
return {
|
|
|
|
|
'results': [],
|
|
|
|
|
@ -388,6 +452,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
reverse = False
|
|
|
|
|
|
|
|
|
|
#psr:处理排序参数
|
|
|
|
|
if sort_by is not None:
|
|
|
|
|
# Determine if we need to reverse the results and if Whoosh can
|
|
|
|
|
# handle what it's being asked to sort by. Reversing is an
|
|
|
|
|
@ -417,6 +482,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
sort_by = sort_by_list[0]
|
|
|
|
|
|
|
|
|
|
#psr:处理分面搜索警告
|
|
|
|
|
if facets is not None:
|
|
|
|
|
warnings.warn(
|
|
|
|
|
"Whoosh does not handle faceting.",
|
|
|
|
|
@ -438,6 +504,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
narrowed_results = None
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
|
|
|
|
|
#psr:处理模型限制参数
|
|
|
|
|
if limit_to_registered_models is None:
|
|
|
|
|
limit_to_registered_models = getattr(
|
|
|
|
|
settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
|
|
|
|
|
@ -451,6 +518,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
else:
|
|
|
|
|
model_choices = []
|
|
|
|
|
|
|
|
|
|
#psr:构建模型选择查询
|
|
|
|
|
if len(model_choices) > 0:
|
|
|
|
|
if narrow_queries is None:
|
|
|
|
|
narrow_queries = set()
|
|
|
|
|
@ -460,6 +528,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
narrow_searcher = None
|
|
|
|
|
|
|
|
|
|
#psr:处理窄化查询
|
|
|
|
|
if narrow_queries is not None:
|
|
|
|
|
# Potentially expensive? I don't see another way to do it in
|
|
|
|
|
# Whoosh...
|
|
|
|
|
@ -482,6 +551,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
|
|
|
|
|
#psr:执行实际搜索
|
|
|
|
|
if self.index.doc_count():
|
|
|
|
|
searcher = self.index.searcher()
|
|
|
|
|
parsed_query = self.parser.parse(query_string)
|
|
|
|
|
@ -496,6 +566,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
page_num, page_length = self.calculate_page(
|
|
|
|
|
start_offset, end_offset)
|
|
|
|
|
|
|
|
|
|
#psr:设置搜索参数
|
|
|
|
|
search_kwargs = {
|
|
|
|
|
'pagelen': page_length,
|
|
|
|
|
'sortedby': sort_by,
|
|
|
|
|
@ -507,12 +578,14 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
search_kwargs['filter'] = narrowed_results
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
#psr:执行搜索页面查询
|
|
|
|
|
raw_page = searcher.search_page(
|
|
|
|
|
parsed_query,
|
|
|
|
|
page_num,
|
|
|
|
|
**search_kwargs
|
|
|
|
|
)
|
|
|
|
|
except ValueError:
|
|
|
|
|
#psr:处理数值错误异常
|
|
|
|
|
if not self.silently_fail:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
@ -531,6 +604,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'spelling_suggestion': None,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#psr:处理搜索结果
|
|
|
|
|
results = self._process_results(
|
|
|
|
|
raw_page,
|
|
|
|
|
highlight=highlight,
|
|
|
|
|
@ -544,6 +618,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
else:
|
|
|
|
|
#psr:处理空索引情况
|
|
|
|
|
if self.include_spelling:
|
|
|
|
|
if spelling_query:
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(
|
|
|
|
|
@ -560,6 +635,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'spelling_suggestion': spelling_suggestion,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#psr:相似文档搜索
|
|
|
|
|
def more_like_this(
|
|
|
|
|
self,
|
|
|
|
|
model_instance,
|
|
|
|
|
@ -570,6 +646,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
limit_to_registered_models=None,
|
|
|
|
|
result_class=None,
|
|
|
|
|
**kwargs):
|
|
|
|
|
#psr:如果设置未完成则先进行设置
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
|
|
|
|
|
@ -582,6 +659,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
narrowed_results = None
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
|
|
|
|
|
#psr:处理模型限制参数
|
|
|
|
|
if limit_to_registered_models is None:
|
|
|
|
|
limit_to_registered_models = getattr(
|
|
|
|
|
settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
|
|
|
|
|
@ -595,6 +673,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
else:
|
|
|
|
|
model_choices = []
|
|
|
|
|
|
|
|
|
|
#psr:构建模型选择查询
|
|
|
|
|
if len(model_choices) > 0:
|
|
|
|
|
if narrow_queries is None:
|
|
|
|
|
narrow_queries = set()
|
|
|
|
|
@ -602,11 +681,13 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
narrow_queries.add(' OR '.join(
|
|
|
|
|
['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
|
|
|
|
|
|
|
|
|
|
#psr:添加附加查询字符串
|
|
|
|
|
if additional_query_string and additional_query_string != '*':
|
|
|
|
|
narrow_queries.add(additional_query_string)
|
|
|
|
|
|
|
|
|
|
narrow_searcher = None
|
|
|
|
|
|
|
|
|
|
#psr:处理窄化查询
|
|
|
|
|
if narrow_queries is not None:
|
|
|
|
|
# Potentially expensive? I don't see another way to do it in
|
|
|
|
|
# Whoosh...
|
|
|
|
|
@ -627,11 +708,13 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
else:
|
|
|
|
|
narrowed_results = recent_narrowed_results
|
|
|
|
|
|
|
|
|
|
#psr:计算分页参数
|
|
|
|
|
page_num, page_length = self.calculate_page(start_offset, end_offset)
|
|
|
|
|
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
raw_results = EmptyResults()
|
|
|
|
|
|
|
|
|
|
#psr:执行相似文档搜索
|
|
|
|
|
if self.index.doc_count():
|
|
|
|
|
query = "%s:%s" % (ID, get_identifier(model_instance))
|
|
|
|
|
searcher = self.index.searcher()
|
|
|
|
|
@ -647,8 +730,10 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
raw_results.filter(narrowed_results)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
#psr:创建结果页面
|
|
|
|
|
raw_page = ResultsPage(raw_results, page_num, page_length)
|
|
|
|
|
except ValueError:
|
|
|
|
|
#psr:处理数值错误异常
|
|
|
|
|
if not self.silently_fail:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
@ -667,6 +752,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'spelling_suggestion': None,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#psr:处理搜索结果
|
|
|
|
|
results = self._process_results(raw_page, result_class=result_class)
|
|
|
|
|
searcher.close()
|
|
|
|
|
|
|
|
|
|
@ -675,6 +761,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
#psr:处理搜索结果
|
|
|
|
|
def _process_results(
|
|
|
|
|
self,
|
|
|
|
|
raw_page,
|
|
|
|
|
@ -689,6 +776,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
# can cause pagination failures.
|
|
|
|
|
hits = len(raw_page)
|
|
|
|
|
|
|
|
|
|
#psr:设置结果类
|
|
|
|
|
if result_class is None:
|
|
|
|
|
result_class = SearchResult
|
|
|
|
|
|
|
|
|
|
@ -697,6 +785,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
unified_index = connections[self.connection_alias].get_unified_index()
|
|
|
|
|
indexed_models = unified_index.get_indexed_models()
|
|
|
|
|
|
|
|
|
|
#psr:遍历原始结果处理每条记录
|
|
|
|
|
for doc_offset, raw_result in enumerate(raw_page):
|
|
|
|
|
score = raw_page.score(doc_offset) or 0
|
|
|
|
|
app_label, model_name = raw_result[DJANGO_CT].split('.')
|
|
|
|
|
@ -704,6 +793,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
model = haystack_get_model(app_label, model_name)
|
|
|
|
|
|
|
|
|
|
if model and model in indexed_models:
|
|
|
|
|
#psr:处理字段数据
|
|
|
|
|
for key, value in raw_result.items():
|
|
|
|
|
index = unified_index.get_index(model)
|
|
|
|
|
string_key = str(key)
|
|
|
|
|
@ -723,9 +813,11 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
else:
|
|
|
|
|
additional_fields[string_key] = self._to_python(value)
|
|
|
|
|
|
|
|
|
|
#psr:删除不需要的字段
|
|
|
|
|
del (additional_fields[DJANGO_CT])
|
|
|
|
|
del (additional_fields[DJANGO_ID])
|
|
|
|
|
|
|
|
|
|
#psr:处理高亮显示
|
|
|
|
|
if highlight:
|
|
|
|
|
sa = StemmingAnalyzer()
|
|
|
|
|
formatter = WhooshHtmlFormatter('em')
|
|
|
|
|
@ -742,6 +834,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
self.content_field_name: [whoosh_result],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#psr:创建搜索结果对象
|
|
|
|
|
result = result_class(
|
|
|
|
|
app_label,
|
|
|
|
|
model_name,
|
|
|
|
|
@ -752,6 +845,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
else:
|
|
|
|
|
hits -= 1
|
|
|
|
|
|
|
|
|
|
#psr:处理拼写建议
|
|
|
|
|
if self.include_spelling:
|
|
|
|
|
if spelling_query:
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(
|
|
|
|
|
@ -760,6 +854,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(
|
|
|
|
|
query_string)
|
|
|
|
|
|
|
|
|
|
#psr:返回处理后的结果
|
|
|
|
|
return {
|
|
|
|
|
'results': results,
|
|
|
|
|
'hits': hits,
|
|
|
|
|
@ -767,6 +862,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'spelling_suggestion': spelling_suggestion,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#psr:创建拼写建议
|
|
|
|
|
def create_spelling_suggestion(self, query_string):
|
|
|
|
|
spelling_suggestion = None
|
|
|
|
|
reader = self.index.reader()
|
|
|
|
|
@ -787,6 +883,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
query_words = cleaned_query.split()
|
|
|
|
|
suggested_words = []
|
|
|
|
|
|
|
|
|
|
#psr:为每个查询词生成建议
|
|
|
|
|
for word in query_words:
|
|
|
|
|
suggestions = corrector.suggest(word, limit=1)
|
|
|
|
|
|
|
|
|
|
@ -796,6 +893,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
spelling_suggestion = ' '.join(suggested_words)
|
|
|
|
|
return spelling_suggestion
|
|
|
|
|
|
|
|
|
|
#psr:将Python值转换为Whoosh字符串
|
|
|
|
|
def _from_python(self, value):
|
|
|
|
|
"""
|
|
|
|
|
Converts Python values to a string for Whoosh.
|
|
|
|
|
@ -819,6 +917,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
value = force_str(value)
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
#psr:将Whoosh值转换为Python原生值
|
|
|
|
|
def _to_python(self, value):
|
|
|
|
|
"""
|
|
|
|
|
Converts values from Whoosh to native Python values.
|
|
|
|
|
@ -870,13 +969,16 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#psr:定义Whoosh搜索查询类,继承自BaseSearchQuery
|
|
|
|
|
class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
#psr:转换日期时间格式
|
|
|
|
|
def _convert_datetime(self, date):
|
|
|
|
|
if hasattr(date, 'hour'):
|
|
|
|
|
return force_str(date.strftime('%Y%m%d%H%M%S'))
|
|
|
|
|
else:
|
|
|
|
|
return force_str(date.strftime('%Y%m%d000000'))
|
|
|
|
|
|
|
|
|
|
#psr:清理查询片段
|
|
|
|
|
def clean(self, query_fragment):
|
|
|
|
|
"""
|
|
|
|
|
Provides a mechanism for sanitizing user input before presenting the
|
|
|
|
|
@ -902,11 +1004,13 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
|
|
|
|
|
return ' '.join(cleaned_words)
|
|
|
|
|
|
|
|
|
|
#psr:构建查询片段
|
|
|
|
|
def build_query_fragment(self, field, filter_type, value):
|
|
|
|
|
from haystack import connections
|
|
|
|
|
query_frag = ''
|
|
|
|
|
is_datetime = False
|
|
|
|
|
|
|
|
|
|
#psr:处理不同类型的值
|
|
|
|
|
if not hasattr(value, 'input_type_name'):
|
|
|
|
|
# Handle when we've got a ``ValuesListQuerySet``...
|
|
|
|
|
if hasattr(value, 'values_list'):
|
|
|
|
|
@ -936,6 +1040,7 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
index_fieldname = u'%s:' % connections[self._using].get_unified_index(
|
|
|
|
|
).get_index_fieldname(field)
|
|
|
|
|
|
|
|
|
|
#psr:定义过滤类型映射
|
|
|
|
|
filter_types = {
|
|
|
|
|
'content': '%s',
|
|
|
|
|
'contains': '*%s*',
|
|
|
|
|
@ -949,6 +1054,7 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
'fuzzy': u'%s~',
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#psr:根据不同过滤类型构建查询片段
|
|
|
|
|
if value.post_process is False:
|
|
|
|
|
query_frag = prepared_value
|
|
|
|
|
else:
|
|
|
|
|
@ -1026,19 +1132,15 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
|
|
|
|
|
query_frag = filter_types[filter_type] % prepared_value
|
|
|
|
|
|
|
|
|
|
#psr:格式化查询片段
|
|
|
|
|
if len(query_frag) and not isinstance(value, Raw):
|
|
|
|
|
if not query_frag.startswith('(') and not query_frag.endswith(')'):
|
|
|
|
|
query_frag = "(%s)" % query_frag
|
|
|
|
|
|
|
|
|
|
return u"%s%s" % (index_fieldname, query_frag)
|
|
|
|
|
|
|
|
|
|
# if not filter_type in ('in', 'range'):
|
|
|
|
|
# # 'in' is a bit of a special case, as we don't want to
|
|
|
|
|
# # convert a valid list/tuple to string. Defer handling it
|
|
|
|
|
# # until later...
|
|
|
|
|
# value = self.backend._from_python(value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#psr:定义Whoosh搜索引擎类,继承自BaseEngine
|
|
|
|
|
class WhooshEngine(BaseEngine):
|
|
|
|
|
#psr:指定后端和查询类
|
|
|
|
|
backend = WhooshSearchBackend
|
|
|
|
|
query = WhooshSearchQuery
|
|
|
|
|
|