|
|
|
|
@ -40,6 +40,7 @@ except ImportError:
|
|
|
|
|
raise MissingDependency(
|
|
|
|
|
"The 'whoosh' backend requires the installation of 'Whoosh'. Please refer to the documentation.")
|
|
|
|
|
|
|
|
|
|
# szy:处理最低版本要求
|
|
|
|
|
# Handle minimum requirement.
|
|
|
|
|
if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
|
|
|
|
|
raise MissingDependency(
|
|
|
|
|
@ -47,12 +48,13 @@ if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
|
|
|
|
|
|
|
|
|
|
# Bubble up the correct error.
|
|
|
|
|
|
|
|
|
|
# szy:日期时间正则表达式,用于解析日期字符串
|
|
|
|
|
DATETIME_REGEX = re.compile(
|
|
|
|
|
'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$')
|
|
|
|
|
LOCALS = threading.local()
|
|
|
|
|
LOCALS.RAM_STORE = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:自定义Whoosh HTML格式化器,用于保持跨后端一致的高亮结果
|
|
|
|
|
class WhooshHtmlFormatter(HtmlFormatter):
|
|
|
|
|
"""
|
|
|
|
|
This is a HtmlFormatter simpler than the whoosh.HtmlFormatter.
|
|
|
|
|
@ -62,6 +64,7 @@ class WhooshHtmlFormatter(HtmlFormatter):
|
|
|
|
|
template = '<%(tag)s>%(t)s</%(tag)s>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:Whoosh搜索后端主类
|
|
|
|
|
class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
# Word reserved by Whoosh for special use.
|
|
|
|
|
RESERVED_WORDS = (
|
|
|
|
|
@ -79,6 +82,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'[', ']', '^', '"', '~', '*', '?', ':', '.',
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# szy:初始化Whoosh后端
|
|
|
|
|
def __init__(self, connection_alias, **connection_options):
|
|
|
|
|
super(
|
|
|
|
|
WhooshSearchBackend,
|
|
|
|
|
@ -103,6 +107,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
self.log = logging.getLogger('haystack')
|
|
|
|
|
|
|
|
|
|
# szy:设置Whoosh索引和配置
|
|
|
|
|
def setup(self):
|
|
|
|
|
"""
|
|
|
|
|
Defers loading until needed.
|
|
|
|
|
@ -110,6 +115,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
from haystack import connections
|
|
|
|
|
new_index = False
|
|
|
|
|
|
|
|
|
|
# szy:确保索引目录存在,如果不存在则创建
|
|
|
|
|
# Make sure the index is there.
|
|
|
|
|
if self.use_file_storage and not os.path.exists(self.path):
|
|
|
|
|
os.makedirs(self.path)
|
|
|
|
|
@ -120,6 +126,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
"The path to your Whoosh index '%s' is not writable for the current user/group." %
|
|
|
|
|
self.path)
|
|
|
|
|
|
|
|
|
|
# szy:根据配置选择文件存储或内存存储
|
|
|
|
|
if self.use_file_storage:
|
|
|
|
|
self.storage = FileStorage(self.path)
|
|
|
|
|
else:
|
|
|
|
|
@ -134,6 +141,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
connections[self.connection_alias].get_unified_index().all_searchfields())
|
|
|
|
|
self.parser = QueryParser(self.content_field_name, schema=self.schema)
|
|
|
|
|
|
|
|
|
|
# szy:创建或打开索引
|
|
|
|
|
if new_index is True:
|
|
|
|
|
self.index = self.storage.create_index(self.schema)
|
|
|
|
|
else:
|
|
|
|
|
@ -144,6 +152,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
self.setup_complete = True
|
|
|
|
|
|
|
|
|
|
# szy:构建Whoosh schema,定义字段类型
|
|
|
|
|
def build_schema(self, fields):
|
|
|
|
|
schema_fields = {
|
|
|
|
|
ID: WHOOSH_ID(stored=True, unique=True),
|
|
|
|
|
@ -199,6 +208,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
return (content_field_name, Schema(**schema_fields))
|
|
|
|
|
|
|
|
|
|
# szy:更新索引
|
|
|
|
|
def update(self, index, iterable, commit=True):
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
@ -206,6 +216,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
writer = AsyncWriter(self.index)
|
|
|
|
|
|
|
|
|
|
# szy:遍历对象并更新索引
|
|
|
|
|
for obj in iterable:
|
|
|
|
|
try:
|
|
|
|
|
doc = index.full_prepare(obj)
|
|
|
|
|
@ -244,6 +255,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
# otherwise.
|
|
|
|
|
writer.commit()
|
|
|
|
|
|
|
|
|
|
# szy:从索引中移除对象
|
|
|
|
|
def remove(self, obj_or_string, commit=True):
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
@ -266,6 +278,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
e,
|
|
|
|
|
exc_info=True)
|
|
|
|
|
|
|
|
|
|
# szy:清空索引
|
|
|
|
|
def clear(self, models=None, commit=True):
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
@ -303,6 +316,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
self.log.error(
|
|
|
|
|
"Failed to clear Whoosh index: %s", e, exc_info=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:删除整个索引
|
|
|
|
|
def delete_index(self):
|
|
|
|
|
# Per the Whoosh mailing list, if wiping out everything from the index,
|
|
|
|
|
# it's much more efficient to simply delete the index files.
|
|
|
|
|
@ -311,9 +326,11 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
elif not self.use_file_storage:
|
|
|
|
|
self.storage.clean()
|
|
|
|
|
|
|
|
|
|
# szy:重新创建所有内容
|
|
|
|
|
# Recreate everything.
|
|
|
|
|
self.setup()
|
|
|
|
|
|
|
|
|
|
# szy:优化索引
|
|
|
|
|
def optimize(self):
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
@ -321,12 +338,14 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
self.index.optimize()
|
|
|
|
|
|
|
|
|
|
# szy:计算分页信息
|
|
|
|
|
def calculate_page(self, start_offset=0, end_offset=None):
|
|
|
|
|
# Prevent against Whoosh throwing an error. Requires an end_offset
|
|
|
|
|
# greater than 0.
|
|
|
|
|
if end_offset is not None and end_offset <= 0:
|
|
|
|
|
end_offset = 1
|
|
|
|
|
|
|
|
|
|
# szy:确定页码
|
|
|
|
|
# Determine the page.
|
|
|
|
|
page_num = 0
|
|
|
|
|
|
|
|
|
|
@ -345,6 +364,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
page_num += 1
|
|
|
|
|
return page_num, page_length
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:执行搜索查询
|
|
|
|
|
@log_query
|
|
|
|
|
def search(
|
|
|
|
|
self,
|
|
|
|
|
@ -388,6 +409,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
reverse = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:处理排序
|
|
|
|
|
if sort_by is not None:
|
|
|
|
|
# Determine if we need to reverse the results and if Whoosh can
|
|
|
|
|
# handle what it's being asked to sort by. Reversing is an
|
|
|
|
|
@ -560,6 +583,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'spelling_suggestion': spelling_suggestion,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:实现"更多类似此结果"功能
|
|
|
|
|
def more_like_this(
|
|
|
|
|
self,
|
|
|
|
|
model_instance,
|
|
|
|
|
@ -675,6 +700,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:处理原始搜索结果,转换为Haystack格式
|
|
|
|
|
def _process_results(
|
|
|
|
|
self,
|
|
|
|
|
raw_page,
|
|
|
|
|
@ -767,6 +794,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'spelling_suggestion': spelling_suggestion,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:创建拼写建议
|
|
|
|
|
def create_spelling_suggestion(self, query_string):
|
|
|
|
|
spelling_suggestion = None
|
|
|
|
|
reader = self.index.reader()
|
|
|
|
|
@ -819,6 +848,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
value = force_str(value)
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:将Whoosh值转换为Python原生值
|
|
|
|
|
def _to_python(self, value):
|
|
|
|
|
"""
|
|
|
|
|
Converts values from Whoosh to native Python values.
|
|
|
|
|
@ -870,6 +901,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:Whoosh搜索查询类
|
|
|
|
|
class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
def _convert_datetime(self, date):
|
|
|
|
|
if hasattr(date, 'hour'):
|
|
|
|
|
@ -877,6 +909,8 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
else:
|
|
|
|
|
return force_str(date.strftime('%Y%m%d000000'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:清理查询片段,处理保留字和特殊字符
|
|
|
|
|
def clean(self, query_fragment):
|
|
|
|
|
"""
|
|
|
|
|
Provides a mechanism for sanitizing user input before presenting the
|
|
|
|
|
@ -902,6 +936,8 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
|
|
|
|
|
return ' '.join(cleaned_words)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:构建查询片段
|
|
|
|
|
def build_query_fragment(self, field, filter_type, value):
|
|
|
|
|
from haystack import connections
|
|
|
|
|
query_frag = ''
|
|
|
|
|
@ -1039,6 +1075,7 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
# value = self.backend._from_python(value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# szy:Whoosh搜索引擎类
|
|
|
|
|
class WhooshEngine(BaseEngine):
|
|
|
|
|
backend = WhooshSearchBackend
|
|
|
|
|
query = WhooshSearchQuery
|
|
|
|
|
|