|
|
|
|
@ -51,7 +51,7 @@ DATETIME_REGEX = re.compile(
|
|
|
|
|
LOCALS = threading.local()
|
|
|
|
|
LOCALS.RAM_STORE = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
class WhooshHtmlFormatter(HtmlFormatter):
|
|
|
|
|
"""
|
|
|
|
|
自定义高亮格式化器,使用简单标签包裹匹配关键词。
|
|
|
|
|
@ -59,7 +59,7 @@ class WhooshHtmlFormatter(HtmlFormatter):
|
|
|
|
|
"""
|
|
|
|
|
template = '<%(tag)s>%(t)s</%(tag)s>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
"""
|
|
|
|
|
Whoosh 搜索后端实现类,负责与 Whoosh 引擎交互,执行索引、搜索、删除等操作。
|
|
|
|
|
@ -79,7 +79,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
|
|
|
|
|
'[', ']', '^', '"', '~', '*', '?', ':', '.',
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def __init__(self, connection_alias, **connection_options):
|
|
|
|
|
"""
|
|
|
|
|
初始化 Whoosh 后端连接。
|
|
|
|
|
@ -100,7 +100,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
"You must specify a 'PATH' in your settings for connection '%s'." % connection_alias)
|
|
|
|
|
|
|
|
|
|
self.log = logging.getLogger('haystack') # 日志记录器
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def setup(self):
|
|
|
|
|
"""
|
|
|
|
|
初始化索引环境:创建目录、构建 schema、打开或创建索引。
|
|
|
|
|
@ -143,7 +143,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
self.index = self.storage.create_index(self.schema)
|
|
|
|
|
|
|
|
|
|
self.setup_complete = True
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def build_schema(self, fields):
|
|
|
|
|
"""
|
|
|
|
|
根据 Django 模型字段定义构建 Whoosh 的 Schema(索引结构)。
|
|
|
|
|
@ -188,7 +188,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
raise SearchBackendError("No fields were found in any search_indexes. Please correct this before attempting to search.")
|
|
|
|
|
|
|
|
|
|
return (content_field_name, Schema(**schema_fields))
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def update(self, index, iterable, commit=True):
|
|
|
|
|
"""
|
|
|
|
|
更新索引文档(添加或更新)。
|
|
|
|
|
@ -224,7 +224,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
if len(iterable) > 0:
|
|
|
|
|
writer.commit() # 提交写入
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def remove(self, obj_or_string, commit=True):
|
|
|
|
|
"""
|
|
|
|
|
从索引中删除一个文档。
|
|
|
|
|
@ -243,7 +243,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
if not self.silently_fail:
|
|
|
|
|
raise
|
|
|
|
|
self.log.error("Failed to remove document '%s' from Whoosh: %s", whoosh_id, e, exc_info=True)
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def clear(self, models=None, commit=True):
|
|
|
|
|
"""
|
|
|
|
|
清空索引。可指定模型或清空全部。
|
|
|
|
|
@ -273,7 +273,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
self.log.error("Failed to clear Whoosh index of models '%s': %s", ','.join(models_to_delete), e, exc_info=True)
|
|
|
|
|
else:
|
|
|
|
|
self.log.error("Failed to clear Whoosh index: %s", e, exc_info=True)
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def delete_index(self):
|
|
|
|
|
"""
|
|
|
|
|
彻底删除索引目录并重建。
|
|
|
|
|
@ -283,7 +283,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
elif not self.use_file_storage:
|
|
|
|
|
self.storage.clean()
|
|
|
|
|
self.setup() # 重新初始化
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def optimize(self):
|
|
|
|
|
"""
|
|
|
|
|
优化索引(合并段),提升搜索性能。
|
|
|
|
|
@ -292,7 +292,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
self.setup()
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
self.index.optimize()
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def calculate_page(self, start_offset=0, end_offset=None):
|
|
|
|
|
"""
|
|
|
|
|
计算分页参数(页码和每页数量),适配 Whoosh 的分页机制。
|
|
|
|
|
@ -307,7 +307,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
page_length = end_offset - start_offset
|
|
|
|
|
page_num = int(start_offset / page_length) + 1 # Whoosh 页码从 1 开始
|
|
|
|
|
return page_num, page_length
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
@log_query
|
|
|
|
|
def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False,
|
|
|
|
|
facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None,
|
|
|
|
|
@ -432,7 +432,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(spelling_query or query_string) \
|
|
|
|
|
if self.include_spelling else None
|
|
|
|
|
return {'results': [], 'hits': 0, 'spelling_suggestion': spelling_suggestion}
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def more_like_this(self, model_instance, additional_query_string=None, start_offset=0, end_offset=None,
|
|
|
|
|
models=None, limit_to_registered_models=None, result_class=None, **kwargs):
|
|
|
|
|
"""
|
|
|
|
|
@ -501,7 +501,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
narrow_searcher.close()
|
|
|
|
|
return results
|
|
|
|
|
return {'results': [], 'hits': 0}
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def _process_results(self, raw_page, highlight=False, query_string='', spelling_query=None, result_class=None):
|
|
|
|
|
"""
|
|
|
|
|
处理原始搜索结果,转换为 SearchResult 对象列表。
|
|
|
|
|
@ -561,7 +561,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'facets': facets,
|
|
|
|
|
'spelling_suggestion': spelling_suggestion,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def create_spelling_suggestion(self, query_string):
|
|
|
|
|
"""
|
|
|
|
|
生成拼写纠错建议。
|
|
|
|
|
@ -584,7 +584,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
if suggestions:
|
|
|
|
|
suggested_words.append(suggestions[0])
|
|
|
|
|
return ' '.join(suggested_words)
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def _from_python(self, value):
|
|
|
|
|
"""
|
|
|
|
|
将 Python 值转换为 Whoosh 可索引的字符串。
|
|
|
|
|
@ -601,7 +601,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
else:
|
|
|
|
|
value = force_str(value)
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def _to_python(self, value):
|
|
|
|
|
"""
|
|
|
|
|
将 Whoosh 存储的值转换回 Python 类型。
|
|
|
|
|
@ -625,7 +625,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
pass
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
"""
|
|
|
|
|
Whoosh 查询构建器,负责将 Django 查询语法转换为 Whoosh 查询字符串。
|
|
|
|
|
@ -637,7 +637,7 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
return force_str(date.strftime('%Y%m%d%H%M%S'))
|
|
|
|
|
else:
|
|
|
|
|
return force_str(date.strftime('%Y%m%d000000'))
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def clean(self, query_fragment):
|
|
|
|
|
"""
|
|
|
|
|
清理用户输入的查询片段,避免保留字和字符引发语法错误。
|
|
|
|
|
@ -653,7 +653,7 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
break
|
|
|
|
|
cleaned_words.append(word)
|
|
|
|
|
return ' '.join(cleaned_words)
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
def build_query_fragment(self, field, filter_type, value):
|
|
|
|
|
"""
|
|
|
|
|
构建单个查询片段,如 "title:django" 或 "pub_date:[20200101 TO 20201231]"
|
|
|
|
|
@ -729,7 +729,7 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
|
|
|
|
|
return u"%s%s" % (index_fieldname, query_frag)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#wwc
|
|
|
|
|
class WhooshEngine(BaseEngine):
|
|
|
|
|
"""
|
|
|
|
|
Haystack 引擎注册类,绑定 Backend 和 Query 类。
|
|
|
|
|
|