|
|
|
|
@ -1,6 +1,16 @@
|
|
|
|
|
# encoding: utf-8
|
|
|
|
|
|
|
|
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
|
from whoosh.writing import AsyncWriter
|
|
|
|
|
from whoosh.searching import ResultsPage
|
|
|
|
|
from whoosh.qparser import QueryParser
|
|
|
|
|
from whoosh.highlight import ContextFragmenter, HtmlFormatter
|
|
|
|
|
from whoosh.highlight import highlight as whoosh_highlight
|
|
|
|
|
from whoosh.filedb.filestore import FileStorage, RamStorage
|
|
|
|
|
from whoosh.fields import BOOLEAN, DATETIME, IDLIST, KEYWORD, NGRAM, NGRAMWORDS, NUMERIC, Schema, TEXT
|
|
|
|
|
from whoosh.fields import ID as WHOOSH_ID
|
|
|
|
|
from whoosh.analysis import StemmingAnalyzer
|
|
|
|
|
from whoosh import index
|
|
|
|
|
from jieba.analyse import ChineseAnalyzer
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
@ -32,19 +42,10 @@ except ImportError:
|
|
|
|
|
|
|
|
|
|
# Handle minimum requirement.
|
|
|
|
|
if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
|
|
|
|
|
raise MissingDependency("The 'whoosh' backend requires version 2.5.0 or greater.")
|
|
|
|
|
raise MissingDependency(
|
|
|
|
|
"The 'whoosh' backend requires version 2.5.0 or greater.")
|
|
|
|
|
|
|
|
|
|
# Bubble up the correct error.
|
|
|
|
|
from whoosh import index
|
|
|
|
|
from whoosh.analysis import StemmingAnalyzer
|
|
|
|
|
from whoosh.fields import ID as WHOOSH_ID
|
|
|
|
|
from whoosh.fields import BOOLEAN, DATETIME, IDLIST, KEYWORD, NGRAM, NGRAMWORDS, NUMERIC, Schema, TEXT
|
|
|
|
|
from whoosh.filedb.filestore import FileStorage, RamStorage
|
|
|
|
|
from whoosh.highlight import highlight as whoosh_highlight
|
|
|
|
|
from whoosh.highlight import ContextFragmenter, HtmlFormatter
|
|
|
|
|
from whoosh.qparser import QueryParser
|
|
|
|
|
from whoosh.searching import ResultsPage
|
|
|
|
|
from whoosh.writing import AsyncWriter
|
|
|
|
|
|
|
|
|
|
DATETIME_REGEX = re.compile(
|
|
|
|
|
'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$')
|
|
|
|
|
@ -71,17 +72,25 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Characters reserved by Whoosh for special use.
|
|
|
|
|
# The '\\' must come first, so as not to overwrite the other slash replacements.
|
|
|
|
|
# The '\\' must come first, so as not to overwrite the other slash
|
|
|
|
|
# replacements.
|
|
|
|
|
RESERVED_CHARACTERS = (
|
|
|
|
|
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
|
|
|
|
|
'[', ']', '^', '"', '~', '*', '?', ':', '.',
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def __init__(self, connection_alias, **connection_options):
|
|
|
|
|
super(WhooshSearchBackend, self).__init__(connection_alias, **connection_options)
|
|
|
|
|
super(
|
|
|
|
|
WhooshSearchBackend,
|
|
|
|
|
self).__init__(
|
|
|
|
|
connection_alias,
|
|
|
|
|
**connection_options)
|
|
|
|
|
self.setup_complete = False
|
|
|
|
|
self.use_file_storage = True
|
|
|
|
|
self.post_limit = getattr(connection_options, 'POST_LIMIT', 128 * 1024 * 1024)
|
|
|
|
|
self.post_limit = getattr(
|
|
|
|
|
connection_options,
|
|
|
|
|
'POST_LIMIT',
|
|
|
|
|
128 * 1024 * 1024)
|
|
|
|
|
self.path = connection_options.get('PATH')
|
|
|
|
|
|
|
|
|
|
if connection_options.get('STORAGE', 'file') != 'file':
|
|
|
|
|
@ -89,7 +98,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
if self.use_file_storage and not self.path:
|
|
|
|
|
raise ImproperlyConfigured(
|
|
|
|
|
"You must specify a 'PATH' in your settings for connection '%s'." % connection_alias)
|
|
|
|
|
"You must specify a 'PATH' in your settings for connection '%s'." %
|
|
|
|
|
connection_alias)
|
|
|
|
|
|
|
|
|
|
self.log = logging.getLogger('haystack')
|
|
|
|
|
|
|
|
|
|
@ -106,7 +116,9 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
new_index = True
|
|
|
|
|
|
|
|
|
|
if self.use_file_storage and not os.access(self.path, os.W_OK):
|
|
|
|
|
raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)
|
|
|
|
|
raise IOError(
|
|
|
|
|
"The path to your Whoosh index '%s' is not writable for the current user/group." %
|
|
|
|
|
self.path)
|
|
|
|
|
|
|
|
|
|
if self.use_file_storage:
|
|
|
|
|
self.storage = FileStorage(self.path)
|
|
|
|
|
@ -146,32 +158,35 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
for field_name, field_class in fields.items():
|
|
|
|
|
if field_class.is_multivalued:
|
|
|
|
|
if field_class.indexed is False:
|
|
|
|
|
schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
|
|
|
|
|
schema_fields[field_class.index_fieldname] = IDLIST(
|
|
|
|
|
stored=True, field_boost=field_class.boost)
|
|
|
|
|
else:
|
|
|
|
|
schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True,
|
|
|
|
|
field_boost=field_class.boost)
|
|
|
|
|
schema_fields[field_class.index_fieldname] = KEYWORD(
|
|
|
|
|
stored=True, commas=True, scorable=True, field_boost=field_class.boost)
|
|
|
|
|
elif field_class.field_type in ['date', 'datetime']:
|
|
|
|
|
schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
|
|
|
|
|
schema_fields[field_class.index_fieldname] = DATETIME(
|
|
|
|
|
stored=field_class.stored, sortable=True)
|
|
|
|
|
elif field_class.field_type == 'integer':
|
|
|
|
|
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int,
|
|
|
|
|
field_boost=field_class.boost)
|
|
|
|
|
schema_fields[field_class.index_fieldname] = NUMERIC(
|
|
|
|
|
stored=field_class.stored, numtype=int, field_boost=field_class.boost)
|
|
|
|
|
elif field_class.field_type == 'float':
|
|
|
|
|
schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float,
|
|
|
|
|
field_boost=field_class.boost)
|
|
|
|
|
schema_fields[field_class.index_fieldname] = NUMERIC(
|
|
|
|
|
stored=field_class.stored, numtype=float, field_boost=field_class.boost)
|
|
|
|
|
elif field_class.field_type == 'boolean':
|
|
|
|
|
# Field boost isn't supported on BOOLEAN as of 1.8.2.
|
|
|
|
|
schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
|
|
|
|
|
schema_fields[field_class.index_fieldname] = BOOLEAN(
|
|
|
|
|
stored=field_class.stored)
|
|
|
|
|
elif field_class.field_type == 'ngram':
|
|
|
|
|
schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored,
|
|
|
|
|
field_boost=field_class.boost)
|
|
|
|
|
schema_fields[field_class.index_fieldname] = NGRAM(
|
|
|
|
|
minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
|
|
|
|
|
elif field_class.field_type == 'edge_ngram':
|
|
|
|
|
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start',
|
|
|
|
|
stored=field_class.stored,
|
|
|
|
|
field_boost=field_class.boost)
|
|
|
|
|
else:
|
|
|
|
|
# schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True)
|
|
|
|
|
schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(),
|
|
|
|
|
field_boost=field_class.boost, sortable=True)
|
|
|
|
|
schema_fields[field_class.index_fieldname] = TEXT(
|
|
|
|
|
stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True)
|
|
|
|
|
if field_class.document is True:
|
|
|
|
|
content_field_name = field_class.index_fieldname
|
|
|
|
|
schema_fields[field_class.index_fieldname].spelling = True
|
|
|
|
|
@ -215,12 +230,18 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
# We'll log the object identifier but won't include the actual object
|
|
|
|
|
# to avoid the possibility of that generating encoding errors while
|
|
|
|
|
# processing the log message:
|
|
|
|
|
self.log.error(u"%s while preparing object for update" % e.__class__.__name__,
|
|
|
|
|
exc_info=True, extra={"data": {"index": index,
|
|
|
|
|
"object": get_identifier(obj)}})
|
|
|
|
|
self.log.error(
|
|
|
|
|
u"%s while preparing object for update" %
|
|
|
|
|
e.__class__.__name__,
|
|
|
|
|
exc_info=True,
|
|
|
|
|
extra={
|
|
|
|
|
"data": {
|
|
|
|
|
"index": index,
|
|
|
|
|
"object": get_identifier(obj)}})
|
|
|
|
|
|
|
|
|
|
if len(iterable) > 0:
|
|
|
|
|
# For now, commit no matter what, as we run into locking issues otherwise.
|
|
|
|
|
# For now, commit no matter what, as we run into locking issues
|
|
|
|
|
# otherwise.
|
|
|
|
|
writer.commit()
|
|
|
|
|
|
|
|
|
|
def remove(self, obj_or_string, commit=True):
|
|
|
|
|
@ -231,12 +252,19 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
whoosh_id = get_identifier(obj_or_string)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
self.index.delete_by_query(q=self.parser.parse(u'%s:"%s"' % (ID, whoosh_id)))
|
|
|
|
|
self.index.delete_by_query(
|
|
|
|
|
q=self.parser.parse(
|
|
|
|
|
u'%s:"%s"' %
|
|
|
|
|
(ID, whoosh_id)))
|
|
|
|
|
except Exception as e:
|
|
|
|
|
if not self.silently_fail:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
self.log.error("Failed to remove document '%s' from Whoosh: %s", whoosh_id, e, exc_info=True)
|
|
|
|
|
self.log.error(
|
|
|
|
|
"Failed to remove document '%s' from Whoosh: %s",
|
|
|
|
|
whoosh_id,
|
|
|
|
|
e,
|
|
|
|
|
exc_info=True)
|
|
|
|
|
|
|
|
|
|
def clear(self, models=None, commit=True):
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
@ -254,18 +282,26 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
models_to_delete = []
|
|
|
|
|
|
|
|
|
|
for model in models:
|
|
|
|
|
models_to_delete.append(u"%s:%s" % (DJANGO_CT, get_model_ct(model)))
|
|
|
|
|
models_to_delete.append(
|
|
|
|
|
u"%s:%s" %
|
|
|
|
|
(DJANGO_CT, get_model_ct(model)))
|
|
|
|
|
|
|
|
|
|
self.index.delete_by_query(q=self.parser.parse(u" OR ".join(models_to_delete)))
|
|
|
|
|
self.index.delete_by_query(
|
|
|
|
|
q=self.parser.parse(
|
|
|
|
|
u" OR ".join(models_to_delete)))
|
|
|
|
|
except Exception as e:
|
|
|
|
|
if not self.silently_fail:
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
if models is not None:
|
|
|
|
|
self.log.error("Failed to clear Whoosh index of models '%s': %s", ','.join(models_to_delete),
|
|
|
|
|
e, exc_info=True)
|
|
|
|
|
self.log.error(
|
|
|
|
|
"Failed to clear Whoosh index of models '%s': %s",
|
|
|
|
|
','.join(models_to_delete),
|
|
|
|
|
e,
|
|
|
|
|
exc_info=True)
|
|
|
|
|
else:
|
|
|
|
|
self.log.error("Failed to clear Whoosh index: %s", e, exc_info=True)
|
|
|
|
|
self.log.error(
|
|
|
|
|
"Failed to clear Whoosh index: %s", e, exc_info=True)
|
|
|
|
|
|
|
|
|
|
def delete_index(self):
|
|
|
|
|
# Per the Whoosh mailing list, if wiping out everything from the index,
|
|
|
|
|
@ -288,7 +324,7 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
def calculate_page(self, start_offset=0, end_offset=None):
|
|
|
|
|
# Prevent against Whoosh throwing an error. Requires an end_offset
|
|
|
|
|
# greater than 0.
|
|
|
|
|
if not end_offset is None and end_offset <= 0:
|
|
|
|
|
if end_offset is not None and end_offset <= 0:
|
|
|
|
|
end_offset = 1
|
|
|
|
|
|
|
|
|
|
# Determine the page.
|
|
|
|
|
@ -310,11 +346,26 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
return page_num, page_length
|
|
|
|
|
|
|
|
|
|
@log_query
|
|
|
|
|
def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
|
|
|
|
|
fields='', highlight=False, facets=None, date_facets=None, query_facets=None,
|
|
|
|
|
narrow_queries=None, spelling_query=None, within=None,
|
|
|
|
|
dwithin=None, distance_point=None, models=None,
|
|
|
|
|
limit_to_registered_models=None, result_class=None, **kwargs):
|
|
|
|
|
def search(
|
|
|
|
|
self,
|
|
|
|
|
query_string,
|
|
|
|
|
sort_by=None,
|
|
|
|
|
start_offset=0,
|
|
|
|
|
end_offset=None,
|
|
|
|
|
fields='',
|
|
|
|
|
highlight=False,
|
|
|
|
|
facets=None,
|
|
|
|
|
date_facets=None,
|
|
|
|
|
query_facets=None,
|
|
|
|
|
narrow_queries=None,
|
|
|
|
|
spelling_query=None,
|
|
|
|
|
within=None,
|
|
|
|
|
dwithin=None,
|
|
|
|
|
distance_point=None,
|
|
|
|
|
models=None,
|
|
|
|
|
limit_to_registered_models=None,
|
|
|
|
|
result_class=None,
|
|
|
|
|
**kwargs):
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
|
|
|
|
|
@ -367,19 +418,29 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
sort_by = sort_by_list[0]
|
|
|
|
|
|
|
|
|
|
if facets is not None:
|
|
|
|
|
warnings.warn("Whoosh does not handle faceting.", Warning, stacklevel=2)
|
|
|
|
|
warnings.warn(
|
|
|
|
|
"Whoosh does not handle faceting.",
|
|
|
|
|
Warning,
|
|
|
|
|
stacklevel=2)
|
|
|
|
|
|
|
|
|
|
if date_facets is not None:
|
|
|
|
|
warnings.warn("Whoosh does not handle date faceting.", Warning, stacklevel=2)
|
|
|
|
|
warnings.warn(
|
|
|
|
|
"Whoosh does not handle date faceting.",
|
|
|
|
|
Warning,
|
|
|
|
|
stacklevel=2)
|
|
|
|
|
|
|
|
|
|
if query_facets is not None:
|
|
|
|
|
warnings.warn("Whoosh does not handle query faceting.", Warning, stacklevel=2)
|
|
|
|
|
warnings.warn(
|
|
|
|
|
"Whoosh does not handle query faceting.",
|
|
|
|
|
Warning,
|
|
|
|
|
stacklevel=2)
|
|
|
|
|
|
|
|
|
|
narrowed_results = None
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
|
|
|
|
|
if limit_to_registered_models is None:
|
|
|
|
|
limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
|
|
|
|
|
limit_to_registered_models = getattr(
|
|
|
|
|
settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
|
|
|
|
|
|
|
|
|
|
if models and len(models):
|
|
|
|
|
model_choices = sorted(get_model_ct(model) for model in models)
|
|
|
|
|
@ -394,17 +455,19 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
if narrow_queries is None:
|
|
|
|
|
narrow_queries = set()
|
|
|
|
|
|
|
|
|
|
narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
|
|
|
|
|
narrow_queries.add(' OR '.join(
|
|
|
|
|
['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
|
|
|
|
|
|
|
|
|
|
narrow_searcher = None
|
|
|
|
|
|
|
|
|
|
if narrow_queries is not None:
|
|
|
|
|
# Potentially expensive? I don't see another way to do it in Whoosh...
|
|
|
|
|
# Potentially expensive? I don't see another way to do it in
|
|
|
|
|
# Whoosh...
|
|
|
|
|
narrow_searcher = self.index.searcher()
|
|
|
|
|
|
|
|
|
|
for nq in narrow_queries:
|
|
|
|
|
recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_text(nq)),
|
|
|
|
|
limit=None)
|
|
|
|
|
recent_narrowed_results = narrow_searcher.search(
|
|
|
|
|
self.parser.parse(force_text(nq)), limit=None)
|
|
|
|
|
|
|
|
|
|
if len(recent_narrowed_results) <= 0:
|
|
|
|
|
return {
|
|
|
|
|
@ -430,7 +493,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'hits': 0,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
page_num, page_length = self.calculate_page(start_offset, end_offset)
|
|
|
|
|
page_num, page_length = self.calculate_page(
|
|
|
|
|
start_offset, end_offset)
|
|
|
|
|
|
|
|
|
|
search_kwargs = {
|
|
|
|
|
'pagelen': page_length,
|
|
|
|
|
@ -467,8 +531,12 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'spelling_suggestion': None,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
results = self._process_results(raw_page, highlight=highlight, query_string=query_string,
|
|
|
|
|
spelling_query=spelling_query, result_class=result_class)
|
|
|
|
|
results = self._process_results(
|
|
|
|
|
raw_page,
|
|
|
|
|
highlight=highlight,
|
|
|
|
|
query_string=query_string,
|
|
|
|
|
spelling_query=spelling_query,
|
|
|
|
|
result_class=result_class)
|
|
|
|
|
searcher.close()
|
|
|
|
|
|
|
|
|
|
if hasattr(narrow_searcher, 'close'):
|
|
|
|
|
@ -478,9 +546,11 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
else:
|
|
|
|
|
if self.include_spelling:
|
|
|
|
|
if spelling_query:
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(spelling_query)
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(
|
|
|
|
|
spelling_query)
|
|
|
|
|
else:
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(query_string)
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(
|
|
|
|
|
query_string)
|
|
|
|
|
else:
|
|
|
|
|
spelling_suggestion = None
|
|
|
|
|
|
|
|
|
|
@ -490,9 +560,16 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
'spelling_suggestion': spelling_suggestion,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def more_like_this(self, model_instance, additional_query_string=None,
|
|
|
|
|
start_offset=0, end_offset=None, models=None,
|
|
|
|
|
limit_to_registered_models=None, result_class=None, **kwargs):
|
|
|
|
|
def more_like_this(
|
|
|
|
|
self,
|
|
|
|
|
model_instance,
|
|
|
|
|
additional_query_string=None,
|
|
|
|
|
start_offset=0,
|
|
|
|
|
end_offset=None,
|
|
|
|
|
models=None,
|
|
|
|
|
limit_to_registered_models=None,
|
|
|
|
|
result_class=None,
|
|
|
|
|
**kwargs):
|
|
|
|
|
if not self.setup_complete:
|
|
|
|
|
self.setup()
|
|
|
|
|
|
|
|
|
|
@ -506,7 +583,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
self.index = self.index.refresh()
|
|
|
|
|
|
|
|
|
|
if limit_to_registered_models is None:
|
|
|
|
|
limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
|
|
|
|
|
limit_to_registered_models = getattr(
|
|
|
|
|
settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
|
|
|
|
|
|
|
|
|
|
if models and len(models):
|
|
|
|
|
model_choices = sorted(get_model_ct(model) for model in models)
|
|
|
|
|
@ -521,7 +599,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
if narrow_queries is None:
|
|
|
|
|
narrow_queries = set()
|
|
|
|
|
|
|
|
|
|
narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
|
|
|
|
|
narrow_queries.add(' OR '.join(
|
|
|
|
|
['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
|
|
|
|
|
|
|
|
|
|
if additional_query_string and additional_query_string != '*':
|
|
|
|
|
narrow_queries.add(additional_query_string)
|
|
|
|
|
@ -529,12 +608,13 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
narrow_searcher = None
|
|
|
|
|
|
|
|
|
|
if narrow_queries is not None:
|
|
|
|
|
# Potentially expensive? I don't see another way to do it in Whoosh...
|
|
|
|
|
# Potentially expensive? I don't see another way to do it in
|
|
|
|
|
# Whoosh...
|
|
|
|
|
narrow_searcher = self.index.searcher()
|
|
|
|
|
|
|
|
|
|
for nq in narrow_queries:
|
|
|
|
|
recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_text(nq)),
|
|
|
|
|
limit=None)
|
|
|
|
|
recent_narrowed_results = narrow_searcher.search(
|
|
|
|
|
self.parser.parse(force_text(nq)), limit=None)
|
|
|
|
|
|
|
|
|
|
if len(recent_narrowed_results) <= 0:
|
|
|
|
|
return {
|
|
|
|
|
@ -559,7 +639,8 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
results = searcher.search(parsed_query)
|
|
|
|
|
|
|
|
|
|
if len(results):
|
|
|
|
|
raw_results = results[0].more_like_this(field_name, top=end_offset)
|
|
|
|
|
raw_results = results[0].more_like_this(
|
|
|
|
|
field_name, top=end_offset)
|
|
|
|
|
|
|
|
|
|
# Handle the case where the results have been narrowed.
|
|
|
|
|
if narrowed_results is not None and hasattr(raw_results, 'filter'):
|
|
|
|
|
@ -594,7 +675,13 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
def _process_results(self, raw_page, highlight=False, query_string='', spelling_query=None, result_class=None):
|
|
|
|
|
def _process_results(
|
|
|
|
|
self,
|
|
|
|
|
raw_page,
|
|
|
|
|
highlight=False,
|
|
|
|
|
query_string='',
|
|
|
|
|
spelling_query=None,
|
|
|
|
|
result_class=None):
|
|
|
|
|
from haystack import connections
|
|
|
|
|
results = []
|
|
|
|
|
|
|
|
|
|
@ -621,15 +708,18 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
index = unified_index.get_index(model)
|
|
|
|
|
string_key = str(key)
|
|
|
|
|
|
|
|
|
|
if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
|
|
|
|
|
if string_key in index.fields and hasattr(
|
|
|
|
|
index.fields[string_key], 'convert'):
|
|
|
|
|
# Special-cased due to the nature of KEYWORD fields.
|
|
|
|
|
if index.fields[string_key].is_multivalued:
|
|
|
|
|
if value is None or len(value) == 0:
|
|
|
|
|
additional_fields[string_key] = []
|
|
|
|
|
else:
|
|
|
|
|
additional_fields[string_key] = value.split(',')
|
|
|
|
|
additional_fields[string_key] = value.split(
|
|
|
|
|
',')
|
|
|
|
|
else:
|
|
|
|
|
additional_fields[string_key] = index.fields[string_key].convert(value)
|
|
|
|
|
additional_fields[string_key] = index.fields[string_key].convert(
|
|
|
|
|
value)
|
|
|
|
|
else:
|
|
|
|
|
additional_fields[string_key] = self._to_python(value)
|
|
|
|
|
|
|
|
|
|
@ -652,16 +742,23 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
self.content_field_name: [whoosh_result],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result = result_class(app_label, model_name, raw_result[DJANGO_ID], score, **additional_fields)
|
|
|
|
|
result = result_class(
|
|
|
|
|
app_label,
|
|
|
|
|
model_name,
|
|
|
|
|
raw_result[DJANGO_ID],
|
|
|
|
|
score,
|
|
|
|
|
**additional_fields)
|
|
|
|
|
results.append(result)
|
|
|
|
|
else:
|
|
|
|
|
hits -= 1
|
|
|
|
|
|
|
|
|
|
if self.include_spelling:
|
|
|
|
|
if spelling_query:
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(spelling_query)
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(
|
|
|
|
|
spelling_query)
|
|
|
|
|
else:
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(query_string)
|
|
|
|
|
spelling_suggestion = self.create_spelling_suggestion(
|
|
|
|
|
query_string)
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
'results': results,
|
|
|
|
|
@ -742,17 +839,30 @@ class WhooshSearchBackend(BaseSearchBackend):
|
|
|
|
|
for dk, dv in date_values.items():
|
|
|
|
|
date_values[dk] = int(dv)
|
|
|
|
|
|
|
|
|
|
return datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'],
|
|
|
|
|
date_values['minute'], date_values['second'])
|
|
|
|
|
return datetime(
|
|
|
|
|
date_values['year'],
|
|
|
|
|
date_values['month'],
|
|
|
|
|
date_values['day'],
|
|
|
|
|
date_values['hour'],
|
|
|
|
|
date_values['minute'],
|
|
|
|
|
date_values['second'])
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Attempt to use json to load the values.
|
|
|
|
|
converted_value = json.loads(value)
|
|
|
|
|
|
|
|
|
|
# Try to handle most built-in types.
|
|
|
|
|
if isinstance(converted_value, (list, tuple, set, dict, six.integer_types, float, complex)):
|
|
|
|
|
if isinstance(
|
|
|
|
|
converted_value,
|
|
|
|
|
(list,
|
|
|
|
|
tuple,
|
|
|
|
|
set,
|
|
|
|
|
dict,
|
|
|
|
|
six.integer_types,
|
|
|
|
|
float,
|
|
|
|
|
complex)):
|
|
|
|
|
return converted_value
|
|
|
|
|
except:
|
|
|
|
|
except BaseException:
|
|
|
|
|
# If it fails (SyntaxError or its ilk) or we don't trust it,
|
|
|
|
|
# continue on.
|
|
|
|
|
pass
|
|
|
|
|
@ -823,7 +933,8 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
if field == 'content':
|
|
|
|
|
index_fieldname = ''
|
|
|
|
|
else:
|
|
|
|
|
index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field)
|
|
|
|
|
index_fieldname = u'%s:' % connections[self._using].get_unified_index(
|
|
|
|
|
).get_index_fieldname(field)
|
|
|
|
|
|
|
|
|
|
filter_types = {
|
|
|
|
|
'content': '%s',
|
|
|
|
|
@ -841,23 +952,32 @@ class WhooshSearchQuery(BaseSearchQuery):
|
|
|
|
|
if value.post_process is False:
|
|
|
|
|
query_frag = prepared_value
|
|
|
|
|
else:
|
|
|
|
|
if filter_type in ['content', 'contains', 'startswith', 'endswith', 'fuzzy']:
|
|
|
|
|
if filter_type in [
|
|
|
|
|
'content',
|
|
|
|
|
'contains',
|
|
|
|
|
'startswith',
|
|
|
|
|
'endswith',
|
|
|
|
|
'fuzzy']:
|
|
|
|
|
if value.input_type_name == 'exact':
|
|
|
|
|
query_frag = prepared_value
|
|
|
|
|
else:
|
|
|
|
|
# Iterate over terms & incorportate the converted form of each into the query.
|
|
|
|
|
# Iterate over terms & incorportate the converted form of
|
|
|
|
|
# each into the query.
|
|
|
|
|
terms = []
|
|
|
|
|
|
|
|
|
|
if isinstance(prepared_value, six.string_types):
|
|
|
|
|
possible_values = prepared_value.split(' ')
|
|
|
|
|
else:
|
|
|
|
|
if is_datetime is True:
|
|
|
|
|
prepared_value = self._convert_datetime(prepared_value)
|
|
|
|
|
prepared_value = self._convert_datetime(
|
|
|
|
|
prepared_value)
|
|
|
|
|
|
|
|
|
|
possible_values = [prepared_value]
|
|
|
|
|
|
|
|
|
|
for possible_value in possible_values:
|
|
|
|
|
terms.append(filter_types[filter_type] % self.backend._from_python(possible_value))
|
|
|
|
|
terms.append(
|
|
|
|
|
filter_types[filter_type] %
|
|
|
|
|
self.backend._from_python(possible_value))
|
|
|
|
|
|
|
|
|
|
if len(terms) == 1:
|
|
|
|
|
query_frag = terms[0]
|
|
|
|
|
|