添加搜索功能

10 years ago · 13c937bb95
parent f243ff4228
commit 13c937bb95
13 changed files with 1122 additions and 23 deletions
--- a/.gitignore
+++ b/.gitignore
@ -66,4 +66,5 @@ target/

 migrations/
 !migrations/__init__.py
-collectedstatic/
+collectedstatic/
+DjangoBlog/whoosh_index/
--- a/DjangoBlog/settings.py
+++ b/DjangoBlog/settings.py
@ -37,6 +37,7 @@ INSTALLED_APPS = [
    'django.contrib.staticfiles',
    'django.contrib.sitemaps',
    'pagedown',
+    'haystack',
    'blog',
    'accounts',
    'comments',
@ -131,6 +132,15 @@ USE_TZ = True
 SITE_ROOT = os.path.dirname(os.path.abspath(__file__))
 SITE_ROOT = os.path.abspath(os.path.join(SITE_ROOT, '../'))

+HAYSTACK_CONNECTIONS = {
+    'default': {
+        'ENGINE': 'DjangoBlog.whoosh_cn_backend.WhooshEngine',
+        'PATH': os.path.join(os.path.dirname(__file__), 'whoosh_index'),
+    },
+}
+# 自动更新搜索索引
+HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
+
 STATIC_ROOT = os.path.join(SITE_ROOT, 'collectedstatic')

 STATIC_URL = '/static/'
--- a/DjangoBlog/urls.py
+++ b/DjangoBlog/urls.py
@ -37,5 +37,6 @@ urlpatterns = [
    url(r'', include('oauth.urls', namespace='oauth', app_name='oauth')),
    url(r'^sitemap\.xml$', sitemap, {'sitemaps': sitemaps},
        name='django.contrib.sitemaps.views.sitemap'),
-    url(r'^feed/$', DjangoBlogFeed())
+    url(r'^feed/$', DjangoBlogFeed()),
+    url(r'^search', include('haystack.urls'),name='search'),
 ]
--- a/DjangoBlog/whoosh_cn_backend.py
+++ b/DjangoBlog/whoosh_cn_backend.py
@ -0,0 +1,925 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+from jieba.analyse import ChineseAnalyzer
+import json
+import os
+import re
+import shutil
+import threading
+import warnings
+
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured
+from django.utils import six
+from django.utils.datetime_safe import datetime
+from django.utils.encoding import force_text
+
+from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query
+from haystack.constants import DJANGO_CT, DJANGO_ID, ID
+from haystack.exceptions import MissingDependency, SearchBackendError, SkipDocument
+from haystack.inputs import Clean, Exact, PythonData, Raw
+from haystack.models import SearchResult
+from haystack.utils import log as logging
+from haystack.utils import get_identifier, get_model_ct
+from haystack.utils.app_loading import haystack_get_model
+
+try:
+    import whoosh
+except ImportError:
+    raise MissingDependency(
+        "The 'whoosh' backend requires the installation of 'Whoosh'. Please refer to the documentation.")
+
+# Handle minimum requirement.
+if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
+    raise MissingDependency("The 'whoosh' backend requires version 2.5.0 or greater.")
+
+# Bubble up the correct error.
+from whoosh import index
+from whoosh.analysis import StemmingAnalyzer
+from whoosh.fields import ID as WHOOSH_ID
+from whoosh.fields import BOOLEAN, DATETIME, IDLIST, KEYWORD, NGRAM, NGRAMWORDS, NUMERIC, Schema, TEXT
+from whoosh.filedb.filestore import FileStorage, RamStorage
+from whoosh.highlight import highlight as whoosh_highlight
+from whoosh.highlight import ContextFragmenter, HtmlFormatter
+from whoosh.qparser import QueryParser
+from whoosh.searching import ResultsPage
+from whoosh.writing import AsyncWriter
+
+DATETIME_REGEX = re.compile(
+    '^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$')
+LOCALS = threading.local()
+LOCALS.RAM_STORE = None
+
+
+class WhooshHtmlFormatter(HtmlFormatter):
+    """
+    This is a HtmlFormatter simpler than the whoosh.HtmlFormatter.
+    We use it to have consistent results across backends. Specifically,
+    Solr, Xapian and Elasticsearch are using this formatting.
+    """
+    template = '<%(tag)s>%(t)s</%(tag)s>'
+
+
+class WhooshSearchBackend(BaseSearchBackend):
+    # Word reserved by Whoosh for special use.
+    RESERVED_WORDS = (
+        'AND',
+        'NOT',
+        'OR',
+        'TO',
+    )
+
+    # Characters reserved by Whoosh for special use.
+    # The '\\' must come first, so as not to overwrite the other slash replacements.
+    RESERVED_CHARACTERS = (
+        '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
+        '[', ']', '^', '"', '~', '*', '?', ':', '.',
+    )
+
+    def __init__(self, connection_alias, **connection_options):
+        super(WhooshSearchBackend, self).__init__(connection_alias, **connection_options)
+        self.setup_complete = False
+        self.use_file_storage = True
+        self.post_limit = getattr(connection_options, 'POST_LIMIT', 128 * 1024 * 1024)
+        self.path = connection_options.get('PATH')
+
+        if connection_options.get('STORAGE', 'file') != 'file':
+            self.use_file_storage = False
+
+        if self.use_file_storage and not self.path:
+            raise ImproperlyConfigured(
+                "You must specify a 'PATH' in your settings for connection '%s'." % connection_alias)
+
+        self.log = logging.getLogger('haystack')
+
+    def setup(self):
+        """
+        Defers loading until needed.
+        """
+        from haystack import connections
+        new_index = False
+
+        # Make sure the index is there.
+        if self.use_file_storage and not os.path.exists(self.path):
+            os.makedirs(self.path)
+            new_index = True
+
+        if self.use_file_storage and not os.access(self.path, os.W_OK):
+            raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)
+
+        if self.use_file_storage:
+            self.storage = FileStorage(self.path)
+        else:
+            global LOCALS
+
+            if getattr(LOCALS, 'RAM_STORE', None) is None:
+                LOCALS.RAM_STORE = RamStorage()
+
+            self.storage = LOCALS.RAM_STORE
+
+        self.content_field_name, self.schema = self.build_schema(
+            connections[self.connection_alias].get_unified_index().all_searchfields())
+        self.parser = QueryParser(self.content_field_name, schema=self.schema)
+
+        if new_index is True:
+            self.index = self.storage.create_index(self.schema)
+        else:
+            try:
+                self.index = self.storage.open_index(schema=self.schema)
+            except index.EmptyIndexError:
+                self.index = self.storage.create_index(self.schema)
+
+        self.setup_complete = True
+
+    def build_schema(self, fields):
+        schema_fields = {
+            ID: WHOOSH_ID(stored=True, unique=True),
+            DJANGO_CT: WHOOSH_ID(stored=True),
+            DJANGO_ID: WHOOSH_ID(stored=True),
+        }
+        # Grab the number of keys that are hard-coded into Haystack.
+        # We'll use this to (possibly) fail slightly more gracefully later.
+        initial_key_count = len(schema_fields)
+        content_field_name = ''
+
+        for field_name, field_class in fields.items():
+            if field_class.is_multivalued:
+                if field_class.indexed is False:
+                    schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
+                else:
+                    schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True,
+                                                                         field_boost=field_class.boost)
+            elif field_class.field_type in ['date', 'datetime']:
+                schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
+            elif field_class.field_type == 'integer':
+                schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int,
+                                                                     field_boost=field_class.boost)
+            elif field_class.field_type == 'float':
+                schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float,
+                                                                     field_boost=field_class.boost)
+            elif field_class.field_type == 'boolean':
+                # Field boost isn't supported on BOOLEAN as of 1.8.2.
+                schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
+            elif field_class.field_type == 'ngram':
+                schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored,
+                                                                   field_boost=field_class.boost)
+            elif field_class.field_type == 'edge_ngram':
+                schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start',
+                                                                        stored=field_class.stored,
+                                                                        field_boost=field_class.boost)
+            else:
+                # schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True)
+                schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=ChineseAnalyzer(),
+                                                                  field_boost=field_class.boost, sortable=True)
+            if field_class.document is True:
+                content_field_name = field_class.index_fieldname
+                schema_fields[field_class.index_fieldname].spelling = True
+
+        # Fail more gracefully than relying on the backend to die if no fields
+        # are found.
+        if len(schema_fields) <= initial_key_count:
+            raise SearchBackendError(
+                "No fields were found in any search_indexes. Please correct this before attempting to search.")
+
+        return (content_field_name, Schema(**schema_fields))
+
+    def update(self, index, iterable, commit=True):
+        if not self.setup_complete:
+            self.setup()
+
+        self.index = self.index.refresh()
+        writer = AsyncWriter(self.index)
+
+        for obj in iterable:
+            try:
+                doc = index.full_prepare(obj)
+            except SkipDocument:
+                self.log.debug(u"Indexing for object `%s` skipped", obj)
+            else:
+                # Really make sure it's unicode, because Whoosh won't have it any
+                # other way.
+                for key in doc:
+                    doc[key] = self._from_python(doc[key])
+
+                # Document boosts aren't supported in Whoosh 2.5.0+.
+                if 'boost' in doc:
+                    del doc['boost']
+
+                try:
+                    writer.update_document(**doc)
+                except Exception as e:
+                    if not self.silently_fail:
+                        raise
+
+                    # We'll log the object identifier but won't include the actual object
+                    # to avoid the possibility of that generating encoding errors while
+                    # processing the log message:
+                    self.log.error(u"%s while preparing object for update" % e.__class__.__name__,
+                                   exc_info=True, extra={"data": {"index": index,
+                                                                  "object": get_identifier(obj)}})
+
+        if len(iterable) > 0:
+            # For now, commit no matter what, as we run into locking issues otherwise.
+            writer.commit()
+
+    def remove(self, obj_or_string, commit=True):
+        if not self.setup_complete:
+            self.setup()
+
+        self.index = self.index.refresh()
+        whoosh_id = get_identifier(obj_or_string)
+
+        try:
+            self.index.delete_by_query(q=self.parser.parse(u'%s:"%s"' % (ID, whoosh_id)))
+        except Exception as e:
+            if not self.silently_fail:
+                raise
+
+            self.log.error("Failed to remove document '%s' from Whoosh: %s", whoosh_id, e, exc_info=True)
+
+    def clear(self, models=None, commit=True):
+        if not self.setup_complete:
+            self.setup()
+
+        self.index = self.index.refresh()
+
+        if models is not None:
+            assert isinstance(models, (list, tuple))
+
+        try:
+            if models is None:
+                self.delete_index()
+            else:
+                models_to_delete = []
+
+                for model in models:
+                    models_to_delete.append(u"%s:%s" % (DJANGO_CT, get_model_ct(model)))
+
+                self.index.delete_by_query(q=self.parser.parse(u" OR ".join(models_to_delete)))
+        except Exception as e:
+            if not self.silently_fail:
+                raise
+
+            if models is not None:
+                self.log.error("Failed to clear Whoosh index of models '%s': %s", ','.join(models_to_delete),
+                               e, exc_info=True)
+            else:
+                self.log.error("Failed to clear Whoosh index: %s", e, exc_info=True)
+
+    def delete_index(self):
+        # Per the Whoosh mailing list, if wiping out everything from the index,
+        # it's much more efficient to simply delete the index files.
+        if self.use_file_storage and os.path.exists(self.path):
+            shutil.rmtree(self.path)
+        elif not self.use_file_storage:
+            self.storage.clean()
+
+        # Recreate everything.
+        self.setup()
+
+    def optimize(self):
+        if not self.setup_complete:
+            self.setup()
+
+        self.index = self.index.refresh()
+        self.index.optimize()
+
+    def calculate_page(self, start_offset=0, end_offset=None):
+        # Prevent against Whoosh throwing an error. Requires an end_offset
+        # greater than 0.
+        if not end_offset is None and end_offset <= 0:
+            end_offset = 1
+
+        # Determine the page.
+        page_num = 0
+
+        if end_offset is None:
+            end_offset = 1000000
+
+        if start_offset is None:
+            start_offset = 0
+
+        page_length = end_offset - start_offset
+
+        if page_length and page_length > 0:
+            page_num = int(start_offset / page_length)
+
+        # Increment because Whoosh uses 1-based page numbers.
+        page_num += 1
+        return page_num, page_length
+
+    @log_query
+    def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
+               fields='', highlight=False, facets=None, date_facets=None, query_facets=None,
+               narrow_queries=None, spelling_query=None, within=None,
+               dwithin=None, distance_point=None, models=None,
+               limit_to_registered_models=None, result_class=None, **kwargs):
+        if not self.setup_complete:
+            self.setup()
+
+        # A zero length query should return no results.
+        if len(query_string) == 0:
+            return {
+                'results': [],
+                'hits': 0,
+            }
+
+        query_string = force_text(query_string)
+
+        # A one-character query (non-wildcard) gets nabbed by a stopwords
+        # filter and should yield zero results.
+        if len(query_string) <= 1 and query_string != u'*':
+            return {
+                'results': [],
+                'hits': 0,
+            }
+
+        reverse = False
+
+        if sort_by is not None:
+            # Determine if we need to reverse the results and if Whoosh can
+            # handle what it's being asked to sort by. Reversing is an
+            # all-or-nothing action, unfortunately.
+            sort_by_list = []
+            reverse_counter = 0
+
+            for order_by in sort_by:
+                if order_by.startswith('-'):
+                    reverse_counter += 1
+
+            if reverse_counter and reverse_counter != len(sort_by):
+                raise SearchBackendError("Whoosh requires all order_by fields"
+                                         " to use the same sort direction")
+
+            for order_by in sort_by:
+                if order_by.startswith('-'):
+                    sort_by_list.append(order_by[1:])
+
+                    if len(sort_by_list) == 1:
+                        reverse = True
+                else:
+                    sort_by_list.append(order_by)
+
+                    if len(sort_by_list) == 1:
+                        reverse = False
+
+            sort_by = sort_by_list[0]
+
+        if facets is not None:
+            warnings.warn("Whoosh does not handle faceting.", Warning, stacklevel=2)
+
+        if date_facets is not None:
+            warnings.warn("Whoosh does not handle date faceting.", Warning, stacklevel=2)
+
+        if query_facets is not None:
+            warnings.warn("Whoosh does not handle query faceting.", Warning, stacklevel=2)
+
+        narrowed_results = None
+        self.index = self.index.refresh()
+
+        if limit_to_registered_models is None:
+            limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
+
+        if models and len(models):
+            model_choices = sorted(get_model_ct(model) for model in models)
+        elif limit_to_registered_models:
+            # Using narrow queries, limit the results to only models handled
+            # with the current routers.
+            model_choices = self.build_models_list()
+        else:
+            model_choices = []
+
+        if len(model_choices) > 0:
+            if narrow_queries is None:
+                narrow_queries = set()
+
+            narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
+
+        narrow_searcher = None
+
+        if narrow_queries is not None:
+            # Potentially expensive? I don't see another way to do it in Whoosh...
+            narrow_searcher = self.index.searcher()
+
+            for nq in narrow_queries:
+                recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_text(nq)),
+                                                                 limit=None)
+
+                if len(recent_narrowed_results) <= 0:
+                    return {
+                        'results': [],
+                        'hits': 0,
+                    }
+
+                if narrowed_results:
+                    narrowed_results.filter(recent_narrowed_results)
+                else:
+                    narrowed_results = recent_narrowed_results
+
+        self.index = self.index.refresh()
+
+        if self.index.doc_count():
+            searcher = self.index.searcher()
+            parsed_query = self.parser.parse(query_string)
+
+            # In the event of an invalid/stopworded query, recover gracefully.
+            if parsed_query is None:
+                return {
+                    'results': [],
+                    'hits': 0,
+                }
+
+            page_num, page_length = self.calculate_page(start_offset, end_offset)
+
+            search_kwargs = {
+                'pagelen': page_length,
+                'sortedby': sort_by,
+                'reverse': reverse,
+            }
+
+            # Handle the case where the results have been narrowed.
+            if narrowed_results is not None:
+                search_kwargs['filter'] = narrowed_results
+
+            try:
+                raw_page = searcher.search_page(
+                    parsed_query,
+                    page_num,
+                    **search_kwargs
+                )
+            except ValueError:
+                if not self.silently_fail:
+                    raise
+
+                return {
+                    'results': [],
+                    'hits': 0,
+                    'spelling_suggestion': None,
+                }
+
+            # Because as of Whoosh 2.5.1, it will return the wrong page of
+            # results if you request something too high. :(
+            if raw_page.pagenum < page_num:
+                return {
+                    'results': [],
+                    'hits': 0,
+                    'spelling_suggestion': None,
+                }
+
+            results = self._process_results(raw_page, highlight=highlight, query_string=query_string,
+                                            spelling_query=spelling_query, result_class=result_class)
+            searcher.close()
+
+            if hasattr(narrow_searcher, 'close'):
+                narrow_searcher.close()
+
+            return results
+        else:
+            if self.include_spelling:
+                if spelling_query:
+                    spelling_suggestion = self.create_spelling_suggestion(spelling_query)
+                else:
+                    spelling_suggestion = self.create_spelling_suggestion(query_string)
+            else:
+                spelling_suggestion = None
+
+            return {
+                'results': [],
+                'hits': 0,
+                'spelling_suggestion': spelling_suggestion,
+            }
+
+    def more_like_this(self, model_instance, additional_query_string=None,
+                       start_offset=0, end_offset=None, models=None,
+                       limit_to_registered_models=None, result_class=None, **kwargs):
+        if not self.setup_complete:
+            self.setup()
+
+        # Deferred models will have a different class ("RealClass_Deferred_fieldname")
+        # which won't be in our registry:
+        model_klass = model_instance._meta.concrete_model
+
+        field_name = self.content_field_name
+        narrow_queries = set()
+        narrowed_results = None
+        self.index = self.index.refresh()
+
+        if limit_to_registered_models is None:
+            limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
+
+        if models and len(models):
+            model_choices = sorted(get_model_ct(model) for model in models)
+        elif limit_to_registered_models:
+            # Using narrow queries, limit the results to only models handled
+            # with the current routers.
+            model_choices = self.build_models_list()
+        else:
+            model_choices = []
+
+        if len(model_choices) > 0:
+            if narrow_queries is None:
+                narrow_queries = set()
+
+            narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
+
+        if additional_query_string and additional_query_string != '*':
+            narrow_queries.add(additional_query_string)
+
+        narrow_searcher = None
+
+        if narrow_queries is not None:
+            # Potentially expensive? I don't see another way to do it in Whoosh...
+            narrow_searcher = self.index.searcher()
+
+            for nq in narrow_queries:
+                recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_text(nq)),
+                                                                 limit=None)
+
+                if len(recent_narrowed_results) <= 0:
+                    return {
+                        'results': [],
+                        'hits': 0,
+                    }
+
+                if narrowed_results:
+                    narrowed_results.filter(recent_narrowed_results)
+                else:
+                    narrowed_results = recent_narrowed_results
+
+        page_num, page_length = self.calculate_page(start_offset, end_offset)
+
+        self.index = self.index.refresh()
+        raw_results = EmptyResults()
+
+        if self.index.doc_count():
+            query = "%s:%s" % (ID, get_identifier(model_instance))
+            searcher = self.index.searcher()
+            parsed_query = self.parser.parse(query)
+            results = searcher.search(parsed_query)
+
+            if len(results):
+                raw_results = results[0].more_like_this(field_name, top=end_offset)
+
+            # Handle the case where the results have been narrowed.
+            if narrowed_results is not None and hasattr(raw_results, 'filter'):
+                raw_results.filter(narrowed_results)
+
+        try:
+            raw_page = ResultsPage(raw_results, page_num, page_length)
+        except ValueError:
+            if not self.silently_fail:
+                raise
+
+            return {
+                'results': [],
+                'hits': 0,
+                'spelling_suggestion': None,
+            }
+
+        # Because as of Whoosh 2.5.1, it will return the wrong page of
+        # results if you request something too high. :(
+        if raw_page.pagenum < page_num:
+            return {
+                'results': [],
+                'hits': 0,
+                'spelling_suggestion': None,
+            }
+
+        results = self._process_results(raw_page, result_class=result_class)
+        searcher.close()
+
+        if hasattr(narrow_searcher, 'close'):
+            narrow_searcher.close()
+
+        return results
+
+    def _process_results(self, raw_page, highlight=False, query_string='', spelling_query=None, result_class=None):
+        from haystack import connections
+        results = []
+
+        # It's important to grab the hits first before slicing. Otherwise, this
+        # can cause pagination failures.
+        hits = len(raw_page)
+
+        if result_class is None:
+            result_class = SearchResult
+
+        facets = {}
+        spelling_suggestion = None
+        unified_index = connections[self.connection_alias].get_unified_index()
+        indexed_models = unified_index.get_indexed_models()
+
+        for doc_offset, raw_result in enumerate(raw_page):
+            score = raw_page.score(doc_offset) or 0
+            app_label, model_name = raw_result[DJANGO_CT].split('.')
+            additional_fields = {}
+            model = haystack_get_model(app_label, model_name)
+
+            if model and model in indexed_models:
+                for key, value in raw_result.items():
+                    index = unified_index.get_index(model)
+                    string_key = str(key)
+
+                    if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
+                        # Special-cased due to the nature of KEYWORD fields.
+                        if index.fields[string_key].is_multivalued:
+                            if value is None or len(value) is 0:
+                                additional_fields[string_key] = []
+                            else:
+                                additional_fields[string_key] = value.split(',')
+                        else:
+                            additional_fields[string_key] = index.fields[string_key].convert(value)
+                    else:
+                        additional_fields[string_key] = self._to_python(value)
+
+                del (additional_fields[DJANGO_CT])
+                del (additional_fields[DJANGO_ID])
+
+                if highlight:
+                    sa = StemmingAnalyzer()
+                    formatter = WhooshHtmlFormatter('em')
+                    terms = [token.text for token in sa(query_string)]
+
+                    whoosh_result = whoosh_highlight(
+                        additional_fields.get(self.content_field_name),
+                        terms,
+                        sa,
+                        ContextFragmenter(),
+                        formatter
+                    )
+                    additional_fields['highlighted'] = {
+                        self.content_field_name: [whoosh_result],
+                    }
+
+                result = result_class(app_label, model_name, raw_result[DJANGO_ID], score, **additional_fields)
+                results.append(result)
+            else:
+                hits -= 1
+
+        if self.include_spelling:
+            if spelling_query:
+                spelling_suggestion = self.create_spelling_suggestion(spelling_query)
+            else:
+                spelling_suggestion = self.create_spelling_suggestion(query_string)
+
+        return {
+            'results': results,
+            'hits': hits,
+            'facets': facets,
+            'spelling_suggestion': spelling_suggestion,
+        }
+
+    def create_spelling_suggestion(self, query_string):
+        spelling_suggestion = None
+        reader = self.index.reader()
+        corrector = reader.corrector(self.content_field_name)
+        cleaned_query = force_text(query_string)
+
+        if not query_string:
+            return spelling_suggestion
+
+        # Clean the string.
+        for rev_word in self.RESERVED_WORDS:
+            cleaned_query = cleaned_query.replace(rev_word, '')
+
+        for rev_char in self.RESERVED_CHARACTERS:
+            cleaned_query = cleaned_query.replace(rev_char, '')
+
+        # Break it down.
+        query_words = cleaned_query.split()
+        suggested_words = []
+
+        for word in query_words:
+            suggestions = corrector.suggest(word, limit=1)
+
+            if len(suggestions) > 0:
+                suggested_words.append(suggestions[0])
+
+        spelling_suggestion = ' '.join(suggested_words)
+        return spelling_suggestion
+
+    def _from_python(self, value):
+        """
+        Converts Python values to a string for Whoosh.
+
+        Code courtesy of pysolr.
+        """
+        if hasattr(value, 'strftime'):
+            if not hasattr(value, 'hour'):
+                value = datetime(value.year, value.month, value.day, 0, 0, 0)
+        elif isinstance(value, bool):
+            if value:
+                value = 'true'
+            else:
+                value = 'false'
+        elif isinstance(value, (list, tuple)):
+            value = u','.join([force_text(v) for v in value])
+        elif isinstance(value, (six.integer_types, float)):
+            # Leave it alone.
+            pass
+        else:
+            value = force_text(value)
+        return value
+
+    def _to_python(self, value):
+        """
+        Converts values from Whoosh to native Python values.
+
+        A port of the same method in pysolr, as they deal with data the same way.
+        """
+        if value == 'true':
+            return True
+        elif value == 'false':
+            return False
+
+        if value and isinstance(value, six.string_types):
+            possible_datetime = DATETIME_REGEX.search(value)
+
+            if possible_datetime:
+                date_values = possible_datetime.groupdict()
+
+                for dk, dv in date_values.items():
+                    date_values[dk] = int(dv)
+
+                return datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'],
+                                date_values['minute'], date_values['second'])
+
+        try:
+            # Attempt to use json to load the values.
+            converted_value = json.loads(value)
+
+            # Try to handle most built-in types.
+            if isinstance(converted_value, (list, tuple, set, dict, six.integer_types, float, complex)):
+                return converted_value
+        except:
+            # If it fails (SyntaxError or its ilk) or we don't trust it,
+            # continue on.
+            pass
+
+        return value
+
+
+class WhooshSearchQuery(BaseSearchQuery):
+    def _convert_datetime(self, date):
+        if hasattr(date, 'hour'):
+            return force_text(date.strftime('%Y%m%d%H%M%S'))
+        else:
+            return force_text(date.strftime('%Y%m%d000000'))
+
+    def clean(self, query_fragment):
+        """
+        Provides a mechanism for sanitizing user input before presenting the
+        value to the backend.
+
+        Whoosh 1.X differs here in that you can no longer use a backslash
+        to escape reserved characters. Instead, the whole word should be
+        quoted.
+        """
+        words = query_fragment.split()
+        cleaned_words = []
+
+        for word in words:
+            if word in self.backend.RESERVED_WORDS:
+                word = word.replace(word, word.lower())
+
+            for char in self.backend.RESERVED_CHARACTERS:
+                if char in word:
+                    word = "'%s'" % word
+                    break
+
+            cleaned_words.append(word)
+
+        return ' '.join(cleaned_words)
+
+    def build_query_fragment(self, field, filter_type, value):
+        from haystack import connections
+        query_frag = ''
+        is_datetime = False
+
+        if not hasattr(value, 'input_type_name'):
+            # Handle when we've got a ``ValuesListQuerySet``...
+            if hasattr(value, 'values_list'):
+                value = list(value)
+
+            if hasattr(value, 'strftime'):
+                is_datetime = True
+
+            if isinstance(value, six.string_types) and value != ' ':
+                # It's not an ``InputType``. Assume ``Clean``.
+                value = Clean(value)
+            else:
+                value = PythonData(value)
+
+        # Prepare the query using the InputType.
+        prepared_value = value.prepare(self)
+
+        if not isinstance(prepared_value, (set, list, tuple)):
+            # Then convert whatever we get back to what pysolr wants if needed.
+            prepared_value = self.backend._from_python(prepared_value)
+
+        # 'content' is a special reserved word, much like 'pk' in
+        # Django's ORM layer. It indicates 'no special field'.
+        if field == 'content':
+            index_fieldname = ''
+        else:
+            index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field)
+
+        filter_types = {
+            'content': '%s',
+            'contains': '*%s*',
+            'endswith': "*%s",
+            'startswith': "%s*",
+            'exact': '%s',
+            'gt': "{%s to}",
+            'gte': "[%s to]",
+            'lt': "{to %s}",
+            'lte': "[to %s]",
+            'fuzzy': u'%s~',
+        }
+
+        if value.post_process is False:
+            query_frag = prepared_value
+        else:
+            if filter_type in ['content', 'contains', 'startswith', 'endswith', 'fuzzy']:
+                if value.input_type_name == 'exact':
+                    query_frag = prepared_value
+                else:
+                    # Iterate over terms & incorportate the converted form of each into the query.
+                    terms = []
+
+                    if isinstance(prepared_value, six.string_types):
+                        possible_values = prepared_value.split(' ')
+                    else:
+                        if is_datetime is True:
+                            prepared_value = self._convert_datetime(prepared_value)
+
+                        possible_values = [prepared_value]
+
+                    for possible_value in possible_values:
+                        terms.append(filter_types[filter_type] % self.backend._from_python(possible_value))
+
+                    if len(terms) == 1:
+                        query_frag = terms[0]
+                    else:
+                        query_frag = u"(%s)" % " AND ".join(terms)
+            elif filter_type == 'in':
+                in_options = []
+
+                for possible_value in prepared_value:
+                    is_datetime = False
+
+                    if hasattr(possible_value, 'strftime'):
+                        is_datetime = True
+
+                    pv = self.backend._from_python(possible_value)
+
+                    if is_datetime is True:
+                        pv = self._convert_datetime(pv)
+
+                    if isinstance(pv, six.string_types) and not is_datetime:
+                        in_options.append('"%s"' % pv)
+                    else:
+                        in_options.append('%s' % pv)
+
+                query_frag = "(%s)" % " OR ".join(in_options)
+            elif filter_type == 'range':
+                start = self.backend._from_python(prepared_value[0])
+                end = self.backend._from_python(prepared_value[1])
+
+                if hasattr(prepared_value[0], 'strftime'):
+                    start = self._convert_datetime(start)
+
+                if hasattr(prepared_value[1], 'strftime'):
+                    end = self._convert_datetime(end)
+
+                query_frag = u"[%s to %s]" % (start, end)
+            elif filter_type == 'exact':
+                if value.input_type_name == 'exact':
+                    query_frag = prepared_value
+                else:
+                    prepared_value = Exact(prepared_value).prepare(self)
+                    query_frag = filter_types[filter_type] % prepared_value
+            else:
+                if is_datetime is True:
+                    prepared_value = self._convert_datetime(prepared_value)
+
+                query_frag = filter_types[filter_type] % prepared_value
+
+        if len(query_frag) and not isinstance(value, Raw):
+            if not query_frag.startswith('(') and not query_frag.endswith(')'):
+                query_frag = "(%s)" % query_frag
+
+        return u"%s%s" % (index_fieldname, query_frag)
+
+
+        # if not filter_type in ('in', 'range'):
+        #     # 'in' is a bit of a special case, as we don't want to
+        #     # convert a valid list/tuple to string. Defer handling it
+        #     # until later...
+        #     value = self.backend._from_python(value)
+
+
+class WhooshEngine(BaseEngine):
+    backend = WhooshSearchBackend
+    query = WhooshSearchQuery
--- a/blog/forms.py
+++ b/blog/forms.py
@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+
+"""
+@version: ??
+@author: liangliangyy
+@license: MIT Licence 
+@contact: liangliangyy@gmail.com
+@site: https://www.lylinux.org/
+@software: PyCharm
+@file: forms.py
+@time: 2017/1/7 上午12:36
+"""
+
+from haystack.forms import SearchForm
+from django import forms
+from blog.models import Article, Category
+
+
+class BlogSearchForm(SearchForm):
+    querydata = forms.CharField(required=True)
+
+    def search(self):
+        datas = super(BlogSearchForm, self).search()
+        if not self.is_valid():
+            return self.no_query_found()
+
+        if self.cleaned_data['querydata']:
+            print(self.cleaned_data['querydata'])
+        return datas
--- a/blog/models.py
+++ b/blog/models.py
@ -32,7 +32,7 @@ class Article(models.Model):
    author = models.ForeignKey(settings.AUTH_USER_MODEL, verbose_name='作者', on_delete=models.CASCADE)

    category = models.ForeignKey('Category', verbose_name='分类', on_delete=models.CASCADE, blank=True, null=True)
-    tags = models.ManyToManyField('Tag', verbose_name='标签集合', blank=True, null=True)
+    tags = models.ManyToManyField('Tag', verbose_name='标签集合', blank=True)

    slug = models.SlugField(default='no-slug', max_length=60, blank=True)
    wordpress_id = models.IntegerField()
--- a/blog/search_indexes.py
+++ b/blog/search_indexes.py
@ -0,0 +1,41 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+
+"""
+@version: ??
+@author: liangliangyy
+@license: MIT Licence 
+@contact: liangliangyy@gmail.com
+@site: https://www.lylinux.org/
+@software: PyCharm
+@file: search_indexes.py
+@time: 2017/1/7 上午12:44
+"""
+from haystack import indexes
+from django.conf import settings
+from blog.models import Article, Category, Tag
+
+
+class ArticleIndex(indexes.SearchIndex, indexes.Indexable):
+    # title = indexes.CharField(document=True, use_template=True)
+    text = indexes.CharField(document=True, use_template=True)
+
+
+    def get_model(self):
+        return Article
+
+    def index_queryset(self, using=None):
+        return self.get_model().objects.filter(status='p')
+
+
+"""
+class CategoryIndex(indexes.SearchIndex, indexes.Indexable):
+    name = indexes.CharField(document=True, use_template=True)
+
+    def get_model(self):
+        return Article
+
+    def index_queryset(self, using=None):
+        return self.get_model().objects.filter(status='p')
+"""
--- a/blog/urls.py
+++ b/blog/urls.py
@ -16,6 +16,9 @@
 from django.conf.urls import url
 from django.views.decorators.cache import cache_page
 from . import views
+from haystack.forms import ModelSearchForm
+from haystack.query import SearchQuerySet
+from haystack.views import SearchView

 urlpatterns = [
    # url(r'^$', cache_page(60 * 15)(views.IndexView.as_view()), name='index'),
@ -32,5 +35,7 @@ urlpatterns = [
    url(r'^author/(?P<author_name>\w+).html$', views.AuthorDetailView.as_view(), name='author_detail'),
    url(r'^tag/(?P<tag_name>.+).html$', views.TagDetailView.as_view(), name='tag_detail'),
    url(r'(?P<wordpress_slug>\w+).html$', views.handle_wordpress_post),
-    url(r'^test$', views.test)
+    url(r'^test$', views.test),
+    # url(r'^search/?$', views.BlogSearchView.as_view(), name='search_view'),
+
 ]
--- a/blog/views.py
+++ b/blog/views.py
@ -16,6 +16,8 @@ from blog.wordpress_helper import wordpress_helper
 from django import http
 from django.http import HttpResponse
 from abc import ABCMeta, abstractmethod
+from haystack.generic_views import SearchView
+from blog.forms import BlogSearchForm

 """
 class SeoProcessor():
@ -199,6 +201,31 @@ class TagDetailView(ArticleListView):
        return super(TagDetailView, self).get_context_data(**kwargs)


+"""
+class BlogSearchView(SearchView):
+    form_class = BlogSearchForm
+    template_name = 'blog/articledetail.html'
+    model = Article
+    # template_name属性用于指定使用哪个模板进行渲染
+    template_name = 'blog/index.html'
+
+    # context_object_name属性用于给上下文变量取名（在模板中使用该名字）
+    context_object_name = 'article_list'
+
+    def get_queryset(self):
+        queryset = super(BlogSearchView, self).get_queryset()
+        # further filter queryset based on some set of criteria
+        # return queryset.filter(pub_date__gte=date(2015, 1, 1))
+        return queryset
+
+    def get_context_data(self, **kwargs):
+        tag_name = 'search'
+        kwargs['page_type'] = 'search'
+        kwargs['tag_name'] = tag_name
+        return super(BlogSearchView, self).get_context_data(**kwargs)
+"""
+
+
 def test(requests):
    post = Article.objects.all()
    import re
--- a/requirements.txt
+++ b/requirements.txt
@ -1,17 +1,14 @@
-Django==1.10.3
-django-autoslug==1.9.3
-django-autoslug-field==0.2.3
-django-pagedown==0.1.1
-django-registration==2.2
-django-uuslug==1.1.8
-inflect==0.2.5
-markdown2==2.3.1
-Pillow==3.4.2
-PyMySQL==0.7.9
-python-memcached==1.58
-python-slugify==1.2.1
-requests==2.12.1
-six==1.10.0
-sqlacodegen==1.1.6
-SQLAlchemy==1.1.4
-Unidecode==0.4.19
+Django
+django-autoslug
+django-haystack
+django-pagedown
+django-uuslug
+jieba
+markdown2
+Pillow
+PyMySQL
+python-memcached
+python-slugify
+requests
+Unidecode
+Whoosh
--- a/templates/blog/tags/sidebar.html
+++ b/templates/blog/tags/sidebar.html
@ -1,9 +1,9 @@
 <div id="secondary" class="widget-area" role="complementary">
    <aside id="search-2" class="widget widget_search">
-        <form role="search" method="get" id="searchform" class="searchform" action="https://www.lylinux.org/">
+        <form role="search" method="get" id="searchform" class="searchform" action="/search">
            <div>
                <label class="screen-reader-text" for="s">搜索：</label>
-                <input type="text" value="" name="s" id="s"/>
+                <input type="text" value="" name="q" id="q"/>
                <input type="submit" id="searchsubmit" value="搜索"/>
            </div>
        </form>
--- a/templates/search/indexes/blog/article_text.txt
+++ b/templates/search/indexes/blog/article_text.txt
@ -0,0 +1,3 @@
+{{ object.title }}
+{{ object.author.username }}
+{{ object.body }}
--- a/templates/search/search.html
+++ b/templates/search/search.html
@ -0,0 +1,58 @@
+{% extends 'base.html' %}
+{% load blog_tags %}
+{% block header %}
+    <title>{{ SITE_NAME }} | {{ SITE_DESCRIPTION }}</title>
+    <meta name="description" content="{{ SITE_SEO_DESCRIPTION }}"/>
+    <meta name="keywords" content="linux,apache,mysql,服务器,ubuntu,shell,java,web,javaweb,wordpress,csharp,.net,asp"/>
+    <meta property="og:type" content="blog"/>
+    <meta property="og:title" content="{{ SITE_NAME }}"/>
+    <meta property="og:description" content="{{ SITE_DESCRIPTION }}"/>
+    <meta property="og:url" content="{{ SITE_BASE_URL }}"/>
+    <meta property="og:site_name" content="{{ SITE_NAME }}"/>
+{% endblock %}
+{% block content %}
+    <div id="primary" class="site-content">
+        <div id="content" role="main">
+            {% if query %}
+                <header class="archive-header">
+
+                    <h2 class="archive-title"> 搜索：<span style="color: red">{{ query }}</span></h2>
+                </header><!-- .archive-header -->
+            {% endif %}
+            {% if query and page.object_list %}
+                {% for article in page.object_list %}
+                    {% load_article_detail article.object True user %}
+                {% endfor %}
+                {% if page.has_previous or page.has_next %}
+                    <nav id="nav-below" class="navigation" role="navigation">
+                        <h3 class="assistive-text">文章导航</h3>
+                        {% if page.has_previous %}
+                            <div class="nav-previous"><a
+                                    href="?q={{ query }}&amp;page={{ page.previous_page_number }}"><span
+                                    class="meta-nav">&larr;</span> 早期文章</a></div>
+                        {% endif %}
+                        {% if page.has_next %}
+                            <div class="nav-next"><a href="?q={{ query }}&amp;page={{ page.next_page_number }}">较新文章
+                                <span
+                                        class="meta-nav">→</span></a>
+                            </div>
+                        {% endif %}
+                    </nav><!-- .navigation -->
+
+                {% endif %}
+            {% else %}
+                <header class="archive-header">
+
+                    <h1 class="archive-title">哎呀，关键字：<span>{{ query }}</span>没有找到结果，要不换个词再试试？</h1>
+                </header><!-- .archive-header -->
+            {% endif %}
+        </div><!-- #content -->
+    </div><!-- #primary -->
+{% endblock %}
+
+
+{% block sidebar %}
+    {% load_sidebar request.user %}
+{% endblock %}
+
+