diff --git a/DjangoBlog/elasticsearch_backend.py b/DjangoBlog/elasticsearch_backend.py index 8d34bc8..67f12b9 100644 --- a/DjangoBlog/elasticsearch_backend.py +++ b/DjangoBlog/elasticsearch_backend.py @@ -10,72 +10,55 @@ @file: elasticsearch_backend.py @time: 2019-04-13 11:46 """ + import logging import re -import json - -from datetime import datetime, timedelta - -from django.conf import settings -from django.core.exceptions import ImproperlyConfigured -from django.utils import six -from django.utils.datetime_safe import datetime from django.utils.encoding import force_text from elasticsearch_dsl import Q from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query -from haystack.constants import DJANGO_CT, DJANGO_ID, ID -from haystack.exceptions import MissingDependency, SearchBackendError, SkipDocument -from haystack.inputs import Clean, Exact, PythonData, Raw from haystack.models import SearchResult from haystack.utils import log as logging -from haystack.utils import get_identifier, get_model_ct -from haystack.utils.app_loading import haystack_get_model -from django_elasticsearch_dsl.registries import registry from blog.models import Article -from blog.documents import ArticleDocument +from blog.documents import ArticleDocument, ArticleDocumentManager logger = logging.getLogger(__name__) -DATETIME_REGEX = re.compile( - '^(?P\d{4})-(?P\d{2})-(?P\d{2})T(?P\d{2}):(?P\d{2}):(?P\d{2})(\.\d{3,6}Z?)?$') - class ElasticSearchBackend(BaseSearchBackend): + def __init__(self, connection_alias, **connection_options): + super(ElasticSearchBackend, self).__init__(connection_alias, **connection_options) + self.manager = ArticleDocumentManager() + self._rebuild(None) - def _get_models(self): - models = registry.get_models() - return set(models) + def _get_models(self, iterable): + models = iterable if iterable else Article.objects.all() + docs = self.manager.convert_to_doc(models) + return docs def _create(self, models): - for index in registry.get_indices(models): - index.create() - - def _populate(self, models): - for doc in registry.get_documents(models): - qs = doc().get_queryset() - doc().update(qs) + self.manager.create_index() + docs = self._get_models(models) + self.manager.rebuild(docs) def _delete(self, models): - for index in registry.get_indices(models): - index.delete(ignore=404) + for m in models: + m.delete() return True def _rebuild(self, models): - if not self._delete(models): - return - - self._create(models) - self._populate(models) + models = models if models else Article.objects.all() + docs = self.manager.convert_to_doc(models) + self.manager.update_docs(docs) def update(self, index, iterable, commit=True): - models = self._get_models() - # self._rebuild(models) + models = self._get_models(iterable) + self.manager.update_docs(models) def remove(self, obj_or_string): - models = self._get_models() + models = self._get_models([obj_or_string]) self._delete(models) def clear(self, models=None, commit=True): @@ -124,66 +107,6 @@ class ElasticSearchBackend(BaseSearchBackend): 'spelling_suggestion': spelling_suggestion, } - def _from_python(self, value): - """ - Converts Python values to a string for Whoosh. - - Code courtesy of pysolr. - """ - if hasattr(value, 'strftime'): - if not hasattr(value, 'hour'): - value = datetime(value.year, value.month, value.day, 0, 0, 0) - elif isinstance(value, bool): - if value: - value = 'true' - else: - value = 'false' - elif isinstance(value, (list, tuple)): - value = u','.join([force_text(v) for v in value]) - elif isinstance(value, (six.integer_types, float)): - # Leave it alone. - pass - else: - value = force_text(value) - return value - - def _to_python(self, value): - """ - Converts values from Whoosh to native Python values. - - A port of the same method in pysolr, as they deal with data the same way. - """ - if value == 'true': - return True - elif value == 'false': - return False - - if value and isinstance(value, six.string_types): - possible_datetime = DATETIME_REGEX.search(value) - - if possible_datetime: - date_values = possible_datetime.groupdict() - - for dk, dv in date_values.items(): - date_values[dk] = int(dv) - - return datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'], - date_values['minute'], date_values['second']) - - try: - # Attempt to use json to load the values. - converted_value = json.loads(value) - - # Try to handle most built-in types. - if isinstance(converted_value, (list, tuple, set, dict, six.integer_types, float, complex)): - return converted_value - except: - # If it fails (SyntaxError or its ilk) or we don't trust it, - # continue on. - pass - - return value - class ElasticSearchQuery(BaseSearchQuery): def _convert_datetime(self, date): diff --git a/blog/documents.py b/blog/documents.py index 3219587..699d9bc 100644 --- a/blog/documents.py +++ b/blog/documents.py @@ -10,54 +10,122 @@ @file: documents.py @time: 2019-04-05 13:05 """ - -from django_elasticsearch_dsl import DocType, Index, fields +import time from blog.models import Article, Category, Tag -from accounts.models import BlogUser - -blog = Index('blog') -blog.settings( - number_of_shards=1, - number_of_replicas=0 -) - - -@blog.doc_type -class ArticleDocument(DocType): - body = fields.TextField(attr='body_to_string', analyzer='ik_max_word') - title = fields.TextField(analyzer='ik_max_word') - author = fields.ObjectField(properties={ - 'nickname': fields.TextField(analyzer='ik_max_word'), - 'id': fields.IntegerField() +from elasticsearch_dsl import Document, Date, Integer, Keyword, Text, Object, Boolean + +from django.conf import settings + +ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL') + +from elasticsearch_dsl.connections import connections + +if ELASTICSEARCH_ENABLED: + connections.create_connection(hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']]) + + +class ElapsedTimeDocument(Document): + url = Text() + time_taken = Integer() + log_datetime = Date() + type = Text(analyzer='ik_max_word') + + class Index: + name = 'performance' + settings = { + "number_of_shards": 1, + "number_of_replicas": 0 + } + + class Meta: + doc_type = 'ElapsedTime' + + +class ElaspedTimeDocumentManager(): + + @staticmethod + def create(url, time_taken, log_datetime, type): + if not hasattr(ElaspedTimeDocumentManager, 'mapping_created'): + ElapsedTimeDocument.init() + setattr(ElaspedTimeDocumentManager, 'mapping_created', True) + doc = ElapsedTimeDocument(meta={'id': int(round(time.time() * 1000))}, url=url, time_taken=time_taken, + log_datetime=log_datetime, type=type) + doc.save() + + +class ArticleDocument(Document): + body = Text(analyzer='ik_max_word') + title = Text(analyzer='ik_max_word') + author = Object(properties={ + 'nickname': Text(analyzer='ik_max_word'), + 'id': Integer() }) - category = fields.ObjectField(properties={ - 'name': fields.TextField(analyzer='ik_max_word'), - 'id': fields.IntegerField() + category = Object(properties={ + 'name': Text(analyzer='ik_max_word'), + 'id': Integer() }) - tags = fields.ObjectField(properties={ - 'name': fields.TextField(analyzer='ik_max_word'), - 'id': fields.IntegerField() + tags = Object(properties={ + 'name': Text(analyzer='ik_max_word'), + 'id': Integer() }) - # def get_instances_from_related(self, related_instance): - # if isinstance(related_instance, BlogUser): - # return related_instance - # elif isinstance(related_instance, Category): - # pass + pub_time = Date() + status = Text() + comment_status = Text() + type = Text() + views = Integer() + article_order = Integer() + + class Index: + name = 'blog' + settings = { + "number_of_shards": 1, + "number_of_replicas": 0 + } class Meta: - model = Article - fields = [ - 'pub_time', - 'status', - 'comment_status', - 'type', - 'views', - 'article_order', - - ] - # related_models = [Category, Tag, BlogUser] doc_type = 'Article' - auto_refresh = False - ignore_signals = True + +class ArticleDocumentManager(): + + def __init__(self): + + ArticleDocument.init() + + def create_index(self): + ArticleDocument.init() + + def deleate_index(self): + from elasticsearch import Elasticsearch + es = Elasticsearch() + es.indices.delete(index='blog', ignore=[400, 404]) + + def convert_to_doc(self, articles): + return [ArticleDocument(meta={'id': article.id}, body=article.body, title=article.title, + auth={ + 'nikename': article.author.username, + 'id': article.author.id + }, + category={ + 'name': article.category.name, + 'id': article.category.id + }, + tags=[{'name': t.name, 'id': t.id} for t in article.tags.all()], + pub_time=article.pub_time, + status=article.status, + comment_status=article.comment_status, + type=article.type, + views=article.views, + article_order=article.article_order + ) for article in articles] + + def rebuild(self, articles=None): + articles = articles if articles else Article.objects.all() + docs = self.convert_to_doc(articles) + for doc in docs: + doc.save() + + def update_docs(self, docs): + for doc in docs: + doc.save() diff --git a/blog/management/commands/build_index.py b/blog/management/commands/build_index.py new file mode 100644 index 0000000..1beaa40 --- /dev/null +++ b/blog/management/commands/build_index.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +@version: ?? +@author: liangliangyy +@license: MIT Licence +@contact: liangliangyy@gmail.com +@site: https://www.lylinux.net/ +@software: PyCharm +@file: build_index.py +@time: 2019-04-20 20:39 +""" + +from blog.documents import ArticleDocument, ArticleDocumentManager + +from django.core.management.base import BaseCommand +from blog.models import Article + + +# TODO 参数化 +class Command(BaseCommand): + help = 'build search index' + + def handle(self, *args, **options): + manager = ArticleDocumentManager() + manager.deleate_index() + manager.rebuild() diff --git a/blog/middleware.py b/blog/middleware.py index 3241fc4..f24ed98 100644 --- a/blog/middleware.py +++ b/blog/middleware.py @@ -12,10 +12,11 @@ @file: middleware.py @time: 2017/1/19 上午12:36 """ - +import datetime import time from ipware.ip import get_real_ip from DjangoBlog.utils import cache +from blog.documents import ELASTICSEARCH_ENABLED, ElaspedTimeDocumentManager class OnlineMiddleware(object): @@ -31,5 +32,12 @@ class OnlineMiddleware(object): return response cast_time = time.time() - start_time + if ELASTICSEARCH_ENABLED: + time_taken = round((cast_time) * 1000, 2) + url = request.path + from django.utils import timezone + + ElaspedTimeDocumentManager.create(url=url, time_taken=time_taken, log_datetime=timezone.now(), + type='blog') response.content = response.content.replace(b'', str.encode(str(cast_time)[:5])) return response diff --git a/blog/models.py b/blog/models.py index 1ac9854..4c8b7df 100644 --- a/blog/models.py +++ b/blog/models.py @@ -12,6 +12,7 @@ from DjangoBlog.utils import cache_decorator, cache from django.utils.functional import cached_property from django.utils.timezone import now from mdeditor.fields import MDTextField +from django.db.models.signals import post_save logger = logging.getLogger(__name__) @@ -34,7 +35,12 @@ class BaseModel(models.Model): if getattr(self, 'slug') == 'no-slug' or not self.id: slug = getattr(self, 'title') if 'title' in self.__dict__ else getattr(self, 'name') setattr(self, 'slug', slugify(slug)) - super().save(*args, **kwargs) + is_update_views = isinstance(self, Article) and 'update_fields' in kwargs and kwargs['update_fields'] == [ + 'views'] + if is_update_views: + Article.objects.filter(pk=self.pk).update(views=self.views) + else: + super().save(*args, **kwargs) # is_update_views = 'update_fields' in kwargs and len(kwargs['update_fields']) == 1 and kwargs['update_fields'][ # 0] == 'views' # from DjangoBlog.blog_signals import article_save_signal diff --git a/blog/search_indexes.py b/blog/search_indexes.py index f48ae32..ed80e65 100644 --- a/blog/search_indexes.py +++ b/blog/search_indexes.py @@ -19,7 +19,6 @@ from blog.models import Article, Category, Tag class ArticleIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) - author = indexes.CharField(model_attr='author') def get_model(self): return Article diff --git a/requirements.txt b/requirements.txt index 6daa56a..5d87135 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,13 +12,12 @@ django-appconf==1.0.3 django-autoslug==1.9.4 django-compressor==2.2 django-debug-toolbar==1.11 -django-elasticsearch-dsl==0.5.1 django-haystack==2.8.1 django-ipware==2.1.0 django-mdeditor==0.1.13 django-uuslug==1.1.8 elasticsearch==6.3.1 -elasticsearch-dsl==6.1.0 +elasticsearch-dsl==6.3.1 idna==2.8 ipaddress==1.0.22 isort==4.3.15