增加es埋点,优化es搜索

sh_branch
liangliangyy 7 years ago
parent 914ac0484b
commit b5c255c2c0

@ -10,72 +10,55 @@
@file: elasticsearch_backend.py
@time: 2019-04-13 11:46
"""
import logging
import re
import json
from datetime import datetime, timedelta
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.utils import six
from django.utils.datetime_safe import datetime
from django.utils.encoding import force_text
from elasticsearch_dsl import Q
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query
from haystack.constants import DJANGO_CT, DJANGO_ID, ID
from haystack.exceptions import MissingDependency, SearchBackendError, SkipDocument
from haystack.inputs import Clean, Exact, PythonData, Raw
from haystack.models import SearchResult
from haystack.utils import log as logging
from haystack.utils import get_identifier, get_model_ct
from haystack.utils.app_loading import haystack_get_model
from django_elasticsearch_dsl.registries import registry
from blog.models import Article
from blog.documents import ArticleDocument
from blog.documents import ArticleDocument, ArticleDocumentManager
logger = logging.getLogger(__name__)
DATETIME_REGEX = re.compile(
'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$')
class ElasticSearchBackend(BaseSearchBackend):
def __init__(self, connection_alias, **connection_options):
super(ElasticSearchBackend, self).__init__(connection_alias, **connection_options)
self.manager = ArticleDocumentManager()
self._rebuild(None)
def _get_models(self):
models = registry.get_models()
return set(models)
def _get_models(self, iterable):
models = iterable if iterable else Article.objects.all()
docs = self.manager.convert_to_doc(models)
return docs
def _create(self, models):
for index in registry.get_indices(models):
index.create()
def _populate(self, models):
for doc in registry.get_documents(models):
qs = doc().get_queryset()
doc().update(qs)
self.manager.create_index()
docs = self._get_models(models)
self.manager.rebuild(docs)
def _delete(self, models):
for index in registry.get_indices(models):
index.delete(ignore=404)
for m in models:
m.delete()
return True
def _rebuild(self, models):
if not self._delete(models):
return
self._create(models)
self._populate(models)
models = models if models else Article.objects.all()
docs = self.manager.convert_to_doc(models)
self.manager.update_docs(docs)
def update(self, index, iterable, commit=True):
models = self._get_models()
# self._rebuild(models)
models = self._get_models(iterable)
self.manager.update_docs(models)
def remove(self, obj_or_string):
models = self._get_models()
models = self._get_models([obj_or_string])
self._delete(models)
def clear(self, models=None, commit=True):
@ -124,66 +107,6 @@ class ElasticSearchBackend(BaseSearchBackend):
'spelling_suggestion': spelling_suggestion,
}
def _from_python(self, value):
"""
Converts Python values to a string for Whoosh.
Code courtesy of pysolr.
"""
if hasattr(value, 'strftime'):
if not hasattr(value, 'hour'):
value = datetime(value.year, value.month, value.day, 0, 0, 0)
elif isinstance(value, bool):
if value:
value = 'true'
else:
value = 'false'
elif isinstance(value, (list, tuple)):
value = u','.join([force_text(v) for v in value])
elif isinstance(value, (six.integer_types, float)):
# Leave it alone.
pass
else:
value = force_text(value)
return value
def _to_python(self, value):
"""
Converts values from Whoosh to native Python values.
A port of the same method in pysolr, as they deal with data the same way.
"""
if value == 'true':
return True
elif value == 'false':
return False
if value and isinstance(value, six.string_types):
possible_datetime = DATETIME_REGEX.search(value)
if possible_datetime:
date_values = possible_datetime.groupdict()
for dk, dv in date_values.items():
date_values[dk] = int(dv)
return datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'],
date_values['minute'], date_values['second'])
try:
# Attempt to use json to load the values.
converted_value = json.loads(value)
# Try to handle most built-in types.
if isinstance(converted_value, (list, tuple, set, dict, six.integer_types, float, complex)):
return converted_value
except:
# If it fails (SyntaxError or its ilk) or we don't trust it,
# continue on.
pass
return value
class ElasticSearchQuery(BaseSearchQuery):
def _convert_datetime(self, date):

@ -10,54 +10,122 @@
@file: documents.py
@time: 2019-04-05 13:05
"""
from django_elasticsearch_dsl import DocType, Index, fields
import time
from blog.models import Article, Category, Tag
from accounts.models import BlogUser
blog = Index('blog')
blog.settings(
number_of_shards=1,
number_of_replicas=0
)
@blog.doc_type
class ArticleDocument(DocType):
body = fields.TextField(attr='body_to_string', analyzer='ik_max_word')
title = fields.TextField(analyzer='ik_max_word')
author = fields.ObjectField(properties={
'nickname': fields.TextField(analyzer='ik_max_word'),
'id': fields.IntegerField()
from elasticsearch_dsl import Document, Date, Integer, Keyword, Text, Object, Boolean
from django.conf import settings
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')
from elasticsearch_dsl.connections import connections
if ELASTICSEARCH_ENABLED:
connections.create_connection(hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
class ElapsedTimeDocument(Document):
url = Text()
time_taken = Integer()
log_datetime = Date()
type = Text(analyzer='ik_max_word')
class Index:
name = 'performance'
settings = {
"number_of_shards": 1,
"number_of_replicas": 0
}
class Meta:
doc_type = 'ElapsedTime'
class ElaspedTimeDocumentManager():
@staticmethod
def create(url, time_taken, log_datetime, type):
if not hasattr(ElaspedTimeDocumentManager, 'mapping_created'):
ElapsedTimeDocument.init()
setattr(ElaspedTimeDocumentManager, 'mapping_created', True)
doc = ElapsedTimeDocument(meta={'id': int(round(time.time() * 1000))}, url=url, time_taken=time_taken,
log_datetime=log_datetime, type=type)
doc.save()
class ArticleDocument(Document):
body = Text(analyzer='ik_max_word')
title = Text(analyzer='ik_max_word')
author = Object(properties={
'nickname': Text(analyzer='ik_max_word'),
'id': Integer()
})
category = fields.ObjectField(properties={
'name': fields.TextField(analyzer='ik_max_word'),
'id': fields.IntegerField()
category = Object(properties={
'name': Text(analyzer='ik_max_word'),
'id': Integer()
})
tags = fields.ObjectField(properties={
'name': fields.TextField(analyzer='ik_max_word'),
'id': fields.IntegerField()
tags = Object(properties={
'name': Text(analyzer='ik_max_word'),
'id': Integer()
})
# def get_instances_from_related(self, related_instance):
# if isinstance(related_instance, BlogUser):
# return related_instance
# elif isinstance(related_instance, Category):
# pass
pub_time = Date()
status = Text()
comment_status = Text()
type = Text()
views = Integer()
article_order = Integer()
class Index:
name = 'blog'
settings = {
"number_of_shards": 1,
"number_of_replicas": 0
}
class Meta:
model = Article
fields = [
'pub_time',
'status',
'comment_status',
'type',
'views',
'article_order',
]
# related_models = [Category, Tag, BlogUser]
doc_type = 'Article'
auto_refresh = False
ignore_signals = True
class ArticleDocumentManager():
def __init__(self):
ArticleDocument.init()
def create_index(self):
ArticleDocument.init()
def deleate_index(self):
from elasticsearch import Elasticsearch
es = Elasticsearch()
es.indices.delete(index='blog', ignore=[400, 404])
def convert_to_doc(self, articles):
return [ArticleDocument(meta={'id': article.id}, body=article.body, title=article.title,
auth={
'nikename': article.author.username,
'id': article.author.id
},
category={
'name': article.category.name,
'id': article.category.id
},
tags=[{'name': t.name, 'id': t.id} for t in article.tags.all()],
pub_time=article.pub_time,
status=article.status,
comment_status=article.comment_status,
type=article.type,
views=article.views,
article_order=article.article_order
) for article in articles]
def rebuild(self, articles=None):
articles = articles if articles else Article.objects.all()
docs = self.convert_to_doc(articles)
for doc in docs:
doc.save()
def update_docs(self, docs):
for doc in docs:
doc.save()

@ -0,0 +1,27 @@
#!/usr/bin/env python
# encoding: utf-8
"""
@version: ??
@author: liangliangyy
@license: MIT Licence
@contact: liangliangyy@gmail.com
@site: https://www.lylinux.net/
@software: PyCharm
@file: build_index.py
@time: 2019-04-20 20:39
"""
from blog.documents import ArticleDocument, ArticleDocumentManager
from django.core.management.base import BaseCommand
from blog.models import Article
# TODO 参数化
class Command(BaseCommand):
help = 'build search index'
def handle(self, *args, **options):
manager = ArticleDocumentManager()
manager.deleate_index()
manager.rebuild()

@ -12,10 +12,11 @@
@file: middleware.py
@time: 2017/1/19 上午12:36
"""
import datetime
import time
from ipware.ip import get_real_ip
from DjangoBlog.utils import cache
from blog.documents import ELASTICSEARCH_ENABLED, ElaspedTimeDocumentManager
class OnlineMiddleware(object):
@ -31,5 +32,12 @@ class OnlineMiddleware(object):
return response
cast_time = time.time() - start_time
if ELASTICSEARCH_ENABLED:
time_taken = round((cast_time) * 1000, 2)
url = request.path
from django.utils import timezone
ElaspedTimeDocumentManager.create(url=url, time_taken=time_taken, log_datetime=timezone.now(),
type='blog')
response.content = response.content.replace(b'<!!LOAD_TIMES!!>', str.encode(str(cast_time)[:5]))
return response

@ -12,6 +12,7 @@ from DjangoBlog.utils import cache_decorator, cache
from django.utils.functional import cached_property
from django.utils.timezone import now
from mdeditor.fields import MDTextField
from django.db.models.signals import post_save
logger = logging.getLogger(__name__)
@ -34,7 +35,12 @@ class BaseModel(models.Model):
if getattr(self, 'slug') == 'no-slug' or not self.id:
slug = getattr(self, 'title') if 'title' in self.__dict__ else getattr(self, 'name')
setattr(self, 'slug', slugify(slug))
super().save(*args, **kwargs)
is_update_views = isinstance(self, Article) and 'update_fields' in kwargs and kwargs['update_fields'] == [
'views']
if is_update_views:
Article.objects.filter(pk=self.pk).update(views=self.views)
else:
super().save(*args, **kwargs)
# is_update_views = 'update_fields' in kwargs and len(kwargs['update_fields']) == 1 and kwargs['update_fields'][
# 0] == 'views'
# from DjangoBlog.blog_signals import article_save_signal

@ -19,7 +19,6 @@ from blog.models import Article, Category, Tag
class ArticleIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True)
author = indexes.CharField(model_attr='author')
def get_model(self):
return Article

@ -12,13 +12,12 @@ django-appconf==1.0.3
django-autoslug==1.9.4
django-compressor==2.2
django-debug-toolbar==1.11
django-elasticsearch-dsl==0.5.1
django-haystack==2.8.1
django-ipware==2.1.0
django-mdeditor==0.1.13
django-uuslug==1.1.8
elasticsearch==6.3.1
elasticsearch-dsl==6.1.0
elasticsearch-dsl==6.3.1
idna==2.8
ipaddress==1.0.22
isort==4.3.15

Loading…
Cancel
Save