tentest/doc/DjangoBlog/blog/documents.py

import time

import elasticsearch.client
from django.conf import settings
from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean
from elasticsearch_dsl.connections import connections

from blog.models import Article

# mk:检查是否启用了Elasticsearch配置
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')

if ELASTICSEARCH_ENABLED:
    # mk:创建Elasticsearch连接
    connections.create_connection(
        hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
    from elasticsearch import Elasticsearch

    es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
    from elasticsearch.client import IngestClient

    c = IngestClient(es)
    try:
        # mk:尝试获取geoip管道，如果不存在则创建
        c.get_pipeline('geoip')
    except elasticsearch.exceptions.NotFoundError:
        c.put_pipeline('geoip', body='''{
              "description" : "Add geoip info",
              "processors" : [
                {
                  "geoip" : {
                    "field" : "ip"
                  }
                }
              ]
            }''')


class GeoIp(InnerDoc):
    """
    mk:
    地理位置信息文档类
    用于存储IP地址对应的地理位置信息
    """
    continent_name = Keyword()
    country_iso_code = Keyword()
    country_name = Keyword()
    location = GeoPoint()


class UserAgentBrowser(InnerDoc):
    """
    mk:
    用户代理浏览器信息类
    存储浏览器的家族和版本信息
    """
    Family = Keyword()
    Version = Keyword()


class UserAgentOS(UserAgentBrowser):
    """
    mk:
    用户代理操作系统信息类
    继承自UserAgentBrowser，存储操作系统的家族和版本信息
    """
    pass


class UserAgentDevice(InnerDoc):
    """
    mk:
    用户代理设备信息类
    存储设备的家族、品牌和型号信息
    """
    Family = Keyword()
    Brand = Keyword()
    Model = Keyword()


class UserAgent(InnerDoc):
    """
    mk:
    用户代理完整信息类
    包含浏览器、操作系统、设备等完整用户代理信息
    """
    browser = Object(UserAgentBrowser, required=False)
    os = Object(UserAgentOS, required=False)
    device = Object(UserAgentDevice, required=False)
    string = Text()
    is_bot = Boolean()


class ElapsedTimeDocument(Document):
    """
    mk:
    性能监控文档类
    用于记录页面访问性能数据，包括URL、响应时间、访问时间等信息
    """
    url = Keyword()
    time_taken = Long()
    log_datetime = Date()
    ip = Keyword()
    geoip = Object(GeoIp, required=False)
    useragent = Object(UserAgent, required=False)

    class Index:
        name = 'performance'
        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 0
        }

    class Meta:
        doc_type = 'ElapsedTime'


class ElaspedTimeDocumentManager:
    """
    mk:
    性能监控文档管理类
    提供性能监控数据的索引创建、删除和保存功能
    """

    @staticmethod
    def build_index():
        """
        mk:
        构建性能监控索引
        检查索引是否存在，如果不存在则初始化索引
        """
        from elasticsearch import Elasticsearch
        client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
        res = client.indices.exists(index="performance")
        if not res:
            ElapsedTimeDocument.init()

    @staticmethod
    def delete_index():
        """
        mk:
        删除性能监控索引
        删除名为'performance'的索引，忽略400和404错误
        """
        from elasticsearch import Elasticsearch
        es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
        es.indices.delete(index='performance', ignore=[400, 404])

    @staticmethod
    def create(url, time_taken, log_datetime, useragent, ip):
        """
        mk:
        创建并保存性能监控记录

        Args:
            url (str): 访问的URL地址
            time_taken (int): 请求耗时（毫秒）
            log_datetime (datetime): 日志记录时间
            useragent (object): 用户代理对象，包含浏览器、系统、设备信息
            ip (str): 访问者IP地址
        """
        ElaspedTimeDocumentManager.build_index()
        ua = UserAgent()
        ua.browser = UserAgentBrowser()
        ua.browser.Family = useragent.browser.family
        ua.browser.Version = useragent.browser.version_string

        ua.os = UserAgentOS()
        ua.os.Family = useragent.os.family
        ua.os.Version = useragent.os.version_string

        ua.device = UserAgentDevice()
        ua.device.Family = useragent.device.family
        ua.device.Brand = useragent.device.brand
        ua.device.Model = useragent.device.model
        ua.string = useragent.ua_string
        ua.is_bot = useragent.is_bot

        doc = ElapsedTimeDocument(
            meta={
                'id': int(
                    round(
                        time.time() *
                        1000))
            },
            url=url,
            time_taken=time_taken,
            log_datetime=log_datetime,
            useragent=ua, ip=ip)
        doc.save(pipeline="geoip")


class ArticleDocument(Document):
    """
    mk:
    文章文档类
    用于Elasticsearch中的文章搜索索引，包含文章的完整信息
    """
    body = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
    title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
    author = Object(properties={
        'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
        'id': Integer()
    })
    category = Object(properties={
        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
        'id': Integer()
    })
    tags = Object(properties={
        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
        'id': Integer()
    })

    pub_time = Date()
    status = Text()
    comment_status = Text()
    type = Text()
    views = Integer()
    article_order = Integer()

    class Index:
        name = 'blog'
        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 0
        }

    class Meta:
        doc_type = 'Article'


class ArticleDocumentManager():
    """
    mk:
    文章文档管理类
    提供文章索引的创建、删除、重建和更新功能
    """

    def __init__(self):
        """
        mk:
        初始化文章文档管理器
        自动创建索引
        """
        self.create_index()

    def create_index(self):
        """
        mk:
        创建文章索引
        初始化ArticleDocument索引结构
        """
        ArticleDocument.init()

    def delete_index(self):
        """
        mk:
        删除文章索引
        删除名为'blog'的索引，忽略400和404错误
        """
        from elasticsearch import Elasticsearch
        es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
        es.indices.delete(index='blog', ignore=[400, 404])

    def convert_to_doc(self, articles):
        """
        mk:
        将文章模型对象转换为文档对象

        Args:
            articles (list): Article模型对象列表

        Returns:
            list: 转换后的ArticleDocument文档对象列表
        """
        return [
            ArticleDocument(
                meta={
                    'id': article.id},
                body=article.body,
                title=article.title,
                author={
                    'nickname': article.author.username,
                    'id': article.author.id},
                category={
                    'name': article.category.name,
                    'id': article.category.id},
                tags=[
                    {
                        'name': t.name,
                        'id': t.id} for t in article.tags.all()],
                pub_time=article.pub_time,
                status=article.status,
                comment_status=article.comment_status,
                type=article.type,
                views=article.views,
                article_order=article.article_order) for article in articles]

    def rebuild(self, articles=None):
        """
        mk:
        重建文章索引

        Args:
            articles (list, optional): 指定要重建索引的文章列表，如果为None则重建所有文章
        """
        ArticleDocument.init()
        articles = articles if articles else Article.objects.all()
        docs = self.convert_to_doc(articles)
        for doc in docs:
            doc.save()

    def update_docs(self, docs):
        """
        mk:
        批量更新文档

        Args:
            docs (list): ArticleDocument文档对象列表
        """
        for doc in docs:
            doc.save()