DjangoBlog/documents.py

#zf：导入时间模块，用于生成时间戳
import time

#zf：导入elasticsearch客户端模块
import elasticsearch.client
#zf：导入Django配置模块
from django.conf import settings
#zf：从elasticsearch_dsl导入各种字段类型和文档类
from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean
#zf：从elasticsearch_dsl.connections导入连接管理器
from elasticsearch_dsl.connections import connections

#zf：从blog.models导入Article模型
from blog.models import Article

#zf：检查是否启用了Elasticsearch功能（通过检查settings中是否有ELASTICSEARCH_DSL配置）
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')

#zf：如果启用了Elasticsearch
if ELASTICSEARCH_ENABLED:
    #zf：创建Elasticsearch连接
    connections.create_connection(
        hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
    #zf：导入Elasticsearch客户端
    from elasticsearch import Elasticsearch

    #zf：创建Elasticsearch实例
    es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
    #zf：导入IngestClient用于管理管道
    from elasticsearch.client import IngestClient

    #zf：创建IngestClient实例
    c = IngestClient(es)
    try:
        #zf：尝试获取名为'geoip'的管道
        c.get_pipeline('geoip')
    except elasticsearch.exceptions.NotFoundError:
        #zf：如果管道不存在，则创建一个geoip管道
        #zf：该管道用于根据IP地址添加地理位置信息
        c.put_pipeline('geoip', body='''{
              "description" : "Add geoip info",
              "processors" : [
                {
                  "geoip" : {
                    "field" : "ip"
                  }
                }
              ]
            }''')


#zf：定义GeoIp内部文档类，用于存储地理位置信息
class GeoIp(InnerDoc):
    #zf：大洲名称
    continent_name = Keyword()
    #zf：国家ISO代码
    country_iso_code = Keyword()
    #zf：国家名称
    country_name = Keyword()
    #zf：地理位置坐标
    location = GeoPoint()


#zf：定义UserAgentBrowser内部文档类，用于存储浏览器信息
class UserAgentBrowser(InnerDoc):
    #zf：浏览器家族
    Family = Keyword()
    #zf：浏览器版本
    Version = Keyword()


#zf：定义UserAgentOS内部文档类，继承自UserAgentBrowser，用于存储操作系统信息
class UserAgentOS(UserAgentBrowser):
    pass


#zf：定义UserAgentDevice内部文档类，用于存储设备信息
class UserAgentDevice(InnerDoc):
    #zf：设备家族
    Family = Keyword()
    #zf：设备品牌
    Brand = Keyword()
    #zf：设备型号
    Model = Keyword()


#zf：定义UserAgent内部文档类，用于存储用户代理信息
class UserAgent(InnerDoc):
    #zf：浏览器信息
    browser = Object(UserAgentBrowser, required=False)
    #zf：操作系统信息
    os = Object(UserAgentOS, required=False)
    #zf：设备信息
    device = Object(UserAgentDevice, required=False)
    #zf：完整的User-Agent字符串
    string = Text()
    #zf：是否为机器人
    is_bot = Boolean()


#zf：定义ElapsedTimeDocument文档类，用于存储页面性能数据
class ElapsedTimeDocument(Document):
    #zf：URL地址
    url = Keyword()
    #zf：耗时（毫秒）
    time_taken = Long()
    #zf：记录时间
    log_datetime = Date()
    #zf：IP地址
    ip = Keyword()
    #zf：地理位置信息
    geoip = Object(GeoIp, required=False)
    #zf：用户代理信息
    useragent = Object(UserAgent, required=False)

    #zf：定义索引配置
    class Index:
        #zf：索引名称
        name = 'performance'
        settings = {
            #zf：分片数量
            "number_of_shards": 1,
            #zf：副本数量
            "number_of_replicas": 0
        }

    #zf：定义文档元数据
    class Meta:
        #zf：文档类型
        doc_type = 'ElapsedTime'


#zf：定义ElapsedTime文档管理器类
class ElaspedTimeDocumentManager:
    #zf：静态方法：构建索引
    @staticmethod
    def build_index():
        from elasticsearch import Elasticsearch
        #zf：创建Elasticsearch客户端
        client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
        #zf：检查performance索引是否存在
        res = client.indices.exists(index="performance")
        if not res:
            #zf：如果不存在则初始化索引
            ElapsedTimeDocument.init()

    #zf：静态方法：删除索引
    @staticmethod
    def delete_index():
        from elasticsearch import Elasticsearch
        #zf：创建Elasticsearch实例
        es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
        #zf：删除performance索引，忽略400和404错误
        es.indices.delete(index='performance', ignore=[400, 404])

    #zf：静态方法：创建性能记录文档
    @staticmethod
    def create(url, time_taken, log_datetime, useragent, ip):
        #zf：构建索引
        ElaspedTimeDocumentManager.build_index()

        #zf：创建UserAgent对象并填充数据
        ua = UserAgent()
        ua.browser = UserAgentBrowser()
        ua.browser.Family = useragent.browser.family
        ua.browser.Version = useragent.browser.version_string

        ua.os = UserAgentOS()
        ua.os.Family = useragent.os.family
        ua.os.Version = useragent.os.version_string

        ua.device = UserAgentDevice()
        ua.device.Family = useragent.device.family
        ua.device.Brand = useragent.device.brand
        ua.device.Model = useragent.device.model
        ua.string = useragent.ua_string
        ua.is_bot = useragent.is_bot

        #zf：创建ElapsedTimeDocument文档
        doc = ElapsedTimeDocument(
            meta={
                'id': int(
                    round(
                        time.time() *
                        1000))  #zf：使用当前时间戳作为ID
            },
            url=url,
            time_taken=time_taken,
            log_datetime=log_datetime,
            useragent=ua,
            ip=ip)
        #zf：保存文档，并使用geoip管道处理
        doc.save(pipeline="geoip")


#zf：定义ArticleDocument文档类，用于存储文章搜索数据
class ArticleDocument(Document):
    #zf：文章正文，使用ik_max_word分词器进行索引，ik_smart进行搜索
    body = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
    #zf：文章标题，使用ik_max_word分词器进行索引，ik_smart进行搜索
    title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
    #zf：作者信息
    author = Object(properties={
        #zf：昵称
        'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
        #zf：ID
        'id': Integer()
    })
    #zf：分类信息
    category = Object(properties={
        #zf：分类名
        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
        #zf：ID
        'id': Integer()
    })
    #zf：标签信息
    tags = Object(properties={
        #zf：标签名
        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
        #zf：ID
        'id': Integer()
    })

    #zf：发布时间
    pub_time = Date()
    #zf：文章状态
    status = Text()
    #zf：评论状态
    comment_status = Text()
    #zf：文章类型
    type = Text()
    #zf：浏览量
    views = Integer()
    #zf：文章排序
    article_order = Integer()

    #zf：定义索引配置
    class Index:
        #zf：索引名称
        name = 'blog'
        settings = {
            #zf：分片数量
            "number_of_shards": 1,
            #zf：副本数量
            "number_of_replicas": 0
        }

    #zf：定义文档元数据
    class Meta:
        #zf：文档类型
        doc_type = 'Article'


#zf：定义ArticleDocument管理器类
class ArticleDocumentManager():
    #zf：初始化方法
    def __init__(self):
        self.create_index()

    #zf：创建索引方法
    def create_index(self):
        ArticleDocument.init()

    #zf：删除索引方法
    def delete_index(self):
        from elasticsearch import Elasticsearch
        #zf：创建Elasticsearch实例
        es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
        #zf：删除blog索引，忽略400和404错误
        es.indices.delete(index='blog', ignore=[400, 404])

    #zf：将文章对象转换为文档对象的方法
    def convert_to_doc(self, articles):
        return [
            ArticleDocument(
                meta={
                    #zf：使用文章ID作为文档ID
                    'id': article.id},
                #zf：文章正文
                body=article.body,
                #zf：文章标题
                title=article.title,
                author={
                    #zf：作者昵称
                    'nickname': article.author.username,
                    #zf：作者ID
                    'id': article.author.id},
                category={
                    #zf：分类名
                    'name': article.category.name,
                    #zf：分类ID
                    'id': article.category.id},
                tags=[
                    {
                        #zf：标签名
                        'name': t.name,
                        #zf：标签ID
                        'id': t.id} for t in article.tags.all()],
                #zf：发布时间
                pub_time=article.pub_time,
                #zf：文章状态
                status=article.status,
                #zf：评论状态
                comment_status=article.comment_status,
                #zf：文章类型
                type=article.type,
                #zf：浏览量
                views=article.views,
                #zf：排序
                article_order=article.article_order) for article in articles]

    #zf：重建索引方法
    def rebuild(self, articles=None):
        #zf：初始化索引
        ArticleDocument.init()
        #zf：如果没有提供文章列表，则获取所有文章
        articles = articles if articles else Article.objects.all()
        #zf：转换文章为文档对象
        docs = self.convert_to_doc(articles)
        #zf：保存所有文档
        for doc in docs:
            doc.save()

    #zf：更新文档方法
    def update_docs(self, docs):
        #zf：保存所有文档
        for doc in docs:
            doc.save()