为documents.py添加代码注释，完善项目文档可读性

4 months ago · 5bae016d7a
parent d9b1d026c9
commit 5bae016d7a
1 changed files with 131 additions and 77 deletions
--- a/djangoblog/src/DjangoBlog-master/DjangoBlog-master/blog/documents.py
+++ b/djangoblog/src/DjangoBlog-master/DjangoBlog-master/blog/documents.py
@ -1,213 +1,267 @@
+# 导入时间处理模块
 import time

+# 导入Elasticsearch客户端相关模块
 import elasticsearch.client
+# 导入Django配置模块
 from django.conf import settings
+# 导入Elasticsearch DSL相关组件，用于定义文档结构
 from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean
 from elasticsearch_dsl.connections import connections

+# 导入博客文章模型，用于数据同步
 from blog.models import Article

+# 检查是否启用Elasticsearch（通过判断配置中是否存在ELASTICSEARCH_DSL）
 ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')

+# 如果启用了Elasticsearch，则进行初始化配置
 if ELASTICSEARCH_ENABLED:
+    # 创建Elasticsearch连接（从Django配置中获取主机地址）
    connections.create_connection(
        hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
+    # 导入Elasticsearch客户端并初始化
    from elasticsearch import Elasticsearch

    es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
+
+    # 初始化IngestClient（用于处理数据预处理管道）
    from elasticsearch.client import IngestClient

    c = IngestClient(es)
+
+    # 尝试获取名为'geoip'的管道，如果不存在则创建
    try:
        c.get_pipeline('geoip')
    except elasticsearch.exceptions.NotFoundError:
+        # 创建geoip管道：通过ip地址解析地理位置信息
        c.put_pipeline('geoip', body='''{
-              "description" : "Add geoip info",
+              "description" : "Add geoip info",  # 管道描述：添加地理信息
              "processors" : [
                {
                  "geoip" : {
-                    "field" : "ip"
+                    "field" : "ip"  # 基于ip字段解析地理信息
                  }
                }
              ]
            }''')


+# 定义地理位置信息内部文档（嵌套在主文档中）
 class GeoIp(InnerDoc):
-    continent_name = Keyword()
-    country_iso_code = Keyword()
-    country_name = Keyword()
-    location = GeoPoint()
+    continent_name = Keyword()  # 大洲名称（ Keyword类型：不分词，适合精确查询）
+    country_iso_code = Keyword()  # 国家ISO代码（如CN、US）
+    country_name = Keyword()  # 国家名称
+    location = GeoPoint()  # 经纬度坐标（Elasticsearch地理点类型）


+# 定义用户代理浏览器信息内部文档
 class UserAgentBrowser(InnerDoc):
-    Family = Keyword()
-    Version = Keyword()
+    Family = Keyword()  # 浏览器家族（如Chrome、Firefox）
+    Version = Keyword()  # 浏览器版本


+# 定义用户代理操作系统信息内部文档（继承浏览器结构，字段相同）
 class UserAgentOS(UserAgentBrowser):
    pass


+# 定义用户代理设备信息内部文档
 class UserAgentDevice(InnerDoc):
-    Family = Keyword()
-    Brand = Keyword()
-    Model = Keyword()
+    Family = Keyword()  # 设备家族（如iPhone、Windows）
+    Brand = Keyword()  # 设备品牌（如Apple、Samsung）
+    Model = Keyword()  # 设备型号（如iPhone 13）


+# 定义用户代理整体信息内部文档（整合浏览器、系统、设备信息）
 class UserAgent(InnerDoc):
-    browser = Object(UserAgentBrowser, required=False)
-    os = Object(UserAgentOS, required=False)
-    device = Object(UserAgentDevice, required=False)
-    string = Text()
-    is_bot = Boolean()
+    browser = Object(UserAgentBrowser, required=False)  # 浏览器信息（可选）
+    os = Object(UserAgentOS, required=False)  # 操作系统信息（可选）
+    device = Object(UserAgentDevice, required=False)  # 设备信息（可选）
+    string = Text()  # 原始用户代理字符串（如"Mozilla/5.0..."）
+    is_bot = Boolean()  # 是否为爬虫机器人


+# 定义性能日志文档（记录访问性能数据）
 class ElapsedTimeDocument(Document):
-    url = Keyword()
-    time_taken = Long()
-    log_datetime = Date()
-    ip = Keyword()
-    geoip = Object(GeoIp, required=False)
-    useragent = Object(UserAgent, required=False)
-
+    url = Keyword()  # 访问的URL（精确匹配）
+    time_taken = Long()  # 页面加载耗时（毫秒）
+    log_datetime = Date()  # 日志记录时间
+    ip = Keyword()  # 访问者IP地址
+    geoip = Object(GeoIp, required=False)  # 地理位置信息（由geoip管道生成）
+    useragent = Object(UserAgent, required=False)  # 用户代理信息
+
+    # 索引配置
    class Index:
-        name = 'performance'
+        name = 'performance'  # 索引名称：performance（性能日志）
        settings = {
-            "number_of_shards": 1,
-            "number_of_replicas": 0
+            "number_of_shards": 1,  # 主分片数量
+            "number_of_replicas": 0  # 副本分片数量（单节点环境设为0）
        }

+    # 文档类型配置（Elasticsearch 7+后逐渐废弃，但DSL仍保留兼容）
    class Meta:
        doc_type = 'ElapsedTime'


+# 性能日志文档管理器（处理索引创建、删除、数据写入）
 class ElaspedTimeDocumentManager:
    @staticmethod
    def build_index():
+        """创建performance索引（如果不存在）"""
        from elasticsearch import Elasticsearch
+        # 连接Elasticsearch
        client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
+        # 检查索引是否存在
        res = client.indices.exists(index="performance")
        if not res:
+            # 初始化索引（根据ElapsedTimeDocument的定义创建映射）
            ElapsedTimeDocument.init()

    @staticmethod
    def delete_index():
+        """删除performance索引"""
        from elasticsearch import Elasticsearch
        es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
+        # 忽略400（索引不存在）和404（请求错误）
        es.indices.delete(index='performance', ignore=[400, 404])

    @staticmethod
    def create(url, time_taken, log_datetime, useragent, ip):
+        """创建一条性能日志记录并写入Elasticsearch"""
+        # 确保索引存在
        ElaspedTimeDocumentManager.build_index()
+
+        # 构建用户代理信息对象
        ua = UserAgent()
        ua.browser = UserAgentBrowser()
-        ua.browser.Family = useragent.browser.family
-        ua.browser.Version = useragent.browser.version_string
+        ua.browser.Family = useragent.browser.family  # 浏览器家族
+        ua.browser.Version = useragent.browser.version_string  # 浏览器版本

        ua.os = UserAgentOS()
-        ua.os.Family = useragent.os.family
-        ua.os.Version = useragent.os.version_string
+        ua.os.Family = useragent.os.family  # 操作系统家族
+        ua.os.Version = useragent.os.version_string  # 操作系统版本

        ua.device = UserAgentDevice()
-        ua.device.Family = useragent.device.family
-        ua.device.Brand = useragent.device.brand
-        ua.device.Model = useragent.device.model
-        ua.string = useragent.ua_string
-        ua.is_bot = useragent.is_bot
+        ua.device.Family = useragent.device.family  # 设备家族
+        ua.device.Brand = useragent.device.brand  # 设备品牌
+        ua.device.Model = useragent.device.model  # 设备型号
+        ua.string = useragent.ua_string  # 原始用户代理字符串
+        ua.is_bot = useragent.is_bot  # 是否为爬虫

+        # 构建性能日志文档
        doc = ElapsedTimeDocument(
            meta={
-                'id': int(
-                    round(
-                        time.time() *
-                        1000))
+                # 用当前时间戳（毫秒）作为文档ID
+                'id': int(round(time.time() * 1000))
            },
-            url=url,
-            time_taken=time_taken,
-            log_datetime=log_datetime,
-            useragent=ua, ip=ip)
+            url=url,  # 访问URL
+            time_taken=time_taken,  # 耗时
+            log_datetime=log_datetime,  # 日志时间
+            useragent=ua,  # 用户代理信息
+            ip=ip  # IP地址
+        )
+        # 保存文档时应用geoip管道（自动解析IP对应的地理位置）
        doc.save(pipeline="geoip")


+# 定义文章文档（用于博客文章的搜索索引）
 class ArticleDocument(Document):
+    # 文章内容（使用ik分词器：max_word最大化分词，smart智能分词）
    body = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
+    # 文章标题（同上分词配置）
    title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
+    # 作者信息（嵌套对象）
    author = Object(properties={
-        'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
-        'id': Integer()
+        'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),  # 作者昵称
+        'id': Integer()  # 作者ID
    })
+    # 分类信息（嵌套对象）
    category = Object(properties={
-        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
-        'id': Integer()
+        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),  # 分类名称
+        'id': Integer()  # 分类ID
    })
+    # 标签信息（嵌套对象列表）
    tags = Object(properties={
-        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
-        'id': Integer()
+        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),  # 标签名称
+        'id': Integer()  # 标签ID
    })
-
-    pub_time = Date()
-    status = Text()
-    comment_status = Text()
-    type = Text()
-    views = Integer()
-    article_order = Integer()
-
+    pub_time = Date()  # 发布时间
+    status = Text()  # 文章状态（如发布、草稿）
+    comment_status = Text()  # 评论状态（如开启、关闭）
+    type = Text()  # 文章类型（如原创、转载）
+    views = Integer()  # 浏览量
+    article_order = Integer()  # 文章排序权重
+
+    # 索引配置
    class Index:
-        name = 'blog'
+        name = 'blog'  # 索引名称：blog（博客文章）
        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 0
        }

+    # 文档类型配置
    class Meta:
        doc_type = 'Article'


+# 文章文档管理器（处理文章索引的创建、更新、重建）
 class ArticleDocumentManager():

    def __init__(self):
+        """初始化时创建索引（如果不存在）"""
        self.create_index()

    def create_index(self):
+        """创建blog索引（根据ArticleDocument定义初始化映射）"""
        ArticleDocument.init()

    def delete_index(self):
+        """删除blog索引"""
        from elasticsearch import Elasticsearch
        es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
        es.indices.delete(index='blog', ignore=[400, 404])

    def convert_to_doc(self, articles):
+        """将Django模型对象列表转换为ArticleDocument列表"""
        return [
            ArticleDocument(
-                meta={
-                    'id': article.id},
-                body=article.body,
-                title=article.title,
+                meta={'id': article.id},  # 用文章ID作为文档ID
+                body=article.body,  # 文章内容
+                title=article.title,  # 文章标题
                author={
-                    'nickname': article.author.username,
-                    'id': article.author.id},
+                    'nickname': article.author.username,  # 作者用户名
+                    'id': article.author.id  # 作者ID
+                },
                category={
-                    'name': article.category.name,
-                    'id': article.category.id},
-                tags=[
-                    {
-                        'name': t.name,
-                        'id': t.id} for t in article.tags.all()],
-                pub_time=article.pub_time,
-                status=article.status,
-                comment_status=article.comment_status,
-                type=article.type,
-                views=article.views,
-                article_order=article.article_order) for article in articles]
+                    'name': article.category.name,  # 分类名称
+                    'id': article.category.id  # 分类ID
+                },
+                # 转换标签列表（多对多关系）
+                tags=[{'name': t.name, 'id': t.id} for t in article.tags.all()],
+                pub_time=article.pub_time,  # 发布时间
+                status=article.status,  # 文章状态
+                comment_status=article.comment_status,  # 评论状态
+                type=article.type,  # 文章类型
+                views=article.views,  # 浏览量
+                article_order=article.article_order  # 排序权重
+            ) for article in articles
+        ]

    def rebuild(self, articles=None):
+        """重建索引（默认同步所有文章，可指定文章列表）"""
+        # 初始化索引结构
        ArticleDocument.init()
+        # 如果未指定文章，则同步所有文章
        articles = articles if articles else Article.objects.all()
+        # 转换模型为文档对象
        docs = self.convert_to_doc(articles)
+        # 批量保存文档
        for doc in docs:
            doc.save()

    def update_docs(self, docs):
+        """更新文档列表（批量保存）"""
        for doc in docs:
-            doc.save()
+            doc.save()