第五周注释

4 months ago · d50f0e026d
parent 98fb2e733b
commit d50f0e026d
5 changed files with 152 additions and 86 deletions
--- a/.idea/djq.iml
+++ b/.idea/djq.iml
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module version="4">
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.12 (DjangoBlog-master)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (DjangoBlog-master)" project-jdk-type="Python SDK" />
+</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
--- a/src/DjangoBlog-master(1)/DjangoBlog-master/blog/documents.py
+++ b/src/DjangoBlog-master(1)/DjangoBlog-master/blog/documents.py
@ -1,213 +1,253 @@
-import time
-
-import elasticsearch.client
-from django.conf import settings
+import time  # 用于生成时间戳作为文档ID
+import elasticsearch.client  # Elasticsearch客户端工具
+from django.conf import settings  # 导入Django项目配置
+# 导入Elasticsearch DSL相关模块，用于定义文档结构和字段类型
 from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean
-from elasticsearch_dsl.connections import connections
+from elasticsearch_dsl.connections import connections  # 用于创建Elasticsearch连接

-from blog.models import Article
+from blog.models import Article  # 导入Django博客文章模型

+# 检查是否启用了Elasticsearch（通过判断配置中是否有ELASTICSEARCH_DSL）
 ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')

 if ELASTICSEARCH_ENABLED:
+    # 创建Elasticsearch连接，连接地址从Django配置中获取
    connections.create_connection(
        hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
-    from elasticsearch import Elasticsearch
+    from elasticsearch import Elasticsearch  # 导入Elasticsearch客户端

+    # 初始化Elasticsearch客户端
    es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
-    from elasticsearch.client import IngestClient
+    from elasticsearch.client import IngestClient  # 导入Ingest API客户端（用于处理数据管道）

    c = IngestClient(es)
    try:
+        # 检查是否存在名为'geoip'的数据管道（用于解析IP地址的地理位置信息）
        c.get_pipeline('geoip')
    except elasticsearch.exceptions.NotFoundError:
+        # 若不存在，则创建'geoip'管道：通过IP地址添加地理位置信息
        c.put_pipeline('geoip', body='''{
-              "description" : "Add geoip info",
+              "description" : "Add geoip info",  # 管道描述：添加IP的地理信息
              "processors" : [
                {
                  "geoip" : {
-                    "field" : "ip"
+                    "field" : "ip"  # 基于文档中的'ip'字段解析地理信息
                  }
                }
              ]
            }''')


+# 内部文档类：存储IP地址解析后的地理位置信息（嵌套在ElapsedTimeDocument中）
 class GeoIp(InnerDoc):
-    continent_name = Keyword()
-    country_iso_code = Keyword()
-    country_name = Keyword()
-    location = GeoPoint()
+    continent_name = Keyword()  # 大陆名称（Keyword类型：精确匹配，不分词）
+    country_iso_code = Keyword()  # 国家ISO代码（如CN、US）
+    country_name = Keyword()  # 国家名称
+    location = GeoPoint()  # 经纬度坐标（Elasticsearch的地理点类型）


+# 内部文档类：存储用户代理中的浏览器信息（嵌套在UserAgent中）
 class UserAgentBrowser(InnerDoc):
-    Family = Keyword()
-    Version = Keyword()
+    Family = Keyword()  # 浏览器家族（如Chrome、Firefox）
+    Version = Keyword()  # 浏览器版本


+# 内部文档类：存储用户代理中的操作系统信息（继承浏览器信息结构）
 class UserAgentOS(UserAgentBrowser):
-    pass
+    pass  # 结构与浏览器一致，包含Family（系统家族）和Version（系统版本）


+# 内部文档类：存储用户代理中的设备信息（嵌套在UserAgent中）
 class UserAgentDevice(InnerDoc):
-    Family = Keyword()
-    Brand = Keyword()
-    Model = Keyword()
+    Family = Keyword()  # 设备家族（如iPhone、Windows）
+    Brand = Keyword()  # 设备品牌（如Apple、Samsung）
+    Model = Keyword()  # 设备型号（如iPhone 13）


+# 内部文档类：存储用户代理（User-Agent）完整信息（嵌套在ElapsedTimeDocument中）
 class UserAgent(InnerDoc):
-    browser = Object(UserAgentBrowser, required=False)
-    os = Object(UserAgentOS, required=False)
-    device = Object(UserAgentDevice, required=False)
-    string = Text()
-    is_bot = Boolean()
+    browser = Object(UserAgentBrowser, required=False)  # 浏览器信息（可选）
+    os = Object(UserAgentOS, required=False)  # 操作系统信息（可选）
+    device = Object(UserAgentDevice, required=False)  # 设备信息（可选）
+    string = Text()  # 原始User-Agent字符串
+    is_bot = Boolean()  # 是否为爬虫机器人


+# Elasticsearch文档类：记录性能耗时信息（如接口响应时间）
 class ElapsedTimeDocument(Document):
-    url = Keyword()
-    time_taken = Long()
-    log_datetime = Date()
-    ip = Keyword()
-    geoip = Object(GeoIp, required=False)
-    useragent = Object(UserAgent, required=False)
+    url = Keyword()  # 请求URL（精确匹配）
+    time_taken = Long()  # 耗时（毫秒）
+    log_datetime = Date()  # 日志记录时间
+    ip = Keyword()  # 访问者IP地址
+    geoip = Object(GeoIp, required=False)  # 地理位置信息（由geoip管道解析，可选）
+    useragent = Object(UserAgent, required=False)  # 用户代理信息（可选）

    class Index:
-        name = 'performance'
+        name = 'performance'  # 索引名称：存储性能数据
        settings = {
-            "number_of_shards": 1,
-            "number_of_replicas": 0
+            "number_of_shards": 1,  # 主分片数量
+            "number_of_replicas": 0  # 副本分片数量（单节点环境设为0）
        }

    class Meta:
-        doc_type = 'ElapsedTime'
+        doc_type = 'ElapsedTime'  # 文档类型（Elasticsearch 7.x后可省略）


+# 管理类：处理ElapsedTimeDocument的索引创建、删除和数据插入
 class ElaspedTimeDocumentManager:
    @staticmethod
    def build_index():
+        """创建performance索引（若不存在）"""
        from elasticsearch import Elasticsearch
        client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
+        # 检查索引是否存在
        res = client.indices.exists(index="performance")
        if not res:
+            # 初始化索引（根据ElapsedTimeDocument的定义创建映射）
            ElapsedTimeDocument.init()

    @staticmethod
    def delete_index():
+        """删除performance索引"""
        from elasticsearch import Elasticsearch
        es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
+        # 忽略400（索引不存在）和404（请求错误）的错误
        es.indices.delete(index='performance', ignore=[400, 404])

    @staticmethod
    def create(url, time_taken, log_datetime, useragent, ip):
+        """创建一条性能日志文档并保存到Elasticsearch"""
+        # 确保索引已创建
        ElaspedTimeDocumentManager.build_index()
+
+        # 构建用户代理信息对象
        ua = UserAgent()
        ua.browser = UserAgentBrowser()
-        ua.browser.Family = useragent.browser.family
-        ua.browser.Version = useragent.browser.version_string
+        ua.browser.Family = useragent.browser.family  # 浏览器家族
+        ua.browser.Version = useragent.browser.version_string  # 浏览器版本

        ua.os = UserAgentOS()
-        ua.os.Family = useragent.os.family
-        ua.os.Version = useragent.os.version_string
+        ua.os.Family = useragent.os.family  # 操作系统家族
+        ua.os.Version = useragent.os.version_string  # 操作系统版本

        ua.device = UserAgentDevice()
-        ua.device.Family = useragent.device.family
-        ua.device.Brand = useragent.device.brand
-        ua.device.Model = useragent.device.model
-        ua.string = useragent.ua_string
-        ua.is_bot = useragent.is_bot
+        ua.device.Family = useragent.device.family  # 设备家族
+        ua.device.Brand = useragent.device.brand  # 设备品牌
+        ua.device.Model = useragent.device.model  # 设备型号
+        ua.string = useragent.ua_string  # 原始User-Agent字符串
+        ua.is_bot = useragent.is_bot  # 是否为爬虫

+        # 创建性能日志文档
        doc = ElapsedTimeDocument(
            meta={
-                'id': int(
-                    round(
-                        time.time() *
-                        1000))
+                # 用当前时间戳（毫秒级）作为文档ID，确保唯一性
+                'id': int(round(time.time() * 1000))
            },
-            url=url,
-            time_taken=time_taken,
-            log_datetime=log_datetime,
-            useragent=ua, ip=ip)
+            url=url,  # 请求URL
+            time_taken=time_taken,  # 耗时
+            log_datetime=log_datetime,  # 记录时间
+            useragent=ua,  # 用户代理信息
+            ip=ip  # 访问IP
+        )
+        # 保存文档时应用'geoip'管道，自动解析IP的地理位置
        doc.save(pipeline="geoip")


+# Elasticsearch文档类：存储博客文章信息（用于全文搜索）
 class ArticleDocument(Document):
+    # 文章内容（使用IK分词器：ik_max_word最大粒度分词，ik_smart智能分词）
    body = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
+    # 文章标题（同上，支持中文分词搜索）
    title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
+    # 作者信息（嵌套对象）
    author = Object(properties={
-        'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
-        'id': Integer()
+        'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),  # 作者昵称
+        'id': Integer()  # 作者ID
    })
+    # 分类信息（嵌套对象）
    category = Object(properties={
-        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
-        'id': Integer()
+        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),  # 分类名称
+        'id': Integer()  # 分类ID
    })
+    # 标签信息（嵌套对象列表）
    tags = Object(properties={
-        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
-        'id': Integer()
+        'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),  # 标签名称
+        'id': Integer()  # 标签ID
    })

-    pub_time = Date()
-    status = Text()
-    comment_status = Text()
-    type = Text()
-    views = Integer()
-    article_order = Integer()
+    pub_time = Date()  # 发布时间
+    status = Text()  # 文章状态（如发布、草稿）
+    comment_status = Text()  # 评论状态（如允许、关闭）
+    type = Text()  # 文章类型（如原创、转载）
+    views = Integer()  # 浏览量
+    article_order = Integer()  # 文章排序权重

    class Index:
-        name = 'blog'
+        name = 'blog'  # 索引名称：存储博客文章数据
        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 0
        }

    class Meta:
-        doc_type = 'Article'
+        doc_type = 'Article'  # 文档类型


+# 管理类：处理ArticleDocument的索引创建、删除、数据同步
 class ArticleDocumentManager():

    def __init__(self):
+        """初始化时创建blog索引（若不存在）"""
        self.create_index()

    def create_index(self):
+        """创建blog索引（根据ArticleDocument的定义）"""
        ArticleDocument.init()

    def delete_index(self):
+        """删除blog索引"""
        from elasticsearch import Elasticsearch
        es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
        es.indices.delete(index='blog', ignore=[400, 404])

    def convert_to_doc(self, articles):
+        """将Django的Article模型对象列表转换为ArticleDocument列表"""
        return [
            ArticleDocument(
-                meta={
-                    'id': article.id},
-                body=article.body,
-                title=article.title,
+                meta={'id': article.id},  # 用文章ID作为文档ID
+                body=article.body,  # 文章内容
+                title=article.title,  # 文章标题
                author={
-                    'nickname': article.author.username,
-                    'id': article.author.id},
+                    'nickname': article.author.username,  # 作者用户名
+                    'id': article.author.id  # 作者ID
+                },
                category={
-                    'name': article.category.name,
-                    'id': article.category.id},
-                tags=[
-                    {
-                        'name': t.name,
-                        'id': t.id} for t in article.tags.all()],
-                pub_time=article.pub_time,
-                status=article.status,
-                comment_status=article.comment_status,
-                type=article.type,
-                views=article.views,
-                article_order=article.article_order) for article in articles]
+                    'name': article.category.name,  # 分类名称
+                    'id': article.category.id  # 分类ID
+                },
+                # 标签列表（遍历文章的tags多对多字段）
+                tags=[{'name': t.name, 'id': t.id} for t in article.tags.all()],
+                pub_time=article.pub_time,  # 发布时间
+                status=article.status,  # 文章状态
+                comment_status=article.comment_status,  # 评论状态
+                type=article.type,  # 文章类型
+                views=article.views,  # 浏览量
+                article_order=article.article_order  # 排序权重
+            ) for article in articles
+        ]

    def rebuild(self, articles=None):
-        ArticleDocument.init()
+        """重建blog索引：将文章数据同步到Elasticsearch（默认同步所有文章）"""
+        ArticleDocument.init()  # 确保索引结构正确
+        # 若未指定文章列表，则同步所有文章
        articles = articles if articles else Article.objects.all()
+        # 转换为文档列表
        docs = self.convert_to_doc(articles)
+        # 批量保存文档
        for doc in docs:
            doc.save()

    def update_docs(self, docs):
+        """更新文档列表（批量保存）"""
        for doc in docs:
-            doc.save()
+            doc.save()