diff --git a/.idea/djq.iml b/.idea/djq.iml new file mode 100644 index 0000000..ec63674 --- /dev/null +++ b/.idea/djq.iml @@ -0,0 +1,7 @@ + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..4f107a0 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/src/DjangoBlog-master(1)/DjangoBlog-master/blog/documents.py b/src/DjangoBlog-master(1)/DjangoBlog-master/blog/documents.py index 0f1db7b..2c07654 100644 --- a/src/DjangoBlog-master(1)/DjangoBlog-master/blog/documents.py +++ b/src/DjangoBlog-master(1)/DjangoBlog-master/blog/documents.py @@ -1,213 +1,253 @@ -import time - -import elasticsearch.client -from django.conf import settings +import time # 用于生成时间戳作为文档ID +import elasticsearch.client # Elasticsearch客户端工具 +from django.conf import settings # 导入Django项目配置 +# 导入Elasticsearch DSL相关模块,用于定义文档结构和字段类型 from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean -from elasticsearch_dsl.connections import connections +from elasticsearch_dsl.connections import connections # 用于创建Elasticsearch连接 -from blog.models import Article +from blog.models import Article # 导入Django博客文章模型 +# 检查是否启用了Elasticsearch(通过判断配置中是否有ELASTICSEARCH_DSL) ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL') if ELASTICSEARCH_ENABLED: + # 创建Elasticsearch连接,连接地址从Django配置中获取 connections.create_connection( hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']]) - from elasticsearch import Elasticsearch + from elasticsearch import Elasticsearch # 导入Elasticsearch客户端 + # 初始化Elasticsearch客户端 es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) - from elasticsearch.client import IngestClient + from elasticsearch.client import IngestClient # 导入Ingest API客户端(用于处理数据管道) c = IngestClient(es) try: + # 检查是否存在名为'geoip'的数据管道(用于解析IP地址的地理位置信息) c.get_pipeline('geoip') except elasticsearch.exceptions.NotFoundError: + # 若不存在,则创建'geoip'管道:通过IP地址添加地理位置信息 c.put_pipeline('geoip', body='''{ - "description" : "Add geoip info", + "description" : "Add geoip info", # 管道描述:添加IP的地理信息 "processors" : [ { "geoip" : { - "field" : "ip" + "field" : "ip" # 基于文档中的'ip'字段解析地理信息 } } ] }''') +# 内部文档类:存储IP地址解析后的地理位置信息(嵌套在ElapsedTimeDocument中) class GeoIp(InnerDoc): - continent_name = Keyword() - country_iso_code = Keyword() - country_name = Keyword() - location = GeoPoint() + continent_name = Keyword() # 大陆名称(Keyword类型:精确匹配,不分词) + country_iso_code = Keyword() # 国家ISO代码(如CN、US) + country_name = Keyword() # 国家名称 + location = GeoPoint() # 经纬度坐标(Elasticsearch的地理点类型) +# 内部文档类:存储用户代理中的浏览器信息(嵌套在UserAgent中) class UserAgentBrowser(InnerDoc): - Family = Keyword() - Version = Keyword() + Family = Keyword() # 浏览器家族(如Chrome、Firefox) + Version = Keyword() # 浏览器版本 +# 内部文档类:存储用户代理中的操作系统信息(继承浏览器信息结构) class UserAgentOS(UserAgentBrowser): - pass + pass # 结构与浏览器一致,包含Family(系统家族)和Version(系统版本) +# 内部文档类:存储用户代理中的设备信息(嵌套在UserAgent中) class UserAgentDevice(InnerDoc): - Family = Keyword() - Brand = Keyword() - Model = Keyword() + Family = Keyword() # 设备家族(如iPhone、Windows) + Brand = Keyword() # 设备品牌(如Apple、Samsung) + Model = Keyword() # 设备型号(如iPhone 13) +# 内部文档类:存储用户代理(User-Agent)完整信息(嵌套在ElapsedTimeDocument中) class UserAgent(InnerDoc): - browser = Object(UserAgentBrowser, required=False) - os = Object(UserAgentOS, required=False) - device = Object(UserAgentDevice, required=False) - string = Text() - is_bot = Boolean() + browser = Object(UserAgentBrowser, required=False) # 浏览器信息(可选) + os = Object(UserAgentOS, required=False) # 操作系统信息(可选) + device = Object(UserAgentDevice, required=False) # 设备信息(可选) + string = Text() # 原始User-Agent字符串 + is_bot = Boolean() # 是否为爬虫机器人 +# Elasticsearch文档类:记录性能耗时信息(如接口响应时间) class ElapsedTimeDocument(Document): - url = Keyword() - time_taken = Long() - log_datetime = Date() - ip = Keyword() - geoip = Object(GeoIp, required=False) - useragent = Object(UserAgent, required=False) + url = Keyword() # 请求URL(精确匹配) + time_taken = Long() # 耗时(毫秒) + log_datetime = Date() # 日志记录时间 + ip = Keyword() # 访问者IP地址 + geoip = Object(GeoIp, required=False) # 地理位置信息(由geoip管道解析,可选) + useragent = Object(UserAgent, required=False) # 用户代理信息(可选) class Index: - name = 'performance' + name = 'performance' # 索引名称:存储性能数据 settings = { - "number_of_shards": 1, - "number_of_replicas": 0 + "number_of_shards": 1, # 主分片数量 + "number_of_replicas": 0 # 副本分片数量(单节点环境设为0) } class Meta: - doc_type = 'ElapsedTime' + doc_type = 'ElapsedTime' # 文档类型(Elasticsearch 7.x后可省略) +# 管理类:处理ElapsedTimeDocument的索引创建、删除和数据插入 class ElaspedTimeDocumentManager: @staticmethod def build_index(): + """创建performance索引(若不存在)""" from elasticsearch import Elasticsearch client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) + # 检查索引是否存在 res = client.indices.exists(index="performance") if not res: + # 初始化索引(根据ElapsedTimeDocument的定义创建映射) ElapsedTimeDocument.init() @staticmethod def delete_index(): + """删除performance索引""" from elasticsearch import Elasticsearch es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) + # 忽略400(索引不存在)和404(请求错误)的错误 es.indices.delete(index='performance', ignore=[400, 404]) @staticmethod def create(url, time_taken, log_datetime, useragent, ip): + """创建一条性能日志文档并保存到Elasticsearch""" + # 确保索引已创建 ElaspedTimeDocumentManager.build_index() + + # 构建用户代理信息对象 ua = UserAgent() ua.browser = UserAgentBrowser() - ua.browser.Family = useragent.browser.family - ua.browser.Version = useragent.browser.version_string + ua.browser.Family = useragent.browser.family # 浏览器家族 + ua.browser.Version = useragent.browser.version_string # 浏览器版本 ua.os = UserAgentOS() - ua.os.Family = useragent.os.family - ua.os.Version = useragent.os.version_string + ua.os.Family = useragent.os.family # 操作系统家族 + ua.os.Version = useragent.os.version_string # 操作系统版本 ua.device = UserAgentDevice() - ua.device.Family = useragent.device.family - ua.device.Brand = useragent.device.brand - ua.device.Model = useragent.device.model - ua.string = useragent.ua_string - ua.is_bot = useragent.is_bot + ua.device.Family = useragent.device.family # 设备家族 + ua.device.Brand = useragent.device.brand # 设备品牌 + ua.device.Model = useragent.device.model # 设备型号 + ua.string = useragent.ua_string # 原始User-Agent字符串 + ua.is_bot = useragent.is_bot # 是否为爬虫 + # 创建性能日志文档 doc = ElapsedTimeDocument( meta={ - 'id': int( - round( - time.time() * - 1000)) + # 用当前时间戳(毫秒级)作为文档ID,确保唯一性 + 'id': int(round(time.time() * 1000)) }, - url=url, - time_taken=time_taken, - log_datetime=log_datetime, - useragent=ua, ip=ip) + url=url, # 请求URL + time_taken=time_taken, # 耗时 + log_datetime=log_datetime, # 记录时间 + useragent=ua, # 用户代理信息 + ip=ip # 访问IP + ) + # 保存文档时应用'geoip'管道,自动解析IP的地理位置 doc.save(pipeline="geoip") +# Elasticsearch文档类:存储博客文章信息(用于全文搜索) class ArticleDocument(Document): + # 文章内容(使用IK分词器:ik_max_word最大粒度分词,ik_smart智能分词) body = Text(analyzer='ik_max_word', search_analyzer='ik_smart') + # 文章标题(同上,支持中文分词搜索) title = Text(analyzer='ik_max_word', search_analyzer='ik_smart') + # 作者信息(嵌套对象) author = Object(properties={ - 'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), - 'id': Integer() + 'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 作者昵称 + 'id': Integer() # 作者ID }) + # 分类信息(嵌套对象) category = Object(properties={ - 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), - 'id': Integer() + 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 分类名称 + 'id': Integer() # 分类ID }) + # 标签信息(嵌套对象列表) tags = Object(properties={ - 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), - 'id': Integer() + 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 标签名称 + 'id': Integer() # 标签ID }) - pub_time = Date() - status = Text() - comment_status = Text() - type = Text() - views = Integer() - article_order = Integer() + pub_time = Date() # 发布时间 + status = Text() # 文章状态(如发布、草稿) + comment_status = Text() # 评论状态(如允许、关闭) + type = Text() # 文章类型(如原创、转载) + views = Integer() # 浏览量 + article_order = Integer() # 文章排序权重 class Index: - name = 'blog' + name = 'blog' # 索引名称:存储博客文章数据 settings = { "number_of_shards": 1, "number_of_replicas": 0 } class Meta: - doc_type = 'Article' + doc_type = 'Article' # 文档类型 +# 管理类:处理ArticleDocument的索引创建、删除、数据同步 class ArticleDocumentManager(): def __init__(self): + """初始化时创建blog索引(若不存在)""" self.create_index() def create_index(self): + """创建blog索引(根据ArticleDocument的定义)""" ArticleDocument.init() def delete_index(self): + """删除blog索引""" from elasticsearch import Elasticsearch es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) es.indices.delete(index='blog', ignore=[400, 404]) def convert_to_doc(self, articles): + """将Django的Article模型对象列表转换为ArticleDocument列表""" return [ ArticleDocument( - meta={ - 'id': article.id}, - body=article.body, - title=article.title, + meta={'id': article.id}, # 用文章ID作为文档ID + body=article.body, # 文章内容 + title=article.title, # 文章标题 author={ - 'nickname': article.author.username, - 'id': article.author.id}, + 'nickname': article.author.username, # 作者用户名 + 'id': article.author.id # 作者ID + }, category={ - 'name': article.category.name, - 'id': article.category.id}, - tags=[ - { - 'name': t.name, - 'id': t.id} for t in article.tags.all()], - pub_time=article.pub_time, - status=article.status, - comment_status=article.comment_status, - type=article.type, - views=article.views, - article_order=article.article_order) for article in articles] + 'name': article.category.name, # 分类名称 + 'id': article.category.id # 分类ID + }, + # 标签列表(遍历文章的tags多对多字段) + tags=[{'name': t.name, 'id': t.id} for t in article.tags.all()], + pub_time=article.pub_time, # 发布时间 + status=article.status, # 文章状态 + comment_status=article.comment_status, # 评论状态 + type=article.type, # 文章类型 + views=article.views, # 浏览量 + article_order=article.article_order # 排序权重 + ) for article in articles + ] def rebuild(self, articles=None): - ArticleDocument.init() + """重建blog索引:将文章数据同步到Elasticsearch(默认同步所有文章)""" + ArticleDocument.init() # 确保索引结构正确 + # 若未指定文章列表,则同步所有文章 articles = articles if articles else Article.objects.all() + # 转换为文档列表 docs = self.convert_to_doc(articles) + # 批量保存文档 for doc in docs: doc.save() def update_docs(self, docs): + """更新文档列表(批量保存)""" for doc in docs: - doc.save() + doc.save() \ No newline at end of file