diff --git a/.idea/djq.iml b/.idea/djq.iml
new file mode 100644
index 0000000..ec63674
--- /dev/null
+++ b/.idea/djq.iml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..4f107a0
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/DjangoBlog-master(1)/DjangoBlog-master/blog/documents.py b/src/DjangoBlog-master(1)/DjangoBlog-master/blog/documents.py
index 0f1db7b..2c07654 100644
--- a/src/DjangoBlog-master(1)/DjangoBlog-master/blog/documents.py
+++ b/src/DjangoBlog-master(1)/DjangoBlog-master/blog/documents.py
@@ -1,213 +1,253 @@
-import time
-
-import elasticsearch.client
-from django.conf import settings
+import time # 用于生成时间戳作为文档ID
+import elasticsearch.client # Elasticsearch客户端工具
+from django.conf import settings # 导入Django项目配置
+# 导入Elasticsearch DSL相关模块,用于定义文档结构和字段类型
from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean
-from elasticsearch_dsl.connections import connections
+from elasticsearch_dsl.connections import connections # 用于创建Elasticsearch连接
-from blog.models import Article
+from blog.models import Article # 导入Django博客文章模型
+# 检查是否启用了Elasticsearch(通过判断配置中是否有ELASTICSEARCH_DSL)
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')
if ELASTICSEARCH_ENABLED:
+ # 创建Elasticsearch连接,连接地址从Django配置中获取
connections.create_connection(
hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
- from elasticsearch import Elasticsearch
+ from elasticsearch import Elasticsearch # 导入Elasticsearch客户端
+ # 初始化Elasticsearch客户端
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
- from elasticsearch.client import IngestClient
+ from elasticsearch.client import IngestClient # 导入Ingest API客户端(用于处理数据管道)
c = IngestClient(es)
try:
+ # 检查是否存在名为'geoip'的数据管道(用于解析IP地址的地理位置信息)
c.get_pipeline('geoip')
except elasticsearch.exceptions.NotFoundError:
+ # 若不存在,则创建'geoip'管道:通过IP地址添加地理位置信息
c.put_pipeline('geoip', body='''{
- "description" : "Add geoip info",
+ "description" : "Add geoip info", # 管道描述:添加IP的地理信息
"processors" : [
{
"geoip" : {
- "field" : "ip"
+ "field" : "ip" # 基于文档中的'ip'字段解析地理信息
}
}
]
}''')
+# 内部文档类:存储IP地址解析后的地理位置信息(嵌套在ElapsedTimeDocument中)
class GeoIp(InnerDoc):
- continent_name = Keyword()
- country_iso_code = Keyword()
- country_name = Keyword()
- location = GeoPoint()
+ continent_name = Keyword() # 大陆名称(Keyword类型:精确匹配,不分词)
+ country_iso_code = Keyword() # 国家ISO代码(如CN、US)
+ country_name = Keyword() # 国家名称
+ location = GeoPoint() # 经纬度坐标(Elasticsearch的地理点类型)
+# 内部文档类:存储用户代理中的浏览器信息(嵌套在UserAgent中)
class UserAgentBrowser(InnerDoc):
- Family = Keyword()
- Version = Keyword()
+ Family = Keyword() # 浏览器家族(如Chrome、Firefox)
+ Version = Keyword() # 浏览器版本
+# 内部文档类:存储用户代理中的操作系统信息(继承浏览器信息结构)
class UserAgentOS(UserAgentBrowser):
- pass
+ pass # 结构与浏览器一致,包含Family(系统家族)和Version(系统版本)
+# 内部文档类:存储用户代理中的设备信息(嵌套在UserAgent中)
class UserAgentDevice(InnerDoc):
- Family = Keyword()
- Brand = Keyword()
- Model = Keyword()
+ Family = Keyword() # 设备家族(如iPhone、Windows)
+ Brand = Keyword() # 设备品牌(如Apple、Samsung)
+ Model = Keyword() # 设备型号(如iPhone 13)
+# 内部文档类:存储用户代理(User-Agent)完整信息(嵌套在ElapsedTimeDocument中)
class UserAgent(InnerDoc):
- browser = Object(UserAgentBrowser, required=False)
- os = Object(UserAgentOS, required=False)
- device = Object(UserAgentDevice, required=False)
- string = Text()
- is_bot = Boolean()
+ browser = Object(UserAgentBrowser, required=False) # 浏览器信息(可选)
+ os = Object(UserAgentOS, required=False) # 操作系统信息(可选)
+ device = Object(UserAgentDevice, required=False) # 设备信息(可选)
+ string = Text() # 原始User-Agent字符串
+ is_bot = Boolean() # 是否为爬虫机器人
+# Elasticsearch文档类:记录性能耗时信息(如接口响应时间)
class ElapsedTimeDocument(Document):
- url = Keyword()
- time_taken = Long()
- log_datetime = Date()
- ip = Keyword()
- geoip = Object(GeoIp, required=False)
- useragent = Object(UserAgent, required=False)
+ url = Keyword() # 请求URL(精确匹配)
+ time_taken = Long() # 耗时(毫秒)
+ log_datetime = Date() # 日志记录时间
+ ip = Keyword() # 访问者IP地址
+ geoip = Object(GeoIp, required=False) # 地理位置信息(由geoip管道解析,可选)
+ useragent = Object(UserAgent, required=False) # 用户代理信息(可选)
class Index:
- name = 'performance'
+ name = 'performance' # 索引名称:存储性能数据
settings = {
- "number_of_shards": 1,
- "number_of_replicas": 0
+ "number_of_shards": 1, # 主分片数量
+ "number_of_replicas": 0 # 副本分片数量(单节点环境设为0)
}
class Meta:
- doc_type = 'ElapsedTime'
+ doc_type = 'ElapsedTime' # 文档类型(Elasticsearch 7.x后可省略)
+# 管理类:处理ElapsedTimeDocument的索引创建、删除和数据插入
class ElaspedTimeDocumentManager:
@staticmethod
def build_index():
+ """创建performance索引(若不存在)"""
from elasticsearch import Elasticsearch
client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
+ # 检查索引是否存在
res = client.indices.exists(index="performance")
if not res:
+ # 初始化索引(根据ElapsedTimeDocument的定义创建映射)
ElapsedTimeDocument.init()
@staticmethod
def delete_index():
+ """删除performance索引"""
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
+ # 忽略400(索引不存在)和404(请求错误)的错误
es.indices.delete(index='performance', ignore=[400, 404])
@staticmethod
def create(url, time_taken, log_datetime, useragent, ip):
+ """创建一条性能日志文档并保存到Elasticsearch"""
+ # 确保索引已创建
ElaspedTimeDocumentManager.build_index()
+
+ # 构建用户代理信息对象
ua = UserAgent()
ua.browser = UserAgentBrowser()
- ua.browser.Family = useragent.browser.family
- ua.browser.Version = useragent.browser.version_string
+ ua.browser.Family = useragent.browser.family # 浏览器家族
+ ua.browser.Version = useragent.browser.version_string # 浏览器版本
ua.os = UserAgentOS()
- ua.os.Family = useragent.os.family
- ua.os.Version = useragent.os.version_string
+ ua.os.Family = useragent.os.family # 操作系统家族
+ ua.os.Version = useragent.os.version_string # 操作系统版本
ua.device = UserAgentDevice()
- ua.device.Family = useragent.device.family
- ua.device.Brand = useragent.device.brand
- ua.device.Model = useragent.device.model
- ua.string = useragent.ua_string
- ua.is_bot = useragent.is_bot
+ ua.device.Family = useragent.device.family # 设备家族
+ ua.device.Brand = useragent.device.brand # 设备品牌
+ ua.device.Model = useragent.device.model # 设备型号
+ ua.string = useragent.ua_string # 原始User-Agent字符串
+ ua.is_bot = useragent.is_bot # 是否为爬虫
+ # 创建性能日志文档
doc = ElapsedTimeDocument(
meta={
- 'id': int(
- round(
- time.time() *
- 1000))
+ # 用当前时间戳(毫秒级)作为文档ID,确保唯一性
+ 'id': int(round(time.time() * 1000))
},
- url=url,
- time_taken=time_taken,
- log_datetime=log_datetime,
- useragent=ua, ip=ip)
+ url=url, # 请求URL
+ time_taken=time_taken, # 耗时
+ log_datetime=log_datetime, # 记录时间
+ useragent=ua, # 用户代理信息
+ ip=ip # 访问IP
+ )
+ # 保存文档时应用'geoip'管道,自动解析IP的地理位置
doc.save(pipeline="geoip")
+# Elasticsearch文档类:存储博客文章信息(用于全文搜索)
class ArticleDocument(Document):
+ # 文章内容(使用IK分词器:ik_max_word最大粒度分词,ik_smart智能分词)
body = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
+ # 文章标题(同上,支持中文分词搜索)
title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
+ # 作者信息(嵌套对象)
author = Object(properties={
- 'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
- 'id': Integer()
+ 'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 作者昵称
+ 'id': Integer() # 作者ID
})
+ # 分类信息(嵌套对象)
category = Object(properties={
- 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
- 'id': Integer()
+ 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 分类名称
+ 'id': Integer() # 分类ID
})
+ # 标签信息(嵌套对象列表)
tags = Object(properties={
- 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
- 'id': Integer()
+ 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 标签名称
+ 'id': Integer() # 标签ID
})
- pub_time = Date()
- status = Text()
- comment_status = Text()
- type = Text()
- views = Integer()
- article_order = Integer()
+ pub_time = Date() # 发布时间
+ status = Text() # 文章状态(如发布、草稿)
+ comment_status = Text() # 评论状态(如允许、关闭)
+ type = Text() # 文章类型(如原创、转载)
+ views = Integer() # 浏览量
+ article_order = Integer() # 文章排序权重
class Index:
- name = 'blog'
+ name = 'blog' # 索引名称:存储博客文章数据
settings = {
"number_of_shards": 1,
"number_of_replicas": 0
}
class Meta:
- doc_type = 'Article'
+ doc_type = 'Article' # 文档类型
+# 管理类:处理ArticleDocument的索引创建、删除、数据同步
class ArticleDocumentManager():
def __init__(self):
+ """初始化时创建blog索引(若不存在)"""
self.create_index()
def create_index(self):
+ """创建blog索引(根据ArticleDocument的定义)"""
ArticleDocument.init()
def delete_index(self):
+ """删除blog索引"""
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
es.indices.delete(index='blog', ignore=[400, 404])
def convert_to_doc(self, articles):
+ """将Django的Article模型对象列表转换为ArticleDocument列表"""
return [
ArticleDocument(
- meta={
- 'id': article.id},
- body=article.body,
- title=article.title,
+ meta={'id': article.id}, # 用文章ID作为文档ID
+ body=article.body, # 文章内容
+ title=article.title, # 文章标题
author={
- 'nickname': article.author.username,
- 'id': article.author.id},
+ 'nickname': article.author.username, # 作者用户名
+ 'id': article.author.id # 作者ID
+ },
category={
- 'name': article.category.name,
- 'id': article.category.id},
- tags=[
- {
- 'name': t.name,
- 'id': t.id} for t in article.tags.all()],
- pub_time=article.pub_time,
- status=article.status,
- comment_status=article.comment_status,
- type=article.type,
- views=article.views,
- article_order=article.article_order) for article in articles]
+ 'name': article.category.name, # 分类名称
+ 'id': article.category.id # 分类ID
+ },
+ # 标签列表(遍历文章的tags多对多字段)
+ tags=[{'name': t.name, 'id': t.id} for t in article.tags.all()],
+ pub_time=article.pub_time, # 发布时间
+ status=article.status, # 文章状态
+ comment_status=article.comment_status, # 评论状态
+ type=article.type, # 文章类型
+ views=article.views, # 浏览量
+ article_order=article.article_order # 排序权重
+ ) for article in articles
+ ]
def rebuild(self, articles=None):
- ArticleDocument.init()
+ """重建blog索引:将文章数据同步到Elasticsearch(默认同步所有文章)"""
+ ArticleDocument.init() # 确保索引结构正确
+ # 若未指定文章列表,则同步所有文章
articles = articles if articles else Article.objects.all()
+ # 转换为文档列表
docs = self.convert_to_doc(articles)
+ # 批量保存文档
for doc in docs:
doc.save()
def update_docs(self, docs):
+ """更新文档列表(批量保存)"""
for doc in docs:
- doc.save()
+ doc.save()
\ No newline at end of file