第五周注释

pull/25/head
djq 4 months ago
parent 98fb2e733b
commit d50f0e026d

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<module version="4">
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module>

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.12 (DjangoBlog-master)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (DjangoBlog-master)" project-jdk-type="Python SDK" />
</project>

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

@ -1,213 +1,253 @@
import time
import elasticsearch.client
from django.conf import settings
import time # 用于生成时间戳作为文档ID
import elasticsearch.client # Elasticsearch客户端工具
from django.conf import settings # 导入Django项目配置
# 导入Elasticsearch DSL相关模块用于定义文档结构和字段类型
from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean
from elasticsearch_dsl.connections import connections
from elasticsearch_dsl.connections import connections # 用于创建Elasticsearch连接
from blog.models import Article
from blog.models import Article # 导入Django博客文章模型
# 检查是否启用了Elasticsearch通过判断配置中是否有ELASTICSEARCH_DSL
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')
if ELASTICSEARCH_ENABLED:
# 创建Elasticsearch连接连接地址从Django配置中获取
connections.create_connection(
hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
from elasticsearch import Elasticsearch
from elasticsearch import Elasticsearch # 导入Elasticsearch客户端
# 初始化Elasticsearch客户端
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
from elasticsearch.client import IngestClient
from elasticsearch.client import IngestClient # 导入Ingest API客户端用于处理数据管道
c = IngestClient(es)
try:
# 检查是否存在名为'geoip'的数据管道用于解析IP地址的地理位置信息
c.get_pipeline('geoip')
except elasticsearch.exceptions.NotFoundError:
# 若不存在,则创建'geoip'管道通过IP地址添加地理位置信息
c.put_pipeline('geoip', body='''{
"description" : "Add geoip info",
"description" : "Add geoip info", # 管道描述添加IP的地理信息
"processors" : [
{
"geoip" : {
"field" : "ip"
"field" : "ip" # 基于文档中的'ip'字段解析地理信息
}
}
]
}''')
# 内部文档类存储IP地址解析后的地理位置信息嵌套在ElapsedTimeDocument中
class GeoIp(InnerDoc):
continent_name = Keyword()
country_iso_code = Keyword()
country_name = Keyword()
location = GeoPoint()
continent_name = Keyword() # 大陆名称Keyword类型精确匹配不分词
country_iso_code = Keyword() # 国家ISO代码如CN、US
country_name = Keyword() # 国家名称
location = GeoPoint() # 经纬度坐标Elasticsearch的地理点类型
# 内部文档类存储用户代理中的浏览器信息嵌套在UserAgent中
class UserAgentBrowser(InnerDoc):
Family = Keyword()
Version = Keyword()
Family = Keyword() # 浏览器家族如Chrome、Firefox
Version = Keyword() # 浏览器版本
# 内部文档类:存储用户代理中的操作系统信息(继承浏览器信息结构)
class UserAgentOS(UserAgentBrowser):
pass
pass # 结构与浏览器一致包含Family系统家族和Version系统版本
# 内部文档类存储用户代理中的设备信息嵌套在UserAgent中
class UserAgentDevice(InnerDoc):
Family = Keyword()
Brand = Keyword()
Model = Keyword()
Family = Keyword() # 设备家族如iPhone、Windows
Brand = Keyword() # 设备品牌如Apple、Samsung
Model = Keyword() # 设备型号如iPhone 13
# 内部文档类存储用户代理User-Agent完整信息嵌套在ElapsedTimeDocument中
class UserAgent(InnerDoc):
browser = Object(UserAgentBrowser, required=False)
os = Object(UserAgentOS, required=False)
device = Object(UserAgentDevice, required=False)
string = Text()
is_bot = Boolean()
browser = Object(UserAgentBrowser, required=False) # 浏览器信息(可选)
os = Object(UserAgentOS, required=False) # 操作系统信息(可选)
device = Object(UserAgentDevice, required=False) # 设备信息(可选)
string = Text() # 原始User-Agent字符串
is_bot = Boolean() # 是否为爬虫机器人
# Elasticsearch文档类记录性能耗时信息如接口响应时间
class ElapsedTimeDocument(Document):
url = Keyword()
time_taken = Long()
log_datetime = Date()
ip = Keyword()
geoip = Object(GeoIp, required=False)
useragent = Object(UserAgent, required=False)
url = Keyword() # 请求URL精确匹配
time_taken = Long() # 耗时(毫秒)
log_datetime = Date() # 日志记录时间
ip = Keyword() # 访问者IP地址
geoip = Object(GeoIp, required=False) # 地理位置信息由geoip管道解析可选
useragent = Object(UserAgent, required=False) # 用户代理信息(可选)
class Index:
name = 'performance'
name = 'performance' # 索引名称:存储性能数据
settings = {
"number_of_shards": 1,
"number_of_replicas": 0
"number_of_shards": 1, # 主分片数量
"number_of_replicas": 0 # 副本分片数量单节点环境设为0
}
class Meta:
doc_type = 'ElapsedTime'
doc_type = 'ElapsedTime' # 文档类型Elasticsearch 7.x后可省略
# 管理类处理ElapsedTimeDocument的索引创建、删除和数据插入
class ElaspedTimeDocumentManager:
@staticmethod
def build_index():
"""创建performance索引若不存在"""
from elasticsearch import Elasticsearch
client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
# 检查索引是否存在
res = client.indices.exists(index="performance")
if not res:
# 初始化索引根据ElapsedTimeDocument的定义创建映射
ElapsedTimeDocument.init()
@staticmethod
def delete_index():
"""删除performance索引"""
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
# 忽略400索引不存在和404请求错误的错误
es.indices.delete(index='performance', ignore=[400, 404])
@staticmethod
def create(url, time_taken, log_datetime, useragent, ip):
"""创建一条性能日志文档并保存到Elasticsearch"""
# 确保索引已创建
ElaspedTimeDocumentManager.build_index()
# 构建用户代理信息对象
ua = UserAgent()
ua.browser = UserAgentBrowser()
ua.browser.Family = useragent.browser.family
ua.browser.Version = useragent.browser.version_string
ua.browser.Family = useragent.browser.family # 浏览器家族
ua.browser.Version = useragent.browser.version_string # 浏览器版本
ua.os = UserAgentOS()
ua.os.Family = useragent.os.family
ua.os.Version = useragent.os.version_string
ua.os.Family = useragent.os.family # 操作系统家族
ua.os.Version = useragent.os.version_string # 操作系统版本
ua.device = UserAgentDevice()
ua.device.Family = useragent.device.family
ua.device.Brand = useragent.device.brand
ua.device.Model = useragent.device.model
ua.string = useragent.ua_string
ua.is_bot = useragent.is_bot
ua.device.Family = useragent.device.family # 设备家族
ua.device.Brand = useragent.device.brand # 设备品牌
ua.device.Model = useragent.device.model # 设备型号
ua.string = useragent.ua_string # 原始User-Agent字符串
ua.is_bot = useragent.is_bot # 是否为爬虫
# 创建性能日志文档
doc = ElapsedTimeDocument(
meta={
'id': int(
round(
time.time() *
1000))
# 用当前时间戳毫秒级作为文档ID确保唯一性
'id': int(round(time.time() * 1000))
},
url=url,
time_taken=time_taken,
log_datetime=log_datetime,
useragent=ua, ip=ip)
url=url, # 请求URL
time_taken=time_taken, # 耗时
log_datetime=log_datetime, # 记录时间
useragent=ua, # 用户代理信息
ip=ip # 访问IP
)
# 保存文档时应用'geoip'管道自动解析IP的地理位置
doc.save(pipeline="geoip")
# Elasticsearch文档类存储博客文章信息用于全文搜索
class ArticleDocument(Document):
# 文章内容使用IK分词器ik_max_word最大粒度分词ik_smart智能分词
body = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
# 文章标题(同上,支持中文分词搜索)
title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
# 作者信息(嵌套对象)
author = Object(properties={
'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 作者昵称
'id': Integer() # 作者ID
})
# 分类信息(嵌套对象)
category = Object(properties={
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 分类名称
'id': Integer() # 分类ID
})
# 标签信息(嵌套对象列表)
tags = Object(properties={
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 标签名称
'id': Integer() # 标签ID
})
pub_time = Date()
status = Text()
comment_status = Text()
type = Text()
views = Integer()
article_order = Integer()
pub_time = Date() # 发布时间
status = Text() # 文章状态(如发布、草稿)
comment_status = Text() # 评论状态(如允许、关闭)
type = Text() # 文章类型(如原创、转载)
views = Integer() # 浏览量
article_order = Integer() # 文章排序权重
class Index:
name = 'blog'
name = 'blog' # 索引名称:存储博客文章数据
settings = {
"number_of_shards": 1,
"number_of_replicas": 0
}
class Meta:
doc_type = 'Article'
doc_type = 'Article' # 文档类型
# 管理类处理ArticleDocument的索引创建、删除、数据同步
class ArticleDocumentManager():
def __init__(self):
"""初始化时创建blog索引若不存在"""
self.create_index()
def create_index(self):
"""创建blog索引根据ArticleDocument的定义"""
ArticleDocument.init()
def delete_index(self):
"""删除blog索引"""
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
es.indices.delete(index='blog', ignore=[400, 404])
def convert_to_doc(self, articles):
"""将Django的Article模型对象列表转换为ArticleDocument列表"""
return [
ArticleDocument(
meta={
'id': article.id},
body=article.body,
title=article.title,
meta={'id': article.id}, # 用文章ID作为文档ID
body=article.body, # 文章内容
title=article.title, # 文章标题
author={
'nickname': article.author.username,
'id': article.author.id},
'nickname': article.author.username, # 作者用户名
'id': article.author.id # 作者ID
},
category={
'name': article.category.name,
'id': article.category.id},
tags=[
{
'name': t.name,
'id': t.id} for t in article.tags.all()],
pub_time=article.pub_time,
status=article.status,
comment_status=article.comment_status,
type=article.type,
views=article.views,
article_order=article.article_order) for article in articles]
'name': article.category.name, # 分类名称
'id': article.category.id # 分类ID
},
# 标签列表遍历文章的tags多对多字段
tags=[{'name': t.name, 'id': t.id} for t in article.tags.all()],
pub_time=article.pub_time, # 发布时间
status=article.status, # 文章状态
comment_status=article.comment_status, # 评论状态
type=article.type, # 文章类型
views=article.views, # 浏览量
article_order=article.article_order # 排序权重
) for article in articles
]
def rebuild(self, articles=None):
ArticleDocument.init()
"""重建blog索引将文章数据同步到Elasticsearch默认同步所有文章"""
ArticleDocument.init() # 确保索引结构正确
# 若未指定文章列表,则同步所有文章
articles = articles if articles else Article.objects.all()
# 转换为文档列表
docs = self.convert_to_doc(articles)
# 批量保存文档
for doc in docs:
doc.save()
def update_docs(self, docs):
"""更新文档列表(批量保存)"""
for doc in docs:
doc.save()
doc.save()
Loading…
Cancel
Save