为documents.py添加代码注释,完善项目文档可读性

yxy_branch
严欣怡 4 months ago
parent d9b1d026c9
commit 5bae016d7a

@ -1,213 +1,267 @@
# 导入时间处理模块
import time
# 导入Elasticsearch客户端相关模块
import elasticsearch.client
# 导入Django配置模块
from django.conf import settings
# 导入Elasticsearch DSL相关组件用于定义文档结构
from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean
from elasticsearch_dsl.connections import connections
# 导入博客文章模型,用于数据同步
from blog.models import Article
# 检查是否启用Elasticsearch通过判断配置中是否存在ELASTICSEARCH_DSL
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')
# 如果启用了Elasticsearch则进行初始化配置
if ELASTICSEARCH_ENABLED:
# 创建Elasticsearch连接从Django配置中获取主机地址
connections.create_connection(
hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
# 导入Elasticsearch客户端并初始化
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
# 初始化IngestClient用于处理数据预处理管道
from elasticsearch.client import IngestClient
c = IngestClient(es)
# 尝试获取名为'geoip'的管道,如果不存在则创建
try:
c.get_pipeline('geoip')
except elasticsearch.exceptions.NotFoundError:
# 创建geoip管道通过ip地址解析地理位置信息
c.put_pipeline('geoip', body='''{
"description" : "Add geoip info",
"description" : "Add geoip info", # 管道描述:添加地理信息
"processors" : [
{
"geoip" : {
"field" : "ip"
"field" : "ip" # 基于ip字段解析地理信息
}
}
]
}''')
# 定义地理位置信息内部文档(嵌套在主文档中)
class GeoIp(InnerDoc):
continent_name = Keyword()
country_iso_code = Keyword()
country_name = Keyword()
location = GeoPoint()
continent_name = Keyword() # 大洲名称( Keyword类型不分词适合精确查询
country_iso_code = Keyword() # 国家ISO代码如CN、US
country_name = Keyword() # 国家名称
location = GeoPoint() # 经纬度坐标Elasticsearch地理点类型
# 定义用户代理浏览器信息内部文档
class UserAgentBrowser(InnerDoc):
Family = Keyword()
Version = Keyword()
Family = Keyword() # 浏览器家族如Chrome、Firefox
Version = Keyword() # 浏览器版本
# 定义用户代理操作系统信息内部文档(继承浏览器结构,字段相同)
class UserAgentOS(UserAgentBrowser):
pass
# 定义用户代理设备信息内部文档
class UserAgentDevice(InnerDoc):
Family = Keyword()
Brand = Keyword()
Model = Keyword()
Family = Keyword() # 设备家族如iPhone、Windows
Brand = Keyword() # 设备品牌如Apple、Samsung
Model = Keyword() # 设备型号如iPhone 13
# 定义用户代理整体信息内部文档(整合浏览器、系统、设备信息)
class UserAgent(InnerDoc):
browser = Object(UserAgentBrowser, required=False)
os = Object(UserAgentOS, required=False)
device = Object(UserAgentDevice, required=False)
string = Text()
is_bot = Boolean()
browser = Object(UserAgentBrowser, required=False) # 浏览器信息(可选)
os = Object(UserAgentOS, required=False) # 操作系统信息(可选)
device = Object(UserAgentDevice, required=False) # 设备信息(可选)
string = Text() # 原始用户代理字符串(如"Mozilla/5.0..."
is_bot = Boolean() # 是否为爬虫机器人
# 定义性能日志文档(记录访问性能数据)
class ElapsedTimeDocument(Document):
url = Keyword()
time_taken = Long()
log_datetime = Date()
ip = Keyword()
geoip = Object(GeoIp, required=False)
useragent = Object(UserAgent, required=False)
url = Keyword() # 访问的URL精确匹配
time_taken = Long() # 页面加载耗时(毫秒)
log_datetime = Date() # 日志记录时间
ip = Keyword() # 访问者IP地址
geoip = Object(GeoIp, required=False) # 地理位置信息由geoip管道生成
useragent = Object(UserAgent, required=False) # 用户代理信息
# 索引配置
class Index:
name = 'performance'
name = 'performance' # 索引名称performance性能日志
settings = {
"number_of_shards": 1,
"number_of_replicas": 0
"number_of_shards": 1, # 主分片数量
"number_of_replicas": 0 # 副本分片数量单节点环境设为0
}
# 文档类型配置Elasticsearch 7+后逐渐废弃但DSL仍保留兼容
class Meta:
doc_type = 'ElapsedTime'
# 性能日志文档管理器(处理索引创建、删除、数据写入)
class ElaspedTimeDocumentManager:
@staticmethod
def build_index():
"""创建performance索引如果不存在"""
from elasticsearch import Elasticsearch
# 连接Elasticsearch
client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
# 检查索引是否存在
res = client.indices.exists(index="performance")
if not res:
# 初始化索引根据ElapsedTimeDocument的定义创建映射
ElapsedTimeDocument.init()
@staticmethod
def delete_index():
"""删除performance索引"""
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
# 忽略400索引不存在和404请求错误
es.indices.delete(index='performance', ignore=[400, 404])
@staticmethod
def create(url, time_taken, log_datetime, useragent, ip):
"""创建一条性能日志记录并写入Elasticsearch"""
# 确保索引存在
ElaspedTimeDocumentManager.build_index()
# 构建用户代理信息对象
ua = UserAgent()
ua.browser = UserAgentBrowser()
ua.browser.Family = useragent.browser.family
ua.browser.Version = useragent.browser.version_string
ua.browser.Family = useragent.browser.family # 浏览器家族
ua.browser.Version = useragent.browser.version_string # 浏览器版本
ua.os = UserAgentOS()
ua.os.Family = useragent.os.family
ua.os.Version = useragent.os.version_string
ua.os.Family = useragent.os.family # 操作系统家族
ua.os.Version = useragent.os.version_string # 操作系统版本
ua.device = UserAgentDevice()
ua.device.Family = useragent.device.family
ua.device.Brand = useragent.device.brand
ua.device.Model = useragent.device.model
ua.string = useragent.ua_string
ua.is_bot = useragent.is_bot
ua.device.Family = useragent.device.family # 设备家族
ua.device.Brand = useragent.device.brand # 设备品牌
ua.device.Model = useragent.device.model # 设备型号
ua.string = useragent.ua_string # 原始用户代理字符串
ua.is_bot = useragent.is_bot # 是否为爬虫
# 构建性能日志文档
doc = ElapsedTimeDocument(
meta={
'id': int(
round(
time.time() *
1000))
# 用当前时间戳毫秒作为文档ID
'id': int(round(time.time() * 1000))
},
url=url,
time_taken=time_taken,
log_datetime=log_datetime,
useragent=ua, ip=ip)
url=url, # 访问URL
time_taken=time_taken, # 耗时
log_datetime=log_datetime, # 日志时间
useragent=ua, # 用户代理信息
ip=ip # IP地址
)
# 保存文档时应用geoip管道自动解析IP对应的地理位置
doc.save(pipeline="geoip")
# 定义文章文档(用于博客文章的搜索索引)
class ArticleDocument(Document):
# 文章内容使用ik分词器max_word最大化分词smart智能分词
body = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
# 文章标题(同上分词配置)
title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
# 作者信息(嵌套对象)
author = Object(properties={
'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 作者昵称
'id': Integer() # 作者ID
})
# 分类信息(嵌套对象)
category = Object(properties={
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 分类名称
'id': Integer() # 分类ID
})
# 标签信息(嵌套对象列表)
tags = Object(properties={
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 标签名称
'id': Integer() # 标签ID
})
pub_time = Date()
status = Text()
comment_status = Text()
type = Text()
views = Integer()
article_order = Integer()
pub_time = Date() # 发布时间
status = Text() # 文章状态(如发布、草稿)
comment_status = Text() # 评论状态(如开启、关闭)
type = Text() # 文章类型(如原创、转载)
views = Integer() # 浏览量
article_order = Integer() # 文章排序权重
# 索引配置
class Index:
name = 'blog'
name = 'blog' # 索引名称blog博客文章
settings = {
"number_of_shards": 1,
"number_of_replicas": 0
}
# 文档类型配置
class Meta:
doc_type = 'Article'
# 文章文档管理器(处理文章索引的创建、更新、重建)
class ArticleDocumentManager():
def __init__(self):
"""初始化时创建索引(如果不存在)"""
self.create_index()
def create_index(self):
"""创建blog索引根据ArticleDocument定义初始化映射"""
ArticleDocument.init()
def delete_index(self):
"""删除blog索引"""
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
es.indices.delete(index='blog', ignore=[400, 404])
def convert_to_doc(self, articles):
"""将Django模型对象列表转换为ArticleDocument列表"""
return [
ArticleDocument(
meta={
'id': article.id},
body=article.body,
title=article.title,
meta={'id': article.id}, # 用文章ID作为文档ID
body=article.body, # 文章内容
title=article.title, # 文章标题
author={
'nickname': article.author.username,
'id': article.author.id},
'nickname': article.author.username, # 作者用户名
'id': article.author.id # 作者ID
},
category={
'name': article.category.name,
'id': article.category.id},
tags=[
{
'name': t.name,
'id': t.id} for t in article.tags.all()],
pub_time=article.pub_time,
status=article.status,
comment_status=article.comment_status,
type=article.type,
views=article.views,
article_order=article.article_order) for article in articles]
'name': article.category.name, # 分类名称
'id': article.category.id # 分类ID
},
# 转换标签列表(多对多关系)
tags=[{'name': t.name, 'id': t.id} for t in article.tags.all()],
pub_time=article.pub_time, # 发布时间
status=article.status, # 文章状态
comment_status=article.comment_status, # 评论状态
type=article.type, # 文章类型
views=article.views, # 浏览量
article_order=article.article_order # 排序权重
) for article in articles
]
def rebuild(self, articles=None):
"""重建索引(默认同步所有文章,可指定文章列表)"""
# 初始化索引结构
ArticleDocument.init()
# 如果未指定文章,则同步所有文章
articles = articles if articles else Article.objects.all()
# 转换模型为文档对象
docs = self.convert_to_doc(articles)
# 批量保存文档
for doc in docs:
doc.save()
def update_docs(self, docs):
"""更新文档列表(批量保存)"""
for doc in docs:
doc.save()
doc.save()
Loading…
Cancel
Save