Update documents.py

master
p36kxhw2t 1 month ago
parent 13c0bf2e3b
commit 69f39a9c0c

@ -7,21 +7,21 @@ from elasticsearch_dsl.connections import connections
from blog.models import Article
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL') #ZNY 检查是否配置了Elasticsearch
if ELASTICSEARCH_ENABLED:
connections.create_connection(
hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
from elasticsearch import Elasticsearch
if ELASTICSEARCH_ENABLED: #ZNY 如果Elasticsearch已启用
connections.create_connection( #ZNY 创建Elasticsearch连接
hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']]) #ZNY 从配置中获取主机地址
from elasticsearch import Elasticsearch #ZNY 导入Elasticsearch客户端
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
from elasticsearch.client import IngestClient
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) #ZNY 创建Elasticsearch实例
from elasticsearch.client import IngestClient #ZNY 导入Ingest客户端
c = IngestClient(es)
c = IngestClient(es) #ZNY 创建Ingest客户端实例
try:
c.get_pipeline('geoip')
except elasticsearch.exceptions.NotFoundError:
c.put_pipeline('geoip', body='''{
c.get_pipeline('geoip') #ZNY 尝试获取geoip管道
except elasticsearch.exceptions.NotFoundError: #ZNY 如果geoip管道不存在
c.put_pipeline('geoip', body='''{ #ZNY 创建geoip管道
"description" : "Add geoip info",
"processors" : [
{
@ -30,184 +30,184 @@ if ELASTICSEARCH_ENABLED:
}
}
]
}''')
}''') #ZNY 定义geoip处理管道配置
class GeoIp(InnerDoc):
continent_name = Keyword()
country_iso_code = Keyword()
country_name = Keyword()
location = GeoPoint()
class GeoIp(InnerDoc): #ZNY 定义GeoIP内嵌文档类
continent_name = Keyword() #ZNY 大洲名称
country_iso_code = Keyword() #ZNY 国家ISO代码
country_name = Keyword() #ZNY 国家名称
location = GeoPoint() #ZNY 地理位置坐标
class UserAgentBrowser(InnerDoc):
Family = Keyword()
Version = Keyword()
class UserAgentBrowser(InnerDoc): #ZNY 定义用户代理浏览器信息类
Family = Keyword() #ZNY 浏览器家族
Version = Keyword() #ZNY 浏览器版本
class UserAgentOS(UserAgentBrowser):
pass
class UserAgentOS(UserAgentBrowser): #ZNY 定义用户代理操作系统信息类,继承自浏览器类
pass #ZNY 继承父类字段
class UserAgentDevice(InnerDoc):
Family = Keyword()
Brand = Keyword()
Model = Keyword()
class UserAgentDevice(InnerDoc): #ZNY 定义用户代理设备信息类
Family = Keyword() #ZNY 设备家族
Brand = Keyword() #ZNY 设备品牌
Model = Keyword() #ZNY 设备型号
class UserAgent(InnerDoc):
browser = Object(UserAgentBrowser, required=False)
os = Object(UserAgentOS, required=False)
device = Object(UserAgentDevice, required=False)
string = Text()
is_bot = Boolean()
class UserAgent(InnerDoc): #ZNY 定义完整的用户代理信息类
browser = Object(UserAgentBrowser, required=False) #ZNY 浏览器信息对象
os = Object(UserAgentOS, required=False) #ZNY 操作系统信息对象
device = Object(UserAgentDevice, required=False) #ZNY 设备信息对象
string = Text() #ZNY 原始用户代理字符串
is_bot = Boolean() #ZNY 是否为机器人
class ElapsedTimeDocument(Document):
url = Keyword()
time_taken = Long()
log_datetime = Date()
ip = Keyword()
geoip = Object(GeoIp, required=False)
useragent = Object(UserAgent, required=False)
class ElapsedTimeDocument(Document): #ZNY 定义性能耗时文档类
url = Keyword() #ZNY 请求URL
time_taken = Long() #ZNY 耗时(毫秒)
log_datetime = Date() #ZNY 日志时间
ip = Keyword() #ZNY IP地址
geoip = Object(GeoIp, required=False) #ZNY GeoIP地理位置信息
useragent = Object(UserAgent, required=False) #ZNY 用户代理信息
class Index:
name = 'performance'
settings = {
"number_of_shards": 1,
"number_of_replicas": 0
class Index: #ZNY 定义索引配置
name = 'performance' #ZNY 索引名称
settings = { #ZNY 索引设置
"number_of_shards": 1, #ZNY 分片数量
"number_of_replicas": 0 #ZNY 副本数量
}
class Meta:
doc_type = 'ElapsedTime'
class Meta: #ZNY 元数据配置
doc_type = 'ElapsedTime' #ZNY 文档类型
class ElaspedTimeDocumentManager:
class ElaspedTimeDocumentManager: #ZNY 性能耗时文档管理器类
@staticmethod
def build_index():
from elasticsearch import Elasticsearch
client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
res = client.indices.exists(index="performance")
if not res:
ElapsedTimeDocument.init()
def build_index(): #ZNY 构建索引静态方法
from elasticsearch import Elasticsearch #ZNY 导入Elasticsearch
client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) #ZNY 创建客户端
res = client.indices.exists(index="performance") #ZNY 检查索引是否存在
if not res: #ZNY 如果索引不存在
ElapsedTimeDocument.init() #ZNY 初始化性能耗时文档索引
@staticmethod
def delete_index():
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
es.indices.delete(index='performance', ignore=[400, 404])
def delete_index(): #ZNY 删除索引静态方法
from elasticsearch import Elasticsearch #ZNY 导入Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) #ZNY 创建客户端
es.indices.delete(index='performance', ignore=[400, 404]) #ZNY 删除性能索引,忽略特定错误
@staticmethod
def create(url, time_taken, log_datetime, useragent, ip):
ElaspedTimeDocumentManager.build_index()
ua = UserAgent()
ua.browser = UserAgentBrowser()
ua.browser.Family = useragent.browser.family
ua.browser.Version = useragent.browser.version_string
ua.os = UserAgentOS()
ua.os.Family = useragent.os.family
ua.os.Version = useragent.os.version_string
ua.device = UserAgentDevice()
ua.device.Family = useragent.device.family
ua.device.Brand = useragent.device.brand
ua.device.Model = useragent.device.model
ua.string = useragent.ua_string
ua.is_bot = useragent.is_bot
doc = ElapsedTimeDocument(
meta={
'id': int(
def create(url, time_taken, log_datetime, useragent, ip): #ZNY 创建性能记录静态方法
ElaspedTimeDocumentManager.build_index() #ZNY 确保索引存在
ua = UserAgent() #ZNY 创建用户代理对象
ua.browser = UserAgentBrowser() #ZNY 创建浏览器信息对象
ua.browser.Family = useragent.browser.family #ZNY 设置浏览器家族
ua.browser.Version = useragent.browser.version_string #ZNY 设置浏览器版本
ua.os = UserAgentOS() #ZNY 创建操作系统信息对象
ua.os.Family = useragent.os.family #ZNY 设置操作系统家族
ua.os.Version = useragent.os.version_string #ZNY 设置操作系统版本
ua.device = UserAgentDevice() #ZNY 创建设备信息对象
ua.device.Family = useragent.device.family #ZNY 设置设备家族
ua.device.Brand = useragent.device.brand #ZNY 设置设备品牌
ua.device.Model = useragent.device.model #ZNY 设置设备型号
ua.string = useragent.ua_string #ZNY 设置原始用户代理字符串
ua.is_bot = useragent.is_bot #ZNY 设置是否为机器人
doc = ElapsedTimeDocument( #ZNY 创建性能耗时文档
meta={ #ZNY 文档元数据
'id': int( #ZNY 使用时间戳作为文档ID
round(
time.time() *
1000))
},
url=url,
time_taken=time_taken,
log_datetime=log_datetime,
useragent=ua, ip=ip)
doc.save(pipeline="geoip")
class ArticleDocument(Document):
body = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
author = Object(properties={
'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
url=url, #ZNY 设置URL
time_taken=time_taken, #ZNY 设置耗时
log_datetime=log_datetime, #ZNY 设置日志时间
useragent=ua, ip=ip) #ZNY 设置用户代理和IP
doc.save(pipeline="geoip") #ZNY 保存文档并使用geoip管道处理
class ArticleDocument(Document): #ZNY 定义文章文档类
body = Text(analyzer='ik_max_word', search_analyzer='ik_smart') #ZNY 正文使用IK中文分词器
title = Text(analyzer='ik_max_word', search_analyzer='ik_smart') #ZNY 标题使用IK中文分词器
author = Object(properties={ #ZNY 作者对象
'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), #ZNY 作者昵称
'id': Integer() #ZNY 作者ID
})
category = Object(properties={
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
category = Object(properties={ #ZNY 分类对象
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), #ZNY 分类名称
'id': Integer() #ZNY 分类ID
})
tags = Object(properties={
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
tags = Object(properties={ #ZNY 标签对象
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), #ZNY 标签名称
'id': Integer() #ZNY 标签ID
})
pub_time = Date()
status = Text()
comment_status = Text()
type = Text()
views = Integer()
article_order = Integer()
class Index:
name = 'blog'
settings = {
"number_of_shards": 1,
"number_of_replicas": 0
pub_time = Date() #ZNY 发布时间
status = Text() #ZNY 文章状态
comment_status = Text() #ZNY 评论状态
type = Text() #ZNY 文章类型
views = Integer() #ZNY 浏览量
article_order = Integer() #ZNY 文章排序
class Index: #ZNY 定义索引配置
name = 'blog' #ZNY 索引名称
settings = { #ZNY 索引设置
"number_of_shards": 1, #ZNY 分片数量
"number_of_replicas": 0 #ZNY 副本数量
}
class Meta:
doc_type = 'Article'
class Meta: #ZNY 元数据配置
doc_type = 'Article' #ZNY 文档类型
class ArticleDocumentManager():
class ArticleDocumentManager(): #ZNY 文章文档管理器类
def __init__(self):
self.create_index()
def __init__(self): #ZNY 初始化方法
self.create_index() #ZNY 创建索引
def create_index(self):
ArticleDocument.init()
def create_index(self): #ZNY 创建索引方法
ArticleDocument.init() #ZNY 初始化文章文档索引
def delete_index(self):
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
es.indices.delete(index='blog', ignore=[400, 404])
def delete_index(self): #ZNY 删除索引方法
from elasticsearch import Elasticsearch #ZNY 导入Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) #ZNY 创建客户端
es.indices.delete(index='blog', ignore=[400, 404]) #ZNY 删除博客索引,忽略特定错误
def convert_to_doc(self, articles):
return [
ArticleDocument(
def convert_to_doc(self, articles): #ZNY 将文章转换为文档方法
return [ #ZNY 返回文档列表
ArticleDocument( #ZNY 创建文章文档
meta={
'id': article.id},
body=article.body,
title=article.title,
author={
'nickname': article.author.username,
'id': article.author.id},
category={
'name': article.category.name,
'id': article.category.id},
tags=[
'id': article.id}, #ZNY 使用文章ID作为文档ID
body=article.body, #ZNY 设置正文
title=article.title, #ZNY 设置标题
author={ #ZNY 设置作者信息
'nickname': article.author.username, #ZNY 作者用户名
'id': article.author.id}, #ZNY 作者ID
category={ #ZNY 设置分类信息
'name': article.category.name, #ZNY 分类名称
'id': article.category.id}, #ZNY 分类ID
tags=[ #ZNY 设置标签列表
{
'name': t.name,
'id': t.id} for t in article.tags.all()],
pub_time=article.pub_time,
status=article.status,
comment_status=article.comment_status,
type=article.type,
views=article.views,
article_order=article.article_order) for article in articles]
def rebuild(self, articles=None):
ArticleDocument.init()
articles = articles if articles else Article.objects.all()
docs = self.convert_to_doc(articles)
for doc in docs:
doc.save()
def update_docs(self, docs):
for doc in docs:
doc.save()
'name': t.name, #ZNY 标签名称
'id': t.id} for t in article.tags.all()], #ZNY 遍历所有标签
pub_time=article.pub_time, #ZNY 设置发布时间
status=article.status, #ZNY 设置文章状态
comment_status=article.comment_status, #ZNY 设置评论状态
type=article.type, #ZNY 设置文章类型
views=article.views, #ZNY 设置浏览量
article_order=article.article_order) for article in articles] #ZNY 设置文章排序
def rebuild(self, articles=None): #ZNY 重建索引方法
ArticleDocument.init() #ZNY 重新初始化索引
articles = articles if articles else Article.objects.all() #ZNY 获取所有文章或指定文章
docs = self.convert_to_doc(articles) #ZNY 转换为文档格式
for doc in docs: #ZNY 遍历所有文档
doc.save() #ZNY 保存文档到Elasticsearch
def update_docs(self, docs): #ZNY 更新文档方法
for doc in docs: #ZNY 遍历文档列表
doc.save() #ZNY 保存更新后的文档

Loading…
Cancel
Save