|
|
|
|
@ -7,20 +7,26 @@ from elasticsearch_dsl.connections import connections
|
|
|
|
|
|
|
|
|
|
from blog.models import Article
|
|
|
|
|
|
|
|
|
|
# 检查是否启用了Elasticsearch配置
|
|
|
|
|
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')
|
|
|
|
|
|
|
|
|
|
if ELASTICSEARCH_ENABLED:
|
|
|
|
|
# 创建Elasticsearch连接
|
|
|
|
|
connections.create_connection(
|
|
|
|
|
hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
|
|
|
|
|
|
# 初始化Elasticsearch客户端
|
|
|
|
|
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
|
|
|
|
|
from elasticsearch.client import IngestClient
|
|
|
|
|
|
|
|
|
|
# 创建Ingest管道客户端,用于处理数据预处理
|
|
|
|
|
c = IngestClient(es)
|
|
|
|
|
try:
|
|
|
|
|
# 检查是否已存在geoip管道
|
|
|
|
|
c.get_pipeline('geoip')
|
|
|
|
|
except elasticsearch.exceptions.NotFoundError:
|
|
|
|
|
# 如果不存在,创建geoip管道,用于根据IP地址添加地理位置信息
|
|
|
|
|
c.put_pipeline('geoip', body='''{
|
|
|
|
|
"description" : "Add geoip info",
|
|
|
|
|
"processors" : [
|
|
|
|
|
@ -32,29 +38,29 @@ if ELASTICSEARCH_ENABLED:
|
|
|
|
|
]
|
|
|
|
|
}''')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 地理位置信息内部文档类
|
|
|
|
|
class GeoIp(InnerDoc):
|
|
|
|
|
continent_name = Keyword()
|
|
|
|
|
country_iso_code = Keyword()
|
|
|
|
|
country_name = Keyword()
|
|
|
|
|
location = GeoPoint()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 用户代理浏览器信息内部文档类
|
|
|
|
|
class UserAgentBrowser(InnerDoc):
|
|
|
|
|
Family = Keyword()
|
|
|
|
|
Version = Keyword()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 用户代理操作系统信息内部文档类
|
|
|
|
|
class UserAgentOS(UserAgentBrowser):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 用户代理设备信息内部文档类
|
|
|
|
|
class UserAgentDevice(InnerDoc):
|
|
|
|
|
Family = Keyword()
|
|
|
|
|
Brand = Keyword()
|
|
|
|
|
Model = Keyword()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 用户代理完整信息内部文档类
|
|
|
|
|
class UserAgent(InnerDoc):
|
|
|
|
|
browser = Object(UserAgentBrowser, required=False)
|
|
|
|
|
os = Object(UserAgentOS, required=False)
|
|
|
|
|
@ -62,33 +68,35 @@ class UserAgent(InnerDoc):
|
|
|
|
|
string = Text()
|
|
|
|
|
is_bot = Boolean()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 响应时间文档类 - 用于记录性能监控数据
|
|
|
|
|
class ElapsedTimeDocument(Document):
|
|
|
|
|
url = Keyword()
|
|
|
|
|
time_taken = Long()
|
|
|
|
|
log_datetime = Date()
|
|
|
|
|
ip = Keyword()
|
|
|
|
|
geoip = Object(GeoIp, required=False)
|
|
|
|
|
useragent = Object(UserAgent, required=False)
|
|
|
|
|
url = Keyword() # 请求的URL
|
|
|
|
|
time_taken = Long()# 耗时(毫秒)
|
|
|
|
|
log_datetime = Date() # 日志时间
|
|
|
|
|
ip = Keyword()# IP地址
|
|
|
|
|
geoip = Object(GeoIp, required=False)# 地理位置信息对象
|
|
|
|
|
useragent = Object(UserAgent, required=False)# 用户代理信息对象
|
|
|
|
|
|
|
|
|
|
class Index:
|
|
|
|
|
name = 'performance'
|
|
|
|
|
name = 'performance' # 索引名称
|
|
|
|
|
settings = {
|
|
|
|
|
"number_of_shards": 1,
|
|
|
|
|
"number_of_replicas": 0
|
|
|
|
|
"number_of_shards": 1, # 分片数量
|
|
|
|
|
"number_of_replicas": 0# 副本数量
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class Meta:
|
|
|
|
|
doc_type = 'ElapsedTime'
|
|
|
|
|
|
|
|
|
|
doc_type = 'ElapsedTime' # 文档类型
|
|
|
|
|
|
|
|
|
|
# 响应时间文档管理器 - 提供对响应时间索引的操作方法
|
|
|
|
|
class ElaspedTimeDocumentManager:
|
|
|
|
|
@staticmethod
|
|
|
|
|
def build_index():
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
|
client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
|
|
|
|
|
# 检查索引是否存在
|
|
|
|
|
res = client.indices.exists(index="performance")
|
|
|
|
|
if not res:
|
|
|
|
|
# 如果不存在则初始化索引
|
|
|
|
|
ElapsedTimeDocument.init()
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
@ -99,7 +107,9 @@ class ElaspedTimeDocumentManager:
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def create(url, time_taken, log_datetime, useragent, ip):
|
|
|
|
|
# 确保索引存在
|
|
|
|
|
ElaspedTimeDocumentManager.build_index()
|
|
|
|
|
# 构建用户代理信息对象
|
|
|
|
|
ua = UserAgent()
|
|
|
|
|
ua.browser = UserAgentBrowser()
|
|
|
|
|
ua.browser.Family = useragent.browser.family
|
|
|
|
|
@ -115,7 +125,7 @@ class ElaspedTimeDocumentManager:
|
|
|
|
|
ua.device.Model = useragent.device.model
|
|
|
|
|
ua.string = useragent.ua_string
|
|
|
|
|
ua.is_bot = useragent.is_bot
|
|
|
|
|
|
|
|
|
|
# 创建文档对象,使用当前时间戳作为ID
|
|
|
|
|
doc = ElapsedTimeDocument(
|
|
|
|
|
meta={
|
|
|
|
|
'id': int(
|
|
|
|
|
@ -127,43 +137,44 @@ class ElaspedTimeDocumentManager:
|
|
|
|
|
time_taken=time_taken,
|
|
|
|
|
log_datetime=log_datetime,
|
|
|
|
|
useragent=ua, ip=ip)
|
|
|
|
|
# 保存文档,使用geoip管道处理IP地址
|
|
|
|
|
doc.save(pipeline="geoip")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 文章文档类 - 用于博客文章搜索
|
|
|
|
|
class ArticleDocument(Document):
|
|
|
|
|
body = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
|
|
|
|
|
title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
|
|
|
|
|
body = Text(analyzer='ik_max_word', search_analyzer='ik_smart') # 正文,使用IK中文分词器
|
|
|
|
|
title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')# 标题,使用IK中文分词器
|
|
|
|
|
author = Object(properties={
|
|
|
|
|
'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
|
|
|
|
|
'id': Integer()
|
|
|
|
|
'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),# 作者昵称
|
|
|
|
|
'id': Integer() # 作者ID
|
|
|
|
|
})
|
|
|
|
|
category = Object(properties={
|
|
|
|
|
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
|
|
|
|
|
'id': Integer()
|
|
|
|
|
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),# 分类名称
|
|
|
|
|
'id': Integer()# 分类ID
|
|
|
|
|
})
|
|
|
|
|
tags = Object(properties={
|
|
|
|
|
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
|
|
|
|
|
'id': Integer()
|
|
|
|
|
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),# 标签名称
|
|
|
|
|
'id': Integer() # 标签ID
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
pub_time = Date()
|
|
|
|
|
status = Text()
|
|
|
|
|
comment_status = Text()
|
|
|
|
|
type = Text()
|
|
|
|
|
views = Integer()
|
|
|
|
|
article_order = Integer()
|
|
|
|
|
pub_time = Date() # 发布时间
|
|
|
|
|
status = Text() # 文章状态
|
|
|
|
|
comment_status = Text() # 评论状态
|
|
|
|
|
type = Text()# 文章类型
|
|
|
|
|
views = Integer()# 浏览量
|
|
|
|
|
article_order = Integer()# 文章排序
|
|
|
|
|
|
|
|
|
|
class Index:
|
|
|
|
|
name = 'blog'
|
|
|
|
|
name = 'blog'# 索引名称
|
|
|
|
|
settings = {
|
|
|
|
|
"number_of_shards": 1,
|
|
|
|
|
"number_of_replicas": 0
|
|
|
|
|
"number_of_shards": 1,# 分片数量
|
|
|
|
|
"number_of_replicas": 0 # 副本数量
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class Meta:
|
|
|
|
|
doc_type = 'Article'
|
|
|
|
|
|
|
|
|
|
doc_type = 'Article'# 文档类型
|
|
|
|
|
|
|
|
|
|
# 文章文档管理器 - 提供对文章索引的操作方法
|
|
|
|
|
class ArticleDocumentManager():
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
@ -181,7 +192,7 @@ class ArticleDocumentManager():
|
|
|
|
|
return [
|
|
|
|
|
ArticleDocument(
|
|
|
|
|
meta={
|
|
|
|
|
'id': article.id},
|
|
|
|
|
'id': article.id}, # 使用文章ID作为文档ID
|
|
|
|
|
body=article.body,
|
|
|
|
|
title=article.title,
|
|
|
|
|
author={
|
|
|
|
|
@ -193,7 +204,7 @@ class ArticleDocumentManager():
|
|
|
|
|
tags=[
|
|
|
|
|
{
|
|
|
|
|
'name': t.name,
|
|
|
|
|
'id': t.id} for t in article.tags.all()],
|
|
|
|
|
'id': t.id} for t in article.tags.all()],# 处理多对多标签关系
|
|
|
|
|
pub_time=article.pub_time,
|
|
|
|
|
status=article.status,
|
|
|
|
|
comment_status=article.comment_status,
|
|
|
|
|
@ -202,9 +213,13 @@ class ArticleDocumentManager():
|
|
|
|
|
article_order=article.article_order) for article in articles]
|
|
|
|
|
|
|
|
|
|
def rebuild(self, articles=None):
|
|
|
|
|
# 重新初始化索引
|
|
|
|
|
ArticleDocument.init()
|
|
|
|
|
# 获取所有文章或指定文章
|
|
|
|
|
articles = articles if articles else Article.objects.all()
|
|
|
|
|
# 转换为文档对象
|
|
|
|
|
docs = self.convert_to_doc(articles)
|
|
|
|
|
# 保存所有文档到Elasticsearch
|
|
|
|
|
for doc in docs:
|
|
|
|
|
doc.save()
|
|
|
|
|
|
|
|
|
|
|