|
|
|
|
@ -1,213 +1,253 @@
|
|
|
|
|
import time
|
|
|
|
|
|
|
|
|
|
import elasticsearch.client
|
|
|
|
|
from django.conf import settings
|
|
|
|
|
import time # 用于生成时间戳作为文档ID
|
|
|
|
|
import elasticsearch.client # Elasticsearch客户端工具
|
|
|
|
|
from django.conf import settings # 导入Django项目配置
|
|
|
|
|
# 导入Elasticsearch DSL相关模块,用于定义文档结构和字段类型
|
|
|
|
|
from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean
|
|
|
|
|
from elasticsearch_dsl.connections import connections
|
|
|
|
|
from elasticsearch_dsl.connections import connections # 用于创建Elasticsearch连接
|
|
|
|
|
|
|
|
|
|
from blog.models import Article
|
|
|
|
|
from blog.models import Article # 导入Django博客文章模型
|
|
|
|
|
|
|
|
|
|
# 检查是否启用了Elasticsearch(通过判断配置中是否有ELASTICSEARCH_DSL)
|
|
|
|
|
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')
|
|
|
|
|
|
|
|
|
|
if ELASTICSEARCH_ENABLED:
|
|
|
|
|
# 创建Elasticsearch连接,连接地址从Django配置中获取
|
|
|
|
|
connections.create_connection(
|
|
|
|
|
hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
|
from elasticsearch import Elasticsearch # 导入Elasticsearch客户端
|
|
|
|
|
|
|
|
|
|
# 初始化Elasticsearch客户端
|
|
|
|
|
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
|
|
|
|
|
from elasticsearch.client import IngestClient
|
|
|
|
|
from elasticsearch.client import IngestClient # 导入Ingest API客户端(用于处理数据管道)
|
|
|
|
|
|
|
|
|
|
c = IngestClient(es)
|
|
|
|
|
try:
|
|
|
|
|
# 检查是否存在名为'geoip'的数据管道(用于解析IP地址的地理位置信息)
|
|
|
|
|
c.get_pipeline('geoip')
|
|
|
|
|
except elasticsearch.exceptions.NotFoundError:
|
|
|
|
|
# 若不存在,则创建'geoip'管道:通过IP地址添加地理位置信息
|
|
|
|
|
c.put_pipeline('geoip', body='''{
|
|
|
|
|
"description" : "Add geoip info",
|
|
|
|
|
"description" : "Add geoip info", # 管道描述:添加IP的地理信息
|
|
|
|
|
"processors" : [
|
|
|
|
|
{
|
|
|
|
|
"geoip" : {
|
|
|
|
|
"field" : "ip"
|
|
|
|
|
"field" : "ip" # 基于文档中的'ip'字段解析地理信息
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}''')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 内部文档类:存储IP地址解析后的地理位置信息(嵌套在ElapsedTimeDocument中)
|
|
|
|
|
class GeoIp(InnerDoc):
|
|
|
|
|
continent_name = Keyword()
|
|
|
|
|
country_iso_code = Keyword()
|
|
|
|
|
country_name = Keyword()
|
|
|
|
|
location = GeoPoint()
|
|
|
|
|
continent_name = Keyword() # 大陆名称(Keyword类型:精确匹配,不分词)
|
|
|
|
|
country_iso_code = Keyword() # 国家ISO代码(如CN、US)
|
|
|
|
|
country_name = Keyword() # 国家名称
|
|
|
|
|
location = GeoPoint() # 经纬度坐标(Elasticsearch的地理点类型)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 内部文档类:存储用户代理中的浏览器信息(嵌套在UserAgent中)
|
|
|
|
|
class UserAgentBrowser(InnerDoc):
|
|
|
|
|
Family = Keyword()
|
|
|
|
|
Version = Keyword()
|
|
|
|
|
Family = Keyword() # 浏览器家族(如Chrome、Firefox)
|
|
|
|
|
Version = Keyword() # 浏览器版本
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 内部文档类:存储用户代理中的操作系统信息(继承浏览器信息结构)
|
|
|
|
|
class UserAgentOS(UserAgentBrowser):
|
|
|
|
|
pass
|
|
|
|
|
pass # 结构与浏览器一致,包含Family(系统家族)和Version(系统版本)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 内部文档类:存储用户代理中的设备信息(嵌套在UserAgent中)
|
|
|
|
|
class UserAgentDevice(InnerDoc):
|
|
|
|
|
Family = Keyword()
|
|
|
|
|
Brand = Keyword()
|
|
|
|
|
Model = Keyword()
|
|
|
|
|
Family = Keyword() # 设备家族(如iPhone、Windows)
|
|
|
|
|
Brand = Keyword() # 设备品牌(如Apple、Samsung)
|
|
|
|
|
Model = Keyword() # 设备型号(如iPhone 13)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 内部文档类:存储用户代理(User-Agent)完整信息(嵌套在ElapsedTimeDocument中)
|
|
|
|
|
class UserAgent(InnerDoc):
|
|
|
|
|
browser = Object(UserAgentBrowser, required=False)
|
|
|
|
|
os = Object(UserAgentOS, required=False)
|
|
|
|
|
device = Object(UserAgentDevice, required=False)
|
|
|
|
|
string = Text()
|
|
|
|
|
is_bot = Boolean()
|
|
|
|
|
browser = Object(UserAgentBrowser, required=False) # 浏览器信息(可选)
|
|
|
|
|
os = Object(UserAgentOS, required=False) # 操作系统信息(可选)
|
|
|
|
|
device = Object(UserAgentDevice, required=False) # 设备信息(可选)
|
|
|
|
|
string = Text() # 原始User-Agent字符串
|
|
|
|
|
is_bot = Boolean() # 是否为爬虫机器人
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Elasticsearch文档类:记录性能耗时信息(如接口响应时间)
|
|
|
|
|
class ElapsedTimeDocument(Document):
|
|
|
|
|
url = Keyword()
|
|
|
|
|
time_taken = Long()
|
|
|
|
|
log_datetime = Date()
|
|
|
|
|
ip = Keyword()
|
|
|
|
|
geoip = Object(GeoIp, required=False)
|
|
|
|
|
useragent = Object(UserAgent, required=False)
|
|
|
|
|
url = Keyword() # 请求URL(精确匹配)
|
|
|
|
|
time_taken = Long() # 耗时(毫秒)
|
|
|
|
|
log_datetime = Date() # 日志记录时间
|
|
|
|
|
ip = Keyword() # 访问者IP地址
|
|
|
|
|
geoip = Object(GeoIp, required=False) # 地理位置信息(由geoip管道解析,可选)
|
|
|
|
|
useragent = Object(UserAgent, required=False) # 用户代理信息(可选)
|
|
|
|
|
|
|
|
|
|
class Index:
|
|
|
|
|
name = 'performance'
|
|
|
|
|
name = 'performance' # 索引名称:存储性能数据
|
|
|
|
|
settings = {
|
|
|
|
|
"number_of_shards": 1,
|
|
|
|
|
"number_of_replicas": 0
|
|
|
|
|
"number_of_shards": 1, # 主分片数量
|
|
|
|
|
"number_of_replicas": 0 # 副本分片数量(单节点环境设为0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class Meta:
|
|
|
|
|
doc_type = 'ElapsedTime'
|
|
|
|
|
doc_type = 'ElapsedTime' # 文档类型(Elasticsearch 7.x后可省略)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 管理类:处理ElapsedTimeDocument的索引创建、删除和数据插入
|
|
|
|
|
class ElaspedTimeDocumentManager:
|
|
|
|
|
@staticmethod
|
|
|
|
|
def build_index():
|
|
|
|
|
"""创建performance索引(若不存在)"""
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
|
client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
|
|
|
|
|
# 检查索引是否存在
|
|
|
|
|
res = client.indices.exists(index="performance")
|
|
|
|
|
if not res:
|
|
|
|
|
# 初始化索引(根据ElapsedTimeDocument的定义创建映射)
|
|
|
|
|
ElapsedTimeDocument.init()
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def delete_index():
|
|
|
|
|
"""删除performance索引"""
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
|
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
|
|
|
|
|
# 忽略400(索引不存在)和404(请求错误)的错误
|
|
|
|
|
es.indices.delete(index='performance', ignore=[400, 404])
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def create(url, time_taken, log_datetime, useragent, ip):
|
|
|
|
|
"""创建一条性能日志文档并保存到Elasticsearch"""
|
|
|
|
|
# 确保索引已创建
|
|
|
|
|
ElaspedTimeDocumentManager.build_index()
|
|
|
|
|
|
|
|
|
|
# 构建用户代理信息对象
|
|
|
|
|
ua = UserAgent()
|
|
|
|
|
ua.browser = UserAgentBrowser()
|
|
|
|
|
ua.browser.Family = useragent.browser.family
|
|
|
|
|
ua.browser.Version = useragent.browser.version_string
|
|
|
|
|
ua.browser.Family = useragent.browser.family # 浏览器家族
|
|
|
|
|
ua.browser.Version = useragent.browser.version_string # 浏览器版本
|
|
|
|
|
|
|
|
|
|
ua.os = UserAgentOS()
|
|
|
|
|
ua.os.Family = useragent.os.family
|
|
|
|
|
ua.os.Version = useragent.os.version_string
|
|
|
|
|
ua.os.Family = useragent.os.family # 操作系统家族
|
|
|
|
|
ua.os.Version = useragent.os.version_string # 操作系统版本
|
|
|
|
|
|
|
|
|
|
ua.device = UserAgentDevice()
|
|
|
|
|
ua.device.Family = useragent.device.family
|
|
|
|
|
ua.device.Brand = useragent.device.brand
|
|
|
|
|
ua.device.Model = useragent.device.model
|
|
|
|
|
ua.string = useragent.ua_string
|
|
|
|
|
ua.is_bot = useragent.is_bot
|
|
|
|
|
ua.device.Family = useragent.device.family # 设备家族
|
|
|
|
|
ua.device.Brand = useragent.device.brand # 设备品牌
|
|
|
|
|
ua.device.Model = useragent.device.model # 设备型号
|
|
|
|
|
ua.string = useragent.ua_string # 原始User-Agent字符串
|
|
|
|
|
ua.is_bot = useragent.is_bot # 是否为爬虫
|
|
|
|
|
|
|
|
|
|
# 创建性能日志文档
|
|
|
|
|
doc = ElapsedTimeDocument(
|
|
|
|
|
meta={
|
|
|
|
|
'id': int(
|
|
|
|
|
round(
|
|
|
|
|
time.time() *
|
|
|
|
|
1000))
|
|
|
|
|
# 用当前时间戳(毫秒级)作为文档ID,确保唯一性
|
|
|
|
|
'id': int(round(time.time() * 1000))
|
|
|
|
|
},
|
|
|
|
|
url=url,
|
|
|
|
|
time_taken=time_taken,
|
|
|
|
|
log_datetime=log_datetime,
|
|
|
|
|
useragent=ua, ip=ip)
|
|
|
|
|
url=url, # 请求URL
|
|
|
|
|
time_taken=time_taken, # 耗时
|
|
|
|
|
log_datetime=log_datetime, # 记录时间
|
|
|
|
|
useragent=ua, # 用户代理信息
|
|
|
|
|
ip=ip # 访问IP
|
|
|
|
|
)
|
|
|
|
|
# 保存文档时应用'geoip'管道,自动解析IP的地理位置
|
|
|
|
|
doc.save(pipeline="geoip")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Elasticsearch文档类:存储博客文章信息(用于全文搜索)
|
|
|
|
|
class ArticleDocument(Document):
|
|
|
|
|
# 文章内容(使用IK分词器:ik_max_word最大粒度分词,ik_smart智能分词)
|
|
|
|
|
body = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
|
|
|
|
|
# 文章标题(同上,支持中文分词搜索)
|
|
|
|
|
title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
|
|
|
|
|
# 作者信息(嵌套对象)
|
|
|
|
|
author = Object(properties={
|
|
|
|
|
'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
|
|
|
|
|
'id': Integer()
|
|
|
|
|
'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 作者昵称
|
|
|
|
|
'id': Integer() # 作者ID
|
|
|
|
|
})
|
|
|
|
|
# 分类信息(嵌套对象)
|
|
|
|
|
category = Object(properties={
|
|
|
|
|
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
|
|
|
|
|
'id': Integer()
|
|
|
|
|
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 分类名称
|
|
|
|
|
'id': Integer() # 分类ID
|
|
|
|
|
})
|
|
|
|
|
# 标签信息(嵌套对象列表)
|
|
|
|
|
tags = Object(properties={
|
|
|
|
|
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
|
|
|
|
|
'id': Integer()
|
|
|
|
|
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), # 标签名称
|
|
|
|
|
'id': Integer() # 标签ID
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
pub_time = Date()
|
|
|
|
|
status = Text()
|
|
|
|
|
comment_status = Text()
|
|
|
|
|
type = Text()
|
|
|
|
|
views = Integer()
|
|
|
|
|
article_order = Integer()
|
|
|
|
|
pub_time = Date() # 发布时间
|
|
|
|
|
status = Text() # 文章状态(如发布、草稿)
|
|
|
|
|
comment_status = Text() # 评论状态(如允许、关闭)
|
|
|
|
|
type = Text() # 文章类型(如原创、转载)
|
|
|
|
|
views = Integer() # 浏览量
|
|
|
|
|
article_order = Integer() # 文章排序权重
|
|
|
|
|
|
|
|
|
|
class Index:
|
|
|
|
|
name = 'blog'
|
|
|
|
|
name = 'blog' # 索引名称:存储博客文章数据
|
|
|
|
|
settings = {
|
|
|
|
|
"number_of_shards": 1,
|
|
|
|
|
"number_of_replicas": 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class Meta:
|
|
|
|
|
doc_type = 'Article'
|
|
|
|
|
doc_type = 'Article' # 文档类型
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 管理类:处理ArticleDocument的索引创建、删除、数据同步
|
|
|
|
|
class ArticleDocumentManager():
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
"""初始化时创建blog索引(若不存在)"""
|
|
|
|
|
self.create_index()
|
|
|
|
|
|
|
|
|
|
def create_index(self):
|
|
|
|
|
"""创建blog索引(根据ArticleDocument的定义)"""
|
|
|
|
|
ArticleDocument.init()
|
|
|
|
|
|
|
|
|
|
def delete_index(self):
|
|
|
|
|
"""删除blog索引"""
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
|
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
|
|
|
|
|
es.indices.delete(index='blog', ignore=[400, 404])
|
|
|
|
|
|
|
|
|
|
def convert_to_doc(self, articles):
|
|
|
|
|
"""将Django的Article模型对象列表转换为ArticleDocument列表"""
|
|
|
|
|
return [
|
|
|
|
|
ArticleDocument(
|
|
|
|
|
meta={
|
|
|
|
|
'id': article.id},
|
|
|
|
|
body=article.body,
|
|
|
|
|
title=article.title,
|
|
|
|
|
meta={'id': article.id}, # 用文章ID作为文档ID
|
|
|
|
|
body=article.body, # 文章内容
|
|
|
|
|
title=article.title, # 文章标题
|
|
|
|
|
author={
|
|
|
|
|
'nickname': article.author.username,
|
|
|
|
|
'id': article.author.id},
|
|
|
|
|
'nickname': article.author.username, # 作者用户名
|
|
|
|
|
'id': article.author.id # 作者ID
|
|
|
|
|
},
|
|
|
|
|
category={
|
|
|
|
|
'name': article.category.name,
|
|
|
|
|
'id': article.category.id},
|
|
|
|
|
tags=[
|
|
|
|
|
{
|
|
|
|
|
'name': t.name,
|
|
|
|
|
'id': t.id} for t in article.tags.all()],
|
|
|
|
|
pub_time=article.pub_time,
|
|
|
|
|
status=article.status,
|
|
|
|
|
comment_status=article.comment_status,
|
|
|
|
|
type=article.type,
|
|
|
|
|
views=article.views,
|
|
|
|
|
article_order=article.article_order) for article in articles]
|
|
|
|
|
'name': article.category.name, # 分类名称
|
|
|
|
|
'id': article.category.id # 分类ID
|
|
|
|
|
},
|
|
|
|
|
# 标签列表(遍历文章的tags多对多字段)
|
|
|
|
|
tags=[{'name': t.name, 'id': t.id} for t in article.tags.all()],
|
|
|
|
|
pub_time=article.pub_time, # 发布时间
|
|
|
|
|
status=article.status, # 文章状态
|
|
|
|
|
comment_status=article.comment_status, # 评论状态
|
|
|
|
|
type=article.type, # 文章类型
|
|
|
|
|
views=article.views, # 浏览量
|
|
|
|
|
article_order=article.article_order # 排序权重
|
|
|
|
|
) for article in articles
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
def rebuild(self, articles=None):
|
|
|
|
|
ArticleDocument.init()
|
|
|
|
|
"""重建blog索引:将文章数据同步到Elasticsearch(默认同步所有文章)"""
|
|
|
|
|
ArticleDocument.init() # 确保索引结构正确
|
|
|
|
|
# 若未指定文章列表,则同步所有文章
|
|
|
|
|
articles = articles if articles else Article.objects.all()
|
|
|
|
|
# 转换为文档列表
|
|
|
|
|
docs = self.convert_to_doc(articles)
|
|
|
|
|
# 批量保存文档
|
|
|
|
|
for doc in docs:
|
|
|
|
|
doc.save()
|
|
|
|
|
|
|
|
|
|
def update_docs(self, docs):
|
|
|
|
|
"""更新文档列表(批量保存)"""
|
|
|
|
|
for doc in docs:
|
|
|
|
|
doc.save()
|
|
|
|
|
doc.save()
|