You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tentest/doc/DjangoBlog/blog/documents.py

323 lines
8.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import time
import elasticsearch.client
from django.conf import settings
from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean
from elasticsearch_dsl.connections import connections
from blog.models import Article
# mk:检查是否启用了Elasticsearch配置
ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL')
if ELASTICSEARCH_ENABLED:
# mk:创建Elasticsearch连接
connections.create_connection(
hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']])
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
from elasticsearch.client import IngestClient
c = IngestClient(es)
try:
# mk:尝试获取geoip管道如果不存在则创建
c.get_pipeline('geoip')
except elasticsearch.exceptions.NotFoundError:
c.put_pipeline('geoip', body='''{
"description" : "Add geoip info",
"processors" : [
{
"geoip" : {
"field" : "ip"
}
}
]
}''')
class GeoIp(InnerDoc):
"""
mk:
地理位置信息文档类
用于存储IP地址对应的地理位置信息
"""
continent_name = Keyword()
country_iso_code = Keyword()
country_name = Keyword()
location = GeoPoint()
class UserAgentBrowser(InnerDoc):
"""
mk:
用户代理浏览器信息类
存储浏览器的家族和版本信息
"""
Family = Keyword()
Version = Keyword()
class UserAgentOS(UserAgentBrowser):
"""
mk:
用户代理操作系统信息类
继承自UserAgentBrowser存储操作系统的家族和版本信息
"""
pass
class UserAgentDevice(InnerDoc):
"""
mk:
用户代理设备信息类
存储设备的家族、品牌和型号信息
"""
Family = Keyword()
Brand = Keyword()
Model = Keyword()
class UserAgent(InnerDoc):
"""
mk:
用户代理完整信息类
包含浏览器、操作系统、设备等完整用户代理信息
"""
browser = Object(UserAgentBrowser, required=False)
os = Object(UserAgentOS, required=False)
device = Object(UserAgentDevice, required=False)
string = Text()
is_bot = Boolean()
class ElapsedTimeDocument(Document):
"""
mk:
性能监控文档类
用于记录页面访问性能数据包括URL、响应时间、访问时间等信息
"""
url = Keyword()
time_taken = Long()
log_datetime = Date()
ip = Keyword()
geoip = Object(GeoIp, required=False)
useragent = Object(UserAgent, required=False)
class Index:
name = 'performance'
settings = {
"number_of_shards": 1,
"number_of_replicas": 0
}
class Meta:
doc_type = 'ElapsedTime'
class ElaspedTimeDocumentManager:
"""
mk:
性能监控文档管理类
提供性能监控数据的索引创建、删除和保存功能
"""
@staticmethod
def build_index():
"""
mk:
构建性能监控索引
检查索引是否存在,如果不存在则初始化索引
"""
from elasticsearch import Elasticsearch
client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
res = client.indices.exists(index="performance")
if not res:
ElapsedTimeDocument.init()
@staticmethod
def delete_index():
"""
mk:
删除性能监控索引
删除名为'performance'的索引忽略400和404错误
"""
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
es.indices.delete(index='performance', ignore=[400, 404])
@staticmethod
def create(url, time_taken, log_datetime, useragent, ip):
"""
mk:
创建并保存性能监控记录
Args:
url (str): 访问的URL地址
time_taken (int): 请求耗时(毫秒)
log_datetime (datetime): 日志记录时间
useragent (object): 用户代理对象,包含浏览器、系统、设备信息
ip (str): 访问者IP地址
"""
ElaspedTimeDocumentManager.build_index()
ua = UserAgent()
ua.browser = UserAgentBrowser()
ua.browser.Family = useragent.browser.family
ua.browser.Version = useragent.browser.version_string
ua.os = UserAgentOS()
ua.os.Family = useragent.os.family
ua.os.Version = useragent.os.version_string
ua.device = UserAgentDevice()
ua.device.Family = useragent.device.family
ua.device.Brand = useragent.device.brand
ua.device.Model = useragent.device.model
ua.string = useragent.ua_string
ua.is_bot = useragent.is_bot
doc = ElapsedTimeDocument(
meta={
'id': int(
round(
time.time() *
1000))
},
url=url,
time_taken=time_taken,
log_datetime=log_datetime,
useragent=ua, ip=ip)
doc.save(pipeline="geoip")
class ArticleDocument(Document):
"""
mk:
文章文档类
用于Elasticsearch中的文章搜索索引包含文章的完整信息
"""
body = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
title = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
author = Object(properties={
'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
})
category = Object(properties={
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
})
tags = Object(properties={
'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'),
'id': Integer()
})
pub_time = Date()
status = Text()
comment_status = Text()
type = Text()
views = Integer()
article_order = Integer()
class Index:
name = 'blog'
settings = {
"number_of_shards": 1,
"number_of_replicas": 0
}
class Meta:
doc_type = 'Article'
class ArticleDocumentManager():
"""
mk:
文章文档管理类
提供文章索引的创建、删除、重建和更新功能
"""
def __init__(self):
"""
mk:
初始化文章文档管理器
自动创建索引
"""
self.create_index()
def create_index(self):
"""
mk:
创建文章索引
初始化ArticleDocument索引结构
"""
ArticleDocument.init()
def delete_index(self):
"""
mk:
删除文章索引
删除名为'blog'的索引忽略400和404错误
"""
from elasticsearch import Elasticsearch
es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts'])
es.indices.delete(index='blog', ignore=[400, 404])
def convert_to_doc(self, articles):
"""
mk:
将文章模型对象转换为文档对象
Args:
articles (list): Article模型对象列表
Returns:
list: 转换后的ArticleDocument文档对象列表
"""
return [
ArticleDocument(
meta={
'id': article.id},
body=article.body,
title=article.title,
author={
'nickname': article.author.username,
'id': article.author.id},
category={
'name': article.category.name,
'id': article.category.id},
tags=[
{
'name': t.name,
'id': t.id} for t in article.tags.all()],
pub_time=article.pub_time,
status=article.status,
comment_status=article.comment_status,
type=article.type,
views=article.views,
article_order=article.article_order) for article in articles]
def rebuild(self, articles=None):
"""
mk:
重建文章索引
Args:
articles (list, optional): 指定要重建索引的文章列表如果为None则重建所有文章
"""
ArticleDocument.init()
articles = articles if articles else Article.objects.all()
docs = self.convert_to_doc(articles)
for doc in docs:
doc.save()
def update_docs(self, docs):
"""
mk:
批量更新文档
Args:
docs (list): ArticleDocument文档对象列表
"""
for doc in docs:
doc.save()