import time import elasticsearch.client from django.conf import settings from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean from elasticsearch_dsl.connections import connections from blog.models import Article # mk:检查是否启用了Elasticsearch配置 ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL') if ELASTICSEARCH_ENABLED: # mk:创建Elasticsearch连接 connections.create_connection( hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']]) from elasticsearch import Elasticsearch es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) from elasticsearch.client import IngestClient c = IngestClient(es) try: # mk:尝试获取geoip管道,如果不存在则创建 c.get_pipeline('geoip') except elasticsearch.exceptions.NotFoundError: c.put_pipeline('geoip', body='''{ "description" : "Add geoip info", "processors" : [ { "geoip" : { "field" : "ip" } } ] }''') class GeoIp(InnerDoc): """ mk: 地理位置信息文档类 用于存储IP地址对应的地理位置信息 """ continent_name = Keyword() country_iso_code = Keyword() country_name = Keyword() location = GeoPoint() class UserAgentBrowser(InnerDoc): """ mk: 用户代理浏览器信息类 存储浏览器的家族和版本信息 """ Family = Keyword() Version = Keyword() class UserAgentOS(UserAgentBrowser): """ mk: 用户代理操作系统信息类 继承自UserAgentBrowser,存储操作系统的家族和版本信息 """ pass class UserAgentDevice(InnerDoc): """ mk: 用户代理设备信息类 存储设备的家族、品牌和型号信息 """ Family = Keyword() Brand = Keyword() Model = Keyword() class UserAgent(InnerDoc): """ mk: 用户代理完整信息类 包含浏览器、操作系统、设备等完整用户代理信息 """ browser = Object(UserAgentBrowser, required=False) os = Object(UserAgentOS, required=False) device = Object(UserAgentDevice, required=False) string = Text() is_bot = Boolean() class ElapsedTimeDocument(Document): """ mk: 性能监控文档类 用于记录页面访问性能数据,包括URL、响应时间、访问时间等信息 """ url = Keyword() time_taken = Long() log_datetime = Date() ip = Keyword() geoip = Object(GeoIp, required=False) useragent = Object(UserAgent, required=False) class Index: name = 'performance' settings = { "number_of_shards": 1, "number_of_replicas": 0 } class Meta: doc_type = 'ElapsedTime' class ElaspedTimeDocumentManager: """ mk: 性能监控文档管理类 提供性能监控数据的索引创建、删除和保存功能 """ @staticmethod def build_index(): """ mk: 构建性能监控索引 检查索引是否存在,如果不存在则初始化索引 """ from elasticsearch import Elasticsearch client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) res = client.indices.exists(index="performance") if not res: ElapsedTimeDocument.init() @staticmethod def delete_index(): """ mk: 删除性能监控索引 删除名为'performance'的索引,忽略400和404错误 """ from elasticsearch import Elasticsearch es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) es.indices.delete(index='performance', ignore=[400, 404]) @staticmethod def create(url, time_taken, log_datetime, useragent, ip): """ mk: 创建并保存性能监控记录 Args: url (str): 访问的URL地址 time_taken (int): 请求耗时(毫秒) log_datetime (datetime): 日志记录时间 useragent (object): 用户代理对象,包含浏览器、系统、设备信息 ip (str): 访问者IP地址 """ ElaspedTimeDocumentManager.build_index() ua = UserAgent() ua.browser = UserAgentBrowser() ua.browser.Family = useragent.browser.family ua.browser.Version = useragent.browser.version_string ua.os = UserAgentOS() ua.os.Family = useragent.os.family ua.os.Version = useragent.os.version_string ua.device = UserAgentDevice() ua.device.Family = useragent.device.family ua.device.Brand = useragent.device.brand ua.device.Model = useragent.device.model ua.string = useragent.ua_string ua.is_bot = useragent.is_bot doc = ElapsedTimeDocument( meta={ 'id': int( round( time.time() * 1000)) }, url=url, time_taken=time_taken, log_datetime=log_datetime, useragent=ua, ip=ip) doc.save(pipeline="geoip") class ArticleDocument(Document): """ mk: 文章文档类 用于Elasticsearch中的文章搜索索引,包含文章的完整信息 """ body = Text(analyzer='ik_max_word', search_analyzer='ik_smart') title = Text(analyzer='ik_max_word', search_analyzer='ik_smart') author = Object(properties={ 'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), 'id': Integer() }) category = Object(properties={ 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), 'id': Integer() }) tags = Object(properties={ 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), 'id': Integer() }) pub_time = Date() status = Text() comment_status = Text() type = Text() views = Integer() article_order = Integer() class Index: name = 'blog' settings = { "number_of_shards": 1, "number_of_replicas": 0 } class Meta: doc_type = 'Article' class ArticleDocumentManager(): """ mk: 文章文档管理类 提供文章索引的创建、删除、重建和更新功能 """ def __init__(self): """ mk: 初始化文章文档管理器 自动创建索引 """ self.create_index() def create_index(self): """ mk: 创建文章索引 初始化ArticleDocument索引结构 """ ArticleDocument.init() def delete_index(self): """ mk: 删除文章索引 删除名为'blog'的索引,忽略400和404错误 """ from elasticsearch import Elasticsearch es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) es.indices.delete(index='blog', ignore=[400, 404]) def convert_to_doc(self, articles): """ mk: 将文章模型对象转换为文档对象 Args: articles (list): Article模型对象列表 Returns: list: 转换后的ArticleDocument文档对象列表 """ return [ ArticleDocument( meta={ 'id': article.id}, body=article.body, title=article.title, author={ 'nickname': article.author.username, 'id': article.author.id}, category={ 'name': article.category.name, 'id': article.category.id}, tags=[ { 'name': t.name, 'id': t.id} for t in article.tags.all()], pub_time=article.pub_time, status=article.status, comment_status=article.comment_status, type=article.type, views=article.views, article_order=article.article_order) for article in articles] def rebuild(self, articles=None): """ mk: 重建文章索引 Args: articles (list, optional): 指定要重建索引的文章列表,如果为None则重建所有文章 """ ArticleDocument.init() articles = articles if articles else Article.objects.all() docs = self.convert_to_doc(articles) for doc in docs: doc.save() def update_docs(self, docs): """ mk: 批量更新文档 Args: docs (list): ArticleDocument文档对象列表 """ for doc in docs: doc.save()