# bjy: 导入时间模块 import time # bjy: 导入Elasticsearch的客户端模块和异常类 import elasticsearch.client import elasticsearch.exceptions # bjy: 导入Django的设置 from django.conf import settings # bjy: 从elasticsearch_dsl中导入文档、内部文档、字段类型和连接管理器 from elasticsearch_dsl import Document, InnerDoc, Date, Integer, Long, Text, Object, GeoPoint, Keyword, Boolean from elasticsearch_dsl.connections import connections # bjy: 从blog应用中导入Article模型 from blog.models import Article # bjy: 检查Django设置中是否配置了ELASTICSEARCH_DSL,以决定是否启用Elasticsearch功能 ELASTICSEARCH_ENABLED = hasattr(settings, 'ELASTICSEARCH_DSL') # bjy: 如果启用了Elasticsearch if ELASTICSEARCH_ENABLED: # bjy: 根据Django设置创建到Elasticsearch的连接 connections.create_connection( hosts=[settings.ELASTICSEARCH_DSL['default']['hosts']]) # bjy: 导入并实例化Elasticsearch客户端 from elasticsearch import Elasticsearch es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) # bjy: 导入并实例化Ingest客户端,用于管理管道 from elasticsearch.client import IngestClient c = IngestClient(es) # bjy: 尝试获取名为'geoip'的管道 try: c.get_pipeline('geoip') # bjy: 如果管道不存在,则创建它 except elasticsearch.exceptions.NotFoundError: # bjy: 创建一个geoip管道,用于根据IP地址添加地理位置信息 c.put_pipeline('geoip', body='''{ "description" : "Add geoip info", "processors" : [ { "geoip" : { "field" : "ip" } } ] }''') # bjy: 定义一个内部文档(InnerDoc)结构,用于存储IP地理位置信息 class GeoIp(InnerDoc): # bjy: 大洲名称 continent_name = Keyword() # bjy: 国家ISO代码 country_iso_code = Keyword() # bjy: 国家名称 country_name = Keyword() # bjy: 地理坐标(经纬度) location = GeoPoint() # bjy: 定义内部文档,用于存储用户代理(User-Agent)中的浏览器信息 class UserAgentBrowser(InnerDoc): # bjy: 浏览器家族(如Chrome, Firefox) Family = Keyword() # bjy: 浏览器版本 Version = Keyword() # bjy: 定义内部文档,用于存储用户代理中的操作系统信息 class UserAgentOS(UserAgentBrowser): # bjy: 继承自UserAgentBrowser,结构相同 pass # bjy: 定义内部文档,用于存储用户代理中的设备信息 class UserAgentDevice(InnerDoc): # bjy: 设备家族(如iPhone, Android) Family = Keyword() # bjy: 设备品牌(如Apple, Samsung) Brand = Keyword() # bjy: 设备型号(如iPhone 12) Model = Keyword() # bjy: 定义内部文档,用于存储完整的用户代理信息 class UserAgent(InnerDoc): # bjy: 嵌套浏览器信息 browser = Object(UserAgentBrowser, required=False) # bjy: 嵌套操作系统信息 os = Object(UserAgentOS, required=False) # bjy: 嵌套设备信息 device = Object(UserAgentDevice, required=False) # bjy: 原始User-Agent字符串 string = Text() # bjy: 是否为爬虫或机器人 is_bot = Boolean() # bjy: 定义一个Elasticsearch文档,用于存储页面性能数据(如响应时间) class ElapsedTimeDocument(Document): # bjy: 请求的URL url = Keyword() # bjy: 请求耗时(毫秒) time_taken = Long() # bjy: 日志记录时间 log_datetime = Date() # bjy: 客户端IP地址 ip = Keyword() # bjy: 嵌套的IP地理位置信息 geoip = Object(GeoIp, required=False) # bjy: 嵌套的用户代理信息 useragent = Object(UserAgent, required=False) class Index: # bjy: 指定索引名称为'performance' name = 'performance' # bjy: 设置索引的分片和副本数 settings = { "number_of_shards": 1, "number_of_replicas": 0 } class Meta: # bjy: 指定文档类型 doc_type = 'ElapsedTime' # bjy: 定义一个管理类,用于操作ElapsedTimeDocument索引 class ElaspedTimeDocumentManager: @staticmethod def build_index(): # bjy: 如果索引不存在,则创建它 from elasticsearch import Elasticsearch client = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) res = client.indices.exists(index="performance") if not res: ElapsedTimeDocument.init() @staticmethod def delete_index(): # bjy: 删除'performance'索引 from elasticsearch import Elasticsearch es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) es.indices.delete(index='performance', ignore=[400, 404]) @staticmethod def create(url, time_taken, log_datetime, useragent, ip): # bjy: 确保索引存在 ElaspedTimeDocumentManager.build_index() # bjy: 构建UserAgent内部文档对象 ua = UserAgent() ua.browser = UserAgentBrowser() ua.browser.Family = useragent.browser.family ua.browser.Version = useragent.browser.version_string ua.os = UserAgentOS() ua.os.Family = useragent.os.family ua.os.Version = useragent.os.version_string ua.device = UserAgentDevice() ua.device.Family = useragent.device.family ua.device.Brand = useragent.device.brand ua.device.Model = useragent.device.model ua.string = useragent.ua_string ua.is_bot = useragent.is_bot # bjy: 创建ElapsedTimeDocument文档实例 doc = ElapsedTimeDocument( meta={ # bjy: 使用当前时间的毫秒数作为文档ID 'id': int( round( time.time() * 1000)) }, url=url, time_taken=time_taken, log_datetime=log_datetime, useragent=ua, ip=ip) # bjy: 保存文档,并使用'geoip'管道处理IP地址 doc.save(pipeline="geoip") # bjy: 定义一个Elasticsearch文档,用于存储博客文章数据,以支持全文搜索 class ArticleDocument(Document): # bjy: 文章内容,使用ik分词器进行索引和搜索 body = Text(analyzer='ik_max_word', search_analyzer='ik_smart') # bjy: 文章标题,使用ik分词器 title = Text(analyzer='ik_max_word', search_analyzer='ik_smart') # bjy: 作者信息,为一个对象类型 author = Object(properties={ 'nickname': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), 'id': Integer() }) # bjy: 分类信息,为一个对象类型 category = Object(properties={ 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), 'id': Integer() }) # bjy: 标签信息,为一个对象类型 tags = Object(properties={ 'name': Text(analyzer='ik_max_word', search_analyzer='ik_smart'), 'id': Integer() }) # bjy: 发布时间 pub_time = Date() # bjy: 文章状态 status = Text() # bjy: 评论状态 comment_status = Text() # bjy: 文章类型 type = Text() # bjy: 浏览量 views = Integer() # bjy: 文章排序权重 article_order = Integer() class Index: # bjy: 指定索引名称为'blog' name = 'blog' # bjy: 设置索引的分片和副本数 settings = { "number_of_shards": 1, "number_of_replicas": 0 } class Meta: # bjy: 指定文档类型 doc_type = 'Article' # bjy: 定义一个管理类,用于操作ArticleDocument索引 class ArticleDocumentManager(): def __init__(self): # bjy: 初始化时创建索引 self.create_index() def create_index(self): # bjy: 创建'blog'索引 ArticleDocument.init() def delete_index(self): # bjy: 删除'blog'索引 from elasticsearch import Elasticsearch es = Elasticsearch(settings.ELASTICSEARCH_DSL['default']['hosts']) es.indices.delete(index='blog', ignore=[400, 404]) def convert_to_doc(self, articles): # bjy: 将Django的Article查询集转换为ArticleDocument对象列表 return [ ArticleDocument( meta={ 'id': article.id}, body=article.body, title=article.title, author={ 'nickname': article.author.username, 'id': article.author.id}, category={ 'name': article.category.name, 'id': article.category.id}, tags=[ { 'name': t.name, 'id': t.id} for t in article.tags.all()], pub_time=article.pub_time, status=article.status, comment_status=article.comment_status, type=article.type, views=article.views, article_order=article.article_order) for article in articles] def rebuild(self, articles=None): # bjy: 重建索引。如果未提供articles,则使用所有文章 ArticleDocument.init() articles = articles if articles else Article.objects.all() docs = self.convert_to_doc(articles) # bjy: 遍历并保存每个文档 for doc in docs: doc.save() def update_docs(self, docs): # bjy: 更新一组文档 for doc in docs: doc.save()