周俊杰 1 month ago
commit 3b69ce6a1a

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

@ -1 +1,16 @@
default_app_config = 'djangoblog.apps.DjangoblogAppConfig' """
Django应用配置入口模块
本模块定义了Django应用的默认配置类路径用于在Django启动时自动加载应用配置
这是Django应用的标准配置方式确保应用初始化代码能够正确执行
功能说明
- 指定默认的应用配置类
- 确保Django在启动时加载自定义应用配置
- 触发应用相关的初始化流程
"""
# 指定默认的应用配置类路径
# Django在启动时会自动加载此配置类并执行其中的ready()方法
# 这确保了插件系统和其他初始化代码能够在应用启动时正确执行
default_app_config = 'djangoblog.apps.DjangoblogAppConfig'

@ -1,8 +1,22 @@
"""
DjangoBlog 后台管理站点配置模块
本模块定义了自定义的Django后台管理站点用于统一管理博客系统的所有数据模型
通过自定义AdminSite类实现了权限控制界面定制和模型注册的集中管理
主要功能
- 自定义后台管理站点外观和权限
- 集中注册所有应用的模型到统一后台
- 提供超级用户专属的管理界面
- 集成日志记录用户管理内容管理等功能
"""
from django.contrib.admin import AdminSite from django.contrib.admin import AdminSite
from django.contrib.admin.models import LogEntry from django.contrib.admin.models import LogEntry
from django.contrib.sites.admin import SiteAdmin from django.contrib.sites.admin import SiteAdmin
from django.contrib.sites.models import Site from django.contrib.sites.models import Site
# 导入各应用的Admin配置类和模型类
from accounts.admin import * from accounts.admin import *
from blog.admin import * from blog.admin import *
from blog.models import * from blog.models import *
@ -18,15 +32,43 @@ from servermanager.models import *
class DjangoBlogAdminSite(AdminSite): class DjangoBlogAdminSite(AdminSite):
"""
自定义DjangoBlog后台管理站点
继承自Django原生的AdminSite类提供博客系统的定制化后台管理界面
包含站点标题设置权限控制和可选的URL扩展功能
"""
# 设置后台管理站点的头部标题
site_header = 'djangoblog administration' site_header = 'djangoblog administration'
# 设置浏览器标签页标题
site_title = 'djangoblog site admin' site_title = 'djangoblog site admin'
def __init__(self, name='admin'): def __init__(self, name='admin'):
"""
初始化后台管理站点
Args:
name (str): 管理站点的名称默认为'admin'
"""
# 调用父类初始化方法
super().__init__(name) super().__init__(name)
def has_permission(self, request): def has_permission(self, request):
"""
权限验证方法
重写权限检查逻辑只允许超级用户访问后台管理界面
Args:
request: HTTP请求对象
Returns:
bool: 如果是超级用户返回True否则返回False
"""
return request.user.is_superuser return request.user.is_superuser
# 注释掉的URL扩展方法 - 预留用于添加自定义管理视图
# def get_urls(self): # def get_urls(self):
# urls = super().get_urls() # urls = super().get_urls()
# from django.urls import path # from django.urls import path
@ -38,27 +80,36 @@ class DjangoBlogAdminSite(AdminSite):
# return urls + my_urls # return urls + my_urls
# 创建自定义后台管理站点的实例
admin_site = DjangoBlogAdminSite(name='admin') admin_site = DjangoBlogAdminSite(name='admin')
admin_site.register(Article, ArticlelAdmin) # 注册博客相关模型到后台管理
admin_site.register(Category, CategoryAdmin) admin_site.register(Article, ArticlelAdmin) # 文章模型
admin_site.register(Tag, TagAdmin) admin_site.register(Category, CategoryAdmin) # 分类模型
admin_site.register(Links, LinksAdmin) admin_site.register(Tag, TagAdmin) # 标签模型
admin_site.register(SideBar, SideBarAdmin) admin_site.register(Links, LinksAdmin) # 友情链接模型
admin_site.register(BlogSettings, BlogSettingsAdmin) admin_site.register(SideBar, SideBarAdmin) # 侧边栏模型
admin_site.register(BlogSettings, BlogSettingsAdmin) # 博客设置模型
admin_site.register(commands, CommandsAdmin) # 注册服务器管理相关模型
admin_site.register(EmailSendLog, EmailSendLogAdmin) admin_site.register(commands, CommandsAdmin) # 命令模型
admin_site.register(EmailSendLog, EmailSendLogAdmin) # 邮件发送日志模型
admin_site.register(BlogUser, BlogUserAdmin) # 注册用户管理模型
admin_site.register(BlogUser, BlogUserAdmin) # 博客用户模型
admin_site.register(Comment, CommentAdmin) # 注册评论管理模型
admin_site.register(Comment, CommentAdmin) # 评论模型
admin_site.register(OAuthUser, OAuthUserAdmin) # 注册OAuth认证相关模型
admin_site.register(OAuthConfig, OAuthConfigAdmin) admin_site.register(OAuthUser, OAuthUserAdmin) # OAuth用户模型
admin_site.register(OAuthConfig, OAuthConfigAdmin) # OAuth配置模型
admin_site.register(OwnTrackLog, OwnTrackLogsAdmin) # 注册位置追踪相关模型
admin_site.register(OwnTrackLog, OwnTrackLogsAdmin) # 位置追踪日志模型
admin_site.register(Site, SiteAdmin) # 注册Django内置站点模型
admin_site.register(Site, SiteAdmin) # 站点模型
admin_site.register(LogEntry, LogEntryAdmin) # 注册日志记录模型
admin_site.register(LogEntry, LogEntryAdmin) # 管理员操作日志模型

@ -1,11 +1,49 @@
"""
DjangoBlog 应用配置模块
本模块定义了DjangoBlog应用的核心配置类负责应用启动时的初始化工作
主要功能包括应用元数据配置和插件系统的自动加载
关键功能
- 配置Django应用的默认设置
- 在应用准备就绪时自动加载插件系统
- 确保插件在Django启动过程中正确初始化
"""
from django.apps import AppConfig from django.apps import AppConfig
class DjangoblogAppConfig(AppConfig): class DjangoblogAppConfig(AppConfig):
"""
DjangoBlog 应用配置类
继承自Django的AppConfig类用于配置DjangoBlog应用的各项设置
在Django启动时自动实例化并执行ready()方法完成初始化
"""
# 设置默认自增主键字段类型为BigAutoField64位整数
default_auto_field = 'django.db.models.BigAutoField' default_auto_field = 'django.db.models.BigAutoField'
# 定义应用的Python路径Django通过此名称识别应用
name = 'djangoblog' name = 'djangoblog'
def ready(self): def ready(self):
"""
应用准备就绪回调方法
当Django应用注册表完全加载后自动调用此方法
在此处执行应用启动时需要完成的初始化操作特别是插件系统的加载
执行流程
1. 调用父类的ready()方法确保基础初始化完成
2. 导入插件加载器模块
3. 调用load_plugins()函数加载所有激活的插件
"""
# 调用父类ready()方法确保Django基础初始化完成
super().ready() super().ready()
# Import and load plugins here
# 导入插件加载器模块 - 在方法内导入避免循环依赖
from .plugin_manage.loader import load_plugins from .plugin_manage.loader import load_plugins
load_plugins()
# 执行插件加载函数,初始化所有配置的插件
load_plugins()

@ -1,3 +1,17 @@
"""
DjangoBlog 信号处理模块
本模块定义了DjangoBlog系统的所有信号处理函数用于在特定事件发生时执行相应的操作
通过Django的信号机制实现了模块间的解耦和事件驱动的编程模式
主要功能
- 邮件发送信号处理
- OAuth用户登录信号处理
- 模型保存后的回调处理
- 用户登录/登出事件处理
- 缓存管理和搜索引擎通知
"""
import _thread import _thread
import logging import logging
@ -9,6 +23,7 @@ from django.core.mail import EmailMultiAlternatives
from django.db.models.signals import post_save from django.db.models.signals import post_save
from django.dispatch import receiver from django.dispatch import receiver
# 导入应用相关模块
from comments.models import Comment from comments.models import Comment
from comments.utils import send_comment_email from comments.utils import send_comment_email
from djangoblog.spider_notify import SpiderNotify from djangoblog.spider_notify import SpiderNotify
@ -16,51 +31,88 @@ from djangoblog.utils import cache, expire_view_cache, delete_sidebar_cache, del
from djangoblog.utils import get_current_site from djangoblog.utils import get_current_site
from oauth.models import OAuthUser from oauth.models import OAuthUser
# 初始化模块级日志器
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# 定义自定义信号
# OAuth用户登录信号传递用户ID参数
oauth_user_login_signal = django.dispatch.Signal(['id']) oauth_user_login_signal = django.dispatch.Signal(['id'])
# 邮件发送信号,传递收件人、标题和内容参数
send_email_signal = django.dispatch.Signal( send_email_signal = django.dispatch.Signal(
['emailto', 'title', 'content']) ['emailto', 'title', 'content'])
@receiver(send_email_signal) @receiver(send_email_signal)
def send_email_signal_handler(sender, **kwargs): def send_email_signal_handler(sender, **kwargs):
emailto = kwargs['emailto'] """
title = kwargs['title'] 邮件发送信号处理函数
content = kwargs['content']
当收到send_email_signal信号时异步发送HTML格式邮件并记录发送日志
Args:
sender: 信号发送者
**kwargs: 包含emailto, title, content等参数
"""
# 从信号参数中提取邮件信息
emailto = kwargs['emailto'] # 收件人列表
title = kwargs['title'] # 邮件标题
content = kwargs['content'] # 邮件内容
# 创建邮件消息对象设置HTML格式
msg = EmailMultiAlternatives( msg = EmailMultiAlternatives(
title, title,
content, content,
from_email=settings.DEFAULT_FROM_EMAIL, from_email=settings.DEFAULT_FROM_EMAIL, # 使用配置的默认发件人
to=emailto) to=emailto)
msg.content_subtype = "html" msg.content_subtype = "html" # 设置内容类型为HTML
# 导入邮件日志模型并创建日志记录
from servermanager.models import EmailSendLog from servermanager.models import EmailSendLog
log = EmailSendLog() log = EmailSendLog()
log.title = title log.title = title
log.content = content log.content = content
log.emailto = ','.join(emailto) log.emailto = ','.join(emailto) # 将收件人列表转换为字符串存储
try: try:
# 尝试发送邮件send()方法返回发送成功的邮件数量
result = msg.send() result = msg.send()
log.send_result = result > 0 log.send_result = result > 0 # 记录发送结果(成功/失败)
except Exception as e: except Exception as e:
# 记录邮件发送异常信息
logger.error(f"失败邮箱号: {emailto}, {e}") logger.error(f"失败邮箱号: {emailto}, {e}")
log.send_result = False log.send_result = False
# 保存邮件发送日志记录
log.save() log.save()
@receiver(oauth_user_login_signal) @receiver(oauth_user_login_signal)
def oauth_user_login_signal_handler(sender, **kwargs): def oauth_user_login_signal_handler(sender, **kwargs):
"""
OAuth用户登录信号处理函数
处理第三方登录用户的头像保存和缓存清理
Args:
sender: 信号发送者
**kwargs: 包含用户ID参数
"""
# 从信号参数中获取用户ID
id = kwargs['id'] id = kwargs['id']
# 根据ID获取OAuth用户对象
oauthuser = OAuthUser.objects.get(id=id) oauthuser = OAuthUser.objects.get(id=id)
# 获取当前站点域名
site = get_current_site().domain site = get_current_site().domain
# 检查用户头像是否需要下载保存(非本站图片)
if oauthuser.picture and not oauthuser.picture.find(site) >= 0: if oauthuser.picture and not oauthuser.picture.find(site) >= 0:
# 导入头像保存工具函数
from djangoblog.utils import save_user_avatar from djangoblog.utils import save_user_avatar
# 下载并保存用户头像更新头像URL
oauthuser.picture = save_user_avatar(oauthuser.picture) oauthuser.picture = save_user_avatar(oauthuser.picture)
oauthuser.save() oauthuser.save()
# 清理侧边栏缓存,确保显示最新用户信息
delete_sidebar_cache() delete_sidebar_cache()
@ -73,42 +125,79 @@ def model_post_save_callback(
using, using,
update_fields, update_fields,
**kwargs): **kwargs):
clearcache = False """
模型保存后回调信号处理函数
监听所有模型的post_save信号执行相应的缓存清理和通知操作
Args:
sender: 保存的模型类
instance: 保存的模型实例
created: 是否为新建记录
raw: 是否为原始保存
using: 使用的数据库别名
update_fields: 更新的字段集合
**kwargs: 其他参数
"""
clearcache = False # 标记是否需要清理整个缓存
# 跳过管理员操作日志的保存处理
if isinstance(instance, LogEntry): if isinstance(instance, LogEntry):
return return
# 检查实例是否有get_full_url方法通常是文章等可访问的模型
if 'get_full_url' in dir(instance): if 'get_full_url' in dir(instance):
# 判断是否为仅更新浏览量字段
is_update_views = update_fields == {'views'} is_update_views = update_fields == {'views'}
# 非测试环境且非仅更新浏览量时,通知搜索引擎
if not settings.TESTING and not is_update_views: if not settings.TESTING and not is_update_views:
try: try:
# 获取实例的完整URL并通知百度搜索引擎
notify_url = instance.get_full_url() notify_url = instance.get_full_url()
SpiderNotify.baidu_notify([notify_url]) SpiderNotify.baidu_notify([notify_url])
except Exception as ex: except Exception as ex:
logger.error("notify sipder", ex) logger.error("notify sipder", ex)
# 非仅更新浏览量时标记需要清理缓存
if not is_update_views: if not is_update_views:
clearcache = True clearcache = True
# 处理评论保存的特殊逻辑
if isinstance(instance, Comment): if isinstance(instance, Comment):
# 只处理已启用的评论
if instance.is_enable: if instance.is_enable:
# 获取评论所属文章的URL路径
path = instance.article.get_absolute_url() path = instance.article.get_absolute_url()
site = get_current_site().domain site = get_current_site().domain
# 处理端口号(如果有)
if site.find(':') > 0: if site.find(':') > 0:
site = site[0:site.find(':')] site = site[0:site.find(':')]
# 使文章详情页缓存失效
expire_view_cache( expire_view_cache(
path, path,
servername=site, servername=site,
serverport=80, serverport=80,
key_prefix='blogdetail') key_prefix='blogdetail')
# 清理SEO处理器缓存
if cache.get('seo_processor'): if cache.get('seo_processor'):
cache.delete('seo_processor') cache.delete('seo_processor')
# 清理文章评论缓存
comment_cache_key = 'article_comments_{id}'.format( comment_cache_key = 'article_comments_{id}'.format(
id=instance.article.id) id=instance.article.id)
cache.delete(comment_cache_key) cache.delete(comment_cache_key)
# 清理侧边栏缓存和评论视图缓存
delete_sidebar_cache() delete_sidebar_cache()
delete_view_cache('article_comments', [str(instance.article.pk)]) delete_view_cache('article_comments', [str(instance.article.pk)])
# 在新线程中发送评论通知邮件(避免阻塞主线程)
_thread.start_new_thread(send_comment_email, (instance,)) _thread.start_new_thread(send_comment_email, (instance,))
# 如果需要清理整个缓存(文章等主要内容更新时)
if clearcache: if clearcache:
cache.clear() cache.clear()
@ -116,7 +205,20 @@ def model_post_save_callback(
@receiver(user_logged_in) @receiver(user_logged_in)
@receiver(user_logged_out) @receiver(user_logged_out)
def user_auth_callback(sender, request, user, **kwargs): def user_auth_callback(sender, request, user, **kwargs):
"""
用户登录/登出信号处理函数
处理用户认证状态变化时的缓存清理操作
Args:
sender: 信号发送者
request: HTTP请求对象
user: 用户对象
**kwargs: 其他参数
"""
# 确保用户对象存在且有用户名
if user and user.username: if user and user.username:
logger.info(user) logger.info(user) # 记录用户认证日志
delete_sidebar_cache() delete_sidebar_cache() # 清理侧边栏缓存
# cache.clear() # 注释掉的完整缓存清理(可根据需要启用)
# cache.clear()

@ -1,3 +1,17 @@
"""
Elasticsearch 搜索引擎集成模块
本模块提供了Django Haystack与Elasticsearch的深度集成实现了博客文章的全文搜索功能
包含自定义的后端查询类搜索表单和引擎配置支持智能推荐和高效检索
主要功能
- Elasticsearch文档的索引管理
- 高级布尔查询和过滤
- 搜索词智能推荐
- 搜索结果的高亮和评分
- 与Django Haystack框架的无缝集成
"""
from django.utils.encoding import force_str from django.utils.encoding import force_str
from elasticsearch_dsl import Q from elasticsearch_dsl import Q
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query
@ -5,98 +19,218 @@ from haystack.forms import ModelSearchForm
from haystack.models import SearchResult from haystack.models import SearchResult
from haystack.utils import log as logging from haystack.utils import log as logging
# 导入博客相关的文档定义和管理器
from blog.documents import ArticleDocument, ArticleDocumentManager from blog.documents import ArticleDocument, ArticleDocumentManager
from blog.models import Article from blog.models import Article
# 初始化模块级日志器
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class ElasticSearchBackend(BaseSearchBackend): class ElasticSearchBackend(BaseSearchBackend):
"""
Elasticsearch 搜索后端实现
继承自Haystack的BaseSearchBackend提供与Elasticsearch的交互功能
负责索引创建文档更新搜索执行和推荐词生成等核心操作
"""
def __init__(self, connection_alias, **connection_options): def __init__(self, connection_alias, **connection_options):
"""
初始化Elasticsearch后端
Args:
connection_alias: 连接别名
**connection_options: 连接配置选项
"""
super( super(
ElasticSearchBackend, ElasticSearchBackend,
self).__init__( self).__init__(
connection_alias, connection_alias,
**connection_options) **connection_options)
# 初始化文章文档管理器
self.manager = ArticleDocumentManager() self.manager = ArticleDocumentManager()
# 启用拼写建议功能
self.include_spelling = True self.include_spelling = True
def _get_models(self, iterable): def _get_models(self, iterable):
"""
获取模型并转换为文档
Args:
iterable: 模型实例集合
Returns:
list: 转换后的文档对象列表
"""
# 如果提供了模型集合则使用,否则获取所有文章
models = iterable if iterable and iterable[0] else Article.objects.all() models = iterable if iterable and iterable[0] else Article.objects.all()
# 将Django模型转换为Elasticsearch文档
docs = self.manager.convert_to_doc(models) docs = self.manager.convert_to_doc(models)
return docs return docs
def _create(self, models): def _create(self, models):
"""
创建索引并添加文档
Args:
models: 要创建索引的模型集合
"""
# 创建Elasticsearch索引
self.manager.create_index() self.manager.create_index()
# 获取并转换模型为文档
docs = self._get_models(models) docs = self._get_models(models)
# 重建索引(添加所有文档)
self.manager.rebuild(docs) self.manager.rebuild(docs)
def _delete(self, models): def _delete(self, models):
"""
删除文档
Args:
models: 要删除的模型集合
Returns:
bool: 删除操作结果
"""
# 遍历并删除每个模型对应的文档
for m in models: for m in models:
m.delete() m.delete()
return True return True
def _rebuild(self, models): def _rebuild(self, models):
"""
重建索引
Args:
models: 要重建索引的模型集合
"""
# 获取所有文章或指定模型集合
models = models if models else Article.objects.all() models = models if models else Article.objects.all()
# 转换模型为文档
docs = self.manager.convert_to_doc(models) docs = self.manager.convert_to_doc(models)
# 更新文档到索引
self.manager.update_docs(docs) self.manager.update_docs(docs)
def update(self, index, iterable, commit=True): def update(self, index, iterable, commit=True):
"""
更新索引文档
Args:
index: 索引名称
iterable: 要更新的模型集合
commit: 是否立即提交Elasticsearch自动提交此参数保留
"""
# 获取模型并转换为文档
models = self._get_models(iterable) models = self._get_models(iterable)
# 更新文档到索引
self.manager.update_docs(models) self.manager.update_docs(models)
def remove(self, obj_or_string): def remove(self, obj_or_string):
"""
移除单个文档
Args:
obj_or_string: 要移除的模型对象或标识
"""
# 获取要删除的模型文档
models = self._get_models([obj_or_string]) models = self._get_models([obj_or_string])
# 执行删除操作
self._delete(models) self._delete(models)
def clear(self, models=None, commit=True): def clear(self, models=None, commit=True):
"""
清空索引
Args:
models: 要清空的模型集合保留参数
commit: 是否立即提交保留参数
"""
# 移除所有文档传入None表示清空
self.remove(None) self.remove(None)
@staticmethod @staticmethod
def get_suggestion(query: str) -> str: def get_suggestion(query: str) -> str:
"""获取推荐词, 如果没有找到添加原搜索词""" """
获取搜索推荐词
使用Elasticsearch的suggest功能提供搜索词建议
如果没有找到合适的建议词则返回原搜索词
Args:
query (str): 原始搜索词
Returns:
str: 处理后的推荐搜索词
"""
# 构建搜索请求包含suggest功能
search = ArticleDocument.search() \ search = ArticleDocument.search() \
.query("match", body=query) \ .query("match", body=query) \
.suggest('suggest_search', query, term={'field': 'body'}) \ .suggest('suggest_search', query, term={'field': 'body'}) \
.execute() .execute()
keywords = [] keywords = []
# 处理suggest结果
for suggest in search.suggest.suggest_search: for suggest in search.suggest.suggest_search:
if suggest["options"]: if suggest["options"]:
# 使用推荐词
keywords.append(suggest["options"][0]["text"]) keywords.append(suggest["options"][0]["text"])
else: else:
# 没有推荐词时使用原词
keywords.append(suggest["text"]) keywords.append(suggest["text"])
# 将推荐词列表合并为字符串返回
return ' '.join(keywords) return ' '.join(keywords)
@log_query @log_query
def search(self, query_string, **kwargs): def search(self, query_string, **kwargs):
"""
执行搜索查询
核心搜索方法处理查询字符串并返回匹配的结果
支持分页过滤和拼写建议
Args:
query_string: 搜索查询字符串
**kwargs: 其他搜索参数分页偏移等
Returns:
dict: 包含搜索结果命中数分面信息和拼写建议的字典
"""
# 记录搜索查询日志
logger.info('search query_string:' + query_string) logger.info('search query_string:' + query_string)
# 获取分页参数
start_offset = kwargs.get('start_offset') start_offset = kwargs.get('start_offset')
end_offset = kwargs.get('end_offset') end_offset = kwargs.get('end_offset')
# 推荐词搜索 # 根据是否启用建议搜索,获取处理后的搜索词
if getattr(self, "is_suggest", None): if getattr(self, "is_suggest", None):
# 获取推荐搜索词
suggestion = self.get_suggestion(query_string) suggestion = self.get_suggestion(query_string)
else: else:
# 使用原搜索词
suggestion = query_string suggestion = query_string
# 构建布尔查询标题或正文匹配设置最小匹配度70%
q = Q('bool', q = Q('bool',
should=[Q('match', body=suggestion), Q('match', title=suggestion)], should=[Q('match', body=suggestion), Q('match', title=suggestion)],
minimum_should_match="70%") minimum_should_match="70%")
# 构建搜索请求:添加状态和类型过滤,设置分页
search = ArticleDocument.search() \ search = ArticleDocument.search() \
.query('bool', filter=[q]) \ .query('bool', filter=[q]) \
.filter('term', status='p') \ .filter('term', status='p') \
.filter('term', type='a') \ .filter('term', type='a') \
.source(False)[start_offset: end_offset] .source(False)[start_offset: end_offset]
# 执行搜索
results = search.execute() results = search.execute()
# 获取总命中数
hits = results['hits'].total hits = results['hits'].total
raw_results = [] raw_results = []
# 处理搜索结果转换为Haystack的SearchResult格式
for raw_result in results['hits']['hits']: for raw_result in results['hits']['hits']:
app_label = 'blog' app_label = 'blog'
model_name = 'Article' model_name = 'Article'
@ -104,47 +238,75 @@ class ElasticSearchBackend(BaseSearchBackend):
result_class = SearchResult result_class = SearchResult
# 创建SearchResult对象
result = result_class( result = result_class(
app_label, app_label,
model_name, model_name,
raw_result['_id'], raw_result['_id'], # 文档ID
raw_result['_score'], raw_result['_score'], # 匹配分数
**additional_fields) **additional_fields)
raw_results.append(result) raw_results.append(result)
# 分面信息(当前未使用)
facets = {} facets = {}
# 拼写建议:如果推荐词与原词不同则返回推荐词
spelling_suggestion = None if query_string == suggestion else suggestion spelling_suggestion = None if query_string == suggestion else suggestion
# 返回标准格式的搜索结果
return { return {
'results': raw_results, 'results': raw_results, # 搜索结果列表
'hits': hits, 'hits': hits, # 总命中数
'facets': facets, 'facets': facets, # 分面信息
'spelling_suggestion': spelling_suggestion, 'spelling_suggestion': spelling_suggestion, # 拼写建议
} }
class ElasticSearchQuery(BaseSearchQuery): class ElasticSearchQuery(BaseSearchQuery):
"""
Elasticsearch 查询构建器
继承自Haystack的BaseSearchQuery负责构建Elasticsearch查询
处理查询字符串的清理和参数构建
"""
def _convert_datetime(self, date): def _convert_datetime(self, date):
"""
转换日期时间格式
Args:
date: 日期时间对象
Returns:
str: 格式化后的日期时间字符串
"""
if hasattr(date, 'hour'): if hasattr(date, 'hour'):
# 包含时间的完整日期时间格式
return force_str(date.strftime('%Y%m%d%H%M%S')) return force_str(date.strftime('%Y%m%d%H%M%S'))
else: else:
# 仅日期格式,时间部分补零
return force_str(date.strftime('%Y%m%d000000')) return force_str(date.strftime('%Y%m%d000000'))
def clean(self, query_fragment): def clean(self, query_fragment):
""" """
Provides a mechanism for sanitizing user input before presenting the 清理查询片段
value to the backend.
对用户输入的查询词进行清理和转义处理防止注入攻击
Whoosh 1.X differs here in that you can no longer use a backslash Args:
to escape reserved characters. Instead, the whole word should be query_fragment: 原始查询片段
quoted.
Returns:
str: 清理后的查询字符串
""" """
words = query_fragment.split() words = query_fragment.split()
cleaned_words = [] cleaned_words = []
for word in words: for word in words:
# 处理保留字(转为小写)
if word in self.backend.RESERVED_WORDS: if word in self.backend.RESERVED_WORDS:
word = word.replace(word, word.lower()) word = word.replace(word, word.lower())
# 处理保留字符(用引号包围)
for char in self.backend.RESERVED_CHARACTERS: for char in self.backend.RESERVED_CHARACTERS:
if char in word: if char in word:
word = "'%s'" % word word = "'%s'" % word
@ -155,29 +317,86 @@ class ElasticSearchQuery(BaseSearchQuery):
return ' '.join(cleaned_words) return ' '.join(cleaned_words)
def build_query_fragment(self, field, filter_type, value): def build_query_fragment(self, field, filter_type, value):
"""
构建查询片段
Args:
field: 字段名
filter_type: 过滤器类型
value: 字段值
Returns:
str: 查询片段字符串
"""
return value.query_string return value.query_string
def get_count(self): def get_count(self):
"""
获取搜索结果数量
Returns:
int: 搜索结果数量
"""
results = self.get_results() results = self.get_results()
return len(results) if results else 0 return len(results) if results else 0
def get_spelling_suggestion(self, preferred_query=None): def get_spelling_suggestion(self, preferred_query=None):
"""
获取拼写建议
Args:
preferred_query: 优先查询词
Returns:
str: 拼写建议
"""
return self._spelling_suggestion return self._spelling_suggestion
def build_params(self, spelling_query=None): def build_params(self, spelling_query=None):
"""
构建搜索参数
Args:
spelling_query: 拼写查询词
Returns:
dict: 搜索参数字典
"""
kwargs = super(ElasticSearchQuery, self).build_params(spelling_query=spelling_query) kwargs = super(ElasticSearchQuery, self).build_params(spelling_query=spelling_query)
return kwargs return kwargs
class ElasticSearchModelSearchForm(ModelSearchForm): class ElasticSearchModelSearchForm(ModelSearchForm):
"""
Elasticsearch 模型搜索表单
扩展Haystack的ModelSearchForm支持建议搜索功能
"""
def search(self): def search(self):
# 是否建议搜索 """
执行搜索
重写搜索方法根据表单数据设置是否启用建议搜索
Returns:
SearchQuerySet: 搜索查询结果集
"""
# 根据表单数据设置是否启用建议搜索
self.searchqueryset.query.backend.is_suggest = self.data.get("is_suggest") != "no" self.searchqueryset.query.backend.is_suggest = self.data.get("is_suggest") != "no"
# 调用父类搜索方法
sqs = super().search() sqs = super().search()
return sqs return sqs
class ElasticSearchEngine(BaseEngine): class ElasticSearchEngine(BaseEngine):
"""
Elasticsearch 搜索引擎配置
配置Haystack使用自定义的Elasticsearch后端和查询类
"""
# 指定自定义的后端类
backend = ElasticSearchBackend backend = ElasticSearchBackend
query = ElasticSearchQuery # 指定自定义的查询类
query = ElasticSearchQuery

@ -1,3 +1,16 @@
"""
RSS订阅源生成模块
本模块提供了DjangoBlog的RSS订阅功能基于Django的Feed框架实现
生成符合RSS 2.0标准的订阅源包含文章标题内容作者信息等
主要功能
- 生成博客文章的RSS订阅源
- 支持Markdown格式的内容渲染
- 提供作者信息和版权声明
- 符合RSS 2.0标准规范
"""
from django.contrib.auth import get_user_model from django.contrib.auth import get_user_model
from django.contrib.syndication.views import Feed from django.contrib.syndication.views import Feed
from django.utils import timezone from django.utils import timezone
@ -8,33 +21,72 @@ from djangoblog.utils import CommonMarkdown
class DjangoBlogFeed(Feed): class DjangoBlogFeed(Feed):
# 指定使用RSS 2.0格式生成订阅源
feed_type = Rss201rev2Feed feed_type = Rss201rev2Feed
# 订阅源描述信息
description = '大巧无工,重剑无锋.' description = '大巧无工,重剑无锋.'
# 订阅源标题
title = "且听风吟 大巧无工,重剑无锋. " title = "且听风吟 大巧无工,重剑无锋. "
# 订阅源链接地址
link = "/feed/" link = "/feed/"
def author_name(self): def author_name(self):
"""
获取作者名称
返回博客第一用户的昵称作为订阅源作者
"""
return get_user_model().objects.first().nickname return get_user_model().objects.first().nickname
def author_link(self): def author_link(self):
"""
获取作者链接
返回博客第一用户的个人主页链接
"""
return get_user_model().objects.first().get_absolute_url() return get_user_model().objects.first().get_absolute_url()
def items(self): def items(self):
"""
获取订阅项目列表
返回最近发布的5篇文章按发布时间倒序排列
只包含已发布的文章类型
"""
return Article.objects.filter(type='a', status='p').order_by('-pub_time')[:5] return Article.objects.filter(type='a', status='p').order_by('-pub_time')[:5]
def item_title(self, item): def item_title(self, item):
"""
获取单个项目的标题
"""
return item.title return item.title
def item_description(self, item): def item_description(self, item):
"""
获取单个项目的描述内容
将文章的Markdown内容转换为HTML格式
"""
return CommonMarkdown.get_markdown(item.body) return CommonMarkdown.get_markdown(item.body)
def feed_copyright(self): def feed_copyright(self):
"""
获取订阅源版权信息
生成包含当前年份的版权声明
"""
now = timezone.now() now = timezone.now()
return "Copyright© {year} 且听风吟".format(year=now.year) return "Copyright© {year} 且听风吟".format(year=now.year)
def item_link(self, item): def item_link(self, item):
"""
获取单个项目的链接
"""
return item.get_absolute_url() return item.get_absolute_url()
def item_guid(self, item): def item_guid(self, item):
return """
获取单个项目的全局唯一标识符
"""
return

@ -1,3 +1,16 @@
"""
管理员操作日志后台管理模块
本模块提供了Django管理员操作日志的自定义后台管理界面
用于查看和追踪管理员在后台的所有操作记录包括增删改等操作
主要功能
- 自定义日志列表显示格式
- 提供对象和用户的超链接跳转
- 权限控制和操作限制
- 搜索和过滤功能
"""
from django.contrib import admin from django.contrib import admin
from django.contrib.admin.models import DELETION from django.contrib.admin.models import DELETION
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
@ -9,45 +22,73 @@ from django.utils.translation import gettext_lazy as _
class LogEntryAdmin(admin.ModelAdmin): class LogEntryAdmin(admin.ModelAdmin):
"""
管理员操作日志后台管理类
自定义Django默认的LogEntry模型管理界面提供更好的用户体验和功能
"""
# 列表页过滤器配置 - 按内容类型过滤
list_filter = [ list_filter = [
'content_type' 'content_type'
] ]
# 搜索字段配置 - 支持按对象表示和变更消息搜索
search_fields = [ search_fields = [
'object_repr', 'object_repr',
'change_message' 'change_message'
] ]
# 列表页可点击的字段 - 操作时间作为链接
list_display_links = [ list_display_links = [
'action_time', 'action_time',
'get_change_message', 'get_change_message',
] ]
# 列表页显示的字段
list_display = [ list_display = [
'action_time', 'action_time', # 操作时间
'user_link', 'user_link', # 操作用户(带链接)
'content_type', 'content_type', # 内容类型
'object_link', 'object_link', # 操作对象(带链接)
'get_change_message', 'get_change_message', # 变更消息
] ]
def has_add_permission(self, request): def has_add_permission(self, request):
"""
禁用添加权限 - 日志记录只能由系统自动创建
"""
return False return False
def has_change_permission(self, request, obj=None): def has_change_permission(self, request, obj=None):
"""
控制修改权限 - 只允许超级用户或具有特定权限的用户查看
"""
return ( return (
request.user.is_superuser or request.user.is_superuser or
request.user.has_perm('admin.change_logentry') request.user.has_perm('admin.change_logentry')
) and request.method != 'POST' ) and request.method != 'POST'
def has_delete_permission(self, request, obj=None): def has_delete_permission(self, request, obj=None):
"""
禁用删除权限 - 防止误删重要的操作日志
"""
return False return False
def object_link(self, obj): def object_link(self, obj):
"""
生成操作对象的超链接
对于非删除操作尝试生成指向对象编辑页面的链接
如果是删除操作或无法生成链接则返回纯文本表示
"""
# 转义对象表示字符串防止XSS攻击
object_link = escape(obj.object_repr) object_link = escape(obj.object_repr)
content_type = obj.content_type content_type = obj.content_type
# 对于非删除操作且内容类型存在的情况,尝试生成链接
if obj.action_flag != DELETION and content_type is not None: if obj.action_flag != DELETION and content_type is not None:
# try returning an actual link instead of object repr string # 尝试返回实际链接而不是对象表示字符串
try: try:
url = reverse( url = reverse(
'admin:{}_{}_change'.format(content_type.app_label, 'admin:{}_{}_change'.format(content_type.app_label,
@ -56,17 +97,26 @@ class LogEntryAdmin(admin.ModelAdmin):
) )
object_link = '<a href="{}">{}</a>'.format(url, object_link) object_link = '<a href="{}">{}</a>'.format(url, object_link)
except NoReverseMatch: except NoReverseMatch:
# 如果无法生成反向URL保持原样
pass pass
return mark_safe(object_link) return mark_safe(object_link)
# 设置对象链接的排序字段和显示名称
object_link.admin_order_field = 'object_repr' object_link.admin_order_field = 'object_repr'
object_link.short_description = _('object') object_link.short_description = _('object')
def user_link(self, obj): def user_link(self, obj):
"""
生成操作用户的超链接
尝试生成指向用户编辑页面的链接如果无法生成则返回纯文本
"""
# 获取用户模型的内容类型
content_type = ContentType.objects.get_for_model(type(obj.user)) content_type = ContentType.objects.get_for_model(type(obj.user))
# 转义用户表示字符串
user_link = escape(force_str(obj.user)) user_link = escape(force_str(obj.user))
try: try:
# try returning an actual link instead of object repr string # 尝试返回实际链接而不是用户表示字符串
url = reverse( url = reverse(
'admin:{}_{}_change'.format(content_type.app_label, 'admin:{}_{}_change'.format(content_type.app_label,
content_type.model), content_type.model),
@ -74,18 +124,26 @@ class LogEntryAdmin(admin.ModelAdmin):
) )
user_link = '<a href="{}">{}</a>'.format(url, user_link) user_link = '<a href="{}">{}</a>'.format(url, user_link)
except NoReverseMatch: except NoReverseMatch:
# 如果无法生成反向URL保持原样
pass pass
return mark_safe(user_link) return mark_safe(user_link)
# 设置用户链接的排序字段和显示名称
user_link.admin_order_field = 'user' user_link.admin_order_field = 'user'
user_link.short_description = _('user') user_link.short_description = _('user')
def get_queryset(self, request): def get_queryset(self, request):
"""
优化查询集 - 预取关联的内容类型数据
"""
queryset = super(LogEntryAdmin, self).get_queryset(request) queryset = super(LogEntryAdmin, self).get_queryset(request)
return queryset.prefetch_related('content_type') return queryset.prefetch_related('content_type')
def get_actions(self, request): def get_actions(self, request):
"""
移除批量删除操作 - 防止误删日志记录
"""
actions = super(LogEntryAdmin, self).get_actions(request) actions = super(LogEntryAdmin, self).get_actions(request)
if 'delete_selected' in actions: if 'delete_selected' in actions:
del actions['delete_selected'] del actions['delete_selected']
return actions return actions

@ -1,41 +1,93 @@
"""
插件系统基础模块
本模块提供了插件系统的基础框架定义了所有插件的基类BasePlugin
实现了插件的元数据管理初始化流程钩子注册和插件信息获取等核心功能
主要功能
- 插件元数据定义和验证
- 标准化的插件初始化流程
- 钩子注册机制
- 插件信息统一管理
"""
import logging import logging
# 初始化模块级日志器,用于记录插件相关操作
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class BasePlugin: class BasePlugin:
# 插件元数据 """
PLUGIN_NAME = None 插件基类
PLUGIN_DESCRIPTION = None
PLUGIN_VERSION = None 所有具体插件的父类定义了插件的标准接口和基本行为
提供了插件元数据管理初始化钩子注册等基础功能
类属性:
PLUGIN_NAME: 插件名称必须由子类定义
PLUGIN_DESCRIPTION: 插件描述必须由子类定义
PLUGIN_VERSION: 插件版本必须由子类定义
"""
# 插件元数据 - 必须由子类重写的类属性
PLUGIN_NAME = None # 插件名称标识
PLUGIN_DESCRIPTION = None # 插件功能描述
PLUGIN_VERSION = None # 插件版本号
def __init__(self): def __init__(self):
"""
插件基类构造函数
执行插件初始化流程包括
1. 验证插件元数据完整性
2. 调用插件初始化方法
3. 注册插件钩子
Raises:
ValueError: 当插件元数据未完整定义时抛出
"""
# 验证插件元数据是否完整定义
if not all([self.PLUGIN_NAME, self.PLUGIN_DESCRIPTION, self.PLUGIN_VERSION]): if not all([self.PLUGIN_NAME, self.PLUGIN_DESCRIPTION, self.PLUGIN_VERSION]):
raise ValueError("Plugin metadata (PLUGIN_NAME, PLUGIN_DESCRIPTION, PLUGIN_VERSION) must be defined.") raise ValueError("Plugin metadata (PLUGIN_NAME, PLUGIN_DESCRIPTION, PLUGIN_VERSION) must be defined.")
# 执行插件初始化逻辑
self.init_plugin() self.init_plugin()
# 注册插件钩子函数
self.register_hooks() self.register_hooks()
def init_plugin(self): def init_plugin(self):
""" """
插件初始化逻辑 插件初始化逻辑
子类可以重写此方法来实现特定的初始化操作
子类可以重写此方法来实现特定的初始化操作
基类实现仅记录初始化日志信息
""" """
# 记录插件初始化成功日志
logger.info(f'{self.PLUGIN_NAME} initialized.') logger.info(f'{self.PLUGIN_NAME} initialized.')
def register_hooks(self): def register_hooks(self):
""" """
注册插件钩子 注册插件钩子
子类可以重写此方法来注册特定的钩子
子类可以重写此方法来注册特定的钩子
基类实现为空方法由子类按需实现具体钩子注册逻辑
""" """
# 基类不实现具体钩子注册,由子类重写
pass pass
def get_plugin_info(self): def get_plugin_info(self):
""" """
获取插件信息 获取插件信息
:return: 包含插件元数据的字典
返回包含插件完整元数据的字典用于插件信息展示和管理
Returns:
dict: 包含插件名称描述和版本的字典对象
""" """
# 构建并返回插件元数据字典
return { return {
'name': self.PLUGIN_NAME, 'name': self.PLUGIN_NAME, # 插件名称
'description': self.PLUGIN_DESCRIPTION, 'description': self.PLUGIN_DESCRIPTION, # 插件功能描述
'version': self.PLUGIN_VERSION 'version': self.PLUGIN_VERSION # 插件版本号
} }

@ -1,7 +1,26 @@
"""
钩子事件常量定义模块
本模块定义了文章相关的钩子事件常量用于在插件系统中标识不同的事件类型
这些常量作为事件触发器名称用于在特定时机执行注册的钩子函数
主要用途
- 统一管理事件名称常量
- 提供类型安全的钩子标识
- 便于在插件系统中注册和触发事件
"""
# 文章详情加载事件 - 当文章详情数据被加载时触发
ARTICLE_DETAIL_LOAD = 'article_detail_load' ARTICLE_DETAIL_LOAD = 'article_detail_load'
# 文章创建事件 - 当新文章被创建时触发
ARTICLE_CREATE = 'article_create' ARTICLE_CREATE = 'article_create'
# 文章更新事件 - 当现有文章被修改时触发
ARTICLE_UPDATE = 'article_update' ARTICLE_UPDATE = 'article_update'
ARTICLE_DELETE = 'article_delete'
ARTICLE_CONTENT_HOOK_NAME = "the_content" # 文章删除事件 - 当文章被删除时触发
ARTICLE_DELETE = 'article_delete'
# 文章内容处理钩子名称 - 专门用于处理文章内容的钩子标识
ARTICLE_CONTENT_HOOK_NAME = "the_content"

@ -1,44 +1,116 @@
"""
钩子管理系统模块
本模块提供了完整的钩子Hook管理机制支持两种类型的钩子
1. Action Hook动作钩子按顺序执行注册的回调函数不返回值
2. Filter Hook过滤器钩子对输入值进行链式处理返回处理后的值
主要功能
- 钩子回调函数的注册管理
- 动作钩子的顺序执行
- 过滤器钩子的链式处理
- 完善的错误处理和日志记录
"""
import logging import logging
# 初始化模块级日志器,用于记录钩子相关操作
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# 全局钩子存储字典
# 结构:{hook_name: [callback1, callback2, ...]}
_hooks = {} _hooks = {}
def register(hook_name: str, callback: callable): def register(hook_name: str, callback: callable):
""" """
注册一个钩子回调 注册一个钩子回调函数
将回调函数注册到指定的钩子名称下支持同一钩子名称注册多个回调函数
回调函数将按照注册顺序执行
Args:
hook_name (str): 钩子名称标识
callback (callable): 要注册的回调函数
Examples:
>>> register('article_create', my_callback_function)
""" """
# 检查钩子名称是否已存在,不存在则初始化空列表
if hook_name not in _hooks: if hook_name not in _hooks:
_hooks[hook_name] = [] _hooks[hook_name] = []
# 将回调函数添加到对应钩子的回调列表中
_hooks[hook_name].append(callback) _hooks[hook_name].append(callback)
# 记录调试日志,跟踪钩子注册情况
logger.debug(f"Registered hook '{hook_name}' with callback '{callback.__name__}'") logger.debug(f"Registered hook '{hook_name}' with callback '{callback.__name__}'")
def run_action(hook_name: str, *args, **kwargs): def run_action(hook_name: str, *args, **kwargs):
""" """
执行一个 Action Hook 执行一个 Action Hook动作钩子
它会按顺序执行所有注册到该钩子上的回调函数
按注册顺序执行所有注册到该钩子上的回调函数
动作钩子主要用于执行副作用操作不返回任何值
Args:
hook_name (str): 要执行的钩子名称
*args: 传递给回调函数的位置参数
**kwargs: 传递给回调函数的关键字参数
Examples:
>>> run_action('article_create', article_obj, user_obj)
""" """
# 检查指定钩子是否有注册的回调函数
if hook_name in _hooks: if hook_name in _hooks:
# 记录钩子执行开始日志
logger.debug(f"Running action hook '{hook_name}'") logger.debug(f"Running action hook '{hook_name}'")
# 遍历该钩子下的所有回调函数
for callback in _hooks[hook_name]: for callback in _hooks[hook_name]:
try: try:
# 执行回调函数,传入所有参数
callback(*args, **kwargs) callback(*args, **kwargs)
except Exception as e: except Exception as e:
logger.error(f"Error running action hook '{hook_name}' callback '{callback.__name__}': {e}", exc_info=True) # 捕获并记录回调函数执行中的异常,但不中断其他回调的执行
logger.error(f"Error running action hook '{hook_name}' callback '{callback.__name__}': {e}",
exc_info=True)
def apply_filters(hook_name: str, value, *args, **kwargs): def apply_filters(hook_name: str, value, *args, **kwargs):
""" """
执行一个 Filter Hook 执行一个 Filter Hook过滤器钩子
它会把 value 依次传递给所有注册的回调函数进行处理
将输入值依次传递给所有注册的回调函数进行链式处理
每个回调函数的返回值将作为下一个回调函数的输入值
Args:
hook_name (str): 要执行的过滤器钩子名称
value: 初始输入值将被回调函数处理
*args: 传递给回调函数的额外位置参数
**kwargs: 传递给回调函数的额外关键字参数
Returns:
any: 经过所有回调函数处理后的最终值
Examples:
>>> processed_content = apply_filters('the_content', raw_content)
""" """
# 检查指定过滤器钩子是否有注册的回调函数
if hook_name in _hooks: if hook_name in _hooks:
# 记录过滤器应用开始日志
logger.debug(f"Applying filter hook '{hook_name}'") logger.debug(f"Applying filter hook '{hook_name}'")
# 遍历该钩子下的所有回调函数
for callback in _hooks[hook_name]: for callback in _hooks[hook_name]:
try: try:
# 将当前值传递给回调函数处理,并更新为返回值
value = callback(value, *args, **kwargs) value = callback(value, *args, **kwargs)
except Exception as e: except Exception as e:
logger.error(f"Error applying filter hook '{hook_name}' callback '{callback.__name__}': {e}", exc_info=True) # 捕获并记录回调函数执行中的异常,但不中断处理链
return value logger.error(f"Error applying filter hook '{hook_name}' callback '{callback.__name__}': {e}",
exc_info=True)
# 返回经过所有过滤器处理后的最终值
return value

@ -1,19 +1,54 @@
"""
插件动态加载模块
本模块提供了插件系统的动态加载功能负责在Django应用启动时自动加载和初始化已激活的插件
通过扫描插件目录并导入插件模块实现插件的热插拔管理
主要功能
- 动态扫描插件目录
- 按配置加载激活的插件
- 插件模块的导入和初始化
- 加载状态的日志记录
"""
import os import os
import logging import logging
from django.conf import settings from django.conf import settings
# 初始化模块级日志器,用于记录插件加载过程
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def load_plugins(): def load_plugins():
""" """
Dynamically loads and initializes plugins from the 'plugins' directory. 动态加载并初始化插件
This function is intended to be called when the Django app registry is ready.
从配置的插件目录中加载所有激活的插件模块
此函数应在Django应用注册表准备就绪后调用
加载流程
1. 遍历settings.ACTIVE_PLUGINS中配置的插件名称
2. 检查插件目录和plugin.py文件是否存在
3. 动态导入插件模块
4. 记录加载成功或失败状态
注意插件模块的导入会触发其内部代码执行包括类定义和注册逻辑
""" """
# 遍历所有在配置中激活的插件名称
for plugin_name in settings.ACTIVE_PLUGINS: for plugin_name in settings.ACTIVE_PLUGINS:
# 构建插件目录的完整路径
plugin_path = os.path.join(settings.PLUGINS_DIR, plugin_name) plugin_path = os.path.join(settings.PLUGINS_DIR, plugin_name)
# 检查插件目录是否存在且包含plugin.py文件
if os.path.isdir(plugin_path) and os.path.exists(os.path.join(plugin_path, 'plugin.py')): if os.path.isdir(plugin_path) and os.path.exists(os.path.join(plugin_path, 'plugin.py')):
try: try:
# 动态导入插件模块,使用点分模块路径格式
# 导入操作会执行插件模块中的代码,完成插件注册
__import__(f'plugins.{plugin_name}.plugin') __import__(f'plugins.{plugin_name}.plugin')
# 记录插件加载成功日志
logger.info(f"Successfully loaded plugin: {plugin_name}") logger.info(f"Successfully loaded plugin: {plugin_name}")
except ImportError as e: except ImportError as e:
logger.error(f"Failed to import plugin: {plugin_name}", exc_info=e) # 捕获导入异常,记录详细的错误信息
logger.error(f"Failed to import plugin: {plugin_name}", exc_info=e)

@ -1,14 +1,20 @@
""" """
Django settings for djangoblog project. DjangoBlog 项目配置文件
Generated by 'django-admin startproject' using Django 1.10.2. 本模块包含DjangoBlog项目的所有配置设置包括数据库应用中间件国际化缓存邮件等
根据Django 1.10+的配置规范组织支持开发和生产环境的不同配置
For more information on this file, see
https://docs.djangoproject.com/en/1.10/topics/settings/ 主要配置类别
- 基础路径和密钥配置
For the full list of settings and their values, see - 应用和中间件配置
https://docs.djangoproject.com/en/1.10/ref/settings/ - 数据库和缓存配置
- 国际化设置
- 静态文件和媒体文件配置
- 邮件和日志配置
- 安全相关配置
- 搜索和插件系统配置
""" """
import os import os
import sys import sys
from pathlib import Path from pathlib import Path
@ -17,34 +23,46 @@ from django.utils.translation import gettext_lazy as _
def env_to_bool(env, default): def env_to_bool(env, default):
"""
环境变量转布尔值工具函数
将环境变量的字符串值转换为布尔值用于灵活的配置开关
Args:
env: 环境变量名
default: 默认值
Returns:
bool: 转换后的布尔值
"""
str_val = os.environ.get(env) str_val = os.environ.get(env)
return default if str_val is None else str_val == 'True' return default if str_val is None else str_val == 'True'
# Build paths inside the project like this: BASE_DIR / 'subdir'. # 构建项目基础路径 - 使用pathlib现代路径处理方式
BASE_DIR = Path(__file__).resolve().parent.parent BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production # 安全密钥配置 - 生产环境必须从环境变量获取
# See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = os.environ.get( SECRET_KEY = os.environ.get(
'DJANGO_SECRET_KEY') or 'n9ceqv38)#&mwuat@(mjb_p%em$e8$qyr#fw9ot!=ba6lijx-6' 'DJANGO_SECRET_KEY') or 'n9ceqv38)#&mwuat@(mjb_p%em$e8$qyr#fw9ot!=ba6lijx-6'
# SECURITY WARNING: don't run with debug turned on in production!
# 调试模式开关 - 生产环境必须关闭
DEBUG = env_to_bool('DJANGO_DEBUG', True) DEBUG = env_to_bool('DJANGO_DEBUG', True)
# DEBUG = False
# 测试模式标识 - 根据命令行参数判断是否为测试环境
TESTING = len(sys.argv) > 1 and sys.argv[1] == 'test' TESTING = len(sys.argv) > 1 and sys.argv[1] == 'test'
# ALLOWED_HOSTS = [] # 允许的主机名配置 - 生产环境需要具体指定
ALLOWED_HOSTS = ['*', '127.0.0.1', 'example.com'] ALLOWED_HOSTS = ['*', '127.0.0.1', 'example.com']
# django 4.0新增配置
CSRF_TRUSTED_ORIGINS = ['http://example.com']
# Application definition
# Django 4.0新增CSRF信任源配置
CSRF_TRUSTED_ORIGINS = ['http://example.com']
# 已安装应用列表 - 定义项目使用的所有Django应用
INSTALLED_APPS = [ INSTALLED_APPS = [
# 'django.contrib.admin', # 使用简化的Admin配置
'django.contrib.admin.apps.SimpleAdminConfig', 'django.contrib.admin.apps.SimpleAdminConfig',
# Django核心功能应用
'django.contrib.auth', 'django.contrib.auth',
'django.contrib.contenttypes', 'django.contrib.contenttypes',
'django.contrib.sessions', 'django.contrib.sessions',
@ -52,37 +70,54 @@ INSTALLED_APPS = [
'django.contrib.staticfiles', 'django.contrib.staticfiles',
'django.contrib.sites', 'django.contrib.sites',
'django.contrib.sitemaps', 'django.contrib.sitemaps',
'mdeditor', # 第三方应用
'haystack', 'mdeditor', # Markdown编辑器
'blog', 'haystack', # 搜索框架
'accounts', 'compressor', # 静态文件压缩
'comments', # 项目自定义应用
'oauth', 'blog', # 博客核心功能
'servermanager', 'accounts', # 用户账户管理
'owntracks', 'comments', # 评论系统
'compressor', 'oauth', # OAuth认证
'djangoblog' 'servermanager', # 服务器管理
'owntracks', # 位置追踪
'djangoblog' # 项目主应用
] ]
# 中间件配置 - 定义请求处理管道
MIDDLEWARE = [ MIDDLEWARE = [
# 安全相关中间件
'django.middleware.security.SecurityMiddleware', 'django.middleware.security.SecurityMiddleware',
# 会话管理中间件
'django.contrib.sessions.middleware.SessionMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware',
# 国际化中间件
'django.middleware.locale.LocaleMiddleware', 'django.middleware.locale.LocaleMiddleware',
# Gzip压缩中间件
'django.middleware.gzip.GZipMiddleware', 'django.middleware.gzip.GZipMiddleware',
# 缓存中间件(注释状态)
# 'django.middleware.cache.UpdateCacheMiddleware', # 'django.middleware.cache.UpdateCacheMiddleware',
# 通用中间件
'django.middleware.common.CommonMiddleware', 'django.middleware.common.CommonMiddleware',
# 缓存中间件(注释状态)
# 'django.middleware.cache.FetchFromCacheMiddleware', # 'django.middleware.cache.FetchFromCacheMiddleware',
# CSRF保护中间件
'django.middleware.csrf.CsrfViewMiddleware', 'django.middleware.csrf.CsrfViewMiddleware',
# 认证中间件
'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware',
# 消息中间件
'django.contrib.messages.middleware.MessageMiddleware', 'django.contrib.messages.middleware.MessageMiddleware',
# 点击劫持保护中间件
'django.middleware.clickjacking.XFrameOptionsMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware',
# 条件GET中间件
'django.middleware.http.ConditionalGetMiddleware', 'django.middleware.http.ConditionalGetMiddleware',
# 自定义在线用户中间件
'blog.middleware.OnlineMiddleware' 'blog.middleware.OnlineMiddleware'
] ]
# 根URL配置
ROOT_URLCONF = 'djangoblog.urls' ROOT_URLCONF = 'djangoblog.urls'
# 模板配置
TEMPLATES = [ TEMPLATES = [
{ {
'BACKEND': 'django.template.backends.django.DjangoTemplates', 'BACKEND': 'django.template.backends.django.DjangoTemplates',
@ -94,18 +129,17 @@ TEMPLATES = [
'django.template.context_processors.request', 'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth', 'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages', 'django.contrib.messages.context_processors.messages',
# 自定义SEO处理器
'blog.context_processors.seo_processor' 'blog.context_processors.seo_processor'
], ],
}, },
}, },
] ]
# WSGI应用配置
WSGI_APPLICATION = 'djangoblog.wsgi.application' WSGI_APPLICATION = 'djangoblog.wsgi.application'
# Database # 数据库配置 - 使用MySQL作为默认数据库
# https://docs.djangoproject.com/en/1.10/ref/settings/#databases
DATABASES = { DATABASES = {
'default': { 'default': {
'ENGINE': 'django.db.backends.mysql', 'ENGINE': 'django.db.backends.mysql',
@ -117,9 +151,7 @@ DATABASES = {
} }
} }
# Password validation # 密码验证器配置
# https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [ AUTH_PASSWORD_VALIDATORS = [
{ {
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
@ -135,62 +167,75 @@ AUTH_PASSWORD_VALIDATORS = [
}, },
] ]
# 国际化配置
LANGUAGES = ( LANGUAGES = (
('en', _('English')), ('en', _('English')),
('zh-hans', _('Simplified Chinese')), ('zh-hans', _('Simplified Chinese')),
('zh-hant', _('Traditional Chinese')), ('zh-hant', _('Traditional Chinese')),
) )
# 本地化文件路径
LOCALE_PATHS = ( LOCALE_PATHS = (
os.path.join(BASE_DIR, 'locale'), os.path.join(BASE_DIR, 'locale'),
) )
# 默认语言代码
LANGUAGE_CODE = 'zh-hans' LANGUAGE_CODE = 'zh-hans'
# 时区配置
TIME_ZONE = 'Asia/Shanghai' TIME_ZONE = 'Asia/Shanghai'
# 国际化开关
USE_I18N = True USE_I18N = True
# 本地化开关
USE_L10N = True USE_L10N = True
# 时区支持开关
USE_TZ = False USE_TZ = False
# Static files (CSS, JavaScript, Images) # Haystack搜索配置
# https://docs.djangoproject.com/en/1.10/howto/static-files/
HAYSTACK_CONNECTIONS = { HAYSTACK_CONNECTIONS = {
'default': { 'default': {
'ENGINE': 'djangoblog.whoosh_cn_backend.WhooshEngine', 'ENGINE': 'djangoblog.whoosh_cn_backend.WhooshEngine',
'PATH': os.path.join(os.path.dirname(__file__), 'whoosh_index'), 'PATH': os.path.join(os.path.dirname(__file__), 'whoosh_index'),
}, },
} }
# Automatically update searching index
# 实时更新搜索索引
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor' HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
# Allow user login with username and password
# 认证后端配置 - 支持邮箱或用户名登录
AUTHENTICATION_BACKENDS = [ AUTHENTICATION_BACKENDS = [
'accounts.user_login_backend.EmailOrUsernameModelBackend'] 'accounts.user_login_backend.EmailOrUsernameModelBackend']
# 静态文件配置
STATIC_ROOT = os.path.join(BASE_DIR, 'collectedstatic') STATIC_ROOT = os.path.join(BASE_DIR, 'collectedstatic')
STATIC_URL = '/static/' STATIC_URL = '/static/'
STATICFILES = os.path.join(BASE_DIR, 'static') STATICFILES = os.path.join(BASE_DIR, 'static')
# 自定义用户模型
AUTH_USER_MODEL = 'accounts.BlogUser' AUTH_USER_MODEL = 'accounts.BlogUser'
# 登录URL
LOGIN_URL = '/login/' LOGIN_URL = '/login/'
# 时间格式配置
TIME_FORMAT = '%Y-%m-%d %H:%M:%S' TIME_FORMAT = '%Y-%m-%d %H:%M:%S'
DATE_TIME_FORMAT = '%Y-%m-%d' DATE_TIME_FORMAT = '%Y-%m-%d'
# bootstrap color styles # Bootstrap颜色类型
BOOTSTRAP_COLOR_TYPES = [ BOOTSTRAP_COLOR_TYPES = [
'default', 'primary', 'success', 'info', 'warning', 'danger' 'default', 'primary', 'success', 'info', 'warning', 'danger'
] ]
# paginate # 分页配置
PAGINATE_BY = 10 PAGINATE_BY = 10
# http cache timeout
# HTTP缓存超时时间
CACHE_CONTROL_MAX_AGE = 2592000 CACHE_CONTROL_MAX_AGE = 2592000
# cache setting
# 缓存配置 - 默认使用本地内存缓存
CACHES = { CACHES = {
'default': { 'default': {
'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
@ -198,7 +243,8 @@ CACHES = {
'LOCATION': 'unique-snowflake', 'LOCATION': 'unique-snowflake',
} }
} }
# 使用redis作为缓存
# 如果配置了Redis环境变量则使用Redis缓存
if os.environ.get("DJANGO_REDIS_URL"): if os.environ.get("DJANGO_REDIS_URL"):
CACHES = { CACHES = {
'default': { 'default': {
@ -207,11 +253,14 @@ if os.environ.get("DJANGO_REDIS_URL"):
} }
} }
# 站点ID
SITE_ID = 1 SITE_ID = 1
# 百度站长平台通知URL
BAIDU_NOTIFY_URL = os.environ.get('DJANGO_BAIDU_NOTIFY_URL') \ BAIDU_NOTIFY_URL = os.environ.get('DJANGO_BAIDU_NOTIFY_URL') \
or 'http://data.zz.baidu.com/urls?site=https://www.lylinux.net&token=1uAOGrMsUm5syDGn' or 'http://data.zz.baidu.com/urls?site=https://www.lylinux.net&token=1uAOGrMsUm5syDGn'
# Email: # 邮件配置
EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend'
EMAIL_USE_TLS = env_to_bool('DJANGO_EMAIL_TLS', False) EMAIL_USE_TLS = env_to_bool('DJANGO_EMAIL_TLS', False)
EMAIL_USE_SSL = env_to_bool('DJANGO_EMAIL_SSL', True) EMAIL_USE_SSL = env_to_bool('DJANGO_EMAIL_SSL', True)
@ -221,12 +270,15 @@ EMAIL_HOST_USER = os.environ.get('DJANGO_EMAIL_USER')
EMAIL_HOST_PASSWORD = os.environ.get('DJANGO_EMAIL_PASSWORD') EMAIL_HOST_PASSWORD = os.environ.get('DJANGO_EMAIL_PASSWORD')
DEFAULT_FROM_EMAIL = EMAIL_HOST_USER DEFAULT_FROM_EMAIL = EMAIL_HOST_USER
SERVER_EMAIL = EMAIL_HOST_USER SERVER_EMAIL = EMAIL_HOST_USER
# Setting debug=false did NOT handle except email notifications
# 管理员邮箱配置 - 用于错误报告
ADMINS = [('admin', os.environ.get('DJANGO_ADMIN_EMAIL') or 'admin@admin.com')] ADMINS = [('admin', os.environ.get('DJANGO_ADMIN_EMAIL') or 'admin@admin.com')]
# WX ADMIN password(Two times md5)
# 微信管理员密码两次MD5加密
WXADMIN = os.environ.get( WXADMIN = os.environ.get(
'DJANGO_WXADMIN_PASSWORD') or '995F03AC401D6CABABAEF756FC4D43C7' 'DJANGO_WXADMIN_PASSWORD') or '995F03AC401D6CABABAEF756FC4D43C7'
# 日志配置
LOG_PATH = os.path.join(BASE_DIR, 'logs') LOG_PATH = os.path.join(BASE_DIR, 'logs')
if not os.path.exists(LOG_PATH): if not os.path.exists(LOG_PATH):
os.makedirs(LOG_PATH, exist_ok=True) os.makedirs(LOG_PATH, exist_ok=True)
@ -292,36 +344,41 @@ LOGGING = {
} }
} }
# 静态文件查找器配置
STATICFILES_FINDERS = ( STATICFILES_FINDERS = (
'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.FileSystemFinder',
'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder',
# other # 压缩器查找器
'compressor.finders.CompressorFinder', 'compressor.finders.CompressorFinder',
) )
# 静态文件压缩配置
COMPRESS_ENABLED = True COMPRESS_ENABLED = True
# COMPRESS_OFFLINE = True # COMPRESS_OFFLINE = True
COMPRESS_CSS_FILTERS = [ COMPRESS_CSS_FILTERS = [
# creates absolute urls from relative ones # 从相对URL创建绝对URL
'compressor.filters.css_default.CssAbsoluteFilter', 'compressor.filters.css_default.CssAbsoluteFilter',
# css minimizer # CSS压缩过滤器
'compressor.filters.cssmin.CSSMinFilter' 'compressor.filters.cssmin.CSSMinFilter'
] ]
COMPRESS_JS_FILTERS = [ COMPRESS_JS_FILTERS = [
'compressor.filters.jsmin.JSMinFilter' 'compressor.filters.jsmin.JSMinFilter'
] ]
# 媒体文件配置
MEDIA_ROOT = os.path.join(BASE_DIR, 'uploads') MEDIA_ROOT = os.path.join(BASE_DIR, 'uploads')
MEDIA_URL = '/media/' MEDIA_URL = '/media/'
# 框架选项配置
X_FRAME_OPTIONS = 'SAMEORIGIN' X_FRAME_OPTIONS = 'SAMEORIGIN'
# 安全头部配置 - 防XSS和其他攻击 # 安全头部配置
SECURE_BROWSER_XSS_FILTER = True SECURE_BROWSER_XSS_FILTER = True
SECURE_CONTENT_TYPE_NOSNIFF = True SECURE_CONTENT_TYPE_NOSNIFF = True
SECURE_REFERRER_POLICY = 'strict-origin-when-cross-origin' SECURE_REFERRER_POLICY = 'strict-origin-when-cross-origin'
# 内容安全策略 (CSP) - 防XSS攻击 # 内容安全策略配置
CSP_DEFAULT_SRC = ["'self'"] CSP_DEFAULT_SRC = ["'self'"]
CSP_SCRIPT_SRC = ["'self'", "'unsafe-inline'", "cdn.mathjax.org", "*.googleapis.com"] CSP_SCRIPT_SRC = ["'self'", "'unsafe-inline'", "cdn.mathjax.org", "*.googleapis.com"]
CSP_STYLE_SRC = ["'self'", "'unsafe-inline'", "*.googleapis.com", "*.gstatic.com"] CSP_STYLE_SRC = ["'self'", "'unsafe-inline'", "*.googleapis.com", "*.gstatic.com"]
@ -331,8 +388,10 @@ CSP_CONNECT_SRC = ["'self'"]
CSP_FRAME_SRC = ["'none'"] CSP_FRAME_SRC = ["'none'"]
CSP_OBJECT_SRC = ["'none'"] CSP_OBJECT_SRC = ["'none'"]
# 默认自增主键字段类型
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
# Elasticsearch配置如果配置了环境变量
if os.environ.get('DJANGO_ELASTICSEARCH_HOST'): if os.environ.get('DJANGO_ELASTICSEARCH_HOST'):
ELASTICSEARCH_DSL = { ELASTICSEARCH_DSL = {
'default': { 'default': {
@ -345,7 +404,7 @@ if os.environ.get('DJANGO_ELASTICSEARCH_HOST'):
}, },
} }
# Plugin System # 插件系统配置
PLUGINS_DIR = BASE_DIR / 'plugins' PLUGINS_DIR = BASE_DIR / 'plugins'
ACTIVE_PLUGINS = [ ACTIVE_PLUGINS = [
'article_copyright', 'article_copyright',
@ -354,5 +413,4 @@ ACTIVE_PLUGINS = [
'view_count', 'view_count',
'seo_optimizer', 'seo_optimizer',
'image_lazy_loading', 'image_lazy_loading',
] ]

@ -1,3 +1,16 @@
"""
站点地图生成模块
本模块定义了DjangoBlog的站点地图(Sitemap)配置用于生成搜索引擎友好的XML站点地图
包含静态页面文章分类标签和用户页面的站点地图配置
主要功能
- 生成符合搜索引擎标准的XML站点地图
- 为不同类型的内容设置不同的更新频率和优先级
- 提供最后修改时间信息
- 帮助搜索引擎更好地索引网站内容
"""
from django.contrib.sitemaps import Sitemap from django.contrib.sitemaps import Sitemap
from django.urls import reverse from django.urls import reverse
@ -5,55 +18,166 @@ from blog.models import Article, Category, Tag
class StaticViewSitemap(Sitemap): class StaticViewSitemap(Sitemap):
"""
静态页面站点地图
用于生成首页等静态页面的站点地图条目
"""
# 优先级设置0.0-1.0
priority = 0.5 priority = 0.5
# 内容更新频率
changefreq = 'daily' changefreq = 'daily'
def items(self): def items(self):
"""
获取包含在站点地图中的项目
返回需要生成站点地图的URL名称列表
"""
return ['blog:index', ] return ['blog:index', ]
def location(self, item): def location(self, item):
"""
生成项目的完整URL
Args:
item: URL名称
Returns:
str: 完整的URL路径
"""
return reverse(item) return reverse(item)
class ArticleSiteMap(Sitemap): class ArticleSiteMap(Sitemap):
"""
文章页面站点地图
用于生成所有已发布文章的站点地图条目
"""
# 文章更新频率 - 每月更新
changefreq = "monthly" changefreq = "monthly"
# 文章优先级 - 较高优先级
priority = "0.6" priority = "0.6"
def items(self): def items(self):
"""
获取所有已发布的文章
Returns:
QuerySet: 已发布文章的查询集
"""
return Article.objects.filter(status='p') return Article.objects.filter(status='p')
def lastmod(self, obj): def lastmod(self, obj):
"""
获取文章的最后修改时间
Args:
obj: 文章对象
Returns:
datetime: 最后修改时间
"""
return obj.last_modify_time return obj.last_modify_time
class CategorySiteMap(Sitemap): class CategorySiteMap(Sitemap):
"""
分类页面站点地图
用于生成所有文章分类的站点地图条目
"""
# 分类更新频率 - 每周更新
changefreq = "Weekly" changefreq = "Weekly"
# 分类优先级 - 较高优先级
priority = "0.6" priority = "0.6"
def items(self): def items(self):
"""
获取所有分类
Returns:
QuerySet: 所有分类的查询集
"""
return Category.objects.all() return Category.objects.all()
def lastmod(self, obj): def lastmod(self, obj):
"""
获取分类的最后修改时间
Args:
obj: 分类对象
Returns:
datetime: 最后修改时间
"""
return obj.last_modify_time return obj.last_modify_time
class TagSiteMap(Sitemap): class TagSiteMap(Sitemap):
"""
标签页面站点地图
用于生成所有标签的站点地图条目
"""
# 标签更新频率 - 每周更新
changefreq = "Weekly" changefreq = "Weekly"
# 标签优先级 - 中等优先级
priority = "0.3" priority = "0.3"
def items(self): def items(self):
"""
获取所有标签
Returns:
QuerySet: 所有标签的查询集
"""
return Tag.objects.all() return Tag.objects.all()
def lastmod(self, obj): def lastmod(self, obj):
"""
获取标签的最后修改时间
Args:
obj: 标签对象
Returns:
datetime: 最后修改时间
"""
return obj.last_modify_time return obj.last_modify_time
class UserSiteMap(Sitemap): class UserSiteMap(Sitemap):
"""
用户页面站点地图
用于生成所有文章作者的站点地图条目
"""
# 用户页面更新频率 - 每周更新
changefreq = "Weekly" changefreq = "Weekly"
# 用户页面优先级 - 中等优先级
priority = "0.3" priority = "0.3"
def items(self): def items(self):
"""
获取所有发表过文章的用户
通过文章作者去重确保每个用户只出现一次
Returns:
list: 用户对象列表
"""
return list(set(map(lambda x: x.author, Article.objects.all()))) return list(set(map(lambda x: x.author, Article.objects.all())))
def lastmod(self, obj): def lastmod(self, obj):
return obj.date_joined """
获取用户的注册时间
Args:
obj: 用户对象
Returns:
datetime: 用户注册时间
"""
return obj.date_joined

@ -1,21 +1,65 @@
"""
搜索引擎蜘蛛通知模块
本模块提供了向搜索引擎主动推送URL更新的功能主要用于通知搜索引擎及时抓取网站内容更新
目前主要支持百度搜索引擎的URL推送接口
主要功能
- 向百度站长平台推送URL更新
- 批量推送URL列表
- 错误处理和日志记录
- 统一的推送接口封装
"""
import logging import logging
import requests import requests
from django.conf import settings from django.conf import settings
# 初始化模块级日志器
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class SpiderNotify(): class SpiderNotify():
"""
搜索引擎蜘蛛通知类
提供静态方法用于向搜索引擎推送URL更新帮助搜索引擎及时发现网站内容变化
"""
@staticmethod @staticmethod
def baidu_notify(urls): def baidu_notify(urls):
"""
向百度站长平台推送URL更新
将更新的URL列表推送给百度搜索引擎加速内容收录
Args:
urls: 需要推送的URL列表可以是字符串或字符串列表
Note:
使用settings.BAIDU_NOTIFY_URL配置的百度推送接口
"""
try: try:
# 将URL列表转换为换行分隔的字符串格式
data = '\n'.join(urls) data = '\n'.join(urls)
# 向百度推送接口发送POST请求
result = requests.post(settings.BAIDU_NOTIFY_URL, data=data) result = requests.post(settings.BAIDU_NOTIFY_URL, data=data)
# 记录推送结果日志
logger.info(result.text) logger.info(result.text)
except Exception as e: except Exception as e:
# 捕获并记录推送过程中的异常
logger.error(e) logger.error(e)
@staticmethod @staticmethod
def notify(url): def notify(url):
SpiderNotify.baidu_notify(url) """
统一的URL推送接口
提供简化的推送方法支持单个URL或URL列表的推送
Args:
url: 单个URL字符串或URL列表
"""
# 调用百度推送方法处理URL
SpiderNotify.baidu_notify(url)

@ -1,15 +1,51 @@
"""
DjangoBlog 单元测试模块
本模块包含DjangoBlog项目的单元测试用例用于验证工具函数和核心功能的正确性
基于Django的TestCase框架确保代码质量和功能稳定性
测试功能
- 加密工具函数测试
- Markdown渲染功能测试
- 字典转换URL参数测试
"""
from django.test import TestCase from django.test import TestCase
from djangoblog.utils import * from djangoblog.utils import *
class DjangoBlogTest(TestCase): class DjangoBlogTest(TestCase):
"""
DjangoBlog 核心功能测试类
继承自Django的TestCase提供项目核心功能的自动化测试
"""
def setUp(self): def setUp(self):
"""
测试前置设置方法
在每个测试方法执行前运行用于初始化测试环境
当前测试用例无需特殊设置保留空实现
"""
pass pass
def test_utils(self): def test_utils(self):
"""
工具函数综合测试方法
测试工具模块中的多个核心功能
1. SHA256加密功能
2. Markdown文本渲染功能
3. 字典转URL参数字符串功能
"""
# 测试SHA256加密功能
md5 = get_sha256('test') md5 = get_sha256('test')
# 验证加密结果不为空
self.assertIsNotNone(md5) self.assertIsNotNone(md5)
# 测试Markdown渲染功能
c = CommonMarkdown.get_markdown(''' c = CommonMarkdown.get_markdown('''
# Title1 # Title1
@ -23,10 +59,14 @@ class DjangoBlogTest(TestCase):
''') ''')
# 验证Markdown渲染结果不为空
self.assertIsNotNone(c) self.assertIsNotNone(c)
# 测试字典转URL参数功能
d = { d = {
'd': 'key1', 'd': 'key1',
'd2': 'key2' 'd2': 'key2'
} }
data = parse_dict_to_url(d) data = parse_dict_to_url(d)
self.assertIsNotNone(data) # 验证转换结果不为空
self.assertIsNotNone(data)

@ -1,18 +1,19 @@
"""djangoblog URL Configuration """
DjangoBlog 项目URL配置模块
本模块定义了DjangoBlog项目的所有URL路由配置包括管理后台博客评论用户认证等功能的URL映射
采用Django 1.10+的URL配置方式支持国际化路由和静态文件服务
The `urlpatterns` list routes URLs to views. For more information please see: 主要路由分组
https://docs.djangoproject.com/en/1.10/topics/http/urls/ - 国际化路由配置
Examples: - 管理后台路由
Function views - 博客应用路由
1. Add an import: from my_app import views - 第三方应用路由
2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') - 站点地图和订阅源
Class-based views - 搜索功能路由
1. Add an import: from other_app.views import Home - 静态文件服务
2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.conf.urls import url, include
2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls'))
""" """
from django.conf import settings from django.conf import settings
from django.conf.urls.i18n import i18n_patterns from django.conf.urls.i18n import i18n_patterns
from django.conf.urls.static import static from django.conf.urls.static import static
@ -23,12 +24,14 @@ from haystack.views import search_view_factory
from django.http import JsonResponse from django.http import JsonResponse
import time import time
# 导入项目自定义模块
from blog.views import EsSearchView from blog.views import EsSearchView
from djangoblog.admin_site import admin_site from djangoblog.admin_site import admin_site
from djangoblog.elasticsearch_backend import ElasticSearchModelSearchForm from djangoblog.elasticsearch_backend import ElasticSearchModelSearchForm
from djangoblog.feeds import DjangoBlogFeed from djangoblog.feeds import DjangoBlogFeed
from djangoblog.sitemap import ArticleSiteMap, CategorySiteMap, StaticViewSitemap, TagSiteMap, UserSiteMap from djangoblog.sitemap import ArticleSiteMap, CategorySiteMap, StaticViewSitemap, TagSiteMap, UserSiteMap
# 站点地图配置字典 - 定义不同类型内容的站点地图
sitemaps = { sitemaps = {
'blog': ArticleSiteMap, 'blog': ArticleSiteMap,
@ -38,6 +41,7 @@ sitemaps = {
'static': StaticViewSitemap 'static': StaticViewSitemap
} }
# 自定义错误处理视图配置
handler404 = 'blog.views.page_not_found_view' handler404 = 'blog.views.page_not_found_view'
handler500 = 'blog.views.server_error_view' handler500 = 'blog.views.server_error_view'
handle403 = 'blog.views.permission_denied_view' handle403 = 'blog.views.permission_denied_view'
@ -53,10 +57,12 @@ def health_check(request):
'timestamp': time.time() 'timestamp': time.time()
}) })
# 基础URL模式配置 - 不包含语言前缀的URL
urlpatterns = [ urlpatterns = [
path('i18n/', include('django.conf.urls.i18n')), path('i18n/', include('django.conf.urls.i18n')),
path('health/', health_check, name='health_check'), path('health/', health_check, name='health_check'),
] ]
# 国际化URL模式配置 - 自动添加语言前缀的URL
urlpatterns += i18n_patterns( urlpatterns += i18n_patterns(
re_path(r'^admin/', admin_site.urls), re_path(r'^admin/', admin_site.urls),
re_path(r'', include('blog.urls', namespace='blog')), re_path(r'', include('blog.urls', namespace='blog')),
@ -73,6 +79,7 @@ urlpatterns += i18n_patterns(
re_path(r'', include('servermanager.urls', namespace='servermanager')), re_path(r'', include('servermanager.urls', namespace='servermanager')),
re_path(r'', include('owntracks.urls', namespace='owntracks')) re_path(r'', include('owntracks.urls', namespace='owntracks'))
, prefix_default_language=False) + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) , prefix_default_language=False) + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT)
# 开发环境媒体文件服务配置
if settings.DEBUG: if settings.DEBUG:
urlpatterns += static(settings.MEDIA_URL, urlpatterns += static(settings.MEDIA_URL,
document_root=settings.MEDIA_ROOT) document_root=settings.MEDIA_ROOT)

@ -1,6 +1,20 @@
#!/usr/bin/env python #!/usr/bin/env python
# encoding: utf-8 # encoding: utf-8
"""
DjangoBlog 通用工具函数模块
本模块提供了DjangoBlog项目的各种通用工具函数包括缓存装饰器Markdown处理
邮件发送安全过滤等核心功能这些工具函数在整个项目中广泛使用
主要功能
- 缓存管理和装饰器
- Markdown文本处理和转换
- 电子邮件发送功能
- 安全HTML过滤和XSS防护
- 随机码生成和URL处理
- 用户头像下载和管理
"""
import logging import logging
import os import os
@ -17,33 +31,65 @@ from django.contrib.sites.models import Site
from django.core.cache import cache from django.core.cache import cache
from django.templatetags.static import static from django.templatetags.static import static
# 初始化模块级日志器
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def get_max_articleid_commentid(): def get_max_articleid_commentid():
"""
获取最大文章ID和评论ID
用于生成新文章或评论时的ID参考
Returns:
tuple: (最大文章ID, 最大评论ID)
"""
from blog.models import Article from blog.models import Article
from comments.models import Comment from comments.models import Comment
return (Article.objects.latest().pk, Comment.objects.latest().pk) return (Article.objects.latest().pk, Comment.objects.latest().pk)
def get_sha256(str): def get_sha256(str):
"""
SHA256加密函数
Args:
str: 要加密的字符串
Returns:
str: SHA256加密后的十六进制字符串
"""
m = sha256(str.encode('utf-8')) m = sha256(str.encode('utf-8'))
return m.hexdigest() return m.hexdigest()
def cache_decorator(expiration=3 * 60): def cache_decorator(expiration=3 * 60):
"""
缓存装饰器
为函数添加缓存功能减少重复计算和数据库查询
Args:
expiration: 缓存过期时间默认3分钟
Returns:
function: 装饰后的函数
"""
def wrapper(func): def wrapper(func):
def news(*args, **kwargs): def news(*args, **kwargs):
try: try:
# 尝试从视图类获取缓存键
view = args[0] view = args[0]
key = view.get_cache_key() key = view.get_cache_key()
except: except:
key = None key = None
if not key: if not key:
# 生成基于函数参数的唯一缓存键
unique_str = repr((func, args, kwargs)) unique_str = repr((func, args, kwargs))
m = sha256(unique_str.encode('utf-8')) m = sha256(unique_str.encode('utf-8'))
key = m.hexdigest() key = m.hexdigest()
# 尝试从缓存获取值
value = cache.get(key) value = cache.get(key)
if value is not None: if value is not None:
# logger.info('cache_decorator get cache:%s key:%s' % (func.__name__, key)) # logger.info('cache_decorator get cache:%s key:%s' % (func.__name__, key))
@ -52,6 +98,7 @@ def cache_decorator(expiration=3 * 60):
else: else:
return value return value
else: else:
# 缓存未命中,执行函数并设置缓存
logger.debug( logger.debug(
'cache_decorator set cache:%s key:%s' % 'cache_decorator set cache:%s key:%s' %
(func.__name__, key)) (func.__name__, key))
@ -70,19 +117,27 @@ def cache_decorator(expiration=3 * 60):
def expire_view_cache(path, servername, serverport, key_prefix=None): def expire_view_cache(path, servername, serverport, key_prefix=None):
''' '''
刷新视图缓存 刷新视图缓存
:param path:url路径
:param servername:host 使指定路径的视图缓存失效确保内容更新后及时反映
:param serverport:端口
:param key_prefix:前缀 Args:
:return:是否成功 path: URL路径
servername: 主机名
serverport: 端口号
key_prefix: 缓存键前缀
Returns:
bool: 是否成功删除缓存
''' '''
from django.http import HttpRequest from django.http import HttpRequest
from django.utils.cache import get_cache_key from django.utils.cache import get_cache_key
# 创建模拟请求对象用于生成缓存键
request = HttpRequest() request = HttpRequest()
request.META = {'SERVER_NAME': servername, 'SERVER_PORT': serverport} request.META = {'SERVER_NAME': servername, 'SERVER_PORT': serverport}
request.path = path request.path = path
# 获取缓存键并删除对应缓存
key = get_cache_key(request, key_prefix=key_prefix, cache=cache) key = get_cache_key(request, key_prefix=key_prefix, cache=cache)
if key: if key:
logger.info('expire_view_cache:get key:{path}'.format(path=path)) logger.info('expire_view_cache:get key:{path}'.format(path=path))
@ -94,19 +149,43 @@ def expire_view_cache(path, servername, serverport, key_prefix=None):
@cache_decorator() @cache_decorator()
def get_current_site(): def get_current_site():
"""
获取当前站点信息
返回当前Django站点的配置信息带缓存功能
Returns:
Site: 当前站点对象
"""
site = Site.objects.get_current() site = Site.objects.get_current()
return site return site
class CommonMarkdown: class CommonMarkdown:
"""
Markdown处理工具类
提供Markdown文本到HTML的转换功能支持代码高亮和目录生成
"""
@staticmethod @staticmethod
def _convert_markdown(value): def _convert_markdown(value):
"""
内部Markdown转换方法
Args:
value: Markdown格式文本
Returns:
tuple: (转换后的HTML内容, 生成的目录)
"""
# 配置Markdown扩展
md = markdown.Markdown( md = markdown.Markdown(
extensions=[ extensions=[
'extra', 'extra', # 额外语法支持
'codehilite', 'codehilite', # 代码高亮
'toc', 'toc', # 目录生成
'tables', 'tables', # 表格支持
] ]
) )
body = md.convert(value) body = md.convert(value)
@ -115,16 +194,44 @@ class CommonMarkdown:
@staticmethod @staticmethod
def get_markdown_with_toc(value): def get_markdown_with_toc(value):
"""
获取带目录的Markdown转换结果
Args:
value: Markdown格式文本
Returns:
tuple: (HTML内容, 目录HTML)
"""
body, toc = CommonMarkdown._convert_markdown(value) body, toc = CommonMarkdown._convert_markdown(value)
return body, toc return body, toc
@staticmethod @staticmethod
def get_markdown(value): def get_markdown(value):
"""
获取Markdown转换结果不含目录
Args:
value: Markdown格式文本
Returns:
str: 转换后的HTML内容
"""
body, toc = CommonMarkdown._convert_markdown(value) body, toc = CommonMarkdown._convert_markdown(value)
return body return body
def send_email(emailto, title, content): def send_email(emailto, title, content):
"""
发送电子邮件
通过信号机制异步发送邮件
Args:
emailto: 收件人邮箱地址
title: 邮件标题
content: 邮件内容
"""
from djangoblog.blog_signals import send_email_signal from djangoblog.blog_signals import send_email_signal
send_email_signal.send( send_email_signal.send(
send_email.__class__, send_email.__class__,
@ -139,6 +246,15 @@ def generate_code() -> str:
def parse_dict_to_url(dict): def parse_dict_to_url(dict):
"""
将字典转换为URL参数字符串
Args:
dict: 参数字典
Returns:
str: URL参数字符串
"""
from urllib.parse import quote from urllib.parse import quote
url = '&'.join(['{}={}'.format(quote(k, safe='/'), quote(v, safe='/')) url = '&'.join(['{}={}'.format(quote(k, safe='/'), quote(v, safe='/'))
for k, v in dict.items()]) for k, v in dict.items()])
@ -146,11 +262,21 @@ def parse_dict_to_url(dict):
def get_blog_setting(): def get_blog_setting():
"""
获取博客设置
返回博客的全局设置信息带缓存功能
如果设置不存在则创建默认设置
Returns:
BlogSettings: 博客设置对象
"""
value = cache.get('get_blog_setting') value = cache.get('get_blog_setting')
if value: if value:
return value return value
else: else:
from blog.models import BlogSettings from blog.models import BlogSettings
# 如果不存在设置记录,创建默认设置
if not BlogSettings.objects.count(): if not BlogSettings.objects.count():
setting = BlogSettings() setting = BlogSettings()
setting.site_name = 'djangoblog' setting.site_name = 'djangoblog'
@ -176,32 +302,48 @@ def get_blog_setting():
def save_user_avatar(url): def save_user_avatar(url):
''' '''
保存用户头像 保存用户头像
:param url:头像url
:return: 本地路径 从远程URL下载用户头像并保存到本地静态文件目录
Args:
url: 头像URL地址
Returns:
str: 本地静态文件路径
''' '''
logger.info(url) logger.info(url)
try: try:
basedir = os.path.join(settings.STATICFILES, 'avatar') basedir = os.path.join(settings.STATICFILES, 'avatar')
# 下载头像文件
rsp = requests.get(url, timeout=2) rsp = requests.get(url, timeout=2)
if rsp.status_code == 200: if rsp.status_code == 200:
if not os.path.exists(basedir): if not os.path.exists(basedir):
os.makedirs(basedir) os.makedirs(basedir)
# 检查文件扩展名
image_extensions = ['.jpg', '.png', 'jpeg', '.gif'] image_extensions = ['.jpg', '.png', 'jpeg', '.gif']
isimage = len([i for i in image_extensions if url.endswith(i)]) > 0 isimage = len([i for i in image_extensions if url.endswith(i)]) > 0
ext = os.path.splitext(url)[1] if isimage else '.jpg' ext = os.path.splitext(url)[1] if isimage else '.jpg'
# 生成唯一文件名
save_filename = str(uuid.uuid4().hex) + ext save_filename = str(uuid.uuid4().hex) + ext
logger.info('保存用户头像:' + basedir + save_filename) logger.info('保存用户头像:' + basedir + save_filename)
# 保存文件
with open(os.path.join(basedir, save_filename), 'wb+') as file: with open(os.path.join(basedir, save_filename), 'wb+') as file:
file.write(rsp.content) file.write(rsp.content)
return static('avatar/' + save_filename) return static('avatar/' + save_filename)
except Exception as e: except Exception as e:
logger.error(e) logger.error(e)
# 返回默认头像
return static('blog/img/avatar.png') return static('blog/img/avatar.png')
def delete_sidebar_cache(): def delete_sidebar_cache():
"""
删除侧边栏缓存
清理所有侧边栏相关的缓存数据
"""
from blog.models import LinkShowType from blog.models import LinkShowType
keys = ["sidebar" + x for x in LinkShowType.values] keys = ["sidebar" + x for x in LinkShowType.values]
for k in keys: for k in keys:
@ -210,12 +352,27 @@ def delete_sidebar_cache():
def delete_view_cache(prefix, keys): def delete_view_cache(prefix, keys):
"""
删除视图缓存
根据前缀和键删除特定的模板片段缓存
Args:
prefix: 缓存前缀
keys: 缓存键列表
"""
from django.core.cache.utils import make_template_fragment_key from django.core.cache.utils import make_template_fragment_key
key = make_template_fragment_key(prefix, keys) key = make_template_fragment_key(prefix, keys)
cache.delete(key) cache.delete(key)
def get_resource_url(): def get_resource_url():
"""
获取资源URL基础路径
Returns:
str: 静态资源基础URL
"""
if settings.STATIC_URL: if settings.STATIC_URL:
return settings.STATIC_URL return settings.STATIC_URL
else: else:
@ -223,6 +380,7 @@ def get_resource_url():
return 'http://' + site.domain + '/static/' return 'http://' + site.domain + '/static/'
# HTML标签白名单 - 允许的安全HTML标签
ALLOWED_TAGS = ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1', ALLOWED_TAGS = ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1',
'h2', 'p', 'span', 'div'] 'h2', 'p', 'span', 'div']
@ -235,6 +393,7 @@ ALLOWED_CLASSES = [
's1', 'ss', 'bp', 'vc', 'vg', 'vi', 'il' 's1', 'ss', 'bp', 'vc', 'vg', 'vi', 'il'
] ]
def class_filter(tag, name, value): def class_filter(tag, name, value):
"""自定义class属性过滤器""" """自定义class属性过滤器"""
if name == 'class': if name == 'class':
@ -243,10 +402,11 @@ def class_filter(tag, name, value):
return ' '.join(allowed_classes) if allowed_classes else False return ' '.join(allowed_classes) if allowed_classes else False
return value return value
# 安全的属性白名单 # 安全的属性白名单
ALLOWED_ATTRIBUTES = { ALLOWED_ATTRIBUTES = {
'a': ['href', 'title'], 'a': ['href', 'title'],
'abbr': ['title'], 'abbr': ['title'],
'acronym': ['title'], 'acronym': ['title'],
'span': class_filter, 'span': class_filter,
'div': class_filter, 'div': class_filter,
@ -257,16 +417,24 @@ ALLOWED_ATTRIBUTES = {
# 安全的协议白名单 - 防止javascript:等危险协议 # 安全的协议白名单 - 防止javascript:等危险协议
ALLOWED_PROTOCOLS = ['http', 'https', 'mailto'] ALLOWED_PROTOCOLS = ['http', 'https', 'mailto']
def sanitize_html(html): def sanitize_html(html):
""" """
安全的HTML清理函数 安全的HTML清理函数
使用bleach库进行白名单过滤防止XSS攻击
使用bleach库进行白名单过滤防止XSS攻击
Args:
html: 要清理的HTML内容
Returns:
str: 清理后的安全HTML
""" """
return bleach.clean( return bleach.clean(
html, html,
tags=ALLOWED_TAGS, tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES, attributes=ALLOWED_ATTRIBUTES,
protocols=ALLOWED_PROTOCOLS, # 限制允许的协议 protocols=ALLOWED_PROTOCOLS, # 限制允许的协议
strip=True, # 移除不允许的标签而不是转义 strip=True, # 移除不允许的标签而不是转义
strip_comments=True # 移除HTML注释 strip_comments=True # 移除HTML注释
) )

@ -1,5 +1,19 @@
# encoding: utf-8 # encoding: utf-8
"""
Whoosh中文搜索后端模块
本模块提供了基于Whoosh搜索引擎的中文全文搜索功能专门针对Django Haystack框架进行定制
集成了jieba中文分词器支持中文文本的高效索引和搜索
主要特性
- 中文分词支持使用jieba
- 高性能索引和搜索
- 拼写建议和查询高亮
- 多字段类型支持文本数字日期等
- 与Django Haystack框架深度集成
"""
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import absolute_import, division, print_function, unicode_literals
import json import json
@ -40,30 +54,39 @@ except ImportError:
raise MissingDependency( raise MissingDependency(
"The 'whoosh' backend requires the installation of 'Whoosh'. Please refer to the documentation.") "The 'whoosh' backend requires the installation of 'Whoosh'. Please refer to the documentation.")
# Handle minimum requirement. # 检查Whoosh版本要求
if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0): if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
raise MissingDependency( raise MissingDependency(
"The 'whoosh' backend requires version 2.5.0 or greater.") "The 'whoosh' backend requires version 2.5.0 or greater.")
# Bubble up the correct error. # 日期时间正则表达式 - 用于解析日期格式
DATETIME_REGEX = re.compile( DATETIME_REGEX = re.compile(
'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$') '^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$')
# 线程本地存储 - 用于内存索引
LOCALS = threading.local() LOCALS = threading.local()
LOCALS.RAM_STORE = None LOCALS.RAM_STORE = None
class WhooshHtmlFormatter(HtmlFormatter): class WhooshHtmlFormatter(HtmlFormatter):
""" """
This is a HtmlFormatter simpler than the whoosh.HtmlFormatter. 简化的Whoosh HTML格式化器
We use it to have consistent results across backends. Specifically,
Solr, Xapian and Elasticsearch are using this formatting. 提供跨后端一致的高亮结果显示格式
SolrXapian和Elasticsearch都使用这种格式化方式
""" """
template = '<%(tag)s>%(t)s</%(tag)s>' template = '<%(tag)s>%(t)s</%(tag)s>'
class WhooshSearchBackend(BaseSearchBackend): class WhooshSearchBackend(BaseSearchBackend):
# Word reserved by Whoosh for special use. """
Whoosh搜索后端实现
继承自Haystack的BaseSearchBackend提供Whoosh搜索引擎的核心功能
支持文件存储和内存存储两种方式
"""
# Whoosh保留关键字
RESERVED_WORDS = ( RESERVED_WORDS = (
'AND', 'AND',
'NOT', 'NOT',
@ -71,15 +94,20 @@ class WhooshSearchBackend(BaseSearchBackend):
'TO', 'TO',
) )
# Characters reserved by Whoosh for special use. # Whoosh保留字符
# The '\\' must come first, so as not to overwrite the other slash
# replacements.
RESERVED_CHARACTERS = ( RESERVED_CHARACTERS = (
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
'[', ']', '^', '"', '~', '*', '?', ':', '.', '[', ']', '^', '"', '~', '*', '?', ':', '.',
) )
def __init__(self, connection_alias, **connection_options): def __init__(self, connection_alias, **connection_options):
"""
初始化Whoosh搜索后端
Args:
connection_alias: 连接别名
**connection_options: 连接配置选项
"""
super( super(
WhooshSearchBackend, WhooshSearchBackend,
self).__init__( self).__init__(
@ -93,9 +121,11 @@ class WhooshSearchBackend(BaseSearchBackend):
128 * 1024 * 1024) 128 * 1024 * 1024)
self.path = connection_options.get('PATH') self.path = connection_options.get('PATH')
# 检查存储类型
if connection_options.get('STORAGE', 'file') != 'file': if connection_options.get('STORAGE', 'file') != 'file':
self.use_file_storage = False self.use_file_storage = False
# 文件存储必须指定路径
if self.use_file_storage and not self.path: if self.use_file_storage and not self.path:
raise ImproperlyConfigured( raise ImproperlyConfigured(
"You must specify a 'PATH' in your settings for connection '%s'." % "You must specify a 'PATH' in your settings for connection '%s'." %
@ -105,21 +135,26 @@ class WhooshSearchBackend(BaseSearchBackend):
def setup(self): def setup(self):
""" """
Defers loading until needed. 初始化设置
延迟加载在需要时进行初始化
创建或打开索引构建schema
""" """
from haystack import connections from haystack import connections
new_index = False new_index = False
# Make sure the index is there. # 确保索引目录存在
if self.use_file_storage and not os.path.exists(self.path): if self.use_file_storage and not os.path.exists(self.path):
os.makedirs(self.path) os.makedirs(self.path)
new_index = True new_index = True
# 检查目录写入权限
if self.use_file_storage and not os.access(self.path, os.W_OK): if self.use_file_storage and not os.access(self.path, os.W_OK):
raise IOError( raise IOError(
"The path to your Whoosh index '%s' is not writable for the current user/group." % "The path to your Whoosh index '%s' is not writable for the current user/group." %
self.path) self.path)
# 初始化存储
if self.use_file_storage: if self.use_file_storage:
self.storage = FileStorage(self.path) self.storage = FileStorage(self.path)
else: else:
@ -130,10 +165,12 @@ class WhooshSearchBackend(BaseSearchBackend):
self.storage = LOCALS.RAM_STORE self.storage = LOCALS.RAM_STORE
# 构建schema和解析器
self.content_field_name, self.schema = self.build_schema( self.content_field_name, self.schema = self.build_schema(
connections[self.connection_alias].get_unified_index().all_searchfields()) connections[self.connection_alias].get_unified_index().all_searchfields())
self.parser = QueryParser(self.content_field_name, schema=self.schema) self.parser = QueryParser(self.content_field_name, schema=self.schema)
# 创建或打开索引
if new_index is True: if new_index is True:
self.index = self.storage.create_index(self.schema) self.index = self.storage.create_index(self.schema)
else: else:
@ -145,18 +182,30 @@ class WhooshSearchBackend(BaseSearchBackend):
self.setup_complete = True self.setup_complete = True
def build_schema(self, fields): def build_schema(self, fields):
"""
构建Whoosh schema
根据字段定义创建Whoosh索引schema
Args:
fields: 字段定义字典
Returns:
tuple: (内容字段名, schema对象)
"""
# 基础字段
schema_fields = { schema_fields = {
ID: WHOOSH_ID(stored=True, unique=True), ID: WHOOSH_ID(stored=True, unique=True),
DJANGO_CT: WHOOSH_ID(stored=True), DJANGO_CT: WHOOSH_ID(stored=True),
DJANGO_ID: WHOOSH_ID(stored=True), DJANGO_ID: WHOOSH_ID(stored=True),
} }
# Grab the number of keys that are hard-coded into Haystack.
# We'll use this to (possibly) fail slightly more gracefully later.
initial_key_count = len(schema_fields) initial_key_count = len(schema_fields)
content_field_name = '' content_field_name = ''
# 处理每个字段
for field_name, field_class in fields.items(): for field_name, field_class in fields.items():
if field_class.is_multivalued: if field_class.is_multivalued:
# 多值字段
if field_class.indexed is False: if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST( schema_fields[field_class.index_fieldname] = IDLIST(
stored=True, field_boost=field_class.boost) stored=True, field_boost=field_class.boost)
@ -164,35 +213,42 @@ class WhooshSearchBackend(BaseSearchBackend):
schema_fields[field_class.index_fieldname] = KEYWORD( schema_fields[field_class.index_fieldname] = KEYWORD(
stored=True, commas=True, scorable=True, field_boost=field_class.boost) stored=True, commas=True, scorable=True, field_boost=field_class.boost)
elif field_class.field_type in ['date', 'datetime']: elif field_class.field_type in ['date', 'datetime']:
# 日期时间字段
schema_fields[field_class.index_fieldname] = DATETIME( schema_fields[field_class.index_fieldname] = DATETIME(
stored=field_class.stored, sortable=True) stored=field_class.stored, sortable=True)
elif field_class.field_type == 'integer': elif field_class.field_type == 'integer':
# 整数字段
schema_fields[field_class.index_fieldname] = NUMERIC( schema_fields[field_class.index_fieldname] = NUMERIC(
stored=field_class.stored, numtype=int, field_boost=field_class.boost) stored=field_class.stored, numtype=int, field_boost=field_class.boost)
elif field_class.field_type == 'float': elif field_class.field_type == 'float':
# 浮点数字段
schema_fields[field_class.index_fieldname] = NUMERIC( schema_fields[field_class.index_fieldname] = NUMERIC(
stored=field_class.stored, numtype=float, field_boost=field_class.boost) stored=field_class.stored, numtype=float, field_boost=field_class.boost)
elif field_class.field_type == 'boolean': elif field_class.field_type == 'boolean':
# Field boost isn't supported on BOOLEAN as of 1.8.2. # 布尔字段
schema_fields[field_class.index_fieldname] = BOOLEAN( schema_fields[field_class.index_fieldname] = BOOLEAN(
stored=field_class.stored) stored=field_class.stored)
elif field_class.field_type == 'ngram': elif field_class.field_type == 'ngram':
# N-gram字段
schema_fields[field_class.index_fieldname] = NGRAM( schema_fields[field_class.index_fieldname] = NGRAM(
minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost) minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram': elif field_class.field_type == 'edge_ngram':
# 边缘N-gram字段
schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start',
stored=field_class.stored, stored=field_class.stored,
field_boost=field_class.boost) field_boost=field_class.boost)
else: else:
# 文本字段 - 使用中文分析器
# schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True) # schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True)
schema_fields[field_class.index_fieldname] = TEXT( schema_fields[field_class.index_fieldname] = TEXT(
stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True) stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True)
# 标记内容字段
if field_class.document is True: if field_class.document is True:
content_field_name = field_class.index_fieldname content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True schema_fields[field_class.index_fieldname].spelling = True
# Fail more gracefully than relying on the backend to die if no fields # 检查是否有有效字段
# are found.
if len(schema_fields) <= initial_key_count: if len(schema_fields) <= initial_key_count:
raise SearchBackendError( raise SearchBackendError(
"No fields were found in any search_indexes. Please correct this before attempting to search.") "No fields were found in any search_indexes. Please correct this before attempting to search.")
@ -200,6 +256,14 @@ class WhooshSearchBackend(BaseSearchBackend):
return (content_field_name, Schema(**schema_fields)) return (content_field_name, Schema(**schema_fields))
def update(self, index, iterable, commit=True): def update(self, index, iterable, commit=True):
"""
更新索引
Args:
index: 搜索索引
iterable: 可迭代对象
commit: 是否提交更改
"""
if not self.setup_complete: if not self.setup_complete:
self.setup() self.setup()
@ -212,12 +276,11 @@ class WhooshSearchBackend(BaseSearchBackend):
except SkipDocument: except SkipDocument:
self.log.debug(u"Indexing for object `%s` skipped", obj) self.log.debug(u"Indexing for object `%s` skipped", obj)
else: else:
# Really make sure it's unicode, because Whoosh won't have it any # 确保所有值为unicode
# other way.
for key in doc: for key in doc:
doc[key] = self._from_python(doc[key]) doc[key] = self._from_python(doc[key])
# Document boosts aren't supported in Whoosh 2.5.0+. # Whoosh 2.5.0+不支持文档boost
if 'boost' in doc: if 'boost' in doc:
del doc['boost'] del doc['boost']
@ -227,9 +290,6 @@ class WhooshSearchBackend(BaseSearchBackend):
if not self.silently_fail: if not self.silently_fail:
raise raise
# We'll log the object identifier but won't include the actual object
# to avoid the possibility of that generating encoding errors while
# processing the log message:
self.log.error( self.log.error(
u"%s while preparing object for update" % u"%s while preparing object for update" %
e.__class__.__name__, e.__class__.__name__,
@ -239,12 +299,18 @@ class WhooshSearchBackend(BaseSearchBackend):
"index": index, "index": index,
"object": get_identifier(obj)}}) "object": get_identifier(obj)}})
# 提交更改
if len(iterable) > 0: if len(iterable) > 0:
# For now, commit no matter what, as we run into locking issues
# otherwise.
writer.commit() writer.commit()
def remove(self, obj_or_string, commit=True): def remove(self, obj_or_string, commit=True):
"""
移除文档
Args:
obj_or_string: 对象或标识符
commit: 是否提交更改
"""
if not self.setup_complete: if not self.setup_complete:
self.setup() self.setup()
@ -267,6 +333,13 @@ class WhooshSearchBackend(BaseSearchBackend):
exc_info=True) exc_info=True)
def clear(self, models=None, commit=True): def clear(self, models=None, commit=True):
"""
清空索引
Args:
models: 要清空的模型列表
commit: 是否提交更改
"""
if not self.setup_complete: if not self.setup_complete:
self.setup() self.setup()
@ -304,17 +377,27 @@ class WhooshSearchBackend(BaseSearchBackend):
"Failed to clear Whoosh index: %s", e, exc_info=True) "Failed to clear Whoosh index: %s", e, exc_info=True)
def delete_index(self): def delete_index(self):
# Per the Whoosh mailing list, if wiping out everything from the index, """
# it's much more efficient to simply delete the index files. 删除索引
彻底删除索引文件并重新创建
"""
# 文件存储:直接删除目录
if self.use_file_storage and os.path.exists(self.path): if self.use_file_storage and os.path.exists(self.path):
shutil.rmtree(self.path) shutil.rmtree(self.path)
elif not self.use_file_storage: elif not self.use_file_storage:
# 内存存储:清理存储
self.storage.clean() self.storage.clean()
# Recreate everything. # 重新创建
self.setup() self.setup()
def optimize(self): def optimize(self):
"""
优化索引
提高搜索性能
"""
if not self.setup_complete: if not self.setup_complete:
self.setup() self.setup()
@ -322,12 +405,21 @@ class WhooshSearchBackend(BaseSearchBackend):
self.index.optimize() self.index.optimize()
def calculate_page(self, start_offset=0, end_offset=None): def calculate_page(self, start_offset=0, end_offset=None):
# Prevent against Whoosh throwing an error. Requires an end_offset """
# greater than 0. 计算分页参数
Args:
start_offset: 起始偏移量
end_offset: 结束偏移量
Returns:
tuple: (页码, 页大小)
"""
# 防止Whoosh错误
if end_offset is not None and end_offset <= 0: if end_offset is not None and end_offset <= 0:
end_offset = 1 end_offset = 1
# Determine the page. # 确定页码
page_num = 0 page_num = 0
if end_offset is None: if end_offset is None:
@ -341,7 +433,7 @@ class WhooshSearchBackend(BaseSearchBackend):
if page_length and page_length > 0: if page_length and page_length > 0:
page_num = int(start_offset / page_length) page_num = int(start_offset / page_length)
# Increment because Whoosh uses 1-based page numbers. # Whoosh使用1-based页码
page_num += 1 page_num += 1
return page_num, page_length return page_num, page_length
@ -366,10 +458,15 @@ class WhooshSearchBackend(BaseSearchBackend):
limit_to_registered_models=None, limit_to_registered_models=None,
result_class=None, result_class=None,
**kwargs): **kwargs):
"""
执行搜索查询
核心搜索方法处理各种搜索参数和选项
"""
if not self.setup_complete: if not self.setup_complete:
self.setup() self.setup()
# A zero length query should return no results. # 空查询返回无结果
if len(query_string) == 0: if len(query_string) == 0:
return { return {
'results': [], 'results': [],
@ -378,8 +475,7 @@ class WhooshSearchBackend(BaseSearchBackend):
query_string = force_str(query_string) query_string = force_str(query_string)
# A one-character query (non-wildcard) gets nabbed by a stopwords # 单字符查询(非通配符)返回无结果
# filter and should yield zero results.
if len(query_string) <= 1 and query_string != u'*': if len(query_string) <= 1 and query_string != u'*':
return { return {
'results': [], 'results': [],
@ -388,10 +484,8 @@ class WhooshSearchBackend(BaseSearchBackend):
reverse = False reverse = False
# 处理排序
if sort_by is not None: if sort_by is not None:
# Determine if we need to reverse the results and if Whoosh can
# handle what it's being asked to sort by. Reversing is an
# all-or-nothing action, unfortunately.
sort_by_list = [] sort_by_list = []
reverse_counter = 0 reverse_counter = 0
@ -399,6 +493,7 @@ class WhooshSearchBackend(BaseSearchBackend):
if order_by.startswith('-'): if order_by.startswith('-'):
reverse_counter += 1 reverse_counter += 1
# Whoosh要求所有排序字段方向一致
if reverse_counter and reverse_counter != len(sort_by): if reverse_counter and reverse_counter != len(sort_by):
raise SearchBackendError("Whoosh requires all order_by fields" raise SearchBackendError("Whoosh requires all order_by fields"
" to use the same sort direction") " to use the same sort direction")
@ -406,17 +501,16 @@ class WhooshSearchBackend(BaseSearchBackend):
for order_by in sort_by: for order_by in sort_by:
if order_by.startswith('-'): if order_by.startswith('-'):
sort_by_list.append(order_by[1:]) sort_by_list.append(order_by[1:])
if len(sort_by_list) == 1: if len(sort_by_list) == 1:
reverse = True reverse = True
else: else:
sort_by_list.append(order_by) sort_by_list.append(order_by)
if len(sort_by_list) == 1: if len(sort_by_list) == 1:
reverse = False reverse = False
sort_by = sort_by_list[0] sort_by = sort_by_list[0]
# Whoosh不支持facet功能
if facets is not None: if facets is not None:
warnings.warn( warnings.warn(
"Whoosh does not handle faceting.", "Whoosh does not handle faceting.",
@ -438,6 +532,7 @@ class WhooshSearchBackend(BaseSearchBackend):
narrowed_results = None narrowed_results = None
self.index = self.index.refresh() self.index = self.index.refresh()
# 模型限制处理
if limit_to_registered_models is None: if limit_to_registered_models is None:
limit_to_registered_models = getattr( limit_to_registered_models = getattr(
settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
@ -445,12 +540,11 @@ class WhooshSearchBackend(BaseSearchBackend):
if models and len(models): if models and len(models):
model_choices = sorted(get_model_ct(model) for model in models) model_choices = sorted(get_model_ct(model) for model in models)
elif limit_to_registered_models: elif limit_to_registered_models:
# Using narrow queries, limit the results to only models handled
# with the current routers.
model_choices = self.build_models_list() model_choices = self.build_models_list()
else: else:
model_choices = [] model_choices = []
# 构建窄查询
if len(model_choices) > 0: if len(model_choices) > 0:
if narrow_queries is None: if narrow_queries is None:
narrow_queries = set() narrow_queries = set()
@ -460,9 +554,8 @@ class WhooshSearchBackend(BaseSearchBackend):
narrow_searcher = None narrow_searcher = None
# 处理窄查询
if narrow_queries is not None: if narrow_queries is not None:
# Potentially expensive? I don't see another way to do it in
# Whoosh...
narrow_searcher = self.index.searcher() narrow_searcher = self.index.searcher()
for nq in narrow_queries: for nq in narrow_queries:
@ -482,11 +575,12 @@ class WhooshSearchBackend(BaseSearchBackend):
self.index = self.index.refresh() self.index = self.index.refresh()
# 执行搜索
if self.index.doc_count(): if self.index.doc_count():
searcher = self.index.searcher() searcher = self.index.searcher()
parsed_query = self.parser.parse(query_string) parsed_query = self.parser.parse(query_string)
# In the event of an invalid/stopworded query, recover gracefully. # 处理无效查询
if parsed_query is None: if parsed_query is None:
return { return {
'results': [], 'results': [],
@ -502,7 +596,7 @@ class WhooshSearchBackend(BaseSearchBackend):
'reverse': reverse, 'reverse': reverse,
} }
# Handle the case where the results have been narrowed. # 应用窄查询过滤
if narrowed_results is not None: if narrowed_results is not None:
search_kwargs['filter'] = narrowed_results search_kwargs['filter'] = narrowed_results
@ -522,8 +616,7 @@ class WhooshSearchBackend(BaseSearchBackend):
'spelling_suggestion': None, 'spelling_suggestion': None,
} }
# Because as of Whoosh 2.5.1, it will return the wrong page of # 检查页码有效性
# results if you request something too high. :(
if raw_page.pagenum < page_num: if raw_page.pagenum < page_num:
return { return {
'results': [], 'results': [],
@ -531,6 +624,7 @@ class WhooshSearchBackend(BaseSearchBackend):
'spelling_suggestion': None, 'spelling_suggestion': None,
} }
# 处理搜索结果
results = self._process_results( results = self._process_results(
raw_page, raw_page,
highlight=highlight, highlight=highlight,
@ -544,6 +638,7 @@ class WhooshSearchBackend(BaseSearchBackend):
return results return results
else: else:
# 无文档时的处理
if self.include_spelling: if self.include_spelling:
if spelling_query: if spelling_query:
spelling_suggestion = self.create_spelling_suggestion( spelling_suggestion = self.create_spelling_suggestion(
@ -570,18 +665,21 @@ class WhooshSearchBackend(BaseSearchBackend):
limit_to_registered_models=None, limit_to_registered_models=None,
result_class=None, result_class=None,
**kwargs): **kwargs):
"""
查找相似文档
基于给定模型实例查找相似内容
"""
if not self.setup_complete: if not self.setup_complete:
self.setup() self.setup()
# Deferred models will have a different class ("RealClass_Deferred_fieldname")
# which won't be in our registry:
model_klass = model_instance._meta.concrete_model model_klass = model_instance._meta.concrete_model
field_name = self.content_field_name field_name = self.content_field_name
narrow_queries = set() narrow_queries = set()
narrowed_results = None narrowed_results = None
self.index = self.index.refresh() self.index = self.index.refresh()
# 模型限制处理
if limit_to_registered_models is None: if limit_to_registered_models is None:
limit_to_registered_models = getattr( limit_to_registered_models = getattr(
settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
@ -589,12 +687,11 @@ class WhooshSearchBackend(BaseSearchBackend):
if models and len(models): if models and len(models):
model_choices = sorted(get_model_ct(model) for model in models) model_choices = sorted(get_model_ct(model) for model in models)
elif limit_to_registered_models: elif limit_to_registered_models:
# Using narrow queries, limit the results to only models handled
# with the current routers.
model_choices = self.build_models_list() model_choices = self.build_models_list()
else: else:
model_choices = [] model_choices = []
# 构建查询
if len(model_choices) > 0: if len(model_choices) > 0:
if narrow_queries is None: if narrow_queries is None:
narrow_queries = set() narrow_queries = set()
@ -607,9 +704,8 @@ class WhooshSearchBackend(BaseSearchBackend):
narrow_searcher = None narrow_searcher = None
# 处理窄查询
if narrow_queries is not None: if narrow_queries is not None:
# Potentially expensive? I don't see another way to do it in
# Whoosh...
narrow_searcher = self.index.searcher() narrow_searcher = self.index.searcher()
for nq in narrow_queries: for nq in narrow_queries:
@ -632,6 +728,7 @@ class WhooshSearchBackend(BaseSearchBackend):
self.index = self.index.refresh() self.index = self.index.refresh()
raw_results = EmptyResults() raw_results = EmptyResults()
# 执行相似文档搜索
if self.index.doc_count(): if self.index.doc_count():
query = "%s:%s" % (ID, get_identifier(model_instance)) query = "%s:%s" % (ID, get_identifier(model_instance))
searcher = self.index.searcher() searcher = self.index.searcher()
@ -642,7 +739,7 @@ class WhooshSearchBackend(BaseSearchBackend):
raw_results = results[0].more_like_this( raw_results = results[0].more_like_this(
field_name, top=end_offset) field_name, top=end_offset)
# Handle the case where the results have been narrowed. # 应用窄查询过滤
if narrowed_results is not None and hasattr(raw_results, 'filter'): if narrowed_results is not None and hasattr(raw_results, 'filter'):
raw_results.filter(narrowed_results) raw_results.filter(narrowed_results)
@ -658,8 +755,7 @@ class WhooshSearchBackend(BaseSearchBackend):
'spelling_suggestion': None, 'spelling_suggestion': None,
} }
# Because as of Whoosh 2.5.1, it will return the wrong page of # 检查页码有效性
# results if you request something too high. :(
if raw_page.pagenum < page_num: if raw_page.pagenum < page_num:
return { return {
'results': [], 'results': [],
@ -667,6 +763,7 @@ class WhooshSearchBackend(BaseSearchBackend):
'spelling_suggestion': None, 'spelling_suggestion': None,
} }
# 处理结果
results = self._process_results(raw_page, result_class=result_class) results = self._process_results(raw_page, result_class=result_class)
searcher.close() searcher.close()
@ -682,11 +779,15 @@ class WhooshSearchBackend(BaseSearchBackend):
query_string='', query_string='',
spelling_query=None, spelling_query=None,
result_class=None): result_class=None):
"""
处理搜索结果
将Whoosh原始结果转换为Haystack格式
"""
from haystack import connections from haystack import connections
results = [] results = []
# It's important to grab the hits first before slicing. Otherwise, this # 获取命中数
# can cause pagination failures.
hits = len(raw_page) hits = len(raw_page)
if result_class is None: if result_class is None:
@ -697,6 +798,7 @@ class WhooshSearchBackend(BaseSearchBackend):
unified_index = connections[self.connection_alias].get_unified_index() unified_index = connections[self.connection_alias].get_unified_index()
indexed_models = unified_index.get_indexed_models() indexed_models = unified_index.get_indexed_models()
# 处理每个结果
for doc_offset, raw_result in enumerate(raw_page): for doc_offset, raw_result in enumerate(raw_page):
score = raw_page.score(doc_offset) or 0 score = raw_page.score(doc_offset) or 0
app_label, model_name = raw_result[DJANGO_CT].split('.') app_label, model_name = raw_result[DJANGO_CT].split('.')
@ -704,13 +806,14 @@ class WhooshSearchBackend(BaseSearchBackend):
model = haystack_get_model(app_label, model_name) model = haystack_get_model(app_label, model_name)
if model and model in indexed_models: if model and model in indexed_models:
# 处理字段值
for key, value in raw_result.items(): for key, value in raw_result.items():
index = unified_index.get_index(model) index = unified_index.get_index(model)
string_key = str(key) string_key = str(key)
if string_key in index.fields and hasattr( if string_key in index.fields and hasattr(
index.fields[string_key], 'convert'): index.fields[string_key], 'convert'):
# Special-cased due to the nature of KEYWORD fields. # 多值字段特殊处理
if index.fields[string_key].is_multivalued: if index.fields[string_key].is_multivalued:
if value is None or len(value) == 0: if value is None or len(value) == 0:
additional_fields[string_key] = [] additional_fields[string_key] = []
@ -723,9 +826,11 @@ class WhooshSearchBackend(BaseSearchBackend):
else: else:
additional_fields[string_key] = self._to_python(value) additional_fields[string_key] = self._to_python(value)
# 移除系统字段
del (additional_fields[DJANGO_CT]) del (additional_fields[DJANGO_CT])
del (additional_fields[DJANGO_ID]) del (additional_fields[DJANGO_ID])
# 高亮处理
if highlight: if highlight:
sa = StemmingAnalyzer() sa = StemmingAnalyzer()
formatter = WhooshHtmlFormatter('em') formatter = WhooshHtmlFormatter('em')
@ -742,6 +847,7 @@ class WhooshSearchBackend(BaseSearchBackend):
self.content_field_name: [whoosh_result], self.content_field_name: [whoosh_result],
} }
# 创建结果对象
result = result_class( result = result_class(
app_label, app_label,
model_name, model_name,
@ -752,6 +858,7 @@ class WhooshSearchBackend(BaseSearchBackend):
else: else:
hits -= 1 hits -= 1
# 拼写建议
if self.include_spelling: if self.include_spelling:
if spelling_query: if spelling_query:
spelling_suggestion = self.create_spelling_suggestion( spelling_suggestion = self.create_spelling_suggestion(
@ -768,6 +875,15 @@ class WhooshSearchBackend(BaseSearchBackend):
} }
def create_spelling_suggestion(self, query_string): def create_spelling_suggestion(self, query_string):
"""
创建拼写建议
Args:
query_string: 查询字符串
Returns:
str: 拼写建议
"""
spelling_suggestion = None spelling_suggestion = None
reader = self.index.reader() reader = self.index.reader()
corrector = reader.corrector(self.content_field_name) corrector = reader.corrector(self.content_field_name)
@ -776,14 +892,14 @@ class WhooshSearchBackend(BaseSearchBackend):
if not query_string: if not query_string:
return spelling_suggestion return spelling_suggestion
# Clean the string. # 清理查询字符串
for rev_word in self.RESERVED_WORDS: for rev_word in self.RESERVED_WORDS:
cleaned_query = cleaned_query.replace(rev_word, '') cleaned_query = cleaned_query.replace(rev_word, '')
for rev_char in self.RESERVED_CHARACTERS: for rev_char in self.RESERVED_CHARACTERS:
cleaned_query = cleaned_query.replace(rev_char, '') cleaned_query = cleaned_query.replace(rev_char, '')
# Break it down. # 分词并获取建议
query_words = cleaned_query.split() query_words = cleaned_query.split()
suggested_words = [] suggested_words = []
@ -798,22 +914,29 @@ class WhooshSearchBackend(BaseSearchBackend):
def _from_python(self, value): def _from_python(self, value):
""" """
Converts Python values to a string for Whoosh. Python值转换为Whoosh字符串
Code courtesy of pysolr. Args:
value: Python值
Returns:
str: Whoosh格式字符串
""" """
if hasattr(value, 'strftime'): if hasattr(value, 'strftime'):
# 日期时间处理
if not hasattr(value, 'hour'): if not hasattr(value, 'hour'):
value = datetime(value.year, value.month, value.day, 0, 0, 0) value = datetime(value.year, value.month, value.day, 0, 0, 0)
elif isinstance(value, bool): elif isinstance(value, bool):
# 布尔值处理
if value: if value:
value = 'true' value = 'true'
else: else:
value = 'false' value = 'false'
elif isinstance(value, (list, tuple)): elif isinstance(value, (list, tuple)):
# 列表元组处理
value = u','.join([force_str(v) for v in value]) value = u','.join([force_str(v) for v in value])
elif isinstance(value, (six.integer_types, float)): elif isinstance(value, (six.integer_types, float)):
# Leave it alone. # 数字类型保持原样
pass pass
else: else:
value = force_str(value) value = force_str(value)
@ -821,15 +944,20 @@ class WhooshSearchBackend(BaseSearchBackend):
def _to_python(self, value): def _to_python(self, value):
""" """
Converts values from Whoosh to native Python values. Whoosh值转换为Python值
Args:
value: Whoosh值
A port of the same method in pysolr, as they deal with data the same way. Returns:
object: Python值
""" """
if value == 'true': if value == 'true':
return True return True
elif value == 'false': elif value == 'false':
return False return False
# 日期时间解析
if value and isinstance(value, six.string_types): if value and isinstance(value, six.string_types):
possible_datetime = DATETIME_REGEX.search(value) possible_datetime = DATETIME_REGEX.search(value)
@ -847,11 +975,10 @@ class WhooshSearchBackend(BaseSearchBackend):
date_values['minute'], date_values['minute'],
date_values['second']) date_values['second'])
# JSON解析尝试
try: try:
# Attempt to use json to load the values.
converted_value = json.loads(value) converted_value = json.loads(value)
# Try to handle most built-in types.
if isinstance( if isinstance(
converted_value, converted_value,
(list, (list,
@ -863,15 +990,28 @@ class WhooshSearchBackend(BaseSearchBackend):
complex)): complex)):
return converted_value return converted_value
except BaseException: except BaseException:
# If it fails (SyntaxError or its ilk) or we don't trust it,
# continue on.
pass pass
return value return value
class WhooshSearchQuery(BaseSearchQuery): class WhooshSearchQuery(BaseSearchQuery):
"""
Whoosh搜索查询构建器
负责构建Whoosh搜索引擎的查询语句
"""
def _convert_datetime(self, date): def _convert_datetime(self, date):
"""
日期时间转换
Args:
date: 日期时间对象
Returns:
str: 格式化字符串
"""
if hasattr(date, 'hour'): if hasattr(date, 'hour'):
return force_str(date.strftime('%Y%m%d%H%M%S')) return force_str(date.strftime('%Y%m%d%H%M%S'))
else: else:
@ -879,20 +1019,25 @@ class WhooshSearchQuery(BaseSearchQuery):
def clean(self, query_fragment): def clean(self, query_fragment):
""" """
Provides a mechanism for sanitizing user input before presenting the 清理查询片段
value to the backend.
对用户输入进行清理和转义处理
Whoosh 1.X differs here in that you can no longer use a backslash Args:
to escape reserved characters. Instead, the whole word should be query_fragment: 查询片段
quoted.
Returns:
str: 清理后的查询字符串
""" """
words = query_fragment.split() words = query_fragment.split()
cleaned_words = [] cleaned_words = []
for word in words: for word in words:
# 保留字转为小写
if word in self.backend.RESERVED_WORDS: if word in self.backend.RESERVED_WORDS:
word = word.replace(word, word.lower()) word = word.replace(word, word.lower())
# 保留字符用引号包围
for char in self.backend.RESERVED_CHARACTERS: for char in self.backend.RESERVED_CHARACTERS:
if char in word: if char in word:
word = "'%s'" % word word = "'%s'" % word
@ -903,12 +1048,23 @@ class WhooshSearchQuery(BaseSearchQuery):
return ' '.join(cleaned_words) return ' '.join(cleaned_words)
def build_query_fragment(self, field, filter_type, value): def build_query_fragment(self, field, filter_type, value):
"""
构建查询片段
Args:
field: 字段名
filter_type: 过滤器类型
value: 字段值
Returns:
str: 查询片段
"""
from haystack import connections from haystack import connections
query_frag = '' query_frag = ''
is_datetime = False is_datetime = False
# 值类型处理
if not hasattr(value, 'input_type_name'): if not hasattr(value, 'input_type_name'):
# Handle when we've got a ``ValuesListQuerySet``...
if hasattr(value, 'values_list'): if hasattr(value, 'values_list'):
value = list(value) value = list(value)
@ -916,26 +1072,24 @@ class WhooshSearchQuery(BaseSearchQuery):
is_datetime = True is_datetime = True
if isinstance(value, six.string_types) and value != ' ': if isinstance(value, six.string_types) and value != ' ':
# It's not an ``InputType``. Assume ``Clean``.
value = Clean(value) value = Clean(value)
else: else:
value = PythonData(value) value = PythonData(value)
# Prepare the query using the InputType. # 准备值
prepared_value = value.prepare(self) prepared_value = value.prepare(self)
if not isinstance(prepared_value, (set, list, tuple)): if not isinstance(prepared_value, (set, list, tuple)):
# Then convert whatever we get back to what pysolr wants if needed.
prepared_value = self.backend._from_python(prepared_value) prepared_value = self.backend._from_python(prepared_value)
# 'content' is a special reserved word, much like 'pk' in # 字段名处理
# Django's ORM layer. It indicates 'no special field'.
if field == 'content': if field == 'content':
index_fieldname = '' index_fieldname = ''
else: else:
index_fieldname = u'%s:' % connections[self._using].get_unified_index( index_fieldname = u'%s:' % connections[self._using].get_unified_index(
).get_index_fieldname(field) ).get_index_fieldname(field)
# 过滤器类型映射
filter_types = { filter_types = {
'content': '%s', 'content': '%s',
'contains': '*%s*', 'contains': '*%s*',
@ -949,6 +1103,7 @@ class WhooshSearchQuery(BaseSearchQuery):
'fuzzy': u'%s~', 'fuzzy': u'%s~',
} }
# 查询片段构建
if value.post_process is False: if value.post_process is False:
query_frag = prepared_value query_frag = prepared_value
else: else:
@ -961,8 +1116,6 @@ class WhooshSearchQuery(BaseSearchQuery):
if value.input_type_name == 'exact': if value.input_type_name == 'exact':
query_frag = prepared_value query_frag = prepared_value
else: else:
# Iterate over terms & incorportate the converted form of
# each into the query.
terms = [] terms = []
if isinstance(prepared_value, six.string_types): if isinstance(prepared_value, six.string_types):
@ -1026,19 +1179,19 @@ class WhooshSearchQuery(BaseSearchQuery):
query_frag = filter_types[filter_type] % prepared_value query_frag = filter_types[filter_type] % prepared_value
# 添加括号
if len(query_frag) and not isinstance(value, Raw): if len(query_frag) and not isinstance(value, Raw):
if not query_frag.startswith('(') and not query_frag.endswith(')'): if not query_frag.startswith('(') and not query_frag.endswith(')'):
query_frag = "(%s)" % query_frag query_frag = "(%s)" % query_frag
return u"%s%s" % (index_fieldname, query_frag) return u"%s%s" % (index_fieldname, query_frag)
# if not filter_type in ('in', 'range'):
# # 'in' is a bit of a special case, as we don't want to
# # convert a valid list/tuple to string. Defer handling it
# # until later...
# value = self.backend._from_python(value)
class WhooshEngine(BaseEngine): class WhooshEngine(BaseEngine):
"""
Whoosh搜索引擎配置
配置Haystack使用Whoosh作为搜索后端
"""
backend = WhooshSearchBackend backend = WhooshSearchBackend
query = WhooshSearchQuery query = WhooshSearchQuery

@ -1,16 +1,25 @@
""" """
WSGI config for djangoblog project. DjangoBlog WSGI 配置模块
It exposes the WSGI callable as a module-level variable named ``application``. 本模块定义了DjangoBlog项目的WSGIWeb Server Gateway Interface配置
用于将Django应用部署到支持WSGI的Web服务器如ApacheNginx + uWSGI等
For more information on this file, see WSGI是Python Web应用与Web服务器之间的标准接口确保应用能够在生产环境中正确运行
https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/
主要功能
- 设置Django环境变量
- 创建WSGI应用实例
- 提供Web服务器与Django应用之间的桥梁
""" """
import os import os
from django.core.wsgi import get_wsgi_application from django.core.wsgi import get_wsgi_application
# 设置Django设置模块的环境变量
# 告诉Django使用哪个配置文件这里设置为'djangoblog.settings'
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "djangoblog.settings") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "djangoblog.settings")
application = get_wsgi_application() # 创建WSGI应用实例
# 这是Web服务器将调用的入口点用于处理HTTP请求
application = get_wsgi_application()

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save