diff --git a/djangoblog/admin_site.py b/djangoblog/admin_site.py
index f120405..f1d9f9d 100644
--- a/djangoblog/admin_site.py
+++ b/djangoblog/admin_site.py
@@ -1,13 +1,20 @@
+#gq:
+# 从 Django 内置的 admin 模块导入 AdminSite 基类
from django.contrib.admin import AdminSite
+# 导入 LogEntry 模型,用于记录管理员操作日志
from django.contrib.admin.models import LogEntry
+# 导入 Site 模型及其默认的 Admin 配置
from django.contrib.sites.admin import SiteAdmin
from django.contrib.sites.models import Site
+# 批量导入各个自定义 App 的 Admin 配置和模型
+# 这种星号(*)导入方式在项目规模较小时很方便,但大型项目中可能影响代码可读性
from accounts.admin import *
from blog.admin import *
from blog.models import *
from comments.admin import *
from comments.models import *
+# 导入自定义的 LogEntryAdmin,用于自定义操作日志的后台显示
from djangoblog.logentryadmin import LogEntryAdmin
from oauth.admin import *
from oauth.models import *
@@ -18,28 +25,59 @@ from servermanager.models import *
class DjangoBlogAdminSite(AdminSite):
+ """
+ 自定义的 Admin 站点类,继承自 Django 的 AdminSite。
+ 用于定制 Admin 后台的外观和行为。
+ """
+ # 定制 Admin 后台顶部的标题
site_header = 'djangoblog administration'
+ # 定制浏览器标签页上的标题
site_title = 'djangoblog site admin'
def __init__(self, name='admin'):
+ """
+ 初始化方法
+ :param name: 站点的名称,默认是 'admin',这会影响 URL 反向解析等。
+ """
super().__init__(name)
def has_permission(self, request):
+ """
+ 重写权限检查方法。
+ 这个方法决定了一个请求是否有权限访问 Admin 后台。
+ :param request: 当前的 HTTP 请求对象
+ :return: Boolean,表示是否允许访问
+ """
+ # 只有超级用户(superuser)才能访问这个自定义的 Admin 站点
+ # 这是一个比默认更严格的权限控制
return request.user.is_superuser
# def get_urls(self):
+ # """
+ # (已注释)重写 get_urls 方法来添加自定义的 URL 路由。
+ # 这是一个示例,展示了如何在 Admin 后台中加入自己的视图。
+ # """
+ # # 先获取父类的所有 URL
# urls = super().get_urls()
# from django.urls import path
+ # # 导入一个自定义的视图函数,用于刷新缓存
# from blog.views import refresh_memcache
#
+ # # 定义自己的 URL 模式
# my_urls = [
+ # # 使用 self.admin_view() 包装自定义视图,以确保它受到 Admin 权限保护
# path('refresh/', self.admin_view(refresh_memcache), name="refresh"),
# ]
+ # # 返回合并后的 URL 列表
# return urls + my_urls
-
+# 创建一个自定义 Admin 站点的实例
+# 这个实例将被用于注册所有的模型
admin_site = DjangoBlogAdminSite(name='admin')
+# --- 开始注册各个 App 的模型到自定义的 admin_site ---
+
+# 注册 blog App 的模型
admin_site.register(Article, ArticlelAdmin)
admin_site.register(Category, CategoryAdmin)
admin_site.register(Tag, TagAdmin)
@@ -47,18 +85,25 @@ admin_site.register(Links, LinksAdmin)
admin_site.register(SideBar, SideBarAdmin)
admin_site.register(BlogSettings, BlogSettingsAdmin)
+# 注册 servermanager App 的模型
admin_site.register(commands, CommandsAdmin)
admin_site.register(EmailSendLog, EmailSendLogAdmin)
+# 注册 accounts App 的模型
admin_site.register(BlogUser, BlogUserAdmin)
+# 注册 comments App 的模型
admin_site.register(Comment, CommentAdmin)
+# 注册 oauth App 的模型
admin_site.register(OAuthUser, OAuthUserAdmin)
admin_site.register(OAuthConfig, OAuthConfigAdmin)
+# 注册 owntracks App 的模型
admin_site.register(OwnTrackLog, OwnTrackLogsAdmin)
+# 注册 Django 内置的 Site 模型
admin_site.register(Site, SiteAdmin)
-admin_site.register(LogEntry, LogEntryAdmin)
+# 注册 Django 内置的 LogEntry 模型,使用自定义的 LogEntryAdmin 配置
+admin_site.register(LogEntry, LogEntryAdmin)
\ No newline at end of file
diff --git a/djangoblog/apps.py b/djangoblog/apps.py
index d29e318..cb37b47 100644
--- a/djangoblog/apps.py
+++ b/djangoblog/apps.py
@@ -1,11 +1,14 @@
+#gq:
from django.apps import AppConfig
class DjangoblogAppConfig(AppConfig):
+ # 默认主键类型:BigAutoField(自增 BigInteger,避免ID溢出)
default_auto_field = 'django.db.models.BigAutoField'
+ # 应用唯一标识(与settings.py中INSTALLED_APPS配置一致)
name = 'djangoblog'
def ready(self):
- super().ready()
- # Import and load plugins here
+ super().ready() # 执行父类启动逻辑
+ # 应用启动时自动加载所有插件
from .plugin_manage.loader import load_plugins
- load_plugins()
\ No newline at end of file
+ load_plugins()
\ No newline at end of file
diff --git a/djangoblog/blog_signals.py b/djangoblog/blog_signals.py
index 393f441..712757b 100644
--- a/djangoblog/blog_signals.py
+++ b/djangoblog/blog_signals.py
@@ -1,3 +1,4 @@
+#gq:
import _thread
import logging
@@ -18,13 +19,15 @@ from oauth.models import OAuthUser
logger = logging.getLogger(__name__)
+# 自定义信号:OAuth用户登录成功后触发
oauth_user_login_signal = django.dispatch.Signal(['id'])
-send_email_signal = django.dispatch.Signal(
- ['emailto', 'title', 'content'])
+# 自定义信号:需要发送邮件时触发
+send_email_signal = django.dispatch.Signal(['emailto', 'title', 'content'])
@receiver(send_email_signal)
def send_email_signal_handler(sender, **kwargs):
+ """发送HTML邮件并记录发送日志"""
emailto = kwargs['emailto']
title = kwargs['title']
content = kwargs['content']
@@ -53,9 +56,11 @@ def send_email_signal_handler(sender, **kwargs):
@receiver(oauth_user_login_signal)
def oauth_user_login_signal_handler(sender, **kwargs):
+ """OAuth用户登录后,处理头像本地化并清理侧边栏缓存"""
id = kwargs['id']
oauthuser = OAuthUser.objects.get(id=id)
site = get_current_site().domain
+ # 如果头像是外部链接,则下载本地化
if oauthuser.picture and not oauthuser.picture.find(site) >= 0:
from djangoblog.utils import save_user_avatar
oauthuser.picture = save_user_avatar(oauthuser.picture)
@@ -73,20 +78,31 @@ def model_post_save_callback(
using,
update_fields,
**kwargs):
+ """
+ 模型保存后触发:
+ 1. 对有get_full_url方法的模型(如文章),通知搜索引擎。
+ 2. 对评论,清理相关缓存并异步发送邮件通知。
+ """
clearcache = False
+ # 忽略Admin日志
if isinstance(instance, LogEntry):
return
+
+ # 处理内容模型(如文章)
if 'get_full_url' in dir(instance):
is_update_views = update_fields == {'views'}
+ # 非测试环境且非仅更新浏览量,则通知搜索引擎
if not settings.TESTING and not is_update_views:
try:
notify_url = instance.get_full_url()
SpiderNotify.baidu_notify([notify_url])
except Exception as ex:
logger.error("notify sipder", ex)
+ # 非仅更新浏览量,则标记需要清理缓存
if not is_update_views:
clearcache = True
+ # 处理评论
if isinstance(instance, Comment):
if instance.is_enable:
path = instance.article.get_absolute_url()
@@ -94,21 +110,19 @@ def model_post_save_callback(
if site.find(':') > 0:
site = site[0:site.find(':')]
- expire_view_cache(
- path,
- servername=site,
- serverport=80,
- key_prefix='blogdetail')
+ # 清理文章页、评论、侧边栏等相关缓存
+ expire_view_cache(path, servername=site, serverport=80, key_prefix='blogdetail')
if cache.get('seo_processor'):
cache.delete('seo_processor')
- comment_cache_key = 'article_comments_{id}'.format(
- id=instance.article.id)
+ comment_cache_key = 'article_comments_{id}'.format(id=instance.article.id)
cache.delete(comment_cache_key)
delete_sidebar_cache()
delete_view_cache('article_comments', [str(instance.article.pk)])
+ # 异步发送评论通知邮件
_thread.start_new_thread(send_comment_email, (instance,))
+ # 根据标记清理全局缓存
if clearcache:
cache.clear()
@@ -116,7 +130,8 @@ def model_post_save_callback(
@receiver(user_logged_in)
@receiver(user_logged_out)
def user_auth_callback(sender, request, user, **kwargs):
+ """用户登录/登出后,清理侧边栏缓存"""
if user and user.username:
logger.info(user)
delete_sidebar_cache()
- # cache.clear()
+ # cache.clear() # 注释掉了全局缓存清理,仅清理侧边栏
\ No newline at end of file
diff --git a/djangoblog/elasticsearch_backend.py b/djangoblog/elasticsearch_backend.py
index 4afe498..3b143fc 100644
--- a/djangoblog/elasticsearch_backend.py
+++ b/djangoblog/elasticsearch_backend.py
@@ -1,3 +1,4 @@
+#gq:
from django.utils.encoding import force_str
from elasticsearch_dsl import Q
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query
@@ -12,51 +13,57 @@ logger = logging.getLogger(__name__)
class ElasticSearchBackend(BaseSearchBackend):
+ """Elasticsearch搜索后端,实现Haystack接口,处理索引与搜索逻辑"""
def __init__(self, connection_alias, **connection_options):
super(
ElasticSearchBackend,
self).__init__(
connection_alias,
**connection_options)
- self.manager = ArticleDocumentManager()
- self.include_spelling = True
+ self.manager = ArticleDocumentManager() # 文档索引管理工具
+ self.include_spelling = True # 启用拼写建议
def _get_models(self, iterable):
+ """将模型实例/ID转换为Elasticsearch文档对象"""
models = iterable if iterable and iterable[0] else Article.objects.all()
docs = self.manager.convert_to_doc(models)
return docs
def _create(self, models):
+ """创建索引并批量重建文档"""
self.manager.create_index()
docs = self._get_models(models)
self.manager.rebuild(docs)
def _delete(self, models):
+ """删除指定文档"""
for m in models:
m.delete()
return True
def _rebuild(self, models):
+ """增量更新索引文档"""
models = models if models else Article.objects.all()
- docs = self.manager.convert_to_doc(models)
+ docs = self._get_models(models)
self.manager.update_docs(docs)
def update(self, index, iterable, commit=True):
-
+ """更新索引:将模型实例同步到Elasticsearch"""
models = self._get_models(iterable)
self.manager.update_docs(models)
def remove(self, obj_or_string):
+ """从索引中删除单个对象"""
models = self._get_models([obj_or_string])
self._delete(models)
def clear(self, models=None, commit=True):
+ """清空整个索引"""
self.remove(None)
@staticmethod
def get_suggestion(query: str) -> str:
- """获取推荐词, 如果没有找到添加原搜索词"""
-
+ """获取搜索推荐词,无建议则返回原查询词"""
search = ArticleDocument.search() \
.query("match", body=query) \
.suggest('suggest_search', query, term={'field': 'body'}) \
@@ -64,30 +71,31 @@ class ElasticSearchBackend(BaseSearchBackend):
keywords = []
for suggest in search.suggest.suggest_search:
- if suggest["options"]:
- keywords.append(suggest["options"][0]["text"])
- else:
- keywords.append(suggest["text"])
+ # 有建议取第一个,无则用原词
+ keywords.append(suggest["options"][0]["text"] if suggest["options"] else suggest["text"])
return ' '.join(keywords)
@log_query
def search(self, query_string, **kwargs):
+ """核心搜索逻辑:匹配文章标题/正文,过滤已发布文章,支持分页和拼写建议"""
logger.info('search query_string:' + query_string)
- start_offset = kwargs.get('start_offset')
- end_offset = kwargs.get('end_offset')
+ start_offset = kwargs.get('start_offset') # 分页起始位置
+ end_offset = kwargs.get('end_offset') # 分页结束位置
- # 推荐词搜索
+ # 启用推荐词搜索
if getattr(self, "is_suggest", None):
suggestion = self.get_suggestion(query_string)
else:
suggestion = query_string
+ # 构建查询:匹配正文或标题,最低70%匹配度
q = Q('bool',
should=[Q('match', body=suggestion), Q('match', title=suggestion)],
minimum_should_match="70%")
+ # 执行搜索:过滤已发布(status='p')、文章类型(type='a'),不返回原始文档
search = ArticleDocument.search() \
.query('bool', filter=[q]) \
.filter('term', status='p') \
@@ -95,8 +103,9 @@ class ElasticSearchBackend(BaseSearchBackend):
.source(False)[start_offset: end_offset]
results = search.execute()
- hits = results['hits'].total
+ hits = results['hits'].total # 总命中数
raw_results = []
+ # 格式化结果为Haystack兼容的SearchResult对象
for raw_result in results['hits']['hits']:
app_label = 'blog'
model_name = 'Article'
@@ -107,11 +116,12 @@ class ElasticSearchBackend(BaseSearchBackend):
result = result_class(
app_label,
model_name,
- raw_result['_id'],
- raw_result['_score'],
+ raw_result['_id'], # 文档ID
+ raw_result['_score'], # 相关性得分
**additional_fields)
raw_results.append(result)
facets = {}
+ # 若推荐词与原词不同则返回建议
spelling_suggestion = None if query_string == suggestion else suggestion
return {
@@ -123,21 +133,16 @@ class ElasticSearchBackend(BaseSearchBackend):
class ElasticSearchQuery(BaseSearchQuery):
+ """Elasticsearch查询构建类,适配Haystack接口"""
def _convert_datetime(self, date):
+ """将datetime转换为Elasticsearch兼容的字符串格式"""
if hasattr(date, 'hour'):
return force_str(date.strftime('%Y%m%d%H%M%S'))
else:
return force_str(date.strftime('%Y%m%d000000'))
def clean(self, query_fragment):
- """
- Provides a mechanism for sanitizing user input before presenting the
- value to the backend.
-
- Whoosh 1.X differs here in that you can no longer use a backslash
- to escape reserved characters. Instead, the whole word should be
- quoted.
- """
+ """清理查询片段,处理保留字和特殊字符(兼容Whoosh逻辑)"""
words = query_fragment.split()
cleaned_words = []
@@ -155,29 +160,34 @@ class ElasticSearchQuery(BaseSearchQuery):
return ' '.join(cleaned_words)
def build_query_fragment(self, field, filter_type, value):
+ """构建查询片段,直接返回查询字符串"""
return value.query_string
def get_count(self):
+ """获取查询结果总数"""
results = self.get_results()
return len(results) if results else 0
def get_spelling_suggestion(self, preferred_query=None):
+ """返回拼写建议词"""
return self._spelling_suggestion
def build_params(self, spelling_query=None):
+ """构建查询参数,复用父类逻辑"""
kwargs = super(ElasticSearchQuery, self).build_params(spelling_query=spelling_query)
return kwargs
class ElasticSearchModelSearchForm(ModelSearchForm):
-
+ """自定义搜索表单,控制是否启用搜索建议"""
def search(self):
- # 是否建议搜索
+ # 根据请求参数决定是否启用建议(is_suggest != "no"时启用)
self.searchqueryset.query.backend.is_suggest = self.data.get("is_suggest") != "no"
sqs = super().search()
return sqs
class ElasticSearchEngine(BaseEngine):
+ """Elasticsearch引擎入口,指定后端和查询类"""
backend = ElasticSearchBackend
- query = ElasticSearchQuery
+ query = ElasticSearchQuery
\ No newline at end of file
diff --git a/djangoblog/feeds.py b/djangoblog/feeds.py
index 8c4e851..c4d6b48 100644
--- a/djangoblog/feeds.py
+++ b/djangoblog/feeds.py
@@ -1,40 +1,49 @@
+#gq:
from django.contrib.auth import get_user_model
-from django.contrib.syndication.views import Feed
+from django.contrib.syndication.views import Feed # Django内置RSS/Atom订阅生成视图
from django.utils import timezone
-from django.utils.feedgenerator import Rss201rev2Feed
-
-from blog.models import Article
-from djangoblog.utils import CommonMarkdown
+from django.utils.feedgenerator import Rss201rev2Feed # RSS 2.0标准生成器
+from blog.models import Article # 博客文章模型
+from djangoblog.utils import CommonMarkdown # Markdown解析工具,将Markdown转为HTML
class DjangoBlogFeed(Feed):
- feed_type = Rss201rev2Feed
+ """博客RSS订阅Feed类,生成符合RSS 2.0标准的订阅内容"""
+ feed_type = Rss201rev2Feed # 指定Feed类型为RSS 2.0
- description = '大巧无工,重剑无锋.'
- title = "且听风吟 大巧无工,重剑无锋. "
- link = "/feed/"
+ description = '大巧无工,重剑无锋.' # Feed描述
+ title = "且听风吟 大巧无工,重剑无锋. " # Feed标题(订阅列表中显示)
+ link = "/feed/" # Feed的URL地址
def author_name(self):
+ """Feed作者名称:取系统第一个用户的昵称"""
return get_user_model().objects.first().nickname
def author_link(self):
+ """Feed作者链接:取系统第一个用户的个人主页URL"""
return get_user_model().objects.first().get_absolute_url()
def items(self):
+ """Feed订阅的内容列表:最新5篇已发布(status='p')的文章(type='a')"""
return Article.objects.filter(type='a', status='p').order_by('-pub_time')[:5]
def item_title(self, item):
+ """单个订阅项(文章)的标题:使用文章标题"""
return item.title
def item_description(self, item):
+ """单个订阅项的描述:将文章Markdown正文转为HTML"""
return CommonMarkdown.get_markdown(item.body)
def feed_copyright(self):
+ """Feed版权信息:显示当前年份的版权声明"""
now = timezone.now()
return "Copyright© {year} 且听风吟".format(year=now.year)
def item_link(self, item):
+ """单个订阅项的链接:文章详情页URL"""
return item.get_absolute_url()
def item_guid(self, item):
- return
+ """单个订阅项的唯一标识(预留方法,暂未实现)"""
+ return
\ No newline at end of file
diff --git a/djangoblog/logentryadmin.py b/djangoblog/logentryadmin.py
index 2f6a535..3245344 100644
--- a/djangoblog/logentryadmin.py
+++ b/djangoblog/logentryadmin.py
@@ -1,3 +1,4 @@
+#gq:
from django.contrib import admin
from django.contrib.admin.models import DELETION
from django.contrib.contenttypes.models import ContentType
@@ -7,54 +8,32 @@ from django.utils.html import escape
from django.utils.safestring import mark_safe
from django.utils.translation import gettext_lazy as _
-
class LogEntryAdmin(admin.ModelAdmin):
- list_filter = [
- 'content_type'
- ]
-
- search_fields = [
- 'object_repr',
- 'change_message'
- ]
-
- list_display_links = [
- 'action_time',
- 'get_change_message',
- ]
- list_display = [
- 'action_time',
- 'user_link',
- 'content_type',
- 'object_link',
- 'get_change_message',
- ]
+ """Admin操作日志自定义管理类:优化展示与权限控制"""
+ list_filter = ['content_type'] # 按内容类型筛选
+ search_fields = ['object_repr', 'change_message'] # 搜索对象描述、操作信息
+ list_display_links = ['action_time', 'get_change_message'] # 可点击跳转字段
+ list_display = ['action_time', 'user_link', 'content_type', 'object_link', 'get_change_message'] # 列表展示字段
def has_add_permission(self, request):
+ """禁用添加:日志自动生成,不允许手动添加"""
return False
def has_change_permission(self, request, obj=None):
- return (
- request.user.is_superuser or
- request.user.has_perm('admin.change_logentry')
- ) and request.method != 'POST'
+ """仅超级用户/有权限用户可查看,禁止POST修改"""
+ return (request.user.is_superuser or request.user.has_perm('admin.change_logentry')) and request.method != 'POST'
def has_delete_permission(self, request, obj=None):
+ """禁用删除:日志需保留"""
return False
def object_link(self, obj):
+ """操作对象字段:非删除操作显示Admin编辑链接"""
object_link = escape(obj.object_repr)
- content_type = obj.content_type
-
- if obj.action_flag != DELETION and content_type is not None:
- # try returning an actual link instead of object repr string
+ if obj.action_flag != DELETION and obj.content_type:
try:
- url = reverse(
- 'admin:{}_{}_change'.format(content_type.app_label,
- content_type.model),
- args=[obj.object_id]
- )
- object_link = '{}'.format(url, object_link)
+ url = reverse(f'admin:{obj.content_type.app_label}_{obj.content_type.model}_change', args=[obj.object_id])
+ object_link = f'{object_link}'
except NoReverseMatch:
pass
return mark_safe(object_link)
@@ -63,16 +42,12 @@ class LogEntryAdmin(admin.ModelAdmin):
object_link.short_description = _('object')
def user_link(self, obj):
+ """操作用户字段:显示用户Admin编辑链接"""
content_type = ContentType.objects.get_for_model(type(obj.user))
user_link = escape(force_str(obj.user))
try:
- # try returning an actual link instead of object repr string
- url = reverse(
- 'admin:{}_{}_change'.format(content_type.app_label,
- content_type.model),
- args=[obj.user.pk]
- )
- user_link = '{}'.format(url, user_link)
+ url = reverse(f'admin:{content_type.app_label}_{content_type.model}_change', args=[obj.user.pk])
+ user_link = f'{user_link}'
except NoReverseMatch:
pass
return mark_safe(user_link)
@@ -81,11 +56,11 @@ class LogEntryAdmin(admin.ModelAdmin):
user_link.short_description = _('user')
def get_queryset(self, request):
- queryset = super(LogEntryAdmin, self).get_queryset(request)
- return queryset.prefetch_related('content_type')
+ """预加载content_type,优化查询性能"""
+ return super().get_queryset(request).prefetch_related('content_type')
def get_actions(self, request):
- actions = super(LogEntryAdmin, self).get_actions(request)
- if 'delete_selected' in actions:
- del actions['delete_selected']
- return actions
+ """移除批量删除操作"""
+ actions = super().get_actions(request)
+ del actions['delete_selected']
+ return actions
\ No newline at end of file
diff --git a/djangoblog/plugin_manage/base_plugin.py b/djangoblog/plugin_manage/base_plugin.py
index 2b4be5c..27d3945 100644
--- a/djangoblog/plugin_manage/base_plugin.py
+++ b/djangoblog/plugin_manage/base_plugin.py
@@ -1,18 +1,25 @@
+#gq:
import logging
+# 获取当前模块的日志记录器,方便在插件中输出日志
logger = logging.getLogger(__name__)
class BasePlugin:
- # 插件元数据
+ # 插件元数据,子类必须定义这些属性,否则会在初始化时抛出异常
PLUGIN_NAME = None
PLUGIN_DESCRIPTION = None
PLUGIN_VERSION = None
def __init__(self):
+ # 检查插件元数据是否完整,不完整则抛出 ValueError 异常
if not all([self.PLUGIN_NAME, self.PLUGIN_DESCRIPTION, self.PLUGIN_VERSION]):
raise ValueError("Plugin metadata (PLUGIN_NAME, PLUGIN_DESCRIPTION, PLUGIN_VERSION) must be defined.")
+
+ # 调用插件初始化方法,子类可以在这里完成特定的初始化操作
self.init_plugin()
+
+ # 注册插件钩子,子类可以在这里注册需要监听的钩子
self.register_hooks()
def init_plugin(self):
@@ -20,6 +27,7 @@ class BasePlugin:
插件初始化逻辑
子类可以重写此方法来实现特定的初始化操作
"""
+ # 默认行为:输出插件初始化日志
logger.info(f'{self.PLUGIN_NAME} initialized.')
def register_hooks(self):
@@ -27,6 +35,7 @@ class BasePlugin:
注册插件钩子
子类可以重写此方法来注册特定的钩子
"""
+ # 默认行为:空实现,子类根据需要覆盖
pass
def get_plugin_info(self):
@@ -34,8 +43,9 @@ class BasePlugin:
获取插件信息
:return: 包含插件元数据的字典
"""
+ # 返回插件的基本信息,供外部调用者获取
return {
'name': self.PLUGIN_NAME,
'description': self.PLUGIN_DESCRIPTION,
'version': self.PLUGIN_VERSION
- }
+ }
\ No newline at end of file
diff --git a/djangoblog/plugin_manage/hooks.py b/djangoblog/plugin_manage/hooks.py
index d712540..42c50ee 100644
--- a/djangoblog/plugin_manage/hooks.py
+++ b/djangoblog/plugin_manage/hooks.py
@@ -1,13 +1,18 @@
+#gq:
import logging
+# 获取当前模块的日志记录器,用于输出钩子系统的日志
logger = logging.getLogger(__name__)
+# 私有全局字典,用于存储所有注册的钩子
+# 结构:{ "hook_name": [callback1, callback2, ...] }
_hooks = {}
def register(hook_name: str, callback: callable):
"""
注册一个钩子回调。
+ 如果钩子名不存在,则初始化为空列表,再将回调函数追加进去。
"""
if hook_name not in _hooks:
_hooks[hook_name] = []
@@ -17,8 +22,9 @@ def register(hook_name: str, callback: callable):
def run_action(hook_name: str, *args, **kwargs):
"""
- 执行一个 Action Hook。
- 它会按顺序执行所有注册到该钩子上的回调函数。
+ 执行一个 Action Hook(动作钩子)。
+ 它会按顺序执行所有注册到该钩子上的回调函数,无返回值。
+ 如果某个回调抛出异常,记录错误日志并继续执行下一个回调。
"""
if hook_name in _hooks:
logger.debug(f"Running action hook '{hook_name}'")
@@ -31,8 +37,10 @@ def run_action(hook_name: str, *args, **kwargs):
def apply_filters(hook_name: str, value, *args, **kwargs):
"""
- 执行一个 Filter Hook。
- 它会把 value 依次传递给所有注册的回调函数进行处理。
+ 执行一个 Filter Hook(过滤钩子)。
+ 它会将 value 依次传递给所有注册的回调函数进行处理,每次处理结果作为下次输入。
+ 如果某个回调抛出异常,记录错误日志并跳过该回调。
+ 最后返回经过所有过滤器处理后的值。
"""
if hook_name in _hooks:
logger.debug(f"Applying filter hook '{hook_name}'")
@@ -41,4 +49,4 @@ def apply_filters(hook_name: str, value, *args, **kwargs):
value = callback(value, *args, **kwargs)
except Exception as e:
logger.error(f"Error applying filter hook '{hook_name}' callback '{callback.__name__}': {e}", exc_info=True)
- return value
+ return value
\ No newline at end of file
diff --git a/djangoblog/plugin_manage/loader.py b/djangoblog/plugin_manage/loader.py
index 12e824b..9392c00 100644
--- a/djangoblog/plugin_manage/loader.py
+++ b/djangoblog/plugin_manage/loader.py
@@ -1,19 +1,30 @@
+#gq:
import os
import logging
from django.conf import settings
+# 获取当前模块的日志记录器,用于输出插件加载过程中的日志
logger = logging.getLogger(__name__)
+
def load_plugins():
"""
- Dynamically loads and initializes plugins from the 'plugins' directory.
- This function is intended to be called when the Django app registry is ready.
+ 动态加载并初始化 plugins 目录下的插件。
+ 应在 Django App Registry 完全就绪后调用(如 AppConfig.ready() 中)。
+ 遍历 settings.ACTIVE_PLUGINS 列表,依次导入每个插件的 plugin.py 模块。
"""
+ # 遍历配置中启用的插件列表
for plugin_name in settings.ACTIVE_PLUGINS:
+ # 拼接插件目录绝对路径
plugin_path = os.path.join(settings.PLUGINS_DIR, plugin_name)
+
+ # 仅当目录存在且目录下包含 plugin.py 文件时才尝试导入
if os.path.isdir(plugin_path) and os.path.exists(os.path.join(plugin_path, 'plugin.py')):
try:
+ # 使用 __import__ 动态导入插件模块
+ # 导入路径示例:plugins..plugin
__import__(f'plugins.{plugin_name}.plugin')
logger.info(f"Successfully loaded plugin: {plugin_name}")
except ImportError as e:
- logger.error(f"Failed to import plugin: {plugin_name}", exc_info=e)
\ No newline at end of file
+ # 导入失败时记录错误日志,包含异常堆栈
+ logger.error(f"Failed to import plugin: {plugin_name}", exc_info=e)
\ No newline at end of file
diff --git a/djangoblog/sitemap.py b/djangoblog/sitemap.py
index 8b7d446..fee053e 100644
--- a/djangoblog/sitemap.py
+++ b/djangoblog/sitemap.py
@@ -1,59 +1,70 @@
-from django.contrib.sitemaps import Sitemap
-from django.urls import reverse
-
-from blog.models import Article, Category, Tag
+#gq:
+from django.contrib.sitemaps import Sitemap # Django内置站点地图生成类
+from django.urls import reverse # URL反向解析
+from blog.models import Article, Category, Tag # 导入博客相关模型
class StaticViewSitemap(Sitemap):
- priority = 0.5
- changefreq = 'daily'
+ """静态页面站点地图:如首页"""
+ priority = 0.5 # 页面优先级(0.0-1.0)
+ changefreq = 'daily' # 内容更新频率
def items(self):
+ """返回要包含的静态页面名称列表(对应URL名称)"""
return ['blog:index', ]
def location(self, item):
+ """返回每个静态页面的URL"""
return reverse(item)
-
class ArticleSiteMap(Sitemap):
- changefreq = "monthly"
- priority = "0.6"
+ """文章页面站点地图"""
+ changefreq = "monthly" # 文章更新频率
+ priority = "0.6" # 文章页面优先级
def items(self):
+ """返回所有已发布(status='p')的文章"""
return Article.objects.filter(status='p')
def lastmod(self, obj):
+ """返回文章的最后修改时间"""
return obj.last_modify_time
-
class CategorySiteMap(Sitemap):
- changefreq = "Weekly"
- priority = "0.6"
+ """分类页面站点地图"""
+ changefreq = "Weekly" # 分类更新频率
+ priority = "0.6" # 分类页面优先级
def items(self):
+ """返回所有分类"""
return Category.objects.all()
def lastmod(self, obj):
+ """返回分类的最后修改时间"""
return obj.last_modify_time
-
class TagSiteMap(Sitemap):
- changefreq = "Weekly"
- priority = "0.3"
+ """标签页面站点地图"""
+ changefreq = "Weekly" # 标签更新频率
+ priority = "0.3" # 标签页面优先级(低于文章和分类)
def items(self):
+ """返回所有标签"""
return Tag.objects.all()
def lastmod(self, obj):
+ """返回标签的最后修改时间"""
return obj.last_modify_time
-
class UserSiteMap(Sitemap):
- changefreq = "Weekly"
- priority = "0.3"
+ """用户页面站点地图"""
+ changefreq = "Weekly" # 用户信息更新频率
+ priority = "0.3" # 用户页面优先级
def items(self):
+ """返回所有发布过文章的不重复作者列表"""
return list(set(map(lambda x: x.author, Article.objects.all())))
def lastmod(self, obj):
- return obj.date_joined
+ """返回用户的注册时间(作为站点地图的最后更新时间)"""
+ return obj.date_joined
\ No newline at end of file
diff --git a/djangoblog/spider_notify.py b/djangoblog/spider_notify.py
index 7b909e9..0a1f4b1 100644
--- a/djangoblog/spider_notify.py
+++ b/djangoblog/spider_notify.py
@@ -1,21 +1,26 @@
+#gq:
import logging
+import requests # HTTP请求库,用于向搜索引擎提交URL
+from django.conf import settings # 导入Django项目配置
-import requests
-from django.conf import settings
-
-logger = logging.getLogger(__name__)
-
+logger = logging.getLogger(__name__) # 初始化日志对象
class SpiderNotify():
+ """搜索引擎爬虫通知类:向百度等搜索引擎提交新URL,加速收录"""
+
@staticmethod
def baidu_notify(urls):
+ """向百度搜索引擎提交URL列表,请求收录"""
try:
+ # 格式化URL数据:每行一个URL(百度要求的提交格式)
data = '\n'.join(urls)
+ # 发送POST请求到百度收录接口(配置在settings.BAIDU_NOTIFY_URL)
result = requests.post(settings.BAIDU_NOTIFY_URL, data=data)
- logger.info(result.text)
+ logger.info(result.text) # 记录提交结果
except Exception as e:
- logger.error(e)
+ logger.error(e) # 记录提交失败异常
@staticmethod
def notify(url):
- SpiderNotify.baidu_notify(url)
+ """通用通知方法:调用百度收录提交(可扩展支持其他搜索引擎)"""
+ SpiderNotify.baidu_notify(url)
\ No newline at end of file
diff --git a/djangoblog/tests.py b/djangoblog/tests.py
index 01237d9..d36eb51 100644
--- a/djangoblog/tests.py
+++ b/djangoblog/tests.py
@@ -1,15 +1,21 @@
+#gq:
from django.test import TestCase
from djangoblog.utils import *
class DjangoBlogTest(TestCase):
+ """测试 djangoblog.utils 模块的核心工具函数"""
+
def setUp(self):
pass
def test_utils(self):
+ # 测试SHA-256哈希生成
md5 = get_sha256('test')
self.assertIsNotNone(md5)
+
+ # 测试Markdown解析(含标题、代码块、链接)
c = CommonMarkdown.get_markdown('''
# Title1
diff --git a/djangoblog/urls.py b/djangoblog/urls.py
index 4aae58a..7024876 100644
--- a/djangoblog/urls.py
+++ b/djangoblog/urls.py
@@ -1,3 +1,4 @@
+#gq:
"""djangoblog URL Configuration
The `urlpatterns` list routes URLs to views. For more information please see:
@@ -17,48 +18,59 @@ from django.conf import settings
from django.conf.urls.i18n import i18n_patterns
from django.conf.urls.static import static
from django.contrib.sitemaps.views import sitemap
-from django.urls import path, include
-from django.urls import re_path
+from django.urls import path, include, re_path
from haystack.views import search_view_factory
from blog.views import EsSearchView
from djangoblog.admin_site import admin_site
from djangoblog.elasticsearch_backend import ElasticSearchModelSearchForm
from djangoblog.feeds import DjangoBlogFeed
-from djangoblog.sitemap import ArticleSiteMap, CategorySiteMap, StaticViewSitemap, TagSiteMap, UserSiteMap
+from djangoblog.sitemap import (
+ ArticleSiteMap, CategorySiteMap, StaticViewSitemap, TagSiteMap, UserSiteMap
+)
+# 站点地图配置
sitemaps = {
-
- 'blog': ArticleSiteMap,
- 'Category': CategorySiteMap,
- 'Tag': TagSiteMap,
- 'User': UserSiteMap,
- 'static': StaticViewSitemap
+ 'blog': ArticleSiteMap, # 文章
+ 'Category': CategorySiteMap, # 分类
+ 'Tag': TagSiteMap, # 标签
+ 'User': UserSiteMap, # 用户
+ 'static': StaticViewSitemap # 静态页面
}
-handler404 = 'blog.views.page_not_found_view'
-handler500 = 'blog.views.server_error_view'
-handle403 = 'blog.views.permission_denied_view'
+# 自定义错误页面
+handler404 = 'blog.views.page_not_found_view' # 404
+handler500 = 'blog.views.server_error_view' # 500
+handle403 = 'blog.views.permission_denied_view'# 403
urlpatterns = [
- path('i18n/', include('django.conf.urls.i18n')),
+ path('i18n/', include('django.conf.urls.i18n')), # 国际化
]
+
+# 国际化URL(多语言支持)
urlpatterns += i18n_patterns(
- re_path(r'^admin/', admin_site.urls),
- re_path(r'', include('blog.urls', namespace='blog')),
- re_path(r'mdeditor/', include('mdeditor.urls')),
- re_path(r'', include('comments.urls', namespace='comment')),
- re_path(r'', include('accounts.urls', namespace='account')),
- re_path(r'', include('oauth.urls', namespace='oauth')),
- re_path(r'^sitemap\.xml$', sitemap, {'sitemaps': sitemaps},
- name='django.contrib.sitemaps.views.sitemap'),
- re_path(r'^feed/$', DjangoBlogFeed()),
- re_path(r'^rss/$', DjangoBlogFeed()),
- re_path('^search', search_view_factory(view_class=EsSearchView, form_class=ElasticSearchModelSearchForm),
- name='search'),
- re_path(r'', include('servermanager.urls', namespace='servermanager')),
- re_path(r'', include('owntracks.urls', namespace='owntracks'))
- , prefix_default_language=False) + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT)
+ re_path(r'^admin/', admin_site.urls), # 后台管理
+ re_path(r'', include('blog.urls', namespace='blog')), # 博客主体
+ re_path(r'mdeditor/', include('mdeditor.urls')), # Markdown编辑器
+ re_path(r'', include('comments.urls', namespace='comment')), # 评论
+ re_path(r'', include('accounts.urls', namespace='account')), # 账户
+ re_path(r'', include('oauth.urls', namespace='oauth')), # 第三方登录
+ re_path(r'^sitemap\.xml$', sitemap, {'sitemaps': sitemaps}), # 站点地图
+ re_path(r'^feed/$', DjangoBlogFeed()), # RSS订阅
+ re_path(r'^rss/$', DjangoBlogFeed()), # RSS订阅(备用)
+ # Elasticsearch搜索
+ re_path('^search', search_view_factory(
+ view_class=EsSearchView,
+ form_class=ElasticSearchModelSearchForm
+ ), name='search'),
+ re_path(r'', include('servermanager.urls', namespace='servermanager')), # 服务器管理
+ re_path(r'', include('owntracks.urls', namespace='owntracks')), # 位置跟踪
+ prefix_default_language=False
+)
+
+# 静态文件URL
+urlpatterns += static(settings.STATIC_URL, document_root=settings.STATIC_ROOT)
+
+# 开发环境媒体文件URL
if settings.DEBUG:
- urlpatterns += static(settings.MEDIA_URL,
- document_root=settings.MEDIA_ROOT)
+ urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
\ No newline at end of file
diff --git a/djangoblog/utils.py b/djangoblog/utils.py
index 57f63dc..af493dd 100644
--- a/djangoblog/utils.py
+++ b/djangoblog/utils.py
@@ -1,13 +1,14 @@
+#gq:
#!/usr/bin/env python
# encoding: utf-8
-
import logging
import os
import random
import string
import uuid
from hashlib import sha256
+from urllib.parse import quote
import bleach
import markdown
@@ -15,52 +16,51 @@ import requests
from django.conf import settings
from django.contrib.sites.models import Site
from django.core.cache import cache
+from django.core.cache.utils import make_template_fragment_key
+from django.http import HttpRequest
from django.templatetags.static import static
+from django.utils.cache import get_cache_key
logger = logging.getLogger(__name__)
def get_max_articleid_commentid():
+ """获取最新文章和评论的ID"""
from blog.models import Article
from comments.models import Comment
return (Article.objects.latest().pk, Comment.objects.latest().pk)
def get_sha256(str):
+ """计算字符串的SHA-256哈希值"""
m = sha256(str.encode('utf-8'))
return m.hexdigest()
def cache_decorator(expiration=3 * 60):
+ """函数缓存装饰器,默认缓存3分钟"""
+
def wrapper(func):
def news(*args, **kwargs):
try:
+ # 尝试从请求对象获取缓存键
view = args[0]
key = view.get_cache_key()
except:
- key = None
- if not key:
+ # 否则根据函数和参数生成唯一键
unique_str = repr((func, args, kwargs))
+ key = get_sha256(unique_str)
- m = sha256(unique_str.encode('utf-8'))
- key = m.hexdigest()
value = cache.get(key)
if value is not None:
- # logger.info('cache_decorator get cache:%s key:%s' % (func.__name__, key))
- if str(value) == '__default_cache_value__':
- return None
- else:
- return value
- else:
- logger.debug(
- 'cache_decorator set cache:%s key:%s' %
- (func.__name__, key))
- value = func(*args, **kwargs)
- if value is None:
- cache.set(key, '__default_cache_value__', expiration)
- else:
- cache.set(key, value, expiration)
- return value
+ # 返回缓存值,处理空值标记
+ return None if str(value) == '__default_cache_value__' else value
+
+ # 缓存未命中,执行函数并缓存结果
+ logger.debug(f'cache_decorator set cache:{func.__name__} key:{key}')
+ value = func(*args, **kwargs)
+ cache.set(key, value if value is not None else '__default_cache_value__', expiration)
+ return value
return news
@@ -68,165 +68,143 @@ def cache_decorator(expiration=3 * 60):
def expire_view_cache(path, servername, serverport, key_prefix=None):
- '''
- 刷新视图缓存
- :param path:url路径
- :param servername:host
- :param serverport:端口
- :param key_prefix:前缀
- :return:是否成功
- '''
- from django.http import HttpRequest
- from django.utils.cache import get_cache_key
-
+ """刷新指定URL的视图缓存"""
request = HttpRequest()
request.META = {'SERVER_NAME': servername, 'SERVER_PORT': serverport}
request.path = path
key = get_cache_key(request, key_prefix=key_prefix, cache=cache)
if key:
- logger.info('expire_view_cache:get key:{path}'.format(path=path))
- if cache.get(key):
- cache.delete(key)
+ logger.info(f'expire_view_cache:get key:{path}')
+ cache.delete(key)
return True
return False
@cache_decorator()
def get_current_site():
- site = Site.objects.get_current()
- return site
+ """获取当前站点信息(带缓存)"""
+ return Site.objects.get_current()
class CommonMarkdown:
+ """Markdown解析工具类"""
+
@staticmethod
def _convert_markdown(value):
- md = markdown.Markdown(
- extensions=[
- 'extra',
- 'codehilite',
- 'toc',
- 'tables',
- ]
- )
- body = md.convert(value)
- toc = md.toc
- return body, toc
+ """内部方法:执行Markdown转换,返回HTML和目录"""
+ md = markdown.Markdown(extensions=['extra', 'codehilite', 'toc', 'tables'])
+ return md.convert(value), md.toc
@staticmethod
def get_markdown_with_toc(value):
- body, toc = CommonMarkdown._convert_markdown(value)
- return body, toc
+ """转换Markdown为HTML(含目录)"""
+ return CommonMarkdown._convert_markdown(value)
@staticmethod
def get_markdown(value):
- body, toc = CommonMarkdown._convert_markdown(value)
+ """转换Markdown为HTML(不含目录)"""
+ body, _ = CommonMarkdown._convert_markdown(value)
return body
def send_email(emailto, title, content):
+ """发送邮件(通过信号解耦)"""
from djangoblog.blog_signals import send_email_signal
- send_email_signal.send(
- send_email.__class__,
- emailto=emailto,
- title=title,
- content=content)
+ send_email_signal.send(send_email.__class__, emailto=emailto, title=title, content=content)
def generate_code() -> str:
- """生成随机数验证码"""
+ """生成6位随机数字验证码"""
return ''.join(random.sample(string.digits, 6))
def parse_dict_to_url(dict):
- from urllib.parse import quote
- url = '&'.join(['{}={}'.format(quote(k, safe='/'), quote(v, safe='/'))
- for k, v in dict.items()])
- return url
+ """将字典转换为URL查询字符串"""
+ return '&'.join([f'{quote(k, safe="/")}={quote(v, safe="/")}' for k, v in dict.items()])
def get_blog_setting():
+ """获取博客系统设置(带缓存,无数据时初始化)"""
value = cache.get('get_blog_setting')
if value:
return value
- else:
- from blog.models import BlogSettings
- if not BlogSettings.objects.count():
- setting = BlogSettings()
- setting.site_name = 'djangoblog'
- setting.site_description = '基于Django的博客系统'
- setting.site_seo_description = '基于Django的博客系统'
- setting.site_keywords = 'Django,Python'
- setting.article_sub_length = 300
- setting.sidebar_article_count = 10
- setting.sidebar_comment_count = 5
- setting.show_google_adsense = False
- setting.open_site_comment = True
- setting.analytics_code = ''
- setting.beian_code = ''
- setting.show_gongan_code = False
- setting.comment_need_review = False
- setting.save()
- value = BlogSettings.objects.first()
- logger.info('set cache get_blog_setting')
- cache.set('get_blog_setting', value)
- return value
+ from blog.models import BlogSettings
+ if not BlogSettings.objects.count():
+ # 初始化默认设置
+ setting = BlogSettings(
+ site_name='djangoblog',
+ site_description='基于Django的博客系统',
+ site_seo_description='基于Django的博客系统',
+ site_keywords='Django,Python',
+ article_sub_length=300,
+ sidebar_article_count=10,
+ sidebar_comment_count=5,
+ show_google_adsense=False,
+ open_site_comment=True,
+ analytics_code='',
+ beian_code='',
+ show_gongan_code=False,
+ comment_need_review=False
+ )
+ setting.save()
+
+ value = BlogSettings.objects.first()
+ cache.set('get_blog_setting', value)
+ return value
-def save_user_avatar(url):
- '''
- 保存用户头像
- :param url:头像url
- :return: 本地路径
- '''
- logger.info(url)
+def save_user_avatar(url):
+ """下载并保存用户头像到本地,返回静态文件URL"""
try:
basedir = os.path.join(settings.STATICFILES, 'avatar')
rsp = requests.get(url, timeout=2)
if rsp.status_code == 200:
- if not os.path.exists(basedir):
- os.makedirs(basedir)
-
- image_extensions = ['.jpg', '.png', 'jpeg', '.gif']
- isimage = len([i for i in image_extensions if url.endswith(i)]) > 0
- ext = os.path.splitext(url)[1] if isimage else '.jpg'
- save_filename = str(uuid.uuid4().hex) + ext
- logger.info('保存用户头像:' + basedir + save_filename)
+ os.makedirs(basedir, exist_ok=True)
+
+ # 确定文件扩展名
+ ext = os.path.splitext(url)[1] if any(
+ url.endswith(ext) for ext in ['.jpg', '.png', 'jpeg', '.gif']) else '.jpg'
+ save_filename = f'{uuid.uuid4().hex}{ext}'
+
with open(os.path.join(basedir, save_filename), 'wb+') as file:
file.write(rsp.content)
- return static('avatar/' + save_filename)
+ return static(f'avatar/{save_filename}')
except Exception as e:
logger.error(e)
- return static('blog/img/avatar.png')
+ return static('blog/img/avatar.png') # 返回默认头像
def delete_sidebar_cache():
+ """删除侧边栏相关缓存"""
from blog.models import LinkShowType
- keys = ["sidebar" + x for x in LinkShowType.values]
+ keys = [f"sidebar{x}" for x in LinkShowType.values]
for k in keys:
- logger.info('delete sidebar key:' + k)
+ logger.info(f'delete sidebar key:{k}')
cache.delete(k)
def delete_view_cache(prefix, keys):
- from django.core.cache.utils import make_template_fragment_key
+ """删除指定模板片段缓存"""
key = make_template_fragment_key(prefix, keys)
cache.delete(key)
def get_resource_url():
+ """获取静态资源基础URL"""
if settings.STATIC_URL:
return settings.STATIC_URL
- else:
- site = get_current_site()
- return 'http://' + site.domain + '/static/'
+ site = get_current_site()
+ return f'http://{site.domain}/static/'
+# HTML清理配置
ALLOWED_TAGS = ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1',
'h2', 'p']
ALLOWED_ATTRIBUTES = {'a': ['href', 'title'], 'abbr': ['title'], 'acronym': ['title']}
def sanitize_html(html):
- return bleach.clean(html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES)
+ """清理HTML,只保留允许的标签和属性"""
+ return bleach.clean(html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES)
\ No newline at end of file
diff --git a/djangoblog/whoosh_cn_backend.py b/djangoblog/whoosh_cn_backend.py
index 04e3f7f..554d3ec 100644
--- a/djangoblog/whoosh_cn_backend.py
+++ b/djangoblog/whoosh_cn_backend.py
@@ -1,6 +1,8 @@
+#gq:
# encoding: utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
+# 未来兼容导入:保证代码在 Python2/3 下行为一致
import json
import os
@@ -22,7 +24,7 @@ from haystack.models import SearchResult
from haystack.utils import get_identifier, get_model_ct
from haystack.utils import log as logging
from haystack.utils.app_loading import haystack_get_model
-from jieba.analyse import ChineseAnalyzer
+from jieba.analyse import ChineseAnalyzer # 中文分词器
from whoosh import index
from whoosh.analysis import StemmingAnalyzer
from whoosh.fields import BOOLEAN, DATETIME, IDLIST, KEYWORD, NGRAM, NGRAMWORDS, NUMERIC, Schema, TEXT
@@ -47,23 +49,25 @@ if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
# Bubble up the correct error.
+# 用于解析 whoosh 存储的 datetime 字符串的正则
DATETIME_REGEX = re.compile(
'^(?P\d{4})-(?P\d{2})-(?P\d{2})T(?P\d{2}):(?P\d{2}):(?P\d{2})(\.\d{3,6}Z?)?$')
+
+# 线程本地存储:用于在内存模式时共享 RamStorage
LOCALS = threading.local()
LOCALS.RAM_STORE = None
class WhooshHtmlFormatter(HtmlFormatter):
"""
- This is a HtmlFormatter simpler than the whoosh.HtmlFormatter.
- We use it to have consistent results across backends. Specifically,
- Solr, Xapian and Elasticsearch are using this formatting.
+ 自定义高亮 HTML 输出格式,保持与其他后端(Solr、ES 等)一致。
+ 模板:高亮文本
"""
template = '<%(tag)s>%(t)s%(tag)s>'
class WhooshSearchBackend(BaseSearchBackend):
- # Word reserved by Whoosh for special use.
+ # Whoosh 保留字,查询时需转义或避免
RESERVED_WORDS = (
'AND',
'NOT',
@@ -71,69 +75,69 @@ class WhooshSearchBackend(BaseSearchBackend):
'TO',
)
- # Characters reserved by Whoosh for special use.
- # The '\\' must come first, so as not to overwrite the other slash
- # replacements.
+ # Whoosh 保留字符,同样需转义
RESERVED_CHARACTERS = (
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
'[', ']', '^', '"', '~', '*', '?', ':', '.',
)
def __init__(self, connection_alias, **connection_options):
- super(
- WhooshSearchBackend,
- self).__init__(
- connection_alias,
- **connection_options)
- self.setup_complete = False
- self.use_file_storage = True
- self.post_limit = getattr(
- connection_options,
- 'POST_LIMIT',
- 128 * 1024 * 1024)
- self.path = connection_options.get('PATH')
-
+ """
+ 初始化后端实例
+ :param connection_alias: settings 中 HAYSTACK_CONNECTIONS 的 key
+ :param connection_options: 该连接的配置字典
+ """
+ super(WhooshSearchBackend, self).__init__(connection_alias, **connection_options)
+ self.setup_complete = False # 延迟 setup 标记
+ self.use_file_storage = True # 默认使用文件存储
+ # 提交缓冲区大小,默认 128 MB
+ self.post_limit = getattr(connection_options, 'POST_LIMIT', 128 * 1024 * 1024)
+ self.path = connection_options.get('PATH') # 索引存放路径
+
+ # 如果显式指定 STORAGE != 'file',则使用内存存储
if connection_options.get('STORAGE', 'file') != 'file':
self.use_file_storage = False
+ # 文件存储模式下 PATH 不能为空
if self.use_file_storage and not self.path:
raise ImproperlyConfigured(
- "You must specify a 'PATH' in your settings for connection '%s'." %
- connection_alias)
+ "You must specify a 'PATH' in your settings for connection '%s'." % connection_alias)
self.log = logging.getLogger('haystack')
def setup(self):
"""
- Defers loading until needed.
+ 延迟初始化:真正用到时才创建/打开索引,避免进程启动时即锁定索引。
"""
from haystack import connections
new_index = False
- # Make sure the index is there.
+ # 文件存储:目录不存在则创建
if self.use_file_storage and not os.path.exists(self.path):
os.makedirs(self.path)
new_index = True
+ # 确保目录可写
if self.use_file_storage and not os.access(self.path, os.W_OK):
raise IOError(
- "The path to your Whoosh index '%s' is not writable for the current user/group." %
- self.path)
+ "The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)
+ # 根据配置选择存储后端
if self.use_file_storage:
self.storage = FileStorage(self.path)
else:
global LOCALS
-
+ # 内存模式:线程本地共享 RamStorage
if getattr(LOCALS, 'RAM_STORE', None) is None:
LOCALS.RAM_STORE = RamStorage()
-
self.storage = LOCALS.RAM_STORE
+ # 构建 whoosh Schema
self.content_field_name, self.schema = self.build_schema(
connections[self.connection_alias].get_unified_index().all_searchfields())
self.parser = QueryParser(self.content_field_name, schema=self.schema)
+ # 创建或打开索引
if new_index is True:
self.index = self.storage.create_index(self.schema)
else:
@@ -145,18 +149,22 @@ class WhooshSearchBackend(BaseSearchBackend):
self.setup_complete = True
def build_schema(self, fields):
+ """
+ 把 Haystack 的 SearchField 列表转换成 whoosh 的 Schema
+ :param fields: dict {field_name: field_instance}
+ :return: (content_field_name, Schema)
+ """
schema_fields = {
- ID: WHOOSH_ID(stored=True, unique=True),
- DJANGO_CT: WHOOSH_ID(stored=True),
- DJANGO_ID: WHOOSH_ID(stored=True),
+ ID: WHOOSH_ID(stored=True, unique=True), # 主键
+ DJANGO_CT: WHOOSH_ID(stored=True), # 模型类名
+ DJANGO_ID: WHOOSH_ID(stored=True), # 模型 pk
}
- # Grab the number of keys that are hard-coded into Haystack.
- # We'll use this to (possibly) fail slightly more gracefully later.
initial_key_count = len(schema_fields)
content_field_name = ''
for field_name, field_class in fields.items():
if field_class.is_multivalued:
+ # 多值字段:非索引用 IDLIST,索引用 KEYWORD
if field_class.indexed is False:
schema_fields[field_class.index_fieldname] = IDLIST(
stored=True, field_boost=field_class.boost)
@@ -173,26 +181,28 @@ class WhooshSearchBackend(BaseSearchBackend):
schema_fields[field_class.index_fieldname] = NUMERIC(
stored=field_class.stored, numtype=float, field_boost=field_class.boost)
elif field_class.field_type == 'boolean':
- # Field boost isn't supported on BOOLEAN as of 1.8.2.
+ # BOOLEAN 不支持 field_boost
schema_fields[field_class.index_fieldname] = BOOLEAN(
stored=field_class.stored)
elif field_class.field_type == 'ngram':
schema_fields[field_class.index_fieldname] = NGRAM(
minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
elif field_class.field_type == 'edge_ngram':
- schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start',
- stored=field_class.stored,
- field_boost=field_class.boost)
+ schema_fields[field_class.index_fieldname] = NGRAMWORDS(
+ minsize=2, maxsize=15, at='start',
+ stored=field_class.stored, field_boost=field_class.boost)
else:
- # schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True)
+ # 默认 TEXT,使用 jieba 中文分词
schema_fields[field_class.index_fieldname] = TEXT(
- stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True)
+ stored=True, analyzer=ChineseAnalyzer(),
+ field_boost=field_class.boost, sortable=True)
+
+ # 标记文档主字段(用于高亮/拼写检查)
if field_class.document is True:
content_field_name = field_class.index_fieldname
schema_fields[field_class.index_fieldname].spelling = True
- # Fail more gracefully than relying on the backend to die if no fields
- # are found.
+ # 没有任何业务字段则抛错
if len(schema_fields) <= initial_key_count:
raise SearchBackendError(
"No fields were found in any search_indexes. Please correct this before attempting to search.")
@@ -200,51 +210,50 @@ class WhooshSearchBackend(BaseSearchBackend):
return (content_field_name, Schema(**schema_fields))
def update(self, index, iterable, commit=True):
+ """
+ 批量更新/新增文档
+ :param index: SearchIndex 实例
+ :param iterable: 要索引的模型实例可迭代对象
+ :param commit: 是否立即提交(whoosh 2.5+ 建议写完即 commit)
+ """
if not self.setup_complete:
self.setup()
self.index = self.index.refresh()
- writer = AsyncWriter(self.index)
+ writer = AsyncWriter(self.index) # 异步写,避免长期锁
for obj in iterable:
try:
- doc = index.full_prepare(obj)
+ doc = index.full_prepare(obj) # 提取字段值
except SkipDocument:
self.log.debug(u"Indexing for object `%s` skipped", obj)
- else:
- # Really make sure it's unicode, because Whoosh won't have it any
- # other way.
- for key in doc:
- doc[key] = self._from_python(doc[key])
-
- # Document boosts aren't supported in Whoosh 2.5.0+.
- if 'boost' in doc:
- del doc['boost']
-
- try:
- writer.update_document(**doc)
- except Exception as e:
- if not self.silently_fail:
- raise
-
- # We'll log the object identifier but won't include the actual object
- # to avoid the possibility of that generating encoding errors while
- # processing the log message:
- self.log.error(
- u"%s while preparing object for update" %
- e.__class__.__name__,
- exc_info=True,
- extra={
- "data": {
- "index": index,
- "object": get_identifier(obj)}})
+ continue
+
+ # 全部转 unicode
+ for key in doc:
+ doc[key] = self._from_python(doc[key])
+
+ # whoosh 2.5+ 已移除文档级 boost
+ if 'boost' in doc:
+ del doc['boost']
+
+ try:
+ writer.update_document(**doc) # 存在即更新
+ except Exception as e:
+ if not self.silently_fail:
+ raise
+ self.log.error(
+ u"%s while preparing object for update" % e.__class__.__name__,
+ exc_info=True,
+ extra={"data": {"index": index, "object": get_identifier(obj)}})
if len(iterable) > 0:
- # For now, commit no matter what, as we run into locking issues
- # otherwise.
- writer.commit()
+ writer.commit() # 真正落地
def remove(self, obj_or_string, commit=True):
+ """
+ 根据唯一标识删除单条文档
+ """
if not self.setup_complete:
self.setup()
@@ -253,20 +262,19 @@ class WhooshSearchBackend(BaseSearchBackend):
try:
self.index.delete_by_query(
- q=self.parser.parse(
- u'%s:"%s"' %
- (ID, whoosh_id)))
+ q=self.parser.parse(u'%s:"%s"' % (ID, whoosh_id)))
except Exception as e:
if not self.silently_fail:
raise
-
self.log.error(
"Failed to remove document '%s' from Whoosh: %s",
- whoosh_id,
- e,
- exc_info=True)
+ whoosh_id, e, exc_info=True)
def clear(self, models=None, commit=True):
+ """
+ 清空整个索引或指定模型的索引
+ :param models: None 表示全部;list/tuple 表示指定模型
+ """
if not self.setup_complete:
self.setup()
@@ -277,167 +285,119 @@ class WhooshSearchBackend(BaseSearchBackend):
try:
if models is None:
- self.delete_index()
+ self.delete_index() # 删目录/清内存
else:
models_to_delete = []
-
for model in models:
models_to_delete.append(
- u"%s:%s" %
- (DJANGO_CT, get_model_ct(model)))
-
+ u"%s:%s" % (DJANGO_CT, get_model_ct(model)))
+ # 构造 OR 查询一次性删除
self.index.delete_by_query(
- q=self.parser.parse(
- u" OR ".join(models_to_delete)))
+ q=self.parser.parse(u" OR ".join(models_to_delete)))
except Exception as e:
if not self.silently_fail:
raise
-
if models is not None:
self.log.error(
"Failed to clear Whoosh index of models '%s': %s",
- ','.join(models_to_delete),
- e,
- exc_info=True)
+ ','.join(models_to_delete), e, exc_info=True)
else:
- self.log.error(
- "Failed to clear Whoosh index: %s", e, exc_info=True)
+ self.log.error("Failed to clear Whoosh index: %s", e, exc_info=True)
def delete_index(self):
- # Per the Whoosh mailing list, if wiping out everything from the index,
- # it's much more efficient to simply delete the index files.
+ """
+ 物理删除索引文件/内存,并重新 setup
+ """
if self.use_file_storage and os.path.exists(self.path):
shutil.rmtree(self.path)
elif not self.use_file_storage:
self.storage.clean()
-
- # Recreate everything.
self.setup()
def optimize(self):
+ """
+ 手动合并索引段,提升查询速度(耗时操作)
+ """
if not self.setup_complete:
self.setup()
-
self.index = self.index.refresh()
self.index.optimize()
def calculate_page(self, start_offset=0, end_offset=None):
- # Prevent against Whoosh throwing an error. Requires an end_offset
- # greater than 0.
+ """
+ 把 Django 风格的分页起止偏移换算成 whoosh 的页码+每页条数
+ """
if end_offset is not None and end_offset <= 0:
end_offset = 1
-
- # Determine the page.
page_num = 0
-
if end_offset is None:
end_offset = 1000000
-
if start_offset is None:
start_offset = 0
-
page_length = end_offset - start_offset
-
- if page_length and page_length > 0:
+ if page_length > 0:
page_num = int(start_offset / page_length)
-
- # Increment because Whoosh uses 1-based page numbers.
+ # whoosh 页码从 1 开始
page_num += 1
return page_num, page_length
@log_query
- def search(
- self,
- query_string,
- sort_by=None,
- start_offset=0,
- end_offset=None,
- fields='',
- highlight=False,
- facets=None,
- date_facets=None,
- query_facets=None,
- narrow_queries=None,
- spelling_query=None,
- within=None,
- dwithin=None,
- distance_point=None,
- models=None,
- limit_to_registered_models=None,
- result_class=None,
- **kwargs):
+ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
+ fields='', highlight=False, facets=None, date_facets=None,
+ query_facets=None, narrow_queries=None, spelling_query=None,
+ within=None, dwithin=None, distance_point=None, models=None,
+ limit_to_registered_models=None, result_class=None, **kwargs):
+ """
+ 核心查询方法,返回 {'results': [...], 'hits': n, ...}
+ 各参数含义见 Haystack 文档,此处不赘述
+ """
if not self.setup_complete:
self.setup()
- # A zero length query should return no results.
+ # 空查询直接返回 0 条
if len(query_string) == 0:
- return {
- 'results': [],
- 'hits': 0,
- }
+ return {'results': [], 'hits': 0}
query_string = force_str(query_string)
- # A one-character query (non-wildcard) gets nabbed by a stopwords
- # filter and should yield zero results.
+ # 单个非通配字符会被 whoosh 当 stopword 过滤掉,直接返回 0 条
if len(query_string) <= 1 and query_string != u'*':
- return {
- 'results': [],
- 'hits': 0,
- }
+ return {'results': [], 'hits': 0}
reverse = False
-
+ # 处理排序方向(whoosh 要求所有字段同向)
if sort_by is not None:
- # Determine if we need to reverse the results and if Whoosh can
- # handle what it's being asked to sort by. Reversing is an
- # all-or-nothing action, unfortunately.
sort_by_list = []
reverse_counter = 0
-
for order_by in sort_by:
if order_by.startswith('-'):
reverse_counter += 1
-
if reverse_counter and reverse_counter != len(sort_by):
raise SearchBackendError("Whoosh requires all order_by fields"
" to use the same sort direction")
-
for order_by in sort_by:
if order_by.startswith('-'):
sort_by_list.append(order_by[1:])
-
if len(sort_by_list) == 1:
reverse = True
else:
sort_by_list.append(order_by)
-
if len(sort_by_list) == 1:
reverse = False
-
sort_by = sort_by_list[0]
+ # whoosh 不支持 faceting,仅警告
if facets is not None:
- warnings.warn(
- "Whoosh does not handle faceting.",
- Warning,
- stacklevel=2)
-
+ warnings.warn("Whoosh does not handle faceting.", Warning, stacklevel=2)
if date_facets is not None:
- warnings.warn(
- "Whoosh does not handle date faceting.",
- Warning,
- stacklevel=2)
-
+ warnings.warn("Whoosh does not handle date faceting.", Warning, stacklevel=2)
if query_facets is not None:
- warnings.warn(
- "Whoosh does not handle query faceting.",
- Warning,
- stacklevel=2)
+ warnings.warn("Whoosh does not handle query faceting.", Warning, stacklevel=2)
narrowed_results = None
self.index = self.index.refresh()
+ # 模型过滤
if limit_to_registered_models is None:
limit_to_registered_models = getattr(
settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
@@ -445,8 +405,6 @@ class WhooshSearchBackend(BaseSearchBackend):
if models and len(models):
model_choices = sorted(get_model_ct(model) for model in models)
elif limit_to_registered_models:
- # Using narrow queries, limit the results to only models handled
- # with the current routers.
model_choices = self.build_models_list()
else:
model_choices = []
@@ -454,27 +412,18 @@ class WhooshSearchBackend(BaseSearchBackend):
if len(model_choices) > 0:
if narrow_queries is None:
narrow_queries = set()
-
narrow_queries.add(' OR '.join(
['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
narrow_searcher = None
-
if narrow_queries is not None:
- # Potentially expensive? I don't see another way to do it in
- # Whoosh...
+ # 用 searcher 先过滤缩小结果集
narrow_searcher = self.index.searcher()
-
for nq in narrow_queries:
recent_narrowed_results = narrow_searcher.search(
self.parser.parse(force_str(nq)), limit=None)
-
if len(recent_narrowed_results) <= 0:
- return {
- 'results': [],
- 'hits': 0,
- }
-
+ return {'results': [], 'hits': 0}
if narrowed_results:
narrowed_results.filter(recent_narrowed_results)
else:
@@ -485,98 +434,55 @@ class WhooshSearchBackend(BaseSearchBackend):
if self.index.doc_count():
searcher = self.index.searcher()
parsed_query = self.parser.parse(query_string)
+ if parsed_query is None: # 非法/全停词查询
+ return {'results': [], 'hits': 0}
- # In the event of an invalid/stopworded query, recover gracefully.
- if parsed_query is None:
- return {
- 'results': [],
- 'hits': 0,
- }
-
- page_num, page_length = self.calculate_page(
- start_offset, end_offset)
-
- search_kwargs = {
- 'pagelen': page_length,
- 'sortedby': sort_by,
- 'reverse': reverse,
- }
-
- # Handle the case where the results have been narrowed.
+ page_num, page_length = self.calculate_page(start_offset, end_offset)
+ search_kwargs = {'pagelen': page_length,
+ 'sortedby': sort_by, 'reverse': reverse}
if narrowed_results is not None:
search_kwargs['filter'] = narrowed_results
try:
- raw_page = searcher.search_page(
- parsed_query,
- page_num,
- **search_kwargs
- )
+ raw_page = searcher.search_page(parsed_query, page_num, **search_kwargs)
except ValueError:
if not self.silently_fail:
raise
+ return {'results': [], 'hits': 0, 'spelling_suggestion': None}
- return {
- 'results': [],
- 'hits': 0,
- 'spelling_suggestion': None,
- }
-
- # Because as of Whoosh 2.5.1, it will return the wrong page of
- # results if you request something too high. :(
+ # whoosh 2.5.1 在请求页码过大时会返回错误页,需检测
if raw_page.pagenum < page_num:
- return {
- 'results': [],
- 'hits': 0,
- 'spelling_suggestion': None,
- }
+ return {'results': [], 'hits': 0, 'spelling_suggestion': None}
results = self._process_results(
- raw_page,
- highlight=highlight,
- query_string=query_string,
- spelling_query=spelling_query,
- result_class=result_class)
+ raw_page, highlight=highlight, query_string=query_string,
+ spelling_query=spelling_query, result_class=result_class)
searcher.close()
-
if hasattr(narrow_searcher, 'close'):
narrow_searcher.close()
-
return results
else:
+ # 索引为空时返回拼写建议
if self.include_spelling:
if spelling_query:
- spelling_suggestion = self.create_spelling_suggestion(
- spelling_query)
+ spelling_suggestion = self.create_spelling_suggestion(spelling_query)
else:
- spelling_suggestion = self.create_spelling_suggestion(
- query_string)
+ spelling_suggestion = self.create_spelling_suggestion(query_string)
else:
spelling_suggestion = None
+ return {'results': [], 'hits': 0, 'spelling_suggestion': spelling_suggestion}
- return {
- 'results': [],
- 'hits': 0,
- 'spelling_suggestion': spelling_suggestion,
- }
-
- def more_like_this(
- self,
- model_instance,
- additional_query_string=None,
- start_offset=0,
- end_offset=None,
- models=None,
- limit_to_registered_models=None,
- result_class=None,
- **kwargs):
+ def more_like_this(self, model_instance, additional_query_string=None,
+ start_offset=0, end_offset=None, models=None,
+ limit_to_registered_models=None, result_class=None, **kwargs):
+ """
+ 根据给定实例找“相似文档”
+ """
if not self.setup_complete:
self.setup()
- # Deferred models will have a different class ("RealClass_Deferred_fieldname")
- # which won't be in our registry:
+ # 处理延迟加载模型
model_klass = model_instance._meta.concrete_model
-
field_name = self.content_field_name
narrow_queries = set()
narrowed_results = None
@@ -589,8 +495,6 @@ class WhooshSearchBackend(BaseSearchBackend):
if models and len(models):
model_choices = sorted(get_model_ct(model) for model in models)
elif limit_to_registered_models:
- # Using narrow queries, limit the results to only models handled
- # with the current routers.
model_choices = self.build_models_list()
else:
model_choices = []
@@ -598,7 +502,6 @@ class WhooshSearchBackend(BaseSearchBackend):
if len(model_choices) > 0:
if narrow_queries is None:
narrow_queries = set()
-
narrow_queries.add(' OR '.join(
['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
@@ -606,29 +509,19 @@ class WhooshSearchBackend(BaseSearchBackend):
narrow_queries.add(additional_query_string)
narrow_searcher = None
-
if narrow_queries is not None:
- # Potentially expensive? I don't see another way to do it in
- # Whoosh...
narrow_searcher = self.index.searcher()
-
for nq in narrow_queries:
recent_narrowed_results = narrow_searcher.search(
self.parser.parse(force_str(nq)), limit=None)
-
if len(recent_narrowed_results) <= 0:
- return {
- 'results': [],
- 'hits': 0,
- }
-
+ return {'results': [], 'hits': 0}
if narrowed_results:
narrowed_results.filter(recent_narrowed_results)
else:
narrowed_results = recent_narrowed_results
page_num, page_length = self.calculate_page(start_offset, end_offset)
-
self.index = self.index.refresh()
raw_results = EmptyResults()
@@ -637,12 +530,10 @@ class WhooshSearchBackend(BaseSearchBackend):
searcher = self.index.searcher()
parsed_query = self.parser.parse(query)
results = searcher.search(parsed_query)
-
if len(results):
+ # 取第一条结果调 more_like_this
raw_results = results[0].more_like_this(
field_name, top=end_offset)
-
- # Handle the case where the results have been narrowed.
if narrowed_results is not None and hasattr(raw_results, 'filter'):
raw_results.filter(narrowed_results)
@@ -651,43 +542,25 @@ class WhooshSearchBackend(BaseSearchBackend):
except ValueError:
if not self.silently_fail:
raise
+ return {'results': [], 'hits': 0, 'spelling_suggestion': None}
- return {
- 'results': [],
- 'hits': 0,
- 'spelling_suggestion': None,
- }
-
- # Because as of Whoosh 2.5.1, it will return the wrong page of
- # results if you request something too high. :(
if raw_page.pagenum < page_num:
- return {
- 'results': [],
- 'hits': 0,
- 'spelling_suggestion': None,
- }
+ return {'results': [], 'hits': 0, 'spelling_suggestion': None}
results = self._process_results(raw_page, result_class=result_class)
searcher.close()
-
if hasattr(narrow_searcher, 'close'):
narrow_searcher.close()
-
return results
- def _process_results(
- self,
- raw_page,
- highlight=False,
- query_string='',
- spelling_query=None,
- result_class=None):
+ def _process_results(self, raw_page, highlight=False, query_string='',
+ spelling_query=None, result_class=None):
+ """
+ 把 whoosh 的 ResultsPage 转成 haystack SearchResult 列表
+ """
from haystack import connections
results = []
-
- # It's important to grab the hits first before slicing. Otherwise, this
- # can cause pagination failures.
- hits = len(raw_page)
+ hits = len(raw_page) # 必须在切片前取总数
if result_class is None:
result_class = SearchResult
@@ -707,171 +580,129 @@ class WhooshSearchBackend(BaseSearchBackend):
for key, value in raw_result.items():
index = unified_index.get_index(model)
string_key = str(key)
-
- if string_key in index.fields and hasattr(
- index.fields[string_key], 'convert'):
- # Special-cased due to the nature of KEYWORD fields.
+ if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
+ # 多值 KEYWORD 字段用逗号拆分
if index.fields[string_key].is_multivalued:
if value is None or len(value) == 0:
additional_fields[string_key] = []
else:
- additional_fields[string_key] = value.split(
- ',')
+ additional_fields[string_key] = value.split(',')
else:
- additional_fields[string_key] = index.fields[string_key].convert(
- value)
+ additional_fields[string_key] = index.fields[string_key].convert(value)
else:
additional_fields[string_key] = self._to_python(value)
- del (additional_fields[DJANGO_CT])
- del (additional_fields[DJANGO_ID])
+ # 删除内部字段
+ del additional_fields[DJANGO_CT]
+ del additional_fields[DJANGO_ID]
+ # 高亮处理
if highlight:
sa = StemmingAnalyzer()
formatter = WhooshHtmlFormatter('em')
terms = [token.text for token in sa(query_string)]
-
whoosh_result = whoosh_highlight(
additional_fields.get(self.content_field_name),
- terms,
- sa,
- ContextFragmenter(),
- formatter
- )
+ terms, sa, ContextFragmenter(), formatter)
additional_fields['highlighted'] = {
self.content_field_name: [whoosh_result],
}
- result = result_class(
- app_label,
- model_name,
- raw_result[DJANGO_ID],
- score,
- **additional_fields)
+ result = result_class(app_label, model_name,
+ raw_result[DJANGO_ID], score,
+ **additional_fields)
results.append(result)
else:
- hits -= 1
+ hits -= 1 # 模型未注册,命中不计入总数
+ # 拼写建议
if self.include_spelling:
if spelling_query:
- spelling_suggestion = self.create_spelling_suggestion(
- spelling_query)
+ spelling_suggestion = self.create_spelling_suggestion(spelling_query)
else:
- spelling_suggestion = self.create_spelling_suggestion(
- query_string)
-
- return {
- 'results': results,
- 'hits': hits,
- 'facets': facets,
- 'spelling_suggestion': spelling_suggestion,
- }
+ spelling_suggestion = self.create_spelling_suggestion(query_string)
+
+ return {'results': results, 'hits': hits,
+ 'facets': facets, 'spelling_suggestion': spelling_suggestion}
def create_spelling_suggestion(self, query_string):
+ """
+ 基于 whoosh corrector 给出拼写纠正建议
+ """
spelling_suggestion = None
reader = self.index.reader()
corrector = reader.corrector(self.content_field_name)
cleaned_query = force_str(query_string)
-
if not query_string:
return spelling_suggestion
-
- # Clean the string.
+ # 去掉保留字/符
for rev_word in self.RESERVED_WORDS:
cleaned_query = cleaned_query.replace(rev_word, '')
-
for rev_char in self.RESERVED_CHARACTERS:
cleaned_query = cleaned_query.replace(rev_char, '')
-
- # Break it down.
+ # 按单词纠正
query_words = cleaned_query.split()
suggested_words = []
-
for word in query_words:
suggestions = corrector.suggest(word, limit=1)
-
if len(suggestions) > 0:
suggested_words.append(suggestions[0])
-
spelling_suggestion = ' '.join(suggested_words)
return spelling_suggestion
def _from_python(self, value):
"""
- Converts Python values to a string for Whoosh.
-
- Code courtesy of pysolr.
+ 把 Python 值转成 whoosh 可索引的 unicode / string
"""
if hasattr(value, 'strftime'):
if not hasattr(value, 'hour'):
value = datetime(value.year, value.month, value.day, 0, 0, 0)
elif isinstance(value, bool):
- if value:
- value = 'true'
- else:
- value = 'false'
+ value = 'true' if value else 'false'
elif isinstance(value, (list, tuple)):
value = u','.join([force_str(v) for v in value])
elif isinstance(value, (six.integer_types, float)):
- # Leave it alone.
- pass
+ pass # 数字保持原样
else:
value = force_str(value)
return value
def _to_python(self, value):
"""
- Converts values from Whoosh to native Python values.
-
- A port of the same method in pysolr, as they deal with data the same way.
+ 把 whoosh 存储的字符串转回 Python 对象
"""
if value == 'true':
return True
elif value == 'false':
return False
-
if value and isinstance(value, six.string_types):
possible_datetime = DATETIME_REGEX.search(value)
-
if possible_datetime:
date_values = possible_datetime.groupdict()
-
for dk, dv in date_values.items():
date_values[dk] = int(dv)
-
- return datetime(
- date_values['year'],
- date_values['month'],
- date_values['day'],
- date_values['hour'],
- date_values['minute'],
- date_values['second'])
-
+ return datetime(date_values['year'], date_values['month'],
+ date_values['day'], date_values['hour'],
+ date_values['minute'], date_values['second'])
try:
- # Attempt to use json to load the values.
converted_value = json.loads(value)
-
- # Try to handle most built-in types.
- if isinstance(
- converted_value,
- (list,
- tuple,
- set,
- dict,
- six.integer_types,
- float,
- complex)):
+ if isinstance(converted_value, (list, tuple, set, dict,
+ six.integer_types, float, complex)):
return converted_value
except BaseException:
- # If it fails (SyntaxError or its ilk) or we don't trust it,
- # continue on.
pass
-
return value
class WhooshSearchQuery(BaseSearchQuery):
+ """
+ 负责把 Haystack 的 filter/exclude 条件转换成 whoosh 查询语法
+ """
+
def _convert_datetime(self, date):
+ """
+ 统一把 datetime 转成 whoosh 需要的 YYYYMMDDHHMMSS 字符串
+ """
if hasattr(date, 'hour'):
return force_str(date.strftime('%Y%m%d%H%M%S'))
else:
@@ -879,63 +710,53 @@ class WhooshSearchQuery(BaseSearchQuery):
def clean(self, query_fragment):
"""
- Provides a mechanism for sanitizing user input before presenting the
- value to the backend.
-
- Whoosh 1.X differs here in that you can no longer use a backslash
- to escape reserved characters. Instead, the whole word should be
- quoted.
+ 转义保留字/符;whoosh 1.x 不再支持反斜杠转义,需用引号包裹
"""
words = query_fragment.split()
cleaned_words = []
-
for word in words:
if word in self.backend.RESERVED_WORDS:
- word = word.replace(word, word.lower())
-
+ word = word.lower() # 保留字小写化
for char in self.backend.RESERVED_CHARACTERS:
if char in word:
word = "'%s'" % word
break
-
cleaned_words.append(word)
-
return ' '.join(cleaned_words)
def build_query_fragment(self, field, filter_type, value):
+ """
+ 把单个 filter 条件转成 whoosh 查询子串
+ 例如:name='exact' -> name:xxx
+ """
from haystack import connections
query_frag = ''
is_datetime = False
if not hasattr(value, 'input_type_name'):
- # Handle when we've got a ``ValuesListQuerySet``...
+ # 处理 ValuesListQuerySet 等
if hasattr(value, 'values_list'):
value = list(value)
-
if hasattr(value, 'strftime'):
is_datetime = True
-
if isinstance(value, six.string_types) and value != ' ':
- # It's not an ``InputType``. Assume ``Clean``.
value = Clean(value)
else:
value = PythonData(value)
- # Prepare the query using the InputType.
prepared_value = value.prepare(self)
if not isinstance(prepared_value, (set, list, tuple)):
- # Then convert whatever we get back to what pysolr wants if needed.
prepared_value = self.backend._from_python(prepared_value)
- # 'content' is a special reserved word, much like 'pk' in
- # Django's ORM layer. It indicates 'no special field'.
+ # 'content' 是保留字段,表示全文检索
if field == 'content':
index_fieldname = ''
else:
index_fieldname = u'%s:' % connections[self._using].get_unified_index(
).get_index_fieldname(field)
+ # 查询模板映射
filter_types = {
'content': '%s',
'contains': '*%s*',
@@ -952,67 +773,47 @@ class WhooshSearchQuery(BaseSearchQuery):
if value.post_process is False:
query_frag = prepared_value
else:
- if filter_type in [
- 'content',
- 'contains',
- 'startswith',
- 'endswith',
- 'fuzzy']:
+ if filter_type in ['content', 'contains', 'startswith',
+ 'endswith', 'fuzzy']:
if value.input_type_name == 'exact':
query_frag = prepared_value
else:
- # Iterate over terms & incorportate the converted form of
- # each into the query.
+ # 分词后每个词都加通配符/模板
terms = []
-
if isinstance(prepared_value, six.string_types):
possible_values = prepared_value.split(' ')
else:
if is_datetime is True:
- prepared_value = self._convert_datetime(
- prepared_value)
-
+ prepared_value = self._convert_datetime(prepared_value)
possible_values = [prepared_value]
-
for possible_value in possible_values:
terms.append(
- filter_types[filter_type] %
- self.backend._from_python(possible_value))
-
+ filter_types[filter_type] % self.backend._from_python(possible_value))
if len(terms) == 1:
query_frag = terms[0]
else:
query_frag = u"(%s)" % " AND ".join(terms)
elif filter_type == 'in':
in_options = []
-
for possible_value in prepared_value:
is_datetime = False
-
if hasattr(possible_value, 'strftime'):
is_datetime = True
-
pv = self.backend._from_python(possible_value)
-
if is_datetime is True:
pv = self._convert_datetime(pv)
-
if isinstance(pv, six.string_types) and not is_datetime:
in_options.append('"%s"' % pv)
else:
in_options.append('%s' % pv)
-
query_frag = "(%s)" % " OR ".join(in_options)
elif filter_type == 'range':
start = self.backend._from_python(prepared_value[0])
end = self.backend._from_python(prepared_value[1])
-
if hasattr(prepared_value[0], 'strftime'):
start = self._convert_datetime(start)
-
if hasattr(prepared_value[1], 'strftime'):
end = self._convert_datetime(end)
-
query_frag = u"[%s to %s]" % (start, end)
elif filter_type == 'exact':
if value.input_type_name == 'exact':
@@ -1023,22 +824,19 @@ class WhooshSearchQuery(BaseSearchQuery):
else:
if is_datetime is True:
prepared_value = self._convert_datetime(prepared_value)
-
query_frag = filter_types[filter_type] % prepared_value
+ # 非 Raw 输入且未带括号,则整体括号包裹
if len(query_frag) and not isinstance(value, Raw):
if not query_frag.startswith('(') and not query_frag.endswith(')'):
query_frag = "(%s)" % query_frag
return u"%s%s" % (index_fieldname, query_frag)
- # if not filter_type in ('in', 'range'):
- # # 'in' is a bit of a special case, as we don't want to
- # # convert a valid list/tuple to string. Defer handling it
- # # until later...
- # value = self.backend._from_python(value)
-
class WhooshEngine(BaseEngine):
+ """
+ 入口 Engine,供 Haystack 加载
+ """
backend = WhooshSearchBackend
- query = WhooshSearchQuery
+ query = WhooshSearchQuery
\ No newline at end of file