diff --git a/djangoblog/admin_site.py b/djangoblog/admin_site.py index f120405..f1d9f9d 100644 --- a/djangoblog/admin_site.py +++ b/djangoblog/admin_site.py @@ -1,13 +1,20 @@ +#gq: +# 从 Django 内置的 admin 模块导入 AdminSite 基类 from django.contrib.admin import AdminSite +# 导入 LogEntry 模型,用于记录管理员操作日志 from django.contrib.admin.models import LogEntry +# 导入 Site 模型及其默认的 Admin 配置 from django.contrib.sites.admin import SiteAdmin from django.contrib.sites.models import Site +# 批量导入各个自定义 App 的 Admin 配置和模型 +# 这种星号(*)导入方式在项目规模较小时很方便,但大型项目中可能影响代码可读性 from accounts.admin import * from blog.admin import * from blog.models import * from comments.admin import * from comments.models import * +# 导入自定义的 LogEntryAdmin,用于自定义操作日志的后台显示 from djangoblog.logentryadmin import LogEntryAdmin from oauth.admin import * from oauth.models import * @@ -18,28 +25,59 @@ from servermanager.models import * class DjangoBlogAdminSite(AdminSite): + """ + 自定义的 Admin 站点类,继承自 Django 的 AdminSite。 + 用于定制 Admin 后台的外观和行为。 + """ + # 定制 Admin 后台顶部的标题 site_header = 'djangoblog administration' + # 定制浏览器标签页上的标题 site_title = 'djangoblog site admin' def __init__(self, name='admin'): + """ + 初始化方法 + :param name: 站点的名称,默认是 'admin',这会影响 URL 反向解析等。 + """ super().__init__(name) def has_permission(self, request): + """ + 重写权限检查方法。 + 这个方法决定了一个请求是否有权限访问 Admin 后台。 + :param request: 当前的 HTTP 请求对象 + :return: Boolean,表示是否允许访问 + """ + # 只有超级用户(superuser)才能访问这个自定义的 Admin 站点 + # 这是一个比默认更严格的权限控制 return request.user.is_superuser # def get_urls(self): + # """ + # (已注释)重写 get_urls 方法来添加自定义的 URL 路由。 + # 这是一个示例,展示了如何在 Admin 后台中加入自己的视图。 + # """ + # # 先获取父类的所有 URL # urls = super().get_urls() # from django.urls import path + # # 导入一个自定义的视图函数,用于刷新缓存 # from blog.views import refresh_memcache # + # # 定义自己的 URL 模式 # my_urls = [ + # # 使用 self.admin_view() 包装自定义视图,以确保它受到 Admin 权限保护 # path('refresh/', self.admin_view(refresh_memcache), name="refresh"), # ] + # # 返回合并后的 URL 列表 # return urls + my_urls - +# 创建一个自定义 Admin 站点的实例 +# 这个实例将被用于注册所有的模型 admin_site = DjangoBlogAdminSite(name='admin') +# --- 开始注册各个 App 的模型到自定义的 admin_site --- + +# 注册 blog App 的模型 admin_site.register(Article, ArticlelAdmin) admin_site.register(Category, CategoryAdmin) admin_site.register(Tag, TagAdmin) @@ -47,18 +85,25 @@ admin_site.register(Links, LinksAdmin) admin_site.register(SideBar, SideBarAdmin) admin_site.register(BlogSettings, BlogSettingsAdmin) +# 注册 servermanager App 的模型 admin_site.register(commands, CommandsAdmin) admin_site.register(EmailSendLog, EmailSendLogAdmin) +# 注册 accounts App 的模型 admin_site.register(BlogUser, BlogUserAdmin) +# 注册 comments App 的模型 admin_site.register(Comment, CommentAdmin) +# 注册 oauth App 的模型 admin_site.register(OAuthUser, OAuthUserAdmin) admin_site.register(OAuthConfig, OAuthConfigAdmin) +# 注册 owntracks App 的模型 admin_site.register(OwnTrackLog, OwnTrackLogsAdmin) +# 注册 Django 内置的 Site 模型 admin_site.register(Site, SiteAdmin) -admin_site.register(LogEntry, LogEntryAdmin) +# 注册 Django 内置的 LogEntry 模型,使用自定义的 LogEntryAdmin 配置 +admin_site.register(LogEntry, LogEntryAdmin) \ No newline at end of file diff --git a/djangoblog/apps.py b/djangoblog/apps.py index d29e318..cb37b47 100644 --- a/djangoblog/apps.py +++ b/djangoblog/apps.py @@ -1,11 +1,14 @@ +#gq: from django.apps import AppConfig class DjangoblogAppConfig(AppConfig): + # 默认主键类型:BigAutoField(自增 BigInteger,避免ID溢出) default_auto_field = 'django.db.models.BigAutoField' + # 应用唯一标识(与settings.py中INSTALLED_APPS配置一致) name = 'djangoblog' def ready(self): - super().ready() - # Import and load plugins here + super().ready() # 执行父类启动逻辑 + # 应用启动时自动加载所有插件 from .plugin_manage.loader import load_plugins - load_plugins() \ No newline at end of file + load_plugins() \ No newline at end of file diff --git a/djangoblog/blog_signals.py b/djangoblog/blog_signals.py index 393f441..712757b 100644 --- a/djangoblog/blog_signals.py +++ b/djangoblog/blog_signals.py @@ -1,3 +1,4 @@ +#gq: import _thread import logging @@ -18,13 +19,15 @@ from oauth.models import OAuthUser logger = logging.getLogger(__name__) +# 自定义信号:OAuth用户登录成功后触发 oauth_user_login_signal = django.dispatch.Signal(['id']) -send_email_signal = django.dispatch.Signal( - ['emailto', 'title', 'content']) +# 自定义信号:需要发送邮件时触发 +send_email_signal = django.dispatch.Signal(['emailto', 'title', 'content']) @receiver(send_email_signal) def send_email_signal_handler(sender, **kwargs): + """发送HTML邮件并记录发送日志""" emailto = kwargs['emailto'] title = kwargs['title'] content = kwargs['content'] @@ -53,9 +56,11 @@ def send_email_signal_handler(sender, **kwargs): @receiver(oauth_user_login_signal) def oauth_user_login_signal_handler(sender, **kwargs): + """OAuth用户登录后,处理头像本地化并清理侧边栏缓存""" id = kwargs['id'] oauthuser = OAuthUser.objects.get(id=id) site = get_current_site().domain + # 如果头像是外部链接,则下载本地化 if oauthuser.picture and not oauthuser.picture.find(site) >= 0: from djangoblog.utils import save_user_avatar oauthuser.picture = save_user_avatar(oauthuser.picture) @@ -73,20 +78,31 @@ def model_post_save_callback( using, update_fields, **kwargs): + """ + 模型保存后触发: + 1. 对有get_full_url方法的模型(如文章),通知搜索引擎。 + 2. 对评论,清理相关缓存并异步发送邮件通知。 + """ clearcache = False + # 忽略Admin日志 if isinstance(instance, LogEntry): return + + # 处理内容模型(如文章) if 'get_full_url' in dir(instance): is_update_views = update_fields == {'views'} + # 非测试环境且非仅更新浏览量,则通知搜索引擎 if not settings.TESTING and not is_update_views: try: notify_url = instance.get_full_url() SpiderNotify.baidu_notify([notify_url]) except Exception as ex: logger.error("notify sipder", ex) + # 非仅更新浏览量,则标记需要清理缓存 if not is_update_views: clearcache = True + # 处理评论 if isinstance(instance, Comment): if instance.is_enable: path = instance.article.get_absolute_url() @@ -94,21 +110,19 @@ def model_post_save_callback( if site.find(':') > 0: site = site[0:site.find(':')] - expire_view_cache( - path, - servername=site, - serverport=80, - key_prefix='blogdetail') + # 清理文章页、评论、侧边栏等相关缓存 + expire_view_cache(path, servername=site, serverport=80, key_prefix='blogdetail') if cache.get('seo_processor'): cache.delete('seo_processor') - comment_cache_key = 'article_comments_{id}'.format( - id=instance.article.id) + comment_cache_key = 'article_comments_{id}'.format(id=instance.article.id) cache.delete(comment_cache_key) delete_sidebar_cache() delete_view_cache('article_comments', [str(instance.article.pk)]) + # 异步发送评论通知邮件 _thread.start_new_thread(send_comment_email, (instance,)) + # 根据标记清理全局缓存 if clearcache: cache.clear() @@ -116,7 +130,8 @@ def model_post_save_callback( @receiver(user_logged_in) @receiver(user_logged_out) def user_auth_callback(sender, request, user, **kwargs): + """用户登录/登出后,清理侧边栏缓存""" if user and user.username: logger.info(user) delete_sidebar_cache() - # cache.clear() + # cache.clear() # 注释掉了全局缓存清理,仅清理侧边栏 \ No newline at end of file diff --git a/djangoblog/elasticsearch_backend.py b/djangoblog/elasticsearch_backend.py index 4afe498..3b143fc 100644 --- a/djangoblog/elasticsearch_backend.py +++ b/djangoblog/elasticsearch_backend.py @@ -1,3 +1,4 @@ +#gq: from django.utils.encoding import force_str from elasticsearch_dsl import Q from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query @@ -12,51 +13,57 @@ logger = logging.getLogger(__name__) class ElasticSearchBackend(BaseSearchBackend): + """Elasticsearch搜索后端,实现Haystack接口,处理索引与搜索逻辑""" def __init__(self, connection_alias, **connection_options): super( ElasticSearchBackend, self).__init__( connection_alias, **connection_options) - self.manager = ArticleDocumentManager() - self.include_spelling = True + self.manager = ArticleDocumentManager() # 文档索引管理工具 + self.include_spelling = True # 启用拼写建议 def _get_models(self, iterable): + """将模型实例/ID转换为Elasticsearch文档对象""" models = iterable if iterable and iterable[0] else Article.objects.all() docs = self.manager.convert_to_doc(models) return docs def _create(self, models): + """创建索引并批量重建文档""" self.manager.create_index() docs = self._get_models(models) self.manager.rebuild(docs) def _delete(self, models): + """删除指定文档""" for m in models: m.delete() return True def _rebuild(self, models): + """增量更新索引文档""" models = models if models else Article.objects.all() - docs = self.manager.convert_to_doc(models) + docs = self._get_models(models) self.manager.update_docs(docs) def update(self, index, iterable, commit=True): - + """更新索引:将模型实例同步到Elasticsearch""" models = self._get_models(iterable) self.manager.update_docs(models) def remove(self, obj_or_string): + """从索引中删除单个对象""" models = self._get_models([obj_or_string]) self._delete(models) def clear(self, models=None, commit=True): + """清空整个索引""" self.remove(None) @staticmethod def get_suggestion(query: str) -> str: - """获取推荐词, 如果没有找到添加原搜索词""" - + """获取搜索推荐词,无建议则返回原查询词""" search = ArticleDocument.search() \ .query("match", body=query) \ .suggest('suggest_search', query, term={'field': 'body'}) \ @@ -64,30 +71,31 @@ class ElasticSearchBackend(BaseSearchBackend): keywords = [] for suggest in search.suggest.suggest_search: - if suggest["options"]: - keywords.append(suggest["options"][0]["text"]) - else: - keywords.append(suggest["text"]) + # 有建议取第一个,无则用原词 + keywords.append(suggest["options"][0]["text"] if suggest["options"] else suggest["text"]) return ' '.join(keywords) @log_query def search(self, query_string, **kwargs): + """核心搜索逻辑:匹配文章标题/正文,过滤已发布文章,支持分页和拼写建议""" logger.info('search query_string:' + query_string) - start_offset = kwargs.get('start_offset') - end_offset = kwargs.get('end_offset') + start_offset = kwargs.get('start_offset') # 分页起始位置 + end_offset = kwargs.get('end_offset') # 分页结束位置 - # 推荐词搜索 + # 启用推荐词搜索 if getattr(self, "is_suggest", None): suggestion = self.get_suggestion(query_string) else: suggestion = query_string + # 构建查询:匹配正文或标题,最低70%匹配度 q = Q('bool', should=[Q('match', body=suggestion), Q('match', title=suggestion)], minimum_should_match="70%") + # 执行搜索:过滤已发布(status='p')、文章类型(type='a'),不返回原始文档 search = ArticleDocument.search() \ .query('bool', filter=[q]) \ .filter('term', status='p') \ @@ -95,8 +103,9 @@ class ElasticSearchBackend(BaseSearchBackend): .source(False)[start_offset: end_offset] results = search.execute() - hits = results['hits'].total + hits = results['hits'].total # 总命中数 raw_results = [] + # 格式化结果为Haystack兼容的SearchResult对象 for raw_result in results['hits']['hits']: app_label = 'blog' model_name = 'Article' @@ -107,11 +116,12 @@ class ElasticSearchBackend(BaseSearchBackend): result = result_class( app_label, model_name, - raw_result['_id'], - raw_result['_score'], + raw_result['_id'], # 文档ID + raw_result['_score'], # 相关性得分 **additional_fields) raw_results.append(result) facets = {} + # 若推荐词与原词不同则返回建议 spelling_suggestion = None if query_string == suggestion else suggestion return { @@ -123,21 +133,16 @@ class ElasticSearchBackend(BaseSearchBackend): class ElasticSearchQuery(BaseSearchQuery): + """Elasticsearch查询构建类,适配Haystack接口""" def _convert_datetime(self, date): + """将datetime转换为Elasticsearch兼容的字符串格式""" if hasattr(date, 'hour'): return force_str(date.strftime('%Y%m%d%H%M%S')) else: return force_str(date.strftime('%Y%m%d000000')) def clean(self, query_fragment): - """ - Provides a mechanism for sanitizing user input before presenting the - value to the backend. - - Whoosh 1.X differs here in that you can no longer use a backslash - to escape reserved characters. Instead, the whole word should be - quoted. - """ + """清理查询片段,处理保留字和特殊字符(兼容Whoosh逻辑)""" words = query_fragment.split() cleaned_words = [] @@ -155,29 +160,34 @@ class ElasticSearchQuery(BaseSearchQuery): return ' '.join(cleaned_words) def build_query_fragment(self, field, filter_type, value): + """构建查询片段,直接返回查询字符串""" return value.query_string def get_count(self): + """获取查询结果总数""" results = self.get_results() return len(results) if results else 0 def get_spelling_suggestion(self, preferred_query=None): + """返回拼写建议词""" return self._spelling_suggestion def build_params(self, spelling_query=None): + """构建查询参数,复用父类逻辑""" kwargs = super(ElasticSearchQuery, self).build_params(spelling_query=spelling_query) return kwargs class ElasticSearchModelSearchForm(ModelSearchForm): - + """自定义搜索表单,控制是否启用搜索建议""" def search(self): - # 是否建议搜索 + # 根据请求参数决定是否启用建议(is_suggest != "no"时启用) self.searchqueryset.query.backend.is_suggest = self.data.get("is_suggest") != "no" sqs = super().search() return sqs class ElasticSearchEngine(BaseEngine): + """Elasticsearch引擎入口,指定后端和查询类""" backend = ElasticSearchBackend - query = ElasticSearchQuery + query = ElasticSearchQuery \ No newline at end of file diff --git a/djangoblog/feeds.py b/djangoblog/feeds.py index 8c4e851..c4d6b48 100644 --- a/djangoblog/feeds.py +++ b/djangoblog/feeds.py @@ -1,40 +1,49 @@ +#gq: from django.contrib.auth import get_user_model -from django.contrib.syndication.views import Feed +from django.contrib.syndication.views import Feed # Django内置RSS/Atom订阅生成视图 from django.utils import timezone -from django.utils.feedgenerator import Rss201rev2Feed - -from blog.models import Article -from djangoblog.utils import CommonMarkdown +from django.utils.feedgenerator import Rss201rev2Feed # RSS 2.0标准生成器 +from blog.models import Article # 博客文章模型 +from djangoblog.utils import CommonMarkdown # Markdown解析工具,将Markdown转为HTML class DjangoBlogFeed(Feed): - feed_type = Rss201rev2Feed + """博客RSS订阅Feed类,生成符合RSS 2.0标准的订阅内容""" + feed_type = Rss201rev2Feed # 指定Feed类型为RSS 2.0 - description = '大巧无工,重剑无锋.' - title = "且听风吟 大巧无工,重剑无锋. " - link = "/feed/" + description = '大巧无工,重剑无锋.' # Feed描述 + title = "且听风吟 大巧无工,重剑无锋. " # Feed标题(订阅列表中显示) + link = "/feed/" # Feed的URL地址 def author_name(self): + """Feed作者名称:取系统第一个用户的昵称""" return get_user_model().objects.first().nickname def author_link(self): + """Feed作者链接:取系统第一个用户的个人主页URL""" return get_user_model().objects.first().get_absolute_url() def items(self): + """Feed订阅的内容列表:最新5篇已发布(status='p')的文章(type='a')""" return Article.objects.filter(type='a', status='p').order_by('-pub_time')[:5] def item_title(self, item): + """单个订阅项(文章)的标题:使用文章标题""" return item.title def item_description(self, item): + """单个订阅项的描述:将文章Markdown正文转为HTML""" return CommonMarkdown.get_markdown(item.body) def feed_copyright(self): + """Feed版权信息:显示当前年份的版权声明""" now = timezone.now() return "Copyright© {year} 且听风吟".format(year=now.year) def item_link(self, item): + """单个订阅项的链接:文章详情页URL""" return item.get_absolute_url() def item_guid(self, item): - return + """单个订阅项的唯一标识(预留方法,暂未实现)""" + return \ No newline at end of file diff --git a/djangoblog/logentryadmin.py b/djangoblog/logentryadmin.py index 2f6a535..3245344 100644 --- a/djangoblog/logentryadmin.py +++ b/djangoblog/logentryadmin.py @@ -1,3 +1,4 @@ +#gq: from django.contrib import admin from django.contrib.admin.models import DELETION from django.contrib.contenttypes.models import ContentType @@ -7,54 +8,32 @@ from django.utils.html import escape from django.utils.safestring import mark_safe from django.utils.translation import gettext_lazy as _ - class LogEntryAdmin(admin.ModelAdmin): - list_filter = [ - 'content_type' - ] - - search_fields = [ - 'object_repr', - 'change_message' - ] - - list_display_links = [ - 'action_time', - 'get_change_message', - ] - list_display = [ - 'action_time', - 'user_link', - 'content_type', - 'object_link', - 'get_change_message', - ] + """Admin操作日志自定义管理类:优化展示与权限控制""" + list_filter = ['content_type'] # 按内容类型筛选 + search_fields = ['object_repr', 'change_message'] # 搜索对象描述、操作信息 + list_display_links = ['action_time', 'get_change_message'] # 可点击跳转字段 + list_display = ['action_time', 'user_link', 'content_type', 'object_link', 'get_change_message'] # 列表展示字段 def has_add_permission(self, request): + """禁用添加:日志自动生成,不允许手动添加""" return False def has_change_permission(self, request, obj=None): - return ( - request.user.is_superuser or - request.user.has_perm('admin.change_logentry') - ) and request.method != 'POST' + """仅超级用户/有权限用户可查看,禁止POST修改""" + return (request.user.is_superuser or request.user.has_perm('admin.change_logentry')) and request.method != 'POST' def has_delete_permission(self, request, obj=None): + """禁用删除:日志需保留""" return False def object_link(self, obj): + """操作对象字段:非删除操作显示Admin编辑链接""" object_link = escape(obj.object_repr) - content_type = obj.content_type - - if obj.action_flag != DELETION and content_type is not None: - # try returning an actual link instead of object repr string + if obj.action_flag != DELETION and obj.content_type: try: - url = reverse( - 'admin:{}_{}_change'.format(content_type.app_label, - content_type.model), - args=[obj.object_id] - ) - object_link = '{}'.format(url, object_link) + url = reverse(f'admin:{obj.content_type.app_label}_{obj.content_type.model}_change', args=[obj.object_id]) + object_link = f'{object_link}' except NoReverseMatch: pass return mark_safe(object_link) @@ -63,16 +42,12 @@ class LogEntryAdmin(admin.ModelAdmin): object_link.short_description = _('object') def user_link(self, obj): + """操作用户字段:显示用户Admin编辑链接""" content_type = ContentType.objects.get_for_model(type(obj.user)) user_link = escape(force_str(obj.user)) try: - # try returning an actual link instead of object repr string - url = reverse( - 'admin:{}_{}_change'.format(content_type.app_label, - content_type.model), - args=[obj.user.pk] - ) - user_link = '{}'.format(url, user_link) + url = reverse(f'admin:{content_type.app_label}_{content_type.model}_change', args=[obj.user.pk]) + user_link = f'{user_link}' except NoReverseMatch: pass return mark_safe(user_link) @@ -81,11 +56,11 @@ class LogEntryAdmin(admin.ModelAdmin): user_link.short_description = _('user') def get_queryset(self, request): - queryset = super(LogEntryAdmin, self).get_queryset(request) - return queryset.prefetch_related('content_type') + """预加载content_type,优化查询性能""" + return super().get_queryset(request).prefetch_related('content_type') def get_actions(self, request): - actions = super(LogEntryAdmin, self).get_actions(request) - if 'delete_selected' in actions: - del actions['delete_selected'] - return actions + """移除批量删除操作""" + actions = super().get_actions(request) + del actions['delete_selected'] + return actions \ No newline at end of file diff --git a/djangoblog/plugin_manage/base_plugin.py b/djangoblog/plugin_manage/base_plugin.py index 2b4be5c..27d3945 100644 --- a/djangoblog/plugin_manage/base_plugin.py +++ b/djangoblog/plugin_manage/base_plugin.py @@ -1,18 +1,25 @@ +#gq: import logging +# 获取当前模块的日志记录器,方便在插件中输出日志 logger = logging.getLogger(__name__) class BasePlugin: - # 插件元数据 + # 插件元数据,子类必须定义这些属性,否则会在初始化时抛出异常 PLUGIN_NAME = None PLUGIN_DESCRIPTION = None PLUGIN_VERSION = None def __init__(self): + # 检查插件元数据是否完整,不完整则抛出 ValueError 异常 if not all([self.PLUGIN_NAME, self.PLUGIN_DESCRIPTION, self.PLUGIN_VERSION]): raise ValueError("Plugin metadata (PLUGIN_NAME, PLUGIN_DESCRIPTION, PLUGIN_VERSION) must be defined.") + + # 调用插件初始化方法,子类可以在这里完成特定的初始化操作 self.init_plugin() + + # 注册插件钩子,子类可以在这里注册需要监听的钩子 self.register_hooks() def init_plugin(self): @@ -20,6 +27,7 @@ class BasePlugin: 插件初始化逻辑 子类可以重写此方法来实现特定的初始化操作 """ + # 默认行为:输出插件初始化日志 logger.info(f'{self.PLUGIN_NAME} initialized.') def register_hooks(self): @@ -27,6 +35,7 @@ class BasePlugin: 注册插件钩子 子类可以重写此方法来注册特定的钩子 """ + # 默认行为:空实现,子类根据需要覆盖 pass def get_plugin_info(self): @@ -34,8 +43,9 @@ class BasePlugin: 获取插件信息 :return: 包含插件元数据的字典 """ + # 返回插件的基本信息,供外部调用者获取 return { 'name': self.PLUGIN_NAME, 'description': self.PLUGIN_DESCRIPTION, 'version': self.PLUGIN_VERSION - } + } \ No newline at end of file diff --git a/djangoblog/plugin_manage/hooks.py b/djangoblog/plugin_manage/hooks.py index d712540..42c50ee 100644 --- a/djangoblog/plugin_manage/hooks.py +++ b/djangoblog/plugin_manage/hooks.py @@ -1,13 +1,18 @@ +#gq: import logging +# 获取当前模块的日志记录器,用于输出钩子系统的日志 logger = logging.getLogger(__name__) +# 私有全局字典,用于存储所有注册的钩子 +# 结构:{ "hook_name": [callback1, callback2, ...] } _hooks = {} def register(hook_name: str, callback: callable): """ 注册一个钩子回调。 + 如果钩子名不存在,则初始化为空列表,再将回调函数追加进去。 """ if hook_name not in _hooks: _hooks[hook_name] = [] @@ -17,8 +22,9 @@ def register(hook_name: str, callback: callable): def run_action(hook_name: str, *args, **kwargs): """ - 执行一个 Action Hook。 - 它会按顺序执行所有注册到该钩子上的回调函数。 + 执行一个 Action Hook(动作钩子)。 + 它会按顺序执行所有注册到该钩子上的回调函数,无返回值。 + 如果某个回调抛出异常,记录错误日志并继续执行下一个回调。 """ if hook_name in _hooks: logger.debug(f"Running action hook '{hook_name}'") @@ -31,8 +37,10 @@ def run_action(hook_name: str, *args, **kwargs): def apply_filters(hook_name: str, value, *args, **kwargs): """ - 执行一个 Filter Hook。 - 它会把 value 依次传递给所有注册的回调函数进行处理。 + 执行一个 Filter Hook(过滤钩子)。 + 它会将 value 依次传递给所有注册的回调函数进行处理,每次处理结果作为下次输入。 + 如果某个回调抛出异常,记录错误日志并跳过该回调。 + 最后返回经过所有过滤器处理后的值。 """ if hook_name in _hooks: logger.debug(f"Applying filter hook '{hook_name}'") @@ -41,4 +49,4 @@ def apply_filters(hook_name: str, value, *args, **kwargs): value = callback(value, *args, **kwargs) except Exception as e: logger.error(f"Error applying filter hook '{hook_name}' callback '{callback.__name__}': {e}", exc_info=True) - return value + return value \ No newline at end of file diff --git a/djangoblog/plugin_manage/loader.py b/djangoblog/plugin_manage/loader.py index 12e824b..9392c00 100644 --- a/djangoblog/plugin_manage/loader.py +++ b/djangoblog/plugin_manage/loader.py @@ -1,19 +1,30 @@ +#gq: import os import logging from django.conf import settings +# 获取当前模块的日志记录器,用于输出插件加载过程中的日志 logger = logging.getLogger(__name__) + def load_plugins(): """ - Dynamically loads and initializes plugins from the 'plugins' directory. - This function is intended to be called when the Django app registry is ready. + 动态加载并初始化 plugins 目录下的插件。 + 应在 Django App Registry 完全就绪后调用(如 AppConfig.ready() 中)。 + 遍历 settings.ACTIVE_PLUGINS 列表,依次导入每个插件的 plugin.py 模块。 """ + # 遍历配置中启用的插件列表 for plugin_name in settings.ACTIVE_PLUGINS: + # 拼接插件目录绝对路径 plugin_path = os.path.join(settings.PLUGINS_DIR, plugin_name) + + # 仅当目录存在且目录下包含 plugin.py 文件时才尝试导入 if os.path.isdir(plugin_path) and os.path.exists(os.path.join(plugin_path, 'plugin.py')): try: + # 使用 __import__ 动态导入插件模块 + # 导入路径示例:plugins..plugin __import__(f'plugins.{plugin_name}.plugin') logger.info(f"Successfully loaded plugin: {plugin_name}") except ImportError as e: - logger.error(f"Failed to import plugin: {plugin_name}", exc_info=e) \ No newline at end of file + # 导入失败时记录错误日志,包含异常堆栈 + logger.error(f"Failed to import plugin: {plugin_name}", exc_info=e) \ No newline at end of file diff --git a/djangoblog/sitemap.py b/djangoblog/sitemap.py index 8b7d446..fee053e 100644 --- a/djangoblog/sitemap.py +++ b/djangoblog/sitemap.py @@ -1,59 +1,70 @@ -from django.contrib.sitemaps import Sitemap -from django.urls import reverse - -from blog.models import Article, Category, Tag +#gq: +from django.contrib.sitemaps import Sitemap # Django内置站点地图生成类 +from django.urls import reverse # URL反向解析 +from blog.models import Article, Category, Tag # 导入博客相关模型 class StaticViewSitemap(Sitemap): - priority = 0.5 - changefreq = 'daily' + """静态页面站点地图:如首页""" + priority = 0.5 # 页面优先级(0.0-1.0) + changefreq = 'daily' # 内容更新频率 def items(self): + """返回要包含的静态页面名称列表(对应URL名称)""" return ['blog:index', ] def location(self, item): + """返回每个静态页面的URL""" return reverse(item) - class ArticleSiteMap(Sitemap): - changefreq = "monthly" - priority = "0.6" + """文章页面站点地图""" + changefreq = "monthly" # 文章更新频率 + priority = "0.6" # 文章页面优先级 def items(self): + """返回所有已发布(status='p')的文章""" return Article.objects.filter(status='p') def lastmod(self, obj): + """返回文章的最后修改时间""" return obj.last_modify_time - class CategorySiteMap(Sitemap): - changefreq = "Weekly" - priority = "0.6" + """分类页面站点地图""" + changefreq = "Weekly" # 分类更新频率 + priority = "0.6" # 分类页面优先级 def items(self): + """返回所有分类""" return Category.objects.all() def lastmod(self, obj): + """返回分类的最后修改时间""" return obj.last_modify_time - class TagSiteMap(Sitemap): - changefreq = "Weekly" - priority = "0.3" + """标签页面站点地图""" + changefreq = "Weekly" # 标签更新频率 + priority = "0.3" # 标签页面优先级(低于文章和分类) def items(self): + """返回所有标签""" return Tag.objects.all() def lastmod(self, obj): + """返回标签的最后修改时间""" return obj.last_modify_time - class UserSiteMap(Sitemap): - changefreq = "Weekly" - priority = "0.3" + """用户页面站点地图""" + changefreq = "Weekly" # 用户信息更新频率 + priority = "0.3" # 用户页面优先级 def items(self): + """返回所有发布过文章的不重复作者列表""" return list(set(map(lambda x: x.author, Article.objects.all()))) def lastmod(self, obj): - return obj.date_joined + """返回用户的注册时间(作为站点地图的最后更新时间)""" + return obj.date_joined \ No newline at end of file diff --git a/djangoblog/spider_notify.py b/djangoblog/spider_notify.py index 7b909e9..0a1f4b1 100644 --- a/djangoblog/spider_notify.py +++ b/djangoblog/spider_notify.py @@ -1,21 +1,26 @@ +#gq: import logging +import requests # HTTP请求库,用于向搜索引擎提交URL +from django.conf import settings # 导入Django项目配置 -import requests -from django.conf import settings - -logger = logging.getLogger(__name__) - +logger = logging.getLogger(__name__) # 初始化日志对象 class SpiderNotify(): + """搜索引擎爬虫通知类:向百度等搜索引擎提交新URL,加速收录""" + @staticmethod def baidu_notify(urls): + """向百度搜索引擎提交URL列表,请求收录""" try: + # 格式化URL数据:每行一个URL(百度要求的提交格式) data = '\n'.join(urls) + # 发送POST请求到百度收录接口(配置在settings.BAIDU_NOTIFY_URL) result = requests.post(settings.BAIDU_NOTIFY_URL, data=data) - logger.info(result.text) + logger.info(result.text) # 记录提交结果 except Exception as e: - logger.error(e) + logger.error(e) # 记录提交失败异常 @staticmethod def notify(url): - SpiderNotify.baidu_notify(url) + """通用通知方法:调用百度收录提交(可扩展支持其他搜索引擎)""" + SpiderNotify.baidu_notify(url) \ No newline at end of file diff --git a/djangoblog/tests.py b/djangoblog/tests.py index 01237d9..d36eb51 100644 --- a/djangoblog/tests.py +++ b/djangoblog/tests.py @@ -1,15 +1,21 @@ +#gq: from django.test import TestCase from djangoblog.utils import * class DjangoBlogTest(TestCase): + """测试 djangoblog.utils 模块的核心工具函数""" + def setUp(self): pass def test_utils(self): + # 测试SHA-256哈希生成 md5 = get_sha256('test') self.assertIsNotNone(md5) + + # 测试Markdown解析(含标题、代码块、链接) c = CommonMarkdown.get_markdown(''' # Title1 diff --git a/djangoblog/urls.py b/djangoblog/urls.py index 4aae58a..7024876 100644 --- a/djangoblog/urls.py +++ b/djangoblog/urls.py @@ -1,3 +1,4 @@ +#gq: """djangoblog URL Configuration The `urlpatterns` list routes URLs to views. For more information please see: @@ -17,48 +18,59 @@ from django.conf import settings from django.conf.urls.i18n import i18n_patterns from django.conf.urls.static import static from django.contrib.sitemaps.views import sitemap -from django.urls import path, include -from django.urls import re_path +from django.urls import path, include, re_path from haystack.views import search_view_factory from blog.views import EsSearchView from djangoblog.admin_site import admin_site from djangoblog.elasticsearch_backend import ElasticSearchModelSearchForm from djangoblog.feeds import DjangoBlogFeed -from djangoblog.sitemap import ArticleSiteMap, CategorySiteMap, StaticViewSitemap, TagSiteMap, UserSiteMap +from djangoblog.sitemap import ( + ArticleSiteMap, CategorySiteMap, StaticViewSitemap, TagSiteMap, UserSiteMap +) +# 站点地图配置 sitemaps = { - - 'blog': ArticleSiteMap, - 'Category': CategorySiteMap, - 'Tag': TagSiteMap, - 'User': UserSiteMap, - 'static': StaticViewSitemap + 'blog': ArticleSiteMap, # 文章 + 'Category': CategorySiteMap, # 分类 + 'Tag': TagSiteMap, # 标签 + 'User': UserSiteMap, # 用户 + 'static': StaticViewSitemap # 静态页面 } -handler404 = 'blog.views.page_not_found_view' -handler500 = 'blog.views.server_error_view' -handle403 = 'blog.views.permission_denied_view' +# 自定义错误页面 +handler404 = 'blog.views.page_not_found_view' # 404 +handler500 = 'blog.views.server_error_view' # 500 +handle403 = 'blog.views.permission_denied_view'# 403 urlpatterns = [ - path('i18n/', include('django.conf.urls.i18n')), + path('i18n/', include('django.conf.urls.i18n')), # 国际化 ] + +# 国际化URL(多语言支持) urlpatterns += i18n_patterns( - re_path(r'^admin/', admin_site.urls), - re_path(r'', include('blog.urls', namespace='blog')), - re_path(r'mdeditor/', include('mdeditor.urls')), - re_path(r'', include('comments.urls', namespace='comment')), - re_path(r'', include('accounts.urls', namespace='account')), - re_path(r'', include('oauth.urls', namespace='oauth')), - re_path(r'^sitemap\.xml$', sitemap, {'sitemaps': sitemaps}, - name='django.contrib.sitemaps.views.sitemap'), - re_path(r'^feed/$', DjangoBlogFeed()), - re_path(r'^rss/$', DjangoBlogFeed()), - re_path('^search', search_view_factory(view_class=EsSearchView, form_class=ElasticSearchModelSearchForm), - name='search'), - re_path(r'', include('servermanager.urls', namespace='servermanager')), - re_path(r'', include('owntracks.urls', namespace='owntracks')) - , prefix_default_language=False) + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) + re_path(r'^admin/', admin_site.urls), # 后台管理 + re_path(r'', include('blog.urls', namespace='blog')), # 博客主体 + re_path(r'mdeditor/', include('mdeditor.urls')), # Markdown编辑器 + re_path(r'', include('comments.urls', namespace='comment')), # 评论 + re_path(r'', include('accounts.urls', namespace='account')), # 账户 + re_path(r'', include('oauth.urls', namespace='oauth')), # 第三方登录 + re_path(r'^sitemap\.xml$', sitemap, {'sitemaps': sitemaps}), # 站点地图 + re_path(r'^feed/$', DjangoBlogFeed()), # RSS订阅 + re_path(r'^rss/$', DjangoBlogFeed()), # RSS订阅(备用) + # Elasticsearch搜索 + re_path('^search', search_view_factory( + view_class=EsSearchView, + form_class=ElasticSearchModelSearchForm + ), name='search'), + re_path(r'', include('servermanager.urls', namespace='servermanager')), # 服务器管理 + re_path(r'', include('owntracks.urls', namespace='owntracks')), # 位置跟踪 + prefix_default_language=False +) + +# 静态文件URL +urlpatterns += static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) + +# 开发环境媒体文件URL if settings.DEBUG: - urlpatterns += static(settings.MEDIA_URL, - document_root=settings.MEDIA_ROOT) + urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) \ No newline at end of file diff --git a/djangoblog/utils.py b/djangoblog/utils.py index 57f63dc..af493dd 100644 --- a/djangoblog/utils.py +++ b/djangoblog/utils.py @@ -1,13 +1,14 @@ +#gq: #!/usr/bin/env python # encoding: utf-8 - import logging import os import random import string import uuid from hashlib import sha256 +from urllib.parse import quote import bleach import markdown @@ -15,52 +16,51 @@ import requests from django.conf import settings from django.contrib.sites.models import Site from django.core.cache import cache +from django.core.cache.utils import make_template_fragment_key +from django.http import HttpRequest from django.templatetags.static import static +from django.utils.cache import get_cache_key logger = logging.getLogger(__name__) def get_max_articleid_commentid(): + """获取最新文章和评论的ID""" from blog.models import Article from comments.models import Comment return (Article.objects.latest().pk, Comment.objects.latest().pk) def get_sha256(str): + """计算字符串的SHA-256哈希值""" m = sha256(str.encode('utf-8')) return m.hexdigest() def cache_decorator(expiration=3 * 60): + """函数缓存装饰器,默认缓存3分钟""" + def wrapper(func): def news(*args, **kwargs): try: + # 尝试从请求对象获取缓存键 view = args[0] key = view.get_cache_key() except: - key = None - if not key: + # 否则根据函数和参数生成唯一键 unique_str = repr((func, args, kwargs)) + key = get_sha256(unique_str) - m = sha256(unique_str.encode('utf-8')) - key = m.hexdigest() value = cache.get(key) if value is not None: - # logger.info('cache_decorator get cache:%s key:%s' % (func.__name__, key)) - if str(value) == '__default_cache_value__': - return None - else: - return value - else: - logger.debug( - 'cache_decorator set cache:%s key:%s' % - (func.__name__, key)) - value = func(*args, **kwargs) - if value is None: - cache.set(key, '__default_cache_value__', expiration) - else: - cache.set(key, value, expiration) - return value + # 返回缓存值,处理空值标记 + return None if str(value) == '__default_cache_value__' else value + + # 缓存未命中,执行函数并缓存结果 + logger.debug(f'cache_decorator set cache:{func.__name__} key:{key}') + value = func(*args, **kwargs) + cache.set(key, value if value is not None else '__default_cache_value__', expiration) + return value return news @@ -68,165 +68,143 @@ def cache_decorator(expiration=3 * 60): def expire_view_cache(path, servername, serverport, key_prefix=None): - ''' - 刷新视图缓存 - :param path:url路径 - :param servername:host - :param serverport:端口 - :param key_prefix:前缀 - :return:是否成功 - ''' - from django.http import HttpRequest - from django.utils.cache import get_cache_key - + """刷新指定URL的视图缓存""" request = HttpRequest() request.META = {'SERVER_NAME': servername, 'SERVER_PORT': serverport} request.path = path key = get_cache_key(request, key_prefix=key_prefix, cache=cache) if key: - logger.info('expire_view_cache:get key:{path}'.format(path=path)) - if cache.get(key): - cache.delete(key) + logger.info(f'expire_view_cache:get key:{path}') + cache.delete(key) return True return False @cache_decorator() def get_current_site(): - site = Site.objects.get_current() - return site + """获取当前站点信息(带缓存)""" + return Site.objects.get_current() class CommonMarkdown: + """Markdown解析工具类""" + @staticmethod def _convert_markdown(value): - md = markdown.Markdown( - extensions=[ - 'extra', - 'codehilite', - 'toc', - 'tables', - ] - ) - body = md.convert(value) - toc = md.toc - return body, toc + """内部方法:执行Markdown转换,返回HTML和目录""" + md = markdown.Markdown(extensions=['extra', 'codehilite', 'toc', 'tables']) + return md.convert(value), md.toc @staticmethod def get_markdown_with_toc(value): - body, toc = CommonMarkdown._convert_markdown(value) - return body, toc + """转换Markdown为HTML(含目录)""" + return CommonMarkdown._convert_markdown(value) @staticmethod def get_markdown(value): - body, toc = CommonMarkdown._convert_markdown(value) + """转换Markdown为HTML(不含目录)""" + body, _ = CommonMarkdown._convert_markdown(value) return body def send_email(emailto, title, content): + """发送邮件(通过信号解耦)""" from djangoblog.blog_signals import send_email_signal - send_email_signal.send( - send_email.__class__, - emailto=emailto, - title=title, - content=content) + send_email_signal.send(send_email.__class__, emailto=emailto, title=title, content=content) def generate_code() -> str: - """生成随机数验证码""" + """生成6位随机数字验证码""" return ''.join(random.sample(string.digits, 6)) def parse_dict_to_url(dict): - from urllib.parse import quote - url = '&'.join(['{}={}'.format(quote(k, safe='/'), quote(v, safe='/')) - for k, v in dict.items()]) - return url + """将字典转换为URL查询字符串""" + return '&'.join([f'{quote(k, safe="/")}={quote(v, safe="/")}' for k, v in dict.items()]) def get_blog_setting(): + """获取博客系统设置(带缓存,无数据时初始化)""" value = cache.get('get_blog_setting') if value: return value - else: - from blog.models import BlogSettings - if not BlogSettings.objects.count(): - setting = BlogSettings() - setting.site_name = 'djangoblog' - setting.site_description = '基于Django的博客系统' - setting.site_seo_description = '基于Django的博客系统' - setting.site_keywords = 'Django,Python' - setting.article_sub_length = 300 - setting.sidebar_article_count = 10 - setting.sidebar_comment_count = 5 - setting.show_google_adsense = False - setting.open_site_comment = True - setting.analytics_code = '' - setting.beian_code = '' - setting.show_gongan_code = False - setting.comment_need_review = False - setting.save() - value = BlogSettings.objects.first() - logger.info('set cache get_blog_setting') - cache.set('get_blog_setting', value) - return value + from blog.models import BlogSettings + if not BlogSettings.objects.count(): + # 初始化默认设置 + setting = BlogSettings( + site_name='djangoblog', + site_description='基于Django的博客系统', + site_seo_description='基于Django的博客系统', + site_keywords='Django,Python', + article_sub_length=300, + sidebar_article_count=10, + sidebar_comment_count=5, + show_google_adsense=False, + open_site_comment=True, + analytics_code='', + beian_code='', + show_gongan_code=False, + comment_need_review=False + ) + setting.save() + + value = BlogSettings.objects.first() + cache.set('get_blog_setting', value) + return value -def save_user_avatar(url): - ''' - 保存用户头像 - :param url:头像url - :return: 本地路径 - ''' - logger.info(url) +def save_user_avatar(url): + """下载并保存用户头像到本地,返回静态文件URL""" try: basedir = os.path.join(settings.STATICFILES, 'avatar') rsp = requests.get(url, timeout=2) if rsp.status_code == 200: - if not os.path.exists(basedir): - os.makedirs(basedir) - - image_extensions = ['.jpg', '.png', 'jpeg', '.gif'] - isimage = len([i for i in image_extensions if url.endswith(i)]) > 0 - ext = os.path.splitext(url)[1] if isimage else '.jpg' - save_filename = str(uuid.uuid4().hex) + ext - logger.info('保存用户头像:' + basedir + save_filename) + os.makedirs(basedir, exist_ok=True) + + # 确定文件扩展名 + ext = os.path.splitext(url)[1] if any( + url.endswith(ext) for ext in ['.jpg', '.png', 'jpeg', '.gif']) else '.jpg' + save_filename = f'{uuid.uuid4().hex}{ext}' + with open(os.path.join(basedir, save_filename), 'wb+') as file: file.write(rsp.content) - return static('avatar/' + save_filename) + return static(f'avatar/{save_filename}') except Exception as e: logger.error(e) - return static('blog/img/avatar.png') + return static('blog/img/avatar.png') # 返回默认头像 def delete_sidebar_cache(): + """删除侧边栏相关缓存""" from blog.models import LinkShowType - keys = ["sidebar" + x for x in LinkShowType.values] + keys = [f"sidebar{x}" for x in LinkShowType.values] for k in keys: - logger.info('delete sidebar key:' + k) + logger.info(f'delete sidebar key:{k}') cache.delete(k) def delete_view_cache(prefix, keys): - from django.core.cache.utils import make_template_fragment_key + """删除指定模板片段缓存""" key = make_template_fragment_key(prefix, keys) cache.delete(key) def get_resource_url(): + """获取静态资源基础URL""" if settings.STATIC_URL: return settings.STATIC_URL - else: - site = get_current_site() - return 'http://' + site.domain + '/static/' + site = get_current_site() + return f'http://{site.domain}/static/' +# HTML清理配置 ALLOWED_TAGS = ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'p'] ALLOWED_ATTRIBUTES = {'a': ['href', 'title'], 'abbr': ['title'], 'acronym': ['title']} def sanitize_html(html): - return bleach.clean(html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES) + """清理HTML,只保留允许的标签和属性""" + return bleach.clean(html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES) \ No newline at end of file diff --git a/djangoblog/whoosh_cn_backend.py b/djangoblog/whoosh_cn_backend.py index 04e3f7f..554d3ec 100644 --- a/djangoblog/whoosh_cn_backend.py +++ b/djangoblog/whoosh_cn_backend.py @@ -1,6 +1,8 @@ +#gq: # encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals +# 未来兼容导入:保证代码在 Python2/3 下行为一致 import json import os @@ -22,7 +24,7 @@ from haystack.models import SearchResult from haystack.utils import get_identifier, get_model_ct from haystack.utils import log as logging from haystack.utils.app_loading import haystack_get_model -from jieba.analyse import ChineseAnalyzer +from jieba.analyse import ChineseAnalyzer # 中文分词器 from whoosh import index from whoosh.analysis import StemmingAnalyzer from whoosh.fields import BOOLEAN, DATETIME, IDLIST, KEYWORD, NGRAM, NGRAMWORDS, NUMERIC, Schema, TEXT @@ -47,23 +49,25 @@ if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0): # Bubble up the correct error. +# 用于解析 whoosh 存储的 datetime 字符串的正则 DATETIME_REGEX = re.compile( '^(?P\d{4})-(?P\d{2})-(?P\d{2})T(?P\d{2}):(?P\d{2}):(?P\d{2})(\.\d{3,6}Z?)?$') + +# 线程本地存储:用于在内存模式时共享 RamStorage LOCALS = threading.local() LOCALS.RAM_STORE = None class WhooshHtmlFormatter(HtmlFormatter): """ - This is a HtmlFormatter simpler than the whoosh.HtmlFormatter. - We use it to have consistent results across backends. Specifically, - Solr, Xapian and Elasticsearch are using this formatting. + 自定义高亮 HTML 输出格式,保持与其他后端(Solr、ES 等)一致。 + 模板:高亮文本 """ template = '<%(tag)s>%(t)s' class WhooshSearchBackend(BaseSearchBackend): - # Word reserved by Whoosh for special use. + # Whoosh 保留字,查询时需转义或避免 RESERVED_WORDS = ( 'AND', 'NOT', @@ -71,69 +75,69 @@ class WhooshSearchBackend(BaseSearchBackend): 'TO', ) - # Characters reserved by Whoosh for special use. - # The '\\' must come first, so as not to overwrite the other slash - # replacements. + # Whoosh 保留字符,同样需转义 RESERVED_CHARACTERS = ( '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '.', ) def __init__(self, connection_alias, **connection_options): - super( - WhooshSearchBackend, - self).__init__( - connection_alias, - **connection_options) - self.setup_complete = False - self.use_file_storage = True - self.post_limit = getattr( - connection_options, - 'POST_LIMIT', - 128 * 1024 * 1024) - self.path = connection_options.get('PATH') - + """ + 初始化后端实例 + :param connection_alias: settings 中 HAYSTACK_CONNECTIONS 的 key + :param connection_options: 该连接的配置字典 + """ + super(WhooshSearchBackend, self).__init__(connection_alias, **connection_options) + self.setup_complete = False # 延迟 setup 标记 + self.use_file_storage = True # 默认使用文件存储 + # 提交缓冲区大小,默认 128 MB + self.post_limit = getattr(connection_options, 'POST_LIMIT', 128 * 1024 * 1024) + self.path = connection_options.get('PATH') # 索引存放路径 + + # 如果显式指定 STORAGE != 'file',则使用内存存储 if connection_options.get('STORAGE', 'file') != 'file': self.use_file_storage = False + # 文件存储模式下 PATH 不能为空 if self.use_file_storage and not self.path: raise ImproperlyConfigured( - "You must specify a 'PATH' in your settings for connection '%s'." % - connection_alias) + "You must specify a 'PATH' in your settings for connection '%s'." % connection_alias) self.log = logging.getLogger('haystack') def setup(self): """ - Defers loading until needed. + 延迟初始化:真正用到时才创建/打开索引,避免进程启动时即锁定索引。 """ from haystack import connections new_index = False - # Make sure the index is there. + # 文件存储:目录不存在则创建 if self.use_file_storage and not os.path.exists(self.path): os.makedirs(self.path) new_index = True + # 确保目录可写 if self.use_file_storage and not os.access(self.path, os.W_OK): raise IOError( - "The path to your Whoosh index '%s' is not writable for the current user/group." % - self.path) + "The path to your Whoosh index '%s' is not writable for the current user/group." % self.path) + # 根据配置选择存储后端 if self.use_file_storage: self.storage = FileStorage(self.path) else: global LOCALS - + # 内存模式:线程本地共享 RamStorage if getattr(LOCALS, 'RAM_STORE', None) is None: LOCALS.RAM_STORE = RamStorage() - self.storage = LOCALS.RAM_STORE + # 构建 whoosh Schema self.content_field_name, self.schema = self.build_schema( connections[self.connection_alias].get_unified_index().all_searchfields()) self.parser = QueryParser(self.content_field_name, schema=self.schema) + # 创建或打开索引 if new_index is True: self.index = self.storage.create_index(self.schema) else: @@ -145,18 +149,22 @@ class WhooshSearchBackend(BaseSearchBackend): self.setup_complete = True def build_schema(self, fields): + """ + 把 Haystack 的 SearchField 列表转换成 whoosh 的 Schema + :param fields: dict {field_name: field_instance} + :return: (content_field_name, Schema) + """ schema_fields = { - ID: WHOOSH_ID(stored=True, unique=True), - DJANGO_CT: WHOOSH_ID(stored=True), - DJANGO_ID: WHOOSH_ID(stored=True), + ID: WHOOSH_ID(stored=True, unique=True), # 主键 + DJANGO_CT: WHOOSH_ID(stored=True), # 模型类名 + DJANGO_ID: WHOOSH_ID(stored=True), # 模型 pk } - # Grab the number of keys that are hard-coded into Haystack. - # We'll use this to (possibly) fail slightly more gracefully later. initial_key_count = len(schema_fields) content_field_name = '' for field_name, field_class in fields.items(): if field_class.is_multivalued: + # 多值字段:非索引用 IDLIST,索引用 KEYWORD if field_class.indexed is False: schema_fields[field_class.index_fieldname] = IDLIST( stored=True, field_boost=field_class.boost) @@ -173,26 +181,28 @@ class WhooshSearchBackend(BaseSearchBackend): schema_fields[field_class.index_fieldname] = NUMERIC( stored=field_class.stored, numtype=float, field_boost=field_class.boost) elif field_class.field_type == 'boolean': - # Field boost isn't supported on BOOLEAN as of 1.8.2. + # BOOLEAN 不支持 field_boost schema_fields[field_class.index_fieldname] = BOOLEAN( stored=field_class.stored) elif field_class.field_type == 'ngram': schema_fields[field_class.index_fieldname] = NGRAM( minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost) elif field_class.field_type == 'edge_ngram': - schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', - stored=field_class.stored, - field_boost=field_class.boost) + schema_fields[field_class.index_fieldname] = NGRAMWORDS( + minsize=2, maxsize=15, at='start', + stored=field_class.stored, field_boost=field_class.boost) else: - # schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True) + # 默认 TEXT,使用 jieba 中文分词 schema_fields[field_class.index_fieldname] = TEXT( - stored=True, analyzer=ChineseAnalyzer(), field_boost=field_class.boost, sortable=True) + stored=True, analyzer=ChineseAnalyzer(), + field_boost=field_class.boost, sortable=True) + + # 标记文档主字段(用于高亮/拼写检查) if field_class.document is True: content_field_name = field_class.index_fieldname schema_fields[field_class.index_fieldname].spelling = True - # Fail more gracefully than relying on the backend to die if no fields - # are found. + # 没有任何业务字段则抛错 if len(schema_fields) <= initial_key_count: raise SearchBackendError( "No fields were found in any search_indexes. Please correct this before attempting to search.") @@ -200,51 +210,50 @@ class WhooshSearchBackend(BaseSearchBackend): return (content_field_name, Schema(**schema_fields)) def update(self, index, iterable, commit=True): + """ + 批量更新/新增文档 + :param index: SearchIndex 实例 + :param iterable: 要索引的模型实例可迭代对象 + :param commit: 是否立即提交(whoosh 2.5+ 建议写完即 commit) + """ if not self.setup_complete: self.setup() self.index = self.index.refresh() - writer = AsyncWriter(self.index) + writer = AsyncWriter(self.index) # 异步写,避免长期锁 for obj in iterable: try: - doc = index.full_prepare(obj) + doc = index.full_prepare(obj) # 提取字段值 except SkipDocument: self.log.debug(u"Indexing for object `%s` skipped", obj) - else: - # Really make sure it's unicode, because Whoosh won't have it any - # other way. - for key in doc: - doc[key] = self._from_python(doc[key]) - - # Document boosts aren't supported in Whoosh 2.5.0+. - if 'boost' in doc: - del doc['boost'] - - try: - writer.update_document(**doc) - except Exception as e: - if not self.silently_fail: - raise - - # We'll log the object identifier but won't include the actual object - # to avoid the possibility of that generating encoding errors while - # processing the log message: - self.log.error( - u"%s while preparing object for update" % - e.__class__.__name__, - exc_info=True, - extra={ - "data": { - "index": index, - "object": get_identifier(obj)}}) + continue + + # 全部转 unicode + for key in doc: + doc[key] = self._from_python(doc[key]) + + # whoosh 2.5+ 已移除文档级 boost + if 'boost' in doc: + del doc['boost'] + + try: + writer.update_document(**doc) # 存在即更新 + except Exception as e: + if not self.silently_fail: + raise + self.log.error( + u"%s while preparing object for update" % e.__class__.__name__, + exc_info=True, + extra={"data": {"index": index, "object": get_identifier(obj)}}) if len(iterable) > 0: - # For now, commit no matter what, as we run into locking issues - # otherwise. - writer.commit() + writer.commit() # 真正落地 def remove(self, obj_or_string, commit=True): + """ + 根据唯一标识删除单条文档 + """ if not self.setup_complete: self.setup() @@ -253,20 +262,19 @@ class WhooshSearchBackend(BaseSearchBackend): try: self.index.delete_by_query( - q=self.parser.parse( - u'%s:"%s"' % - (ID, whoosh_id))) + q=self.parser.parse(u'%s:"%s"' % (ID, whoosh_id))) except Exception as e: if not self.silently_fail: raise - self.log.error( "Failed to remove document '%s' from Whoosh: %s", - whoosh_id, - e, - exc_info=True) + whoosh_id, e, exc_info=True) def clear(self, models=None, commit=True): + """ + 清空整个索引或指定模型的索引 + :param models: None 表示全部;list/tuple 表示指定模型 + """ if not self.setup_complete: self.setup() @@ -277,167 +285,119 @@ class WhooshSearchBackend(BaseSearchBackend): try: if models is None: - self.delete_index() + self.delete_index() # 删目录/清内存 else: models_to_delete = [] - for model in models: models_to_delete.append( - u"%s:%s" % - (DJANGO_CT, get_model_ct(model))) - + u"%s:%s" % (DJANGO_CT, get_model_ct(model))) + # 构造 OR 查询一次性删除 self.index.delete_by_query( - q=self.parser.parse( - u" OR ".join(models_to_delete))) + q=self.parser.parse(u" OR ".join(models_to_delete))) except Exception as e: if not self.silently_fail: raise - if models is not None: self.log.error( "Failed to clear Whoosh index of models '%s': %s", - ','.join(models_to_delete), - e, - exc_info=True) + ','.join(models_to_delete), e, exc_info=True) else: - self.log.error( - "Failed to clear Whoosh index: %s", e, exc_info=True) + self.log.error("Failed to clear Whoosh index: %s", e, exc_info=True) def delete_index(self): - # Per the Whoosh mailing list, if wiping out everything from the index, - # it's much more efficient to simply delete the index files. + """ + 物理删除索引文件/内存,并重新 setup + """ if self.use_file_storage and os.path.exists(self.path): shutil.rmtree(self.path) elif not self.use_file_storage: self.storage.clean() - - # Recreate everything. self.setup() def optimize(self): + """ + 手动合并索引段,提升查询速度(耗时操作) + """ if not self.setup_complete: self.setup() - self.index = self.index.refresh() self.index.optimize() def calculate_page(self, start_offset=0, end_offset=None): - # Prevent against Whoosh throwing an error. Requires an end_offset - # greater than 0. + """ + 把 Django 风格的分页起止偏移换算成 whoosh 的页码+每页条数 + """ if end_offset is not None and end_offset <= 0: end_offset = 1 - - # Determine the page. page_num = 0 - if end_offset is None: end_offset = 1000000 - if start_offset is None: start_offset = 0 - page_length = end_offset - start_offset - - if page_length and page_length > 0: + if page_length > 0: page_num = int(start_offset / page_length) - - # Increment because Whoosh uses 1-based page numbers. + # whoosh 页码从 1 开始 page_num += 1 return page_num, page_length @log_query - def search( - self, - query_string, - sort_by=None, - start_offset=0, - end_offset=None, - fields='', - highlight=False, - facets=None, - date_facets=None, - query_facets=None, - narrow_queries=None, - spelling_query=None, - within=None, - dwithin=None, - distance_point=None, - models=None, - limit_to_registered_models=None, - result_class=None, - **kwargs): + def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, + fields='', highlight=False, facets=None, date_facets=None, + query_facets=None, narrow_queries=None, spelling_query=None, + within=None, dwithin=None, distance_point=None, models=None, + limit_to_registered_models=None, result_class=None, **kwargs): + """ + 核心查询方法,返回 {'results': [...], 'hits': n, ...} + 各参数含义见 Haystack 文档,此处不赘述 + """ if not self.setup_complete: self.setup() - # A zero length query should return no results. + # 空查询直接返回 0 条 if len(query_string) == 0: - return { - 'results': [], - 'hits': 0, - } + return {'results': [], 'hits': 0} query_string = force_str(query_string) - # A one-character query (non-wildcard) gets nabbed by a stopwords - # filter and should yield zero results. + # 单个非通配字符会被 whoosh 当 stopword 过滤掉,直接返回 0 条 if len(query_string) <= 1 and query_string != u'*': - return { - 'results': [], - 'hits': 0, - } + return {'results': [], 'hits': 0} reverse = False - + # 处理排序方向(whoosh 要求所有字段同向) if sort_by is not None: - # Determine if we need to reverse the results and if Whoosh can - # handle what it's being asked to sort by. Reversing is an - # all-or-nothing action, unfortunately. sort_by_list = [] reverse_counter = 0 - for order_by in sort_by: if order_by.startswith('-'): reverse_counter += 1 - if reverse_counter and reverse_counter != len(sort_by): raise SearchBackendError("Whoosh requires all order_by fields" " to use the same sort direction") - for order_by in sort_by: if order_by.startswith('-'): sort_by_list.append(order_by[1:]) - if len(sort_by_list) == 1: reverse = True else: sort_by_list.append(order_by) - if len(sort_by_list) == 1: reverse = False - sort_by = sort_by_list[0] + # whoosh 不支持 faceting,仅警告 if facets is not None: - warnings.warn( - "Whoosh does not handle faceting.", - Warning, - stacklevel=2) - + warnings.warn("Whoosh does not handle faceting.", Warning, stacklevel=2) if date_facets is not None: - warnings.warn( - "Whoosh does not handle date faceting.", - Warning, - stacklevel=2) - + warnings.warn("Whoosh does not handle date faceting.", Warning, stacklevel=2) if query_facets is not None: - warnings.warn( - "Whoosh does not handle query faceting.", - Warning, - stacklevel=2) + warnings.warn("Whoosh does not handle query faceting.", Warning, stacklevel=2) narrowed_results = None self.index = self.index.refresh() + # 模型过滤 if limit_to_registered_models is None: limit_to_registered_models = getattr( settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) @@ -445,8 +405,6 @@ class WhooshSearchBackend(BaseSearchBackend): if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: - # Using narrow queries, limit the results to only models handled - # with the current routers. model_choices = self.build_models_list() else: model_choices = [] @@ -454,27 +412,18 @@ class WhooshSearchBackend(BaseSearchBackend): if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() - narrow_queries.add(' OR '.join( ['%s:%s' % (DJANGO_CT, rm) for rm in model_choices])) narrow_searcher = None - if narrow_queries is not None: - # Potentially expensive? I don't see another way to do it in - # Whoosh... + # 用 searcher 先过滤缩小结果集 narrow_searcher = self.index.searcher() - for nq in narrow_queries: recent_narrowed_results = narrow_searcher.search( self.parser.parse(force_str(nq)), limit=None) - if len(recent_narrowed_results) <= 0: - return { - 'results': [], - 'hits': 0, - } - + return {'results': [], 'hits': 0} if narrowed_results: narrowed_results.filter(recent_narrowed_results) else: @@ -485,98 +434,55 @@ class WhooshSearchBackend(BaseSearchBackend): if self.index.doc_count(): searcher = self.index.searcher() parsed_query = self.parser.parse(query_string) + if parsed_query is None: # 非法/全停词查询 + return {'results': [], 'hits': 0} - # In the event of an invalid/stopworded query, recover gracefully. - if parsed_query is None: - return { - 'results': [], - 'hits': 0, - } - - page_num, page_length = self.calculate_page( - start_offset, end_offset) - - search_kwargs = { - 'pagelen': page_length, - 'sortedby': sort_by, - 'reverse': reverse, - } - - # Handle the case where the results have been narrowed. + page_num, page_length = self.calculate_page(start_offset, end_offset) + search_kwargs = {'pagelen': page_length, + 'sortedby': sort_by, 'reverse': reverse} if narrowed_results is not None: search_kwargs['filter'] = narrowed_results try: - raw_page = searcher.search_page( - parsed_query, - page_num, - **search_kwargs - ) + raw_page = searcher.search_page(parsed_query, page_num, **search_kwargs) except ValueError: if not self.silently_fail: raise + return {'results': [], 'hits': 0, 'spelling_suggestion': None} - return { - 'results': [], - 'hits': 0, - 'spelling_suggestion': None, - } - - # Because as of Whoosh 2.5.1, it will return the wrong page of - # results if you request something too high. :( + # whoosh 2.5.1 在请求页码过大时会返回错误页,需检测 if raw_page.pagenum < page_num: - return { - 'results': [], - 'hits': 0, - 'spelling_suggestion': None, - } + return {'results': [], 'hits': 0, 'spelling_suggestion': None} results = self._process_results( - raw_page, - highlight=highlight, - query_string=query_string, - spelling_query=spelling_query, - result_class=result_class) + raw_page, highlight=highlight, query_string=query_string, + spelling_query=spelling_query, result_class=result_class) searcher.close() - if hasattr(narrow_searcher, 'close'): narrow_searcher.close() - return results else: + # 索引为空时返回拼写建议 if self.include_spelling: if spelling_query: - spelling_suggestion = self.create_spelling_suggestion( - spelling_query) + spelling_suggestion = self.create_spelling_suggestion(spelling_query) else: - spelling_suggestion = self.create_spelling_suggestion( - query_string) + spelling_suggestion = self.create_spelling_suggestion(query_string) else: spelling_suggestion = None + return {'results': [], 'hits': 0, 'spelling_suggestion': spelling_suggestion} - return { - 'results': [], - 'hits': 0, - 'spelling_suggestion': spelling_suggestion, - } - - def more_like_this( - self, - model_instance, - additional_query_string=None, - start_offset=0, - end_offset=None, - models=None, - limit_to_registered_models=None, - result_class=None, - **kwargs): + def more_like_this(self, model_instance, additional_query_string=None, + start_offset=0, end_offset=None, models=None, + limit_to_registered_models=None, result_class=None, **kwargs): + """ + 根据给定实例找“相似文档” + """ if not self.setup_complete: self.setup() - # Deferred models will have a different class ("RealClass_Deferred_fieldname") - # which won't be in our registry: + # 处理延迟加载模型 model_klass = model_instance._meta.concrete_model - field_name = self.content_field_name narrow_queries = set() narrowed_results = None @@ -589,8 +495,6 @@ class WhooshSearchBackend(BaseSearchBackend): if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: - # Using narrow queries, limit the results to only models handled - # with the current routers. model_choices = self.build_models_list() else: model_choices = [] @@ -598,7 +502,6 @@ class WhooshSearchBackend(BaseSearchBackend): if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() - narrow_queries.add(' OR '.join( ['%s:%s' % (DJANGO_CT, rm) for rm in model_choices])) @@ -606,29 +509,19 @@ class WhooshSearchBackend(BaseSearchBackend): narrow_queries.add(additional_query_string) narrow_searcher = None - if narrow_queries is not None: - # Potentially expensive? I don't see another way to do it in - # Whoosh... narrow_searcher = self.index.searcher() - for nq in narrow_queries: recent_narrowed_results = narrow_searcher.search( self.parser.parse(force_str(nq)), limit=None) - if len(recent_narrowed_results) <= 0: - return { - 'results': [], - 'hits': 0, - } - + return {'results': [], 'hits': 0} if narrowed_results: narrowed_results.filter(recent_narrowed_results) else: narrowed_results = recent_narrowed_results page_num, page_length = self.calculate_page(start_offset, end_offset) - self.index = self.index.refresh() raw_results = EmptyResults() @@ -637,12 +530,10 @@ class WhooshSearchBackend(BaseSearchBackend): searcher = self.index.searcher() parsed_query = self.parser.parse(query) results = searcher.search(parsed_query) - if len(results): + # 取第一条结果调 more_like_this raw_results = results[0].more_like_this( field_name, top=end_offset) - - # Handle the case where the results have been narrowed. if narrowed_results is not None and hasattr(raw_results, 'filter'): raw_results.filter(narrowed_results) @@ -651,43 +542,25 @@ class WhooshSearchBackend(BaseSearchBackend): except ValueError: if not self.silently_fail: raise + return {'results': [], 'hits': 0, 'spelling_suggestion': None} - return { - 'results': [], - 'hits': 0, - 'spelling_suggestion': None, - } - - # Because as of Whoosh 2.5.1, it will return the wrong page of - # results if you request something too high. :( if raw_page.pagenum < page_num: - return { - 'results': [], - 'hits': 0, - 'spelling_suggestion': None, - } + return {'results': [], 'hits': 0, 'spelling_suggestion': None} results = self._process_results(raw_page, result_class=result_class) searcher.close() - if hasattr(narrow_searcher, 'close'): narrow_searcher.close() - return results - def _process_results( - self, - raw_page, - highlight=False, - query_string='', - spelling_query=None, - result_class=None): + def _process_results(self, raw_page, highlight=False, query_string='', + spelling_query=None, result_class=None): + """ + 把 whoosh 的 ResultsPage 转成 haystack SearchResult 列表 + """ from haystack import connections results = [] - - # It's important to grab the hits first before slicing. Otherwise, this - # can cause pagination failures. - hits = len(raw_page) + hits = len(raw_page) # 必须在切片前取总数 if result_class is None: result_class = SearchResult @@ -707,171 +580,129 @@ class WhooshSearchBackend(BaseSearchBackend): for key, value in raw_result.items(): index = unified_index.get_index(model) string_key = str(key) - - if string_key in index.fields and hasattr( - index.fields[string_key], 'convert'): - # Special-cased due to the nature of KEYWORD fields. + if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): + # 多值 KEYWORD 字段用逗号拆分 if index.fields[string_key].is_multivalued: if value is None or len(value) == 0: additional_fields[string_key] = [] else: - additional_fields[string_key] = value.split( - ',') + additional_fields[string_key] = value.split(',') else: - additional_fields[string_key] = index.fields[string_key].convert( - value) + additional_fields[string_key] = index.fields[string_key].convert(value) else: additional_fields[string_key] = self._to_python(value) - del (additional_fields[DJANGO_CT]) - del (additional_fields[DJANGO_ID]) + # 删除内部字段 + del additional_fields[DJANGO_CT] + del additional_fields[DJANGO_ID] + # 高亮处理 if highlight: sa = StemmingAnalyzer() formatter = WhooshHtmlFormatter('em') terms = [token.text for token in sa(query_string)] - whoosh_result = whoosh_highlight( additional_fields.get(self.content_field_name), - terms, - sa, - ContextFragmenter(), - formatter - ) + terms, sa, ContextFragmenter(), formatter) additional_fields['highlighted'] = { self.content_field_name: [whoosh_result], } - result = result_class( - app_label, - model_name, - raw_result[DJANGO_ID], - score, - **additional_fields) + result = result_class(app_label, model_name, + raw_result[DJANGO_ID], score, + **additional_fields) results.append(result) else: - hits -= 1 + hits -= 1 # 模型未注册,命中不计入总数 + # 拼写建议 if self.include_spelling: if spelling_query: - spelling_suggestion = self.create_spelling_suggestion( - spelling_query) + spelling_suggestion = self.create_spelling_suggestion(spelling_query) else: - spelling_suggestion = self.create_spelling_suggestion( - query_string) - - return { - 'results': results, - 'hits': hits, - 'facets': facets, - 'spelling_suggestion': spelling_suggestion, - } + spelling_suggestion = self.create_spelling_suggestion(query_string) + + return {'results': results, 'hits': hits, + 'facets': facets, 'spelling_suggestion': spelling_suggestion} def create_spelling_suggestion(self, query_string): + """ + 基于 whoosh corrector 给出拼写纠正建议 + """ spelling_suggestion = None reader = self.index.reader() corrector = reader.corrector(self.content_field_name) cleaned_query = force_str(query_string) - if not query_string: return spelling_suggestion - - # Clean the string. + # 去掉保留字/符 for rev_word in self.RESERVED_WORDS: cleaned_query = cleaned_query.replace(rev_word, '') - for rev_char in self.RESERVED_CHARACTERS: cleaned_query = cleaned_query.replace(rev_char, '') - - # Break it down. + # 按单词纠正 query_words = cleaned_query.split() suggested_words = [] - for word in query_words: suggestions = corrector.suggest(word, limit=1) - if len(suggestions) > 0: suggested_words.append(suggestions[0]) - spelling_suggestion = ' '.join(suggested_words) return spelling_suggestion def _from_python(self, value): """ - Converts Python values to a string for Whoosh. - - Code courtesy of pysolr. + 把 Python 值转成 whoosh 可索引的 unicode / string """ if hasattr(value, 'strftime'): if not hasattr(value, 'hour'): value = datetime(value.year, value.month, value.day, 0, 0, 0) elif isinstance(value, bool): - if value: - value = 'true' - else: - value = 'false' + value = 'true' if value else 'false' elif isinstance(value, (list, tuple)): value = u','.join([force_str(v) for v in value]) elif isinstance(value, (six.integer_types, float)): - # Leave it alone. - pass + pass # 数字保持原样 else: value = force_str(value) return value def _to_python(self, value): """ - Converts values from Whoosh to native Python values. - - A port of the same method in pysolr, as they deal with data the same way. + 把 whoosh 存储的字符串转回 Python 对象 """ if value == 'true': return True elif value == 'false': return False - if value and isinstance(value, six.string_types): possible_datetime = DATETIME_REGEX.search(value) - if possible_datetime: date_values = possible_datetime.groupdict() - for dk, dv in date_values.items(): date_values[dk] = int(dv) - - return datetime( - date_values['year'], - date_values['month'], - date_values['day'], - date_values['hour'], - date_values['minute'], - date_values['second']) - + return datetime(date_values['year'], date_values['month'], + date_values['day'], date_values['hour'], + date_values['minute'], date_values['second']) try: - # Attempt to use json to load the values. converted_value = json.loads(value) - - # Try to handle most built-in types. - if isinstance( - converted_value, - (list, - tuple, - set, - dict, - six.integer_types, - float, - complex)): + if isinstance(converted_value, (list, tuple, set, dict, + six.integer_types, float, complex)): return converted_value except BaseException: - # If it fails (SyntaxError or its ilk) or we don't trust it, - # continue on. pass - return value class WhooshSearchQuery(BaseSearchQuery): + """ + 负责把 Haystack 的 filter/exclude 条件转换成 whoosh 查询语法 + """ + def _convert_datetime(self, date): + """ + 统一把 datetime 转成 whoosh 需要的 YYYYMMDDHHMMSS 字符串 + """ if hasattr(date, 'hour'): return force_str(date.strftime('%Y%m%d%H%M%S')) else: @@ -879,63 +710,53 @@ class WhooshSearchQuery(BaseSearchQuery): def clean(self, query_fragment): """ - Provides a mechanism for sanitizing user input before presenting the - value to the backend. - - Whoosh 1.X differs here in that you can no longer use a backslash - to escape reserved characters. Instead, the whole word should be - quoted. + 转义保留字/符;whoosh 1.x 不再支持反斜杠转义,需用引号包裹 """ words = query_fragment.split() cleaned_words = [] - for word in words: if word in self.backend.RESERVED_WORDS: - word = word.replace(word, word.lower()) - + word = word.lower() # 保留字小写化 for char in self.backend.RESERVED_CHARACTERS: if char in word: word = "'%s'" % word break - cleaned_words.append(word) - return ' '.join(cleaned_words) def build_query_fragment(self, field, filter_type, value): + """ + 把单个 filter 条件转成 whoosh 查询子串 + 例如:name='exact' -> name:xxx + """ from haystack import connections query_frag = '' is_datetime = False if not hasattr(value, 'input_type_name'): - # Handle when we've got a ``ValuesListQuerySet``... + # 处理 ValuesListQuerySet 等 if hasattr(value, 'values_list'): value = list(value) - if hasattr(value, 'strftime'): is_datetime = True - if isinstance(value, six.string_types) and value != ' ': - # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) - # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): - # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend._from_python(prepared_value) - # 'content' is a special reserved word, much like 'pk' in - # Django's ORM layer. It indicates 'no special field'. + # 'content' 是保留字段,表示全文检索 if field == 'content': index_fieldname = '' else: index_fieldname = u'%s:' % connections[self._using].get_unified_index( ).get_index_fieldname(field) + # 查询模板映射 filter_types = { 'content': '%s', 'contains': '*%s*', @@ -952,67 +773,47 @@ class WhooshSearchQuery(BaseSearchQuery): if value.post_process is False: query_frag = prepared_value else: - if filter_type in [ - 'content', - 'contains', - 'startswith', - 'endswith', - 'fuzzy']: + if filter_type in ['content', 'contains', 'startswith', + 'endswith', 'fuzzy']: if value.input_type_name == 'exact': query_frag = prepared_value else: - # Iterate over terms & incorportate the converted form of - # each into the query. + # 分词后每个词都加通配符/模板 terms = [] - if isinstance(prepared_value, six.string_types): possible_values = prepared_value.split(' ') else: if is_datetime is True: - prepared_value = self._convert_datetime( - prepared_value) - + prepared_value = self._convert_datetime(prepared_value) possible_values = [prepared_value] - for possible_value in possible_values: terms.append( - filter_types[filter_type] % - self.backend._from_python(possible_value)) - + filter_types[filter_type] % self.backend._from_python(possible_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] - for possible_value in prepared_value: is_datetime = False - if hasattr(possible_value, 'strftime'): is_datetime = True - pv = self.backend._from_python(possible_value) - if is_datetime is True: pv = self._convert_datetime(pv) - if isinstance(pv, six.string_types) and not is_datetime: in_options.append('"%s"' % pv) else: in_options.append('%s' % pv) - query_frag = "(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend._from_python(prepared_value[0]) end = self.backend._from_python(prepared_value[1]) - if hasattr(prepared_value[0], 'strftime'): start = self._convert_datetime(start) - if hasattr(prepared_value[1], 'strftime'): end = self._convert_datetime(end) - query_frag = u"[%s to %s]" % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': @@ -1023,22 +824,19 @@ class WhooshSearchQuery(BaseSearchQuery): else: if is_datetime is True: prepared_value = self._convert_datetime(prepared_value) - query_frag = filter_types[filter_type] % prepared_value + # 非 Raw 输入且未带括号,则整体括号包裹 if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith('(') and not query_frag.endswith(')'): query_frag = "(%s)" % query_frag return u"%s%s" % (index_fieldname, query_frag) - # if not filter_type in ('in', 'range'): - # # 'in' is a bit of a special case, as we don't want to - # # convert a valid list/tuple to string. Defer handling it - # # until later... - # value = self.backend._from_python(value) - class WhooshEngine(BaseEngine): + """ + 入口 Engine,供 Haystack 加载 + """ backend = WhooshSearchBackend - query = WhooshSearchQuery + query = WhooshSearchQuery \ No newline at end of file