From 63b5aa9412cfdc7a354d3f909b8d089b11d76ce5 Mon Sep 17 00:00:00 2001 From: pr4ku3p2c <925473174@qq.com> Date: Mon, 20 Jan 2025 18:08:05 +0800 Subject: [PATCH] ADD file via upload --- search_engine.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 search_engine.h diff --git a/search_engine.h b/search_engine.h new file mode 100644 index 0000000..83fc8aa --- /dev/null +++ b/search_engine.h @@ -0,0 +1,64 @@ +#pragma once +#include +#include +#include +#include +#include + +struct SearchResult { + std::string path; + std::string snippet; + std::vector positions; + double score; +}; + +class SearchEngine { +public: + SearchEngine(const std::string& db_path); + ~SearchEngine(); + + // 添加文档到索引 + void addDocument(const std::string& path, const std::string& content); + + // 搜索文档 + std::vector search(const std::string& query, size_t limit = 10); + + // 提交更改 + void commit(); + + enum class SortField { + RELEVANCE, + PATH, + SIZE, + MODIFIED_TIME + }; + + enum class SortOrder { + ASC, + DESC + }; + + struct SearchOptions { + size_t limit = 10; + size_t offset = 0; + SortField sort_field = SortField::RELEVANCE; + SortOrder sort_order = SortOrder::DESC; + std::string file_type; + }; + + std::vector search( + const std::string& query, + const SearchOptions& options + ); + +private: + std::unique_ptr db_; + std::unique_ptr term_gen_; + std::unique_ptr jieba_; + + // 分词处理 + std::string tokenize(const std::string& text); + + // 获取高亮片段 + std::string getSnippet(const std::string& content, const std::string& query); +}; \ No newline at end of file