You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

64 lines
1.5 KiB

#pragma once
#include <xapian.h>
#include <cppjieba/Jieba.hpp>
#include <string>
#include <vector>
#include <memory>
struct SearchResult {
std::string path;
std::string snippet;
std::vector<size_t> positions;
double score;
};
class SearchEngine {
public:
SearchEngine(const std::string& db_path);
~SearchEngine();
// 添加文档到索引
void addDocument(const std::string& path, const std::string& content);
// 搜索文档
std::vector<SearchResult> search(const std::string& query, size_t limit = 10);
// 提交更改
void commit();
enum class SortField {
RELEVANCE,
PATH,
SIZE,
MODIFIED_TIME
};
enum class SortOrder {
ASC,
DESC
};
struct SearchOptions {
size_t limit = 10;
size_t offset = 0;
SortField sort_field = SortField::RELEVANCE;
SortOrder sort_order = SortOrder::DESC;
std::string file_type;
};
std::vector<SearchResult> search(
const std::string& query,
const SearchOptions& options
);
private:
std::unique_ptr<Xapian::WritableDatabase> db_;
std::unique_ptr<Xapian::TermGenerator> term_gen_;
std::unique_ptr<cppjieba::Jieba> jieba_;
// 分词处理
std::string tokenize(const std::string& text);
// 获取高亮片段
std::string getSnippet(const std::string& content, const std::string& query);
};