You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
64 lines
1.5 KiB
64 lines
1.5 KiB
#pragma once
|
|
#include <xapian.h>
|
|
#include <cppjieba/Jieba.hpp>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <memory>
|
|
|
|
struct SearchResult {
|
|
std::string path;
|
|
std::string snippet;
|
|
std::vector<size_t> positions;
|
|
double score;
|
|
};
|
|
|
|
class SearchEngine {
|
|
public:
|
|
SearchEngine(const std::string& db_path);
|
|
~SearchEngine();
|
|
|
|
// 添加文档到索引
|
|
void addDocument(const std::string& path, const std::string& content);
|
|
|
|
// 搜索文档
|
|
std::vector<SearchResult> search(const std::string& query, size_t limit = 10);
|
|
|
|
// 提交更改
|
|
void commit();
|
|
|
|
enum class SortField {
|
|
RELEVANCE,
|
|
PATH,
|
|
SIZE,
|
|
MODIFIED_TIME
|
|
};
|
|
|
|
enum class SortOrder {
|
|
ASC,
|
|
DESC
|
|
};
|
|
|
|
struct SearchOptions {
|
|
size_t limit = 10;
|
|
size_t offset = 0;
|
|
SortField sort_field = SortField::RELEVANCE;
|
|
SortOrder sort_order = SortOrder::DESC;
|
|
std::string file_type;
|
|
};
|
|
|
|
std::vector<SearchResult> search(
|
|
const std::string& query,
|
|
const SearchOptions& options
|
|
);
|
|
|
|
private:
|
|
std::unique_ptr<Xapian::WritableDatabase> db_;
|
|
std::unique_ptr<Xapian::TermGenerator> term_gen_;
|
|
std::unique_ptr<cppjieba::Jieba> jieba_;
|
|
|
|
// 分词处理
|
|
std::string tokenize(const std::string& text);
|
|
|
|
// 获取高亮片段
|
|
std::string getSnippet(const std::string& content, const std::string& query);
|
|
};
|