ADD file via upload

main
pr4ku3p2c 7 months ago
parent 006471b2e2
commit d8e31033fc

@ -0,0 +1,88 @@
use anyhow::Result;
use jieba_rs::Jieba;
use std::path::Path;
use tantivy::{
collector::TopDocs,
doc,
query::QueryParser,
schema::{Schema, STORED, TEXT},
Index, IndexWriter,
};
pub struct SearchEngine {
index: Index,
writer: IndexWriter,
jieba: Jieba,
}
impl SearchEngine {
pub fn new(index_path: &Path) -> Result<Self> {
let mut schema_builder = Schema::builder();
// 定义索引结构
schema_builder.add_text_field("path", TEXT | STORED);
schema_builder.add_text_field("content", TEXT);
schema_builder.add_text_field("file_type", TEXT | STORED);
schema_builder.add_date_field("modified", STORED);
schema_builder.add_u64_field("size", STORED);
let schema = schema_builder.build();
let index = Index::create_in_dir(index_path, schema)?;
let writer = index.writer(50_000_000)?; // 50MB buffer
Ok(Self {
index,
writer,
jieba: Jieba::new(),
})
}
pub async fn add_document(&mut self, path: &Path, content: &str) -> Result<()> {
let schema = self.index.schema();
let path_field = schema.get_field("path").unwrap();
let content_field = schema.get_field("content").unwrap();
// 使用结巴分词处理内容
let tokens = self.jieba.cut_for_search(content);
self.writer.add_document(doc!(
path_field => path.to_string_lossy().to_string(),
content_field => tokens.join(" ")
))?;
Ok(())
}
pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
let reader = self.index.reader()?;
let searcher = reader.searcher();
let schema = self.index.schema();
let content_field = schema.get_field("content").unwrap();
let query_parser = QueryParser::for_index(&self.index, vec![content_field]);
// 处理布尔查询
let query = query_parser.parse_query(query)?;
let top_docs = searcher.search(&query, &TopDocs::with_limit(limit))?;
let mut results = Vec::new();
for (_score, doc_address) in top_docs {
let doc = searcher.doc(doc_address)?;
results.push(SearchResult {
path: doc.get_first(schema.get_field("path").unwrap()).unwrap().text().unwrap().to_string(),
snippet: String::new(), // TODO: 实现片段提取
positions: vec![], // TODO: 实现位置信息
});
}
Ok(results)
}
}
#[derive(Debug, serde::Serialize)]
pub struct SearchResult {
pub path: String,
pub snippet: String,
pub positions: Vec<usize>,
}
Loading…
Cancel
Save