use anyhow::Result; use jieba_rs::Jieba; use std::path::Path; use tantivy::{ collector::TopDocs, doc, query::QueryParser, schema::{Schema, STORED, TEXT}, Index, IndexWriter, }; pub struct SearchEngine { index: Index, writer: IndexWriter, jieba: Jieba, } impl SearchEngine { pub fn new(index_path: &Path) -> Result { let mut schema_builder = Schema::builder(); // 定义索引结构 schema_builder.add_text_field("path", TEXT | STORED); schema_builder.add_text_field("content", TEXT); schema_builder.add_text_field("file_type", TEXT | STORED); schema_builder.add_date_field("modified", STORED); schema_builder.add_u64_field("size", STORED); let schema = schema_builder.build(); let index = Index::create_in_dir(index_path, schema)?; let writer = index.writer(50_000_000)?; // 50MB buffer Ok(Self { index, writer, jieba: Jieba::new(), }) } pub async fn add_document(&mut self, path: &Path, content: &str) -> Result<()> { let schema = self.index.schema(); let path_field = schema.get_field("path").unwrap(); let content_field = schema.get_field("content").unwrap(); // 使用结巴分词处理内容 let tokens = self.jieba.cut_for_search(content); self.writer.add_document(doc!( path_field => path.to_string_lossy().to_string(), content_field => tokens.join(" ") ))?; Ok(()) } pub async fn search(&self, query: &str, limit: usize) -> Result> { let reader = self.index.reader()?; let searcher = reader.searcher(); let schema = self.index.schema(); let content_field = schema.get_field("content").unwrap(); let query_parser = QueryParser::for_index(&self.index, vec![content_field]); // 处理布尔查询 let query = query_parser.parse_query(query)?; let top_docs = searcher.search(&query, &TopDocs::with_limit(limit))?; let mut results = Vec::new(); for (_score, doc_address) in top_docs { let doc = searcher.doc(doc_address)?; results.push(SearchResult { path: doc.get_first(schema.get_field("path").unwrap()).unwrap().text().unwrap().to_string(), snippet: String::new(), // TODO: 实现片段提取 positions: vec![], // TODO: 实现位置信息 }); } Ok(results) } } #[derive(Debug, serde::Serialize)] pub struct SearchResult { pub path: String, pub snippet: String, pub positions: Vec, }