ADD file via upload

7 months ago · d8e31033fc
parent 006471b2e2
commit d8e31033fc
1 changed files with 88 additions and 0 deletions
--- a/index.cpp
+++ b/index.cpp
@ -0,0 +1,88 @@
 use anyhow::Result;
 use jieba_rs::Jieba;
 use std::path::Path;
 use tantivy::{
    collector::TopDocs,
    doc,
    query::QueryParser,
    schema::{Schema, STORED, TEXT},
    Index, IndexWriter,
 };
 pub struct SearchEngine {
    index: Index,
    writer: IndexWriter,
    jieba: Jieba,
 }
 impl SearchEngine {
    pub fn new(index_path: &Path) -> Result<Self> {
        let mut schema_builder = Schema::builder();
        // 定义索引结构
        schema_builder.add_text_field("path", TEXT | STORED);
        schema_builder.add_text_field("content", TEXT);
        schema_builder.add_text_field("file_type", TEXT | STORED);
        schema_builder.add_date_field("modified", STORED);
        schema_builder.add_u64_field("size", STORED);
        let schema = schema_builder.build();
        let index = Index::create_in_dir(index_path, schema)?;
        let writer = index.writer(50_000_000)?; // 50MB buffer
        Ok(Self {
            index,
            writer,
            jieba: Jieba::new(),
        })
    }
    pub async fn add_document(&mut self, path: &Path, content: &str) -> Result<()> {
        let schema = self.index.schema();
        let path_field = schema.get_field("path").unwrap();
        let content_field = schema.get_field("content").unwrap();
        // 使用结巴分词处理内容
        let tokens = self.jieba.cut_for_search(content);
        self.writer.add_document(doc!(
            path_field => path.to_string_lossy().to_string(),
            content_field => tokens.join(" ")
        ))?;
        Ok(())
    }
    pub async fn search(&self, query: &str, limit: usize) -> Result<Vec<SearchResult>> {
        let reader = self.index.reader()?;
        let searcher = reader.searcher();
        let schema = self.index.schema();
        let content_field = schema.get_field("content").unwrap();
        let query_parser = QueryParser::for_index(&self.index, vec![content_field]);
        // 处理布尔查询
        let query = query_parser.parse_query(query)?;
        let top_docs = searcher.search(&query, &TopDocs::with_limit(limit))?;
        let mut results = Vec::new();
        for (_score, doc_address) in top_docs {
            let doc = searcher.doc(doc_address)?;
            results.push(SearchResult {
                path: doc.get_first(schema.get_field("path").unwrap()).unwrap().text().unwrap().to_string(),
                snippet: String::new(), // TODO: 实现片段提取
                positions: vec![], // TODO: 实现位置信息
            });
        }
        Ok(results)
    }
 }
 #[derive(Debug, serde::Serialize)]
 pub struct SearchResult {
    pub path: String,
    pub snippet: String,
    pub positions: Vec<usize>,
 }