git-test/src/main/java/net/micode/notes/search/SearchIndexer.java

/*
 * Copyright (c) 2024, The MiCode Open Source Community (www.micode.net)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package net.micode.notes.search;

import android.content.ContentResolver;
import android.content.Context;
import android.database.Cursor;
import android.text.TextUtils;
import android.util.Log;

import net.micode.notes.data.Notes;
import net.micode.notes.data.Notes.DataColumns;
import net.micode.notes.data.Notes.NoteColumns;
import net.micode.notes.data.Notes.TextNote;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**
 * 搜索索引器类，负责创建和管理搜索索引，提高搜索效率
 */
public class SearchIndexer {
    private static final String TAG = "SearchIndexer";
    private Context mContext;
    private Map<String, Set<Long>> mWordToNoteIdsMap; // 单词到便签ID的映射
    private Map<Long, NoteIndexData> mNoteIndexDataMap; // 便签ID到索引数据的映射
    private ReadWriteLock mLock; // 读写锁，确保线程安全

    /**
     * 便签索引数据类，存储便签的索引信息
     */
    private static class NoteIndexData {
        String title;
        String content;
        String snippet;
        long createdDate;
        long modifiedDate;

        NoteIndexData(String title, String content, String snippet, long createdDate, long modifiedDate) {
            this.title = title;
            this.content = content;
            this.snippet = snippet;
            this.createdDate = createdDate;
            this.modifiedDate = modifiedDate;
        }
    }

    /**
     * 构造函数，初始化搜索索引器
     * @param context 上下文对象
     */
    public SearchIndexer(Context context) {
        mContext = context;
        mWordToNoteIdsMap = new HashMap<>();
        mNoteIndexDataMap = new HashMap<>();
        mLock = new ReentrantReadWriteLock();

        // 初始化时构建索引
        updateIndex();
    }

    /**
     * 更新搜索索引
     */
    public void updateIndex() {
        mLock.writeLock().lock();
        try {
            // 清空现有索引
            mWordToNoteIdsMap.clear();
            mNoteIndexDataMap.clear();

            // 从数据库中获取所有便签数据
            List<NoteIndexData> noteDataList = getAllNoteData();

            // 为每个便签构建索引
            for (long noteId : mNoteIndexDataMap.keySet()) {
                NoteIndexData noteData = mNoteIndexDataMap.get(noteId);
                if (noteData != null) {
                    // 提取关键词并添加到索引中
                    Set<String> keywords = extractKeywords(noteData.title, noteData.content, noteData.snippet);
                    for (String keyword : keywords) {
                        Set<Long> noteIds = mWordToNoteIdsMap.getOrDefault(keyword, new HashSet<>());
                        noteIds.add(noteId);
                        mWordToNoteIdsMap.put(keyword, noteIds);
                    }
                }
            }

            Log.d(TAG, "Search index updated. Total words: " + mWordToNoteIdsMap.size() + ", Total notes: " + mNoteIndexDataMap.size());
        } finally {
            mLock.writeLock().unlock();
        }
    }

    /**
     * 执行搜索
     * @param keyword 搜索关键词
     * @return 搜索结果列表
     */
    public List<SearchResult> search(String keyword) {
        mLock.readLock().lock();
        try {
            List<SearchResult> results = new ArrayList<>();

            if (TextUtils.isEmpty(keyword)) {
                return results;
            }

            keyword = keyword.toLowerCase();
            Set<Long> matchingNoteIds = new HashSet<>();

            // 查找包含关键词的所有便签ID
            for (String indexedWord : mWordToNoteIdsMap.keySet()) {
                if (indexedWord.contains(keyword)) {
                    matchingNoteIds.addAll(mWordToNoteIdsMap.get(indexedWord));
                }
            }

            // 为每个匹配的便签创建搜索结果
            for (long noteId : matchingNoteIds) {
                NoteIndexData noteData = mNoteIndexDataMap.get(noteId);
                if (noteData != null) {
                    SearchResult result = new SearchResult();
                    result.setNoteId(noteId);
                    result.setTitle(noteData.title);
                    result.setContent(noteData.content);
                    result.setSnippet(noteData.snippet);
                    result.setCreatedDate(noteData.createdDate);
                    result.setModifiedDate(noteData.modifiedDate);

                    // 计算相关度得分
                    result.calculateRelevanceScore(keyword);

                    results.add(result);
                }
            }

            return results;
        } finally {
            mLock.readLock().unlock();
        }
    }

    /**
     * 获取搜索建议
     * @param keyword 搜索关键词前缀
     * @param maxResults 最大建议数
     * @return 搜索建议列表
     */
    public List<String> getSuggestions(String keyword, int maxResults) {
        mLock.readLock().lock();
        try {
            List<String> suggestions = new ArrayList<>();

            if (TextUtils.isEmpty(keyword)) {
                return suggestions;
            }

            keyword = keyword.toLowerCase();
            Set<String> uniqueSuggestions = new HashSet<>();

            // 从索引中查找匹配的关键词
            for (String indexedWord : mWordToNoteIdsMap.keySet()) {
                if (indexedWord.startsWith(keyword)) {
                    uniqueSuggestions.add(indexedWord);
                    if (uniqueSuggestions.size() >= maxResults) {
                        break;
                    }
                }
            }

            suggestions.addAll(uniqueSuggestions);
            return suggestions;
        } finally {
            mLock.readLock().unlock();
        }
    }

    /**
     * 从数据库中获取所有便签数据
     * @return 便签数据列表
     */
    private List<NoteIndexData> getAllNoteData() {
        List<NoteIndexData> noteDataList = new ArrayList<>();
        ContentResolver resolver = mContext.getContentResolver();

        // 查询所有普通便签
        String[] projection = new String[] {
                NoteColumns.ID,
                NoteColumns.TITLE,
                NoteColumns.SNIPPET,
                NoteColumns.CREATED_DATE,
                NoteColumns.MODIFIED_DATE,
                DataColumns.CONTENT
        };

        String selection = NoteColumns.TYPE + " = ?";
        String[] selectionArgs = new String[] { String.valueOf(Notes.TYPE_NOTE) };

        Cursor cursor = resolver.query(
                Notes.CONTENT_NOTE_URI,
                projection,
                selection,
                selectionArgs,
                null
        );

        if (cursor != null) {
            try {
                while (cursor.moveToNext()) {
                    long noteId = cursor.getLong(cursor.getColumnIndexOrThrow(NoteColumns.ID));
                    String title = cursor.getString(cursor.getColumnIndexOrThrow(NoteColumns.TITLE));
                    String snippet = cursor.getString(cursor.getColumnIndexOrThrow(NoteColumns.SNIPPET));
                    String content = cursor.getString(cursor.getColumnIndexOrThrow(DataColumns.CONTENT));
                    long createdDate = cursor.getLong(cursor.getColumnIndexOrThrow(NoteColumns.CREATED_DATE));
                    long modifiedDate = cursor.getLong(cursor.getColumnIndexOrThrow(NoteColumns.MODIFIED_DATE));

                    NoteIndexData noteData = new NoteIndexData(title, content, snippet, createdDate, modifiedDate);
                    noteDataList.add(noteData);
                    mNoteIndexDataMap.put(noteId, noteData);
                }
            } catch (Exception e) {
                Log.e(TAG, "Error reading notes data: " + e.getMessage());
            } finally {
                cursor.close();
            }
        }

        return noteDataList;
    }

    /**
     * 提取文本中的关键词
     * @param texts 要提取关键词的文本数组
     * @return 提取的关键词集合
     */
    private Set<String> extractKeywords(String... texts) {
        Set<String> keywords = new HashSet<>();

        for (String text : texts) {
            if (!TextUtils.isEmpty(text)) {
                // 将文本转换为小写并去除标点符号
                String processedText = text.toLowerCase().replaceAll("[^a-z0-9\\u4e00-\\u9fa5]", " ");

                // 分割成单词
                String[] words = processedText.split("\\s+");

                for (String word : words) {
                    if (word.length() > 0) {
                        keywords.add(word);
                    }
                }
            }
        }

        return keywords;
    }
}