You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
git-test/src/main/java/net/micode/notes/search/SearchIndexer.java

279 lines
9.5 KiB

/*
* Copyright (c) 2024, The MiCode Open Source Community (www.micode.net)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.micode.notes.search;
import android.content.ContentResolver;
import android.content.Context;
import android.database.Cursor;
import android.text.TextUtils;
import android.util.Log;
import net.micode.notes.data.Notes;
import net.micode.notes.data.Notes.DataColumns;
import net.micode.notes.data.Notes.NoteColumns;
import net.micode.notes.data.Notes.TextNote;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
/**
* 搜索索引器类,负责创建和管理搜索索引,提高搜索效率
*/
public class SearchIndexer {
private static final String TAG = "SearchIndexer";
private Context mContext;
private Map<String, Set<Long>> mWordToNoteIdsMap; // 单词到便签ID的映射
private Map<Long, NoteIndexData> mNoteIndexDataMap; // 便签ID到索引数据的映射
private ReadWriteLock mLock; // 读写锁,确保线程安全
/**
* 便签索引数据类,存储便签的索引信息
*/
private static class NoteIndexData {
String title;
String content;
String snippet;
long createdDate;
long modifiedDate;
NoteIndexData(String title, String content, String snippet, long createdDate, long modifiedDate) {
this.title = title;
this.content = content;
this.snippet = snippet;
this.createdDate = createdDate;
this.modifiedDate = modifiedDate;
}
}
/**
* 构造函数,初始化搜索索引器
* @param context 上下文对象
*/
public SearchIndexer(Context context) {
mContext = context;
mWordToNoteIdsMap = new HashMap<>();
mNoteIndexDataMap = new HashMap<>();
mLock = new ReentrantReadWriteLock();
// 初始化时构建索引
updateIndex();
}
/**
* 更新搜索索引
*/
public void updateIndex() {
mLock.writeLock().lock();
try {
// 清空现有索引
mWordToNoteIdsMap.clear();
mNoteIndexDataMap.clear();
// 从数据库中获取所有便签数据
List<NoteIndexData> noteDataList = getAllNoteData();
// 为每个便签构建索引
for (long noteId : mNoteIndexDataMap.keySet()) {
NoteIndexData noteData = mNoteIndexDataMap.get(noteId);
if (noteData != null) {
// 提取关键词并添加到索引中
Set<String> keywords = extractKeywords(noteData.title, noteData.content, noteData.snippet);
for (String keyword : keywords) {
Set<Long> noteIds = mWordToNoteIdsMap.getOrDefault(keyword, new HashSet<>());
noteIds.add(noteId);
mWordToNoteIdsMap.put(keyword, noteIds);
}
}
}
Log.d(TAG, "Search index updated. Total words: " + mWordToNoteIdsMap.size() + ", Total notes: " + mNoteIndexDataMap.size());
} finally {
mLock.writeLock().unlock();
}
}
/**
* 执行搜索
* @param keyword 搜索关键词
* @return 搜索结果列表
*/
public List<SearchResult> search(String keyword) {
mLock.readLock().lock();
try {
List<SearchResult> results = new ArrayList<>();
if (TextUtils.isEmpty(keyword)) {
return results;
}
keyword = keyword.toLowerCase();
Set<Long> matchingNoteIds = new HashSet<>();
// 查找包含关键词的所有便签ID
for (String indexedWord : mWordToNoteIdsMap.keySet()) {
if (indexedWord.contains(keyword)) {
matchingNoteIds.addAll(mWordToNoteIdsMap.get(indexedWord));
}
}
// 为每个匹配的便签创建搜索结果
for (long noteId : matchingNoteIds) {
NoteIndexData noteData = mNoteIndexDataMap.get(noteId);
if (noteData != null) {
SearchResult result = new SearchResult();
result.setNoteId(noteId);
result.setTitle(noteData.title);
result.setContent(noteData.content);
result.setSnippet(noteData.snippet);
result.setCreatedDate(noteData.createdDate);
result.setModifiedDate(noteData.modifiedDate);
// 计算相关度得分
result.calculateRelevanceScore(keyword);
results.add(result);
}
}
return results;
} finally {
mLock.readLock().unlock();
}
}
/**
* 获取搜索建议
* @param keyword 搜索关键词前缀
* @param maxResults 最大建议数
* @return 搜索建议列表
*/
public List<String> getSuggestions(String keyword, int maxResults) {
mLock.readLock().lock();
try {
List<String> suggestions = new ArrayList<>();
if (TextUtils.isEmpty(keyword)) {
return suggestions;
}
keyword = keyword.toLowerCase();
Set<String> uniqueSuggestions = new HashSet<>();
// 从索引中查找匹配的关键词
for (String indexedWord : mWordToNoteIdsMap.keySet()) {
if (indexedWord.startsWith(keyword)) {
uniqueSuggestions.add(indexedWord);
if (uniqueSuggestions.size() >= maxResults) {
break;
}
}
}
suggestions.addAll(uniqueSuggestions);
return suggestions;
} finally {
mLock.readLock().unlock();
}
}
/**
* 从数据库中获取所有便签数据
* @return 便签数据列表
*/
private List<NoteIndexData> getAllNoteData() {
List<NoteIndexData> noteDataList = new ArrayList<>();
ContentResolver resolver = mContext.getContentResolver();
// 查询所有普通便签
String[] projection = new String[] {
NoteColumns.ID,
NoteColumns.TITLE,
NoteColumns.SNIPPET,
NoteColumns.CREATED_DATE,
NoteColumns.MODIFIED_DATE,
DataColumns.CONTENT
};
String selection = NoteColumns.TYPE + " = ?";
String[] selectionArgs = new String[] { String.valueOf(Notes.TYPE_NOTE) };
Cursor cursor = resolver.query(
Notes.CONTENT_NOTE_URI,
projection,
selection,
selectionArgs,
null
);
if (cursor != null) {
try {
while (cursor.moveToNext()) {
long noteId = cursor.getLong(cursor.getColumnIndexOrThrow(NoteColumns.ID));
String title = cursor.getString(cursor.getColumnIndexOrThrow(NoteColumns.TITLE));
String snippet = cursor.getString(cursor.getColumnIndexOrThrow(NoteColumns.SNIPPET));
String content = cursor.getString(cursor.getColumnIndexOrThrow(DataColumns.CONTENT));
long createdDate = cursor.getLong(cursor.getColumnIndexOrThrow(NoteColumns.CREATED_DATE));
long modifiedDate = cursor.getLong(cursor.getColumnIndexOrThrow(NoteColumns.MODIFIED_DATE));
NoteIndexData noteData = new NoteIndexData(title, content, snippet, createdDate, modifiedDate);
noteDataList.add(noteData);
mNoteIndexDataMap.put(noteId, noteData);
}
} catch (Exception e) {
Log.e(TAG, "Error reading notes data: " + e.getMessage());
} finally {
cursor.close();
}
}
return noteDataList;
}
/**
* 提取文本中的关键词
* @param texts 要提取关键词的文本数组
* @return 提取的关键词集合
*/
private Set<String> extractKeywords(String... texts) {
Set<String> keywords = new HashSet<>();
for (String text : texts) {
if (!TextUtils.isEmpty(text)) {
// 将文本转换为小写并去除标点符号
String processedText = text.toLowerCase().replaceAll("[^a-z0-9\\u4e00-\\u9fa5]", " ");
// 分割成单词
String[] words = processedText.split("\\s+");
for (String word : words) {
if (word.length() > 0) {
keywords.add(word);
}
}
}
}
return keywords;
}
}