From a410043fd4d681b52d7beb7b0e88a7c2b5964278 Mon Sep 17 00:00:00 2001 From: guigang xie Date: Thu, 21 Sep 2023 15:51:10 +0800 Subject: [PATCH] make improvements of the resource hash algorithm --- src/LINQ/RQL/RQL.vbproj | 1 + src/LINQ/RQL/Resource.vb | 57 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/LINQ/RQL/RQL.vbproj b/src/LINQ/RQL/RQL.vbproj index 59331d6..8baa142 100644 --- a/src/LINQ/RQL/RQL.vbproj +++ b/src/LINQ/RQL/RQL.vbproj @@ -4,6 +4,7 @@ RQL net6.0-windows;net6.0;net48 AnyCPU;x64 + Debug;Release;Rsharp_app_release;mzkit_win32;gcmodeller_desktop;Docker True True True diff --git a/src/LINQ/RQL/Resource.vb b/src/LINQ/RQL/Resource.vb index 93b3c14..ea6e6f9 100644 --- a/src/LINQ/RQL/Resource.vb +++ b/src/LINQ/RQL/Resource.vb @@ -1,8 +1,11 @@ Imports System.IO +Imports System.Runtime.CompilerServices Imports System.Text Imports Microsoft.VisualBasic.ComponentModel.Ranges Imports Microsoft.VisualBasic.Data.GraphTheory +Imports Microsoft.VisualBasic.Data.Repository Imports Microsoft.VisualBasic.DataStorage.HDSPack.FileSystem +Imports Microsoft.VisualBasic.SecurityString ''' ''' Resource query language @@ -22,25 +25,60 @@ Public Class Resource : Implements IDisposable index = parser.Read End Sub + Public Function Add(key As String, str As String) Return Add(key, Encoding.UTF8.GetBytes(str)) End Function + Public Function ReadString(map As String) As String + Return Encoding.UTF8.GetString(ReadBuffer(map)) + End Function + + Public Function ReadBuffer(map As String) As Byte() Dim path As String = URL(map) Dim file As Stream = buf.OpenFile(path, FileMode.Open, FileAccess.Read) Dim bytes As Byte() = New Byte(file.Length - 1) {} Call file.Read(bytes, Scan0, bytes.Length) - Return Encoding.UTF8.GetString(bytes) + Return bytes End Function + Private Shared Function URL(map As String) As String Return $"/pool/{map.Substring(4, 2)}/{map.Substring(16, 6)}/{map}" End Function + Public Shared Function GetHashKey(data As Byte()) As String + Static md5 As New Md5HashProvider + + ' all null/empty data point to ZERO location + If data.IsNullOrEmpty Then + Return New String("0"c, 32) + End If + + ' combine two hash algorithm for avoid the hash confliction + Dim key1 As String = md5.GetMd5Hash(data.ToArray) + Dim firstByte = data(0).ToString + Dim lastByte = data(data.Length - 1).ToString + Dim middleByte = data((data.Length - 1) / 2).ToString + Dim fnv = FNV1a.GetHashCode({firstByte, lastByte, middleByte}).ToHexString + Dim hashcode As String = md5.GetMd5Hash(key1 & fnv) + + Return hashcode + End Function + + ''' + ''' + ''' + ''' The query key, could be any text + ''' the data for store in the database and associated with + ''' given query text data , the unique reference key of + ''' this resource data is generated via a specific hash algorithm based on + ''' this data payload. + ''' Public Function Add(key As String, data As Byte()) As Boolean Dim tokens As String() = Strings.LCase(key).Split - Dim map As String = key.MD5 + Dim map As String = GetHashKey(data) Dim path As String = URL(map) For Each si As String In tokens @@ -63,6 +101,21 @@ Public Class Resource : Implements IDisposable Return True End Function + ''' + ''' Query resources matches + ''' + ''' any query term + ''' + ''' A collection of the query result key with score value, + ''' the numeric tag in this collection is the query matches + ''' score and the key string value could be used for read + ''' resource data via the + ''' function. + ''' + ''' + ''' the result data of the query result has already been re-order + ''' via the matches score desc + ''' Public Function [Get](query As String) As IEnumerable(Of NumericTagged(Of String)) Dim tokens As String() = Strings.LCase(query).Split Dim maps As New Dictionary(Of String, Double)