Imports System.ComponentModel
Imports GridDynamics_plugins
Imports Microsoft.VisualBasic.CommandLine
Imports Microsoft.VisualBasic.CommandLine.Reflection
Imports Microsoft.VisualBasic.ComponentModel.DataStructures
Imports Microsoft.VisualBasic.Linq
Imports Microsoft.VisualBasic.MachineLearning.Darwinism.GAF
Imports Microsoft.VisualBasic.MachineLearning.Darwinism.GAF.Helper
Imports Microsoft.VisualBasic.MachineLearning.Darwinism.GAF.ReplacementStrategy
Imports Microsoft.VisualBasic.MachineLearning.Darwinism.NonlinearGridTopology
Imports Microsoft.VisualBasic.MachineLearning.StoreProcedure
Imports Microsoft.VisualBasic.Math.LinearAlgebra
Module Program
Public Function Main() As Integer
Return GetType(Program).RunCLI(App.CommandLine)
End Function
'''
''' 使用这个工具几乎可以训练任意规模大小的网格模型, 但是速度会比较慢, 因为这个程序使用硬盘文件来充当内存缓存
'''
'''
'''
[/validateSet /model /truncate /rate /popsize /out ]")>
Public Function RunGrid(args As CommandLine) As Integer
Dim in$ = args <= "/trainingSet"
Dim validates$ = args <= "/validate"
Dim model$ = args <= "/model"
Dim truncate As Double = args("/truncate") Or 1000.0
Dim rate# = args("/rate") Or 0.5
Dim popSize = args("/popsize") Or 50
Dim out$ = args("/out") Or $"{[in].TrimSuffix}.minError_DeepGrid.Xml"
Dim seed As GridMatrix = Nothing
Dim cacheZip = out.ParentPath & $"/.{out.BaseName}/"
Dim diskCaches As LoopArray(Of String) = 5.Sequence _
.Select(Function(i) $"{cacheZip}/cache_{(i + 666).ToHexString}.zip") _
.ToArray
If Not in$.FileExists Then
Call "No input file was found!".PrintException
Else
seed = If(model.FileExists, model.LoadXml(Of GridMatrix), Nothing)
If Not seed Is Nothing Then
Call $"Load trained model from {model}".__INFO_ECHO
End If
End If
Dim trainingSet = in$.LoadXml(Of DataSet)
Dim validateSet = args("/validateSet").LoadXml(Of DataSet)(throwEx:=False)
Dim factorNames = trainingSet.NormalizeMatrix.names
Call $"Mutation rate = {rate}".__DEBUG_ECHO
Call $"Population size = {popSize}".__DEBUG_ECHO
Dim cor As Vector = trainingSet.DataSamples.AsEnumerable.Correlation
Call "Create a base chromosome".__DEBUG_ECHO
Dim chromesome As GridSystem
If seed Is Nothing Then
chromesome = Loader.EmptyGridSystem(trainingSet.width, cor).TryCast(Of GridSystem)
Else
chromesome = seed.CreateSystem
End If
' 在种群范围内不进行并行计算
' 只对蛋白genome内部进行并行化计算
Dim zip As New PopulationZip(diskCaches.Next, rate, truncate)
Dim population As Population(Of Genome) = New Genome(chromesome, rate, truncate).InitialPopulation(New Population(Of Genome)(zip, False) With {.capacitySize = popSize})
Call "Initialize environment".__DEBUG_ECHO
Dim fitness As Fitness(Of Genome) = New Environment(Of GridSystem, Genome)(trainingSet, FitnessMethods.LabelGroupAverage, validateSet)
Call "Create algorithm engine".__DEBUG_ECHO
Dim ga As New GeneticAlgorithm(Of Genome)(
population:=population,
fitnessFunc:=fitness,
replacementStrategy:=Strategies.Naive,
createPopulation:=Function() New PopulationZip(diskCaches.Next, rate, truncate)
)
Call "Load driver".__DEBUG_ECHO
Dim takeBestSnapshot = Sub(best As Genome, error#)
Call best.chromosome _
.CreateSnapshot(
dist:=trainingSet.NormalizeMatrix,
names:=factorNames,
[error]:=[error]
) _
.GetXml _
.SaveTo(OutFile.TrimSuffix & $"_localOptimal/{[error]}.Xml")
End Sub
Dim engine As New EnvironmentDriver(Of Genome)(ga, takeBestSnapshot) With {
.Iterations = 1000000,
.Threshold = 0.005
}
Call engine.AttachReporter(Sub(i, e, g)
Call EnvironmentDriver(Of Genome).CreateReport(i, e, g).ToString.__DEBUG_ECHO
Call g.Best.chromosome _
.CreateSnapshot(trainingSet.NormalizeMatrix, factorNames, e) _
.GetXml _
.SaveTo(OutFile)
End Sub)
Call "Run GA!".__DEBUG_ECHO
Call engine.Train()
Return 0
End Function
End Module