Imports System.Text
Imports Path = System.String
'''
''' A FASTA file that contains multiple sequence data.
''' (一个包含有多条序列数据的FASTA文件)
'''
'''
Public Class FASTA2 : Inherits File
Implements LINQ.Framework.ILINQCollection
Implements System.IDisposable
Implements Generic.IEnumerable(Of FASTA)
Dim FASTAList As List(Of FASTA) = New List(Of FASTA)
Dim _SourceFile As String
Const Scan0 = 0
'''
''' 本FASTA数据文件对象的文件位置
'''
'''
'''
'''
Public ReadOnly Property SourceFile As String
Get
Return _SourceFile
End Get
End Property
Public Sub Add(Seq As FASTA)
Call FASTAList.Add(Seq)
End Sub
Public Function AddRange(FASTACollection As Generic.IEnumerable(Of FASTA)) As Long
Call FASTAList.AddRange(FASTACollection)
Return FASTAList.LongCount
End Function
'''
''' Get a new fasta2 object which is been clear the duplicated records in the collection.
''' (获取去除集合中的重复的记录新列表,原有列表中数据未被改变)
'''
'''
Public Function Distinct() As FASTA2
Dim List = (From fsa In FASTAList Select fsa Order By fsa.Title Ascending).ToList
For i As Integer = 1 To List.Count - 1
If String.Equals(List(i).Title, List(i - 1).Title) Then
Call List.RemoveAt(i)
End If
If i = List.Count Then
Exit For
End If
Next
Return List
End Function
Public Shared Function Read(File As Path) As FASTA2
Dim FASTA As New FASTA2 With {._SourceFile = File}, DtFile As File = File
Dim sBuilder As List(Of String) = New List(Of String)
For Each Line As String In DtFile.Data
If String.IsNullOrEmpty(Line) Then
Continue For
ElseIf Line.Chars(Scan0) = ">"c Then 'New FASTA Object
FASTA.FASTAList.Add(Global.TestLINQEntity.FASTA.Parse(sBuilder))
sBuilder.Clear()
End If
sBuilder.Add(Line)
Next
Call FASTA.FASTAList.RemoveAt(Scan0)
Call FASTA.FASTAList.Add(Global.TestLINQEntity.FASTA.Parse(sBuilder))
Dim OrderQuery = From e As FASTA In FASTA.FASTAList Select e Order By e.ToString Ascending '
FASTA.FASTAList = OrderQuery.ToList
Return FASTA
End Function
Public Sub Split(SaveDir As Path)
Call FileIO.FileSystem.CreateDirectory(SaveDir)
Dim Index As Integer
For Each FASTA In FASTAList
Index += 1
FASTA.Save(String.Format("{0}/{1}.fasta", SaveDir, Index))
Next
End Sub
'''
'''
'''
''' A key string that to search in this fasta file.
''' For text compaired method that not case sensitive, otherwise in the method od binary than case sensitive.
'''
'''
Public Function Query2(KeyWord As String, Optional CaseSensitive As CompareMethod = CompareMethod.Text) As FASTA2
Dim LQuery As Generic.IEnumerable(Of FASTA) = From FASTA In FASTAList.AsParallel Where Find(FASTA.Attributes, KeyWord, CaseSensitive)
Select FASTA '
Return New FASTA2 With {.FASTAList = LQuery.ToList}
End Function
'''
'''
'''
''' A key string that to search in this fasta file.
''' For text compaired method that not case sensitive, otherwise in the method od binary than case sensitive.
'''
'''
Public Function Query(KeyWord As String, Optional CaseSensitive As CompareMethod = CompareMethod.Text) As FASTA
Dim LQuery As Generic.IEnumerable(Of FASTA) = From FASTA In FASTAList.AsParallel Where Find(FASTA.Attributes, KeyWord, CaseSensitive)
Select FASTA '
LQuery = LQuery.ToArray
If LQuery.Count = 0 Then
Return Nothing
Else
Return LQuery.First
End If
End Function
Public Function Query(Keyword As String, Index As Integer, Optional CaseSensitive As CompareMethod = CompareMethod.Text) As FASTA()
Dim List = (From fsa In FASTAList Where fsa.Attributes.Count - 1 >= Index Select fsa).ToArray
Dim LQuery = From fsa In List Where InStr(fsa.Attributes(Index), Keyword, CaseSensitive) > 0 Select fsa '
Return LQuery.ToArray
End Function
Private Shared Function Find(AttributeList As String(), KeyWord As String, CaseSensitive As CompareMethod) As Boolean
For i As Integer = 0 To AttributeList.Length - 1
If InStr(AttributeList(i), KeyWord, CaseSensitive) Then
Return True
End If
Next
Return False
End Function
Public Function Take(KeyWordList As List(Of String), Optional CaseSensitive As CompareMethod = CompareMethod.Text) As FASTA2
Dim FASTA2List As New List(Of FASTA)
For Each KeyWord As String In KeyWordList
Dim LQuery = From FASTA In FASTAList Where InStr(FASTA.Data.First, KeyWord, CaseSensitive) Select FASTA '
FASTA2List.AddRange(LQuery.ToArray)
Next
Return New FASTA2 With {.FASTAList = FASTA2List}
End Function
Public Overrides Sub Save(File As Path)
Dim sBuilder As StringBuilder = New StringBuilder(10 * 1024)
For Each FASTA In FASTAList
sBuilder.AppendLine(FASTA.Generate)
Next
Call FileIO.FileSystem.WriteAllText(File, sBuilder.ToString, append:=False)
End Sub
'''
'''
'''
'''
''' The target FASTA file that to append this FASTA sequences.(将要拓展这些FASTA序列的目标FASTA文件)
'''
'''
Public Sub AppendToFile(File As Path)
Dim sBuilder As StringBuilder = New StringBuilder(10 * 1024)
For Each FASTA In FASTAList
sBuilder.AppendLine(FASTA.Generate)
Next
Call FileIO.FileSystem.WriteAllText(File, sBuilder.ToString, append:=True)
End Sub
Public Overrides Function ToString() As String
Return String.Format("{0}; [{1} records]", _SourceFile, Count)
End Function
Public Shared Shadows Widening Operator CType(Collection As FASTA()) As FASTA2
Return New FASTA2 With {.FASTAList = Collection.ToList}
End Operator
Public Shared Shadows Widening Operator CType(Collection As List(Of FASTA)) As FASTA2
Return New FASTA2 With {.FASTAList = Collection}
End Operator
Public Shared Shadows Widening Operator CType(fsa As FASTA) As FASTA2
Return New FASTA2 With {.FASTAList = New List(Of FASTA) From {fsa}}
End Operator
Public Shadows Iterator Function GetEnumerator() As IEnumerator(Of FASTA) Implements IEnumerable(Of FASTA).GetEnumerator
For i As Integer = 0 To FASTAList.Count - 1
Yield FASTAList(i)
Next
End Function
Public Shadows Iterator Function GetEnumerator1() As IEnumerator Implements IEnumerable.GetEnumerator
Yield GetEnumerator()
End Function
Public Overrides Function GetCollection(FilePath As String) As Object()
Dim File = FASTA2.Read(FilePath)
Return File.FASTAList.ToArray
End Function
Public Overrides Function GetEntityType() As Type
Return GetType(FASTA)
End Function
End Class
'''
''' The FASTA format file of a bimolecular sequence.(Notice that this file is
''' only contains on sequence.)
''' FASTA格式的生物分子序列文件。(但是请注意:文件中只包含一条序列的情况)
'''
'''
Public Class FASTA : Inherits File
'''
''' The attribute header of this FASTA file.
''' (这个FASTA文件的属性头)
'''
'''
Public Attributes As String()
'''
''' The sequence data that contains in this FASTA file.
''' (包含在这个FASTA文件之中的序列数据)
'''
'''
Public Sequence As String
'''
''' 返回FASTA对象的标题
'''
'''
'''
Public Overrides Function ToString() As String
Dim sBuilder As StringBuilder = New StringBuilder("> ", 1024)
For Each attr As String In Attributes
sBuilder.Append(attr & "|")
Next
sBuilder.Remove(sBuilder.Length - 1, 1)
Return sBuilder.ToString
End Function
Public ReadOnly Property Title As String
Get
Return Me.ToString
End Get
End Property
Public Shared Shadows Widening Operator CType(Path As String) As FASTA
Return Load(File:=Path)
End Operator
Public Shared Function Load(File As String) As FASTA
Dim DataFile As File = File
Dim FASTA As FASTA = New FASTA
Call DataFile.CopyTo(FASTA)
FASTA.Attributes = DataFile.Data.First.Split("|")
FASTA.Sequence = Contact(DataFile.Data.Skip(1)) 'Linux mono does not support attribute!
Return FASTA
End Function
Public Shared Function Parse(FASTAStream As Generic.IEnumerable(Of String)) As FASTA
If FASTAStream Is Nothing OrElse FASTAStream.Count = 0 Then Return Nothing
Dim DataFile As File = New File With {.Data = FASTAStream.ToArray}
Dim FASTA As FASTA = New FASTA
Call DataFile.CopyTo(FASTA)
FASTA.Attributes = DataFile.Data.First.Replace(">", "").Split("|")
FASTA.Sequence = Contact(DataFile.Data.Skip(1)) 'Linux mono does not support attribute!
Return FASTA
End Function
'''
''' Generate a FASTA file string.
''' (将这个FASTA对象转换为文件格式以方便进行存储)
'''
'''
'''
Public Function Generate() As String
Dim sBuilder As StringBuilder = New StringBuilder(">", 10 * 1024)
For Each Attribute In Attributes
sBuilder.AppendFormat("{0}|", Attribute)
Next
sBuilder.Remove(sBuilder.Length - 1, 1)
sBuilder.AppendLine()
For i As Integer = 1 To Len(Sequence) Step 60
Dim Segment As String = Mid(Sequence, i, 60)
sBuilder.AppendLine(Segment)
Next
Return sBuilder.ToString
End Function
Public Overrides Function Equals(obj As Object) As Boolean
If TypeOf obj Is FASTA Then
Dim [Object] = DirectCast(obj, FASTA)
Return String.Equals([Object].Title, Me, Title) AndAlso String.Equals([Object].Sequence, Me.Sequence)
Else
Return False
End If
End Function
Public Overrides Sub Save(Path As String)
Call FileIO.FileSystem.WriteAllText(Path, Me.Generate, append:=False)
End Sub
'''
''' Enumerate all of the amino acid.
''' (字符串常量枚举所有的氨基酸分子)
'''
'''
Const AAALL As String = "BDEFHIJKLMNOPQRSVWXYZ"
'''
''' (判断这条序列是否为蛋白质序列)
'''
'''
'''
Public Function IsProtein() As Boolean
Dim Query = From c As Char In Sequence.ToUpper Where InStr(AAALL, c) Select 1 '
Try
Return Query.First > 0
Catch ex As Exception
Return False
End Try
End Function
Public Function Reverse() As FASTA
Dim Attributes As List(Of String) = Me.Attributes.ToList
Dim FASTA As FASTA = New FASTA
Call Attributes.Add("Reversed_sequence")
FASTA.Attributes = Attributes.ToArray
FASTA.Sequence = Sequence.Reverse.ToArray
Return FASTA
End Function
Public Shared Shadows Narrowing Operator CType(e As FASTA) As String
Return e.Generate
End Operator
End Class