You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

360 lines
12 KiB

Imports System.Text
Imports Path = System.String
''' <summary>
''' A FASTA file that contains multiple sequence data.
''' (一个包含有多条序列数据的FASTA文件)
''' </summary>
''' <remarks></remarks>
<LINQ.Framework.Reflection.LINQEntity("fasta")>
Public Class FASTA2 : Inherits File
Implements LINQ.Framework.ILINQCollection
Implements System.IDisposable
Implements Generic.IEnumerable(Of FASTA)
Dim FASTAList As List(Of FASTA) = New List(Of FASTA)
Dim _SourceFile As String
Const Scan0 = 0
''' <summary>
''' 本FASTA数据文件对象的文件位置
''' </summary>
''' <value></value>
''' <returns></returns>
''' <remarks></remarks>
Public ReadOnly Property SourceFile As String
Get
Return _SourceFile
End Get
End Property
Public Sub Add(Seq As FASTA)
Call FASTAList.Add(Seq)
End Sub
Public Function AddRange(FASTACollection As Generic.IEnumerable(Of FASTA)) As Long
Call FASTAList.AddRange(FASTACollection)
Return FASTAList.LongCount
End Function
''' <summary>
''' Get a new fasta2 object which is been clear the duplicated records in the collection.
''' (获取去除集合中的重复的记录新列表,原有列表中数据未被改变)
''' </summary>
''' <remarks></remarks>
Public Function Distinct() As FASTA2
Dim List = (From fsa In FASTAList Select fsa Order By fsa.Title Ascending).ToList
For i As Integer = 1 To List.Count - 1
If String.Equals(List(i).Title, List(i - 1).Title) Then
Call List.RemoveAt(i)
End If
If i = List.Count Then
Exit For
End If
Next
Return List
End Function
Public Shared Function Read(File As Path) As FASTA2
Dim FASTA As New FASTA2 With {._SourceFile = File}, DtFile As File = File
Dim sBuilder As List(Of String) = New List(Of String)
For Each Line As String In DtFile.Data
If String.IsNullOrEmpty(Line) Then
Continue For
ElseIf Line.Chars(Scan0) = ">"c Then 'New FASTA Object
FASTA.FASTAList.Add(Global.TestLINQEntity.FASTA.Parse(sBuilder))
sBuilder.Clear()
End If
sBuilder.Add(Line)
Next
Call FASTA.FASTAList.RemoveAt(Scan0)
Call FASTA.FASTAList.Add(Global.TestLINQEntity.FASTA.Parse(sBuilder))
Dim OrderQuery = From e As FASTA In FASTA.FASTAList Select e Order By e.ToString Ascending '
FASTA.FASTAList = OrderQuery.ToList
Return FASTA
End Function
Public Sub Split(SaveDir As Path)
Call FileIO.FileSystem.CreateDirectory(SaveDir)
Dim Index As Integer
For Each FASTA In FASTAList
Index += 1
FASTA.Save(String.Format("{0}/{1}.fasta", SaveDir, Index))
Next
End Sub
''' <summary>
'''
''' </summary>
''' <param name="KeyWord">A key string that to search in this fasta file.</param>
''' <param name="CaseSensitive">For text compaired method that not case sensitive, otherwise in the method od binary than case sensitive.</param>
''' <returns></returns>
''' <remarks></remarks>
Public Function Query2(KeyWord As String, Optional CaseSensitive As CompareMethod = CompareMethod.Text) As FASTA2
Dim LQuery As Generic.IEnumerable(Of FASTA) = From FASTA In FASTAList.AsParallel Where Find(FASTA.Attributes, KeyWord, CaseSensitive)
Select FASTA '
Return New FASTA2 With {.FASTAList = LQuery.ToList}
End Function
''' <summary>
'''
''' </summary>
''' <param name="KeyWord">A key string that to search in this fasta file.</param>
''' <param name="CaseSensitive">For text compaired method that not case sensitive, otherwise in the method od binary than case sensitive.</param>
''' <returns></returns>
''' <remarks></remarks>
Public Function Query(KeyWord As String, Optional CaseSensitive As CompareMethod = CompareMethod.Text) As FASTA
Dim LQuery As Generic.IEnumerable(Of FASTA) = From FASTA In FASTAList.AsParallel Where Find(FASTA.Attributes, KeyWord, CaseSensitive)
Select FASTA '
LQuery = LQuery.ToArray
If LQuery.Count = 0 Then
Return Nothing
Else
Return LQuery.First
End If
End Function
Public Function Query(Keyword As String, Index As Integer, Optional CaseSensitive As CompareMethod = CompareMethod.Text) As FASTA()
Dim List = (From fsa In FASTAList Where fsa.Attributes.Count - 1 >= Index Select fsa).ToArray
Dim LQuery = From fsa In List Where InStr(fsa.Attributes(Index), Keyword, CaseSensitive) > 0 Select fsa '
Return LQuery.ToArray
End Function
Private Shared Function Find(AttributeList As String(), KeyWord As String, CaseSensitive As CompareMethod) As Boolean
For i As Integer = 0 To AttributeList.Length - 1
If InStr(AttributeList(i), KeyWord, CaseSensitive) Then
Return True
End If
Next
Return False
End Function
Public Function Take(KeyWordList As List(Of String), Optional CaseSensitive As CompareMethod = CompareMethod.Text) As FASTA2
Dim FASTA2List As New List(Of FASTA)
For Each KeyWord As String In KeyWordList
Dim LQuery = From FASTA In FASTAList Where InStr(FASTA.Data.First, KeyWord, CaseSensitive) Select FASTA '
FASTA2List.AddRange(LQuery.ToArray)
Next
Return New FASTA2 With {.FASTAList = FASTA2List}
End Function
Public Overrides Sub Save(File As Path)
Dim sBuilder As StringBuilder = New StringBuilder(10 * 1024)
For Each FASTA In FASTAList
sBuilder.AppendLine(FASTA.Generate)
Next
Call FileIO.FileSystem.WriteAllText(File, sBuilder.ToString, append:=False)
End Sub
''' <summary>
'''
''' </summary>
''' <param name="File">
''' The target FASTA file that to append this FASTA sequences.(将要拓展这些FASTA序列的目标FASTA文件)
''' </param>
''' <remarks></remarks>
Public Sub AppendToFile(File As Path)
Dim sBuilder As StringBuilder = New StringBuilder(10 * 1024)
For Each FASTA In FASTAList
sBuilder.AppendLine(FASTA.Generate)
Next
Call FileIO.FileSystem.WriteAllText(File, sBuilder.ToString, append:=True)
End Sub
Public Overrides Function ToString() As String
Return String.Format("{0}; [{1} records]", _SourceFile, Count)
End Function
Public Shared Shadows Widening Operator CType(Collection As FASTA()) As FASTA2
Return New FASTA2 With {.FASTAList = Collection.ToList}
End Operator
Public Shared Shadows Widening Operator CType(Collection As List(Of FASTA)) As FASTA2
Return New FASTA2 With {.FASTAList = Collection}
End Operator
Public Shared Shadows Widening Operator CType(fsa As FASTA) As FASTA2
Return New FASTA2 With {.FASTAList = New List(Of FASTA) From {fsa}}
End Operator
Public Shadows Iterator Function GetEnumerator() As IEnumerator(Of FASTA) Implements IEnumerable(Of FASTA).GetEnumerator
For i As Integer = 0 To FASTAList.Count - 1
Yield FASTAList(i)
Next
End Function
Public Shadows Iterator Function GetEnumerator1() As IEnumerator Implements IEnumerable.GetEnumerator
Yield GetEnumerator()
End Function
Public Overrides Function GetCollection(FilePath As String) As Object()
Dim File = FASTA2.Read(FilePath)
Return File.FASTAList.ToArray
End Function
Public Overrides Function GetEntityType() As Type
Return GetType(FASTA)
End Function
End Class
''' <summary>
''' The FASTA format file of a bimolecular sequence.(Notice that this file is
''' only contains on sequence.)
''' FASTA格式的生物分子序列文件。(但是请注意:文件中只包含一条序列的情况)
''' </summary>
''' <remarks></remarks>
Public Class FASTA : Inherits File
''' <summary>
''' The attribute header of this FASTA file.
''' (这个FASTA文件的属性头)
''' </summary>
''' <remarks></remarks>
Public Attributes As String()
''' <summary>
''' The sequence data that contains in this FASTA file.
''' (包含在这个FASTA文件之中的序列数据)
''' </summary>
''' <remarks></remarks>
Public Sequence As String
''' <summary>
''' 返回FASTA对象的标题
''' </summary>
''' <returns></returns>
''' <remarks></remarks>
Public Overrides Function ToString() As String
Dim sBuilder As StringBuilder = New StringBuilder("> ", 1024)
For Each attr As String In Attributes
sBuilder.Append(attr & "|")
Next
sBuilder.Remove(sBuilder.Length - 1, 1)
Return sBuilder.ToString
End Function
Public ReadOnly Property Title As String
Get
Return Me.ToString
End Get
End Property
Public Shared Shadows Widening Operator CType(Path As String) As FASTA
Return Load(File:=Path)
End Operator
Public Shared Function Load(File As String) As FASTA
Dim DataFile As File = File
Dim FASTA As FASTA = New FASTA
Call DataFile.CopyTo(FASTA)
FASTA.Attributes = DataFile.Data.First.Split("|")
FASTA.Sequence = Contact(DataFile.Data.Skip(1)) 'Linux mono does not support <Extension> attribute!
Return FASTA
End Function
Public Shared Function Parse(FASTAStream As Generic.IEnumerable(Of String)) As FASTA
If FASTAStream Is Nothing OrElse FASTAStream.Count = 0 Then Return Nothing
Dim DataFile As File = New File With {.Data = FASTAStream.ToArray}
Dim FASTA As FASTA = New FASTA
Call DataFile.CopyTo(FASTA)
FASTA.Attributes = DataFile.Data.First.Replace(">", "").Split("|")
FASTA.Sequence = Contact(DataFile.Data.Skip(1)) 'Linux mono does not support <Extension> attribute!
Return FASTA
End Function
''' <summary>
''' Generate a FASTA file string.
''' (将这个FASTA对象转换为文件格式以方便进行存储)
''' </summary>
''' <returns></returns>
''' <remarks></remarks>
Public Function Generate() As String
Dim sBuilder As StringBuilder = New StringBuilder(">", 10 * 1024)
For Each Attribute In Attributes
sBuilder.AppendFormat("{0}|", Attribute)
Next
sBuilder.Remove(sBuilder.Length - 1, 1)
sBuilder.AppendLine()
For i As Integer = 1 To Len(Sequence) Step 60
Dim Segment As String = Mid(Sequence, i, 60)
sBuilder.AppendLine(Segment)
Next
Return sBuilder.ToString
End Function
Public Overrides Function Equals(obj As Object) As Boolean
If TypeOf obj Is FASTA Then
Dim [Object] = DirectCast(obj, FASTA)
Return String.Equals([Object].Title, Me, Title) AndAlso String.Equals([Object].Sequence, Me.Sequence)
Else
Return False
End If
End Function
Public Overrides Sub Save(Path As String)
Call FileIO.FileSystem.WriteAllText(Path, Me.Generate, append:=False)
End Sub
''' <summary>
''' Enumerate all of the amino acid.
''' (字符串常量枚举所有的氨基酸分子)
''' </summary>
''' <remarks></remarks>
Const AAALL As String = "BDEFHIJKLMNOPQRSVWXYZ"
''' <summary>
''' (判断这条序列是否为蛋白质序列)
''' </summary>
''' <returns></returns>
''' <remarks></remarks>
Public Function IsProtein() As Boolean
Dim Query = From c As Char In Sequence.ToUpper Where InStr(AAALL, c) Select 1 '
Try
Return Query.First > 0
Catch ex As Exception
Return False
End Try
End Function
Public Function Reverse() As FASTA
Dim Attributes As List(Of String) = Me.Attributes.ToList
Dim FASTA As FASTA = New FASTA
Call Attributes.Add("Reversed_sequence")
FASTA.Attributes = Attributes.ToArray
FASTA.Sequence = Sequence.Reverse.ToArray
Return FASTA
End Function
Public Shared Shadows Narrowing Operator CType(e As FASTA) As String
Return e.Generate
End Operator
End Class