Imports System.CodeDom
Imports System.Text
Namespace Parser
'''
''' Divides the string into tokens.
'''
Public Class Tokenizer
Private _En As CharEnumerator
Private _IsInvalid As Boolean = False
Private _PrevToken As Token = Token.NullToken
'''
''' A tokenizer is always constructed on a single string. Create one tokenizer per string.
'''
''' string to tokenize
Public Sub New(s As String)
_En = s.GetEnumerator()
MoveNext()
End Sub
'''
''' Moves to the next character. If there are no more characters, then the tokenizer is
''' invalid.
'''
Private Sub MoveNext()
If Not _En.MoveNext() Then
_IsInvalid = True
End If
End Sub
'''
''' Allows access to the token most recently parsed.
'''
Public ReadOnly Property Current() As Token
Get
Return _PrevToken
End Get
End Property
'''
''' Indicates that there are no more characters in the string and tokenizer is finished.
'''
Public ReadOnly Property IsInvalid() As Boolean
Get
Return _IsInvalid
End Get
End Property
'''
''' Is the current character a letter or underscore?
'''
Public ReadOnly Property IsChar() As Boolean
Get
If _IsInvalid Then
Return False
End If
Return ((_En.Current >= "A"c AndAlso _En.Current <= "Z"c) OrElse (_En.Current >= "a"c AndAlso _En.Current <= "z"c) OrElse _En.Current = "_"c)
End Get
End Property
'''
''' Is the current character a dot (".")?
'''
Public ReadOnly Property IsDot() As Boolean
Get
If _IsInvalid Then
Return False
End If
Return _En.Current = "."c
End Get
End Property
'''
''' Is the current character a comma?
'''
Public ReadOnly Property IsComma() As Boolean
Get
If _IsInvalid Then
Return False
End If
Return _En.Current = ","c
End Get
End Property
'''
''' Is the current character a number?
'''
Public ReadOnly Property IsNumber() As Boolean
Get
If _IsInvalid Then
Return False
End If
Return (_En.Current >= "0"c AndAlso _En.Current <= "9"c)
End Get
End Property
'''
''' Is the current character a whitespace character?
'''
Public ReadOnly Property IsSpace() As Boolean
Get
If _IsInvalid Then
Return False
End If
Return (_En.Current = " "c OrElse _En.Current = ControlChars.Tab)
End Get
End Property
'''
''' Is the current character an operator?
'''
Public ReadOnly Property IsOperator() As Boolean
Get
If _IsInvalid Then
Return False
End If
Select Case _En.Current
Case ">"c, "<"c, "="c, "-"c, "+"c, "!"c, _
"/"c, "%"c, "*"c, "&"c, "|"c, "("c, _
")"c, "["c, "]"c, """"c
Return True
Case Else
Return False
End Select
End Get
End Property
'''
''' Gets the next token in the string. Reads as many characters as necessary to retrieve
''' that token.
'''
''' next token
Public Function GetNextToken() As Token
If _IsInvalid Then
Return Token.NullToken
End If
Dim token__1 As Token
If IsChar Then
token__1 = GetString()
ElseIf IsComma Then
token__1 = New Token(",", TokenType.Comma, TokenPriority.None)
MoveNext()
ElseIf IsDot Then
token__1 = New Token(".", TokenType.Dot, TokenPriority.None)
MoveNext()
ElseIf IsNumber Then
token__1 = GetNumber()
ElseIf IsSpace Then
' Eat space and do recursive call.
MoveNext()
token__1 = GetNextToken()
ElseIf IsOperator Then
token__1 = GetOperator()
Else
token__1 = Token.NullToken
MoveNext()
End If
_PrevToken = token__1
Return token__1
End Function
'''
''' Anything that starts with a character is considered a string. This could be a
''' primitive quoted string, a primitive expression, or an identifier
'''
'''
Private Function GetString() As Token
' Handle empty strings
If _PrevToken.Type = TokenType.Quote AndAlso _En.Current = """"c Then
MoveNext()
Return New Token(String.Empty, TokenType.Primitive, TokenPriority.None)
End If
Dim sb As New StringBuilder()
sb.Append(_En.Current)
While True
If _IsInvalid Then
Exit While
End If
MoveNext()
If _IsInvalid Then
Exit While
End If
If IsChar Then
sb.Append(_En.Current)
ElseIf IsNumber Then
sb.Append(_En.Current)
Else
If _PrevToken.Type = TokenType.Quote Then
If _En.Current = """"c Then
MoveNext()
Exit While
ElseIf _En.Current = "\"c Then
' In the case of \, we'll add that character and whatever character follows it.
sb.Append(_En.Current)
MoveNext()
If Not _IsInvalid Then
sb.Append(_En.Current)
End If
Else
sb.Append(_En.Current)
End If
Else
Exit While
End If
End If
End While
Dim s As String = sb.ToString()
' "false" or "true" is a primitive expression.
If s = "false" OrElse s = "true" Then
Return New Token([Boolean].Parse(s), TokenType.Primitive, TokenPriority.None)
End If
' The previous token was a quote, so this is a primitive string.
If _PrevToken.Type = TokenType.Quote Then
Return New Token(s, TokenType.Primitive, TokenPriority.None)
End If
' The default is that the string indicates an identifier.
Return New Token(s, TokenType.Identifier, TokenPriority.None)
End Function
'''
''' A token that starts with a number can be an integer, a long, or a double.
'''
'''
'''
''' An integer is the default for numbers. Numbers can also be followed by a
''' l, L, d, or D character to indicate a long or a double value respectively.
''' Any numbers containing a dot (".") are considered doubles.
'''
Private Function GetNumber() As Token
Dim sb As New StringBuilder()
sb.Append(_En.Current)
Dim isDouble As Boolean = False
Dim isLong As Boolean = False
Dim cont As Boolean = True
While cont
If _IsInvalid Then
Exit While
End If
MoveNext()
If _IsInvalid Then
Exit While
End If
If IsNumber Then
sb.Append(_En.Current)
ElseIf IsChar Then
Select Case _En.Current
Case "D"c, "d"c
isDouble = True
MoveNext()
If IsChar OrElse IsNumber Then
sb.Append(_En.Current)
Throw New ArgumentException("Invalid number: " & sb.ToString())
Else
cont = False
End If
Case "L"c, "l"c
isLong = True
MoveNext()
If IsChar OrElse IsNumber Then
sb.Append(_En.Current)
Throw New ArgumentException("Invalid number: " & sb.ToString())
Else
cont = False
End If
Case Else
sb.Append(_En.Current)
Throw New ArgumentException("Invalid number: " & sb.ToString())
End Select
ElseIf IsDot Then
sb.Append(_En.Current)
If isDouble Then
' The number has already been marked as a double, which means it already
' contains a number.
Throw New ArgumentException("Invalid number: " & sb.ToString())
Else
isDouble = True
End If
Else
Exit While
End If
End While
Dim s As String = sb.ToString()
If isLong Then
Return New Token(Int64.Parse(s), TokenType.Primitive, TokenPriority.None)
End If
If isDouble Then
Return New Token([Double].Parse(s), TokenType.Primitive, TokenPriority.None)
End If
Return New Token(Int32.Parse(s), TokenType.Primitive, TokenPriority.None)
End Function
'''
''' Some operators take more than one character. Also, the tokenizer is able to
''' categorize the token's priority based on what kind of operator it is.
'''
'''
Private Function GetOperator() As Token
Dim op As New String(_En.Current, 1)
Select Case _En.Current
Case "<"c, "="c, ">"c
MoveNext()
If _En.Current = "="c Then
op += _En.Current
MoveNext()
End If
Return New Token(op, TokenType.[Operator], TokenPriority.Equality)
Case "-"c
MoveNext()
If _PrevToken.Type = TokenType.Primitive OrElse _PrevToken.Type = TokenType.Identifier OrElse _PrevToken.Type = TokenType.CloseParens Then
Return New Token(op, TokenType.[Operator], TokenPriority.PlusMinus)
Else
Return New Token(op, TokenType.[Operator], TokenPriority.UnaryMinus)
End If
Case "+"c
MoveNext()
Return New Token(op, TokenType.[Operator], TokenPriority.PlusMinus)
Case "!"c
MoveNext()
If _En.Current = "="c Then
op += _En.Current
MoveNext()
Return New Token(op, TokenType.[Operator], TokenPriority.Equality)
Else
Return New Token(op, TokenType.[Operator], TokenPriority.[Not])
End If
Case "*"c, "/"c
MoveNext()
Return New Token(op, TokenType.[Operator], TokenPriority.MulDiv)
Case "%"c
MoveNext()
Return New Token(op, TokenType.[Operator], TokenPriority.[Mod])
Case "|"c
MoveNext()
If _En.Current = "|"c Then
op += _En.Current
MoveNext()
End If
Return New Token(op, TokenType.[Operator], TokenPriority.[Or])
Case "&"c
MoveNext()
If _En.Current = "&"c Then
op += _En.Current
MoveNext()
End If
Return New Token(op, TokenType.[Operator], TokenPriority.[And])
Case "("c
MoveNext()
Return New Token(op, TokenType.OpenParens, TokenPriority.None)
Case ")"c
MoveNext()
Return New Token(op, TokenType.CloseParens, TokenPriority.None)
Case "["c
MoveNext()
Return New Token(op, TokenType.OpenBracket, TokenPriority.None)
Case "]"c
MoveNext()
Return New Token(op, TokenType.CloseBracket, TokenPriority.None)
Case """"c
' When we detect a quote, we can just ignore it since the user doesn't really need to know about it.
MoveNext()
_PrevToken = New Token(op, TokenType.Quote, TokenPriority.None)
Return GetString()
End Select
Return Token.NullToken
End Function
End Class
End Namespace