You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

245 lines
5.6 KiB

(*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)
{
(** classic Ocamllex function to update current lexbuf line at each end of
line *)
let incr_linenum lexbuf =
let pos = lexbuf.Lexing.lex_curr_p in
lexbuf.Lexing.lex_curr_p <- { pos with
Lexing.pos_lnum = pos.Lexing.pos_lnum + 1;
Lexing.pos_bol = pos.Lexing.pos_cnum;
}
open JSourceParser
}
let whitespace = [' ' '\t']
let eol = whitespace*("\r")?"\n" (* end of line *)
let eol_comment = "//" [^'\n']*
let id = ['a'-'z' 'A'-'Z' '_' '$'] ['a'-'z' 'A'-'Z' '0'-'9' '_' '$']*
let char = "'\\''" | "'\"'" | "'" [ ^'\'' ]+ "'"
let modifier = "public"|"protected"|"private"|"abstract"|"static"|
"final"|"strictfp"|"transient"|"volatile"
let numeric_type = "byte"|"short"|"int"|"long"|"char"|"float"|"double"
let primitive_type = "boolean"|numeric_type
let assignment_operator = "*="|"/="|"%="|"+="|"-="|"<<="|">>="|">>>="|"&="|"^="|"|="
let binop = "||"|"&&"|"&"|"^"|"=="|"!="|"<="|">="|"<<"|">>"|">>>"|"+"|"-"|"*"|"/"|"%"
let binary_numeral_prefix = "0" ("b"|"B")
let hex_numeral_prefix = "0" ("x"|"X")
let numeral_prefix = ['0'-'9'] | binary_numeral_prefix | hex_numeral_prefix
let numeral_digit = ['0'-'9' 'a'-'f' 'A'-'F' '_']
let integer_literal = numeral_prefix numeral_digit* ['l' 'L']?
let hexadecimal_floating_point_literal = hex_numeral_prefix (numeral_digit | ".")+ ("p"|"P")
let decimal_floating_point_literal = hex_numeral_prefix (numeral_digit | ".")+ ("p"|"P")
let digits = ['0'-'9']+
let float_type_suffix = ['f' 'F' 'd' 'D']
let exponent_part = ['e' 'E'] ['-' '+']? digits
let floating_point_literal =
(digits "." digits? exponent_part? float_type_suffix?)
| ("." digits exponent_part? float_type_suffix?)
| (digits exponent_part float_type_suffix?)
| (digits exponent_part? float_type_suffix)
(* We follow an abstraction of the official grammar described here:
https://docs.oracle.com/javase/specs/jls/se14/html/jls-19.html *)
rule class_scan = parse
| whitespace+
{ class_scan lexbuf }
| eol_comment
{ class_scan lexbuf }
| "/*"
{ skip_comments (class_scan) lexbuf }
| eol
{ incr_linenum lexbuf;
class_scan lexbuf }
| "package"
{ PACKAGE }
| "import"
{ IMPORT }
| "class"
{ CLASS }
| "instanceof"
{ INSTANCEOF }
| "interface"
{ INTERFACE }
| "void"
{ VOID }
| "throws"
{ THROWS }
| "enum"
{ ENUM }
| modifier
{ class_scan lexbuf }
| primitive_type
{ PRIMTYPE }
| "<"
{ LANGLE }
| ">"
{ RANGLE }
| "new"
{ NEW }
| "var"
{ VAR }
| "extends"
{ EXTENDS }
| "super"
{ SUPER }
| "implements"
{ IMPLEMENTS }
| "assert"
{ ASSERT }
| "do"
{ DO }
| "while"
{ WHILE }
| "if"
{ IF }
| "else"
{ ELSE }
| "try"
{ TRY }
| "catch"
{ CATCH }
| "finally"
{ FINALLY }
| "for"
{ FOR }
| "break"
{ BREAK }
| "continue"
{ CONTINUE }
| "return"
{ RETURN }
| "throw"
{ THROW }
| "synchronized"
{ SYNCHRONIZED }
| "yield"
{ YIELD }
| "null"
{ NULL }
| "true"
{ TRUE }
| "false"
{ FALSE }
| (floating_point_literal as f)
{ FLOATINGPOINT f }
| (integer_literal as i)
{ INTEGER i }
| (id as name)
{ IDENT name }
| "\"" ([^ '\"']* as s) "\""
{ STRING s }
| (char as s)
{ CHAR s }
| ";"
{ SEMICOLON }
| ":"
{ COLON }
| "."
{ DOT }
| "{"
{ LBRACKET }
| "}"
{ RBRACKET }
| "["
{ LSBRACKET }
| "]"
{ RSBRACKET }
| "("
{ LPAREN }
| ")"
{ RPAREN }
| ","
{ COMMA }
| "?"
{ QMARK }
| ("++"|"--")
{ INCR_DECR }
| "|"
{ PIPE }
| "="
{ EQ }
| "!"
{ BANG }
| "~"
{ TILDE }
| "..."
{ THREEDOTS }
| assignment_operator
{ ASSIGNOP}
| binop
{ BINOP }
| "@" whitespace* id ("." id)* "("
{ skip_well_parenthesized_parentheses 1
(class_scan) lexbuf }
| "@" whitespace* id ("." id)*
{ class_scan lexbuf }
| _
{ class_scan lexbuf }
| eof
{ EOF }
(* we skip type annotation arguments (...) because they may contain brackets *)
and skip_well_parenthesized_parentheses width action = parse
| eol
{ incr_linenum lexbuf;
skip_well_parenthesized_parentheses width action lexbuf }
| "("
{ skip_well_parenthesized_parentheses (width+1) action lexbuf }
| ")"
{ if width<=1 then action lexbuf
else skip_well_parenthesized_parentheses (width-1) action lexbuf }
| eol_comment
{ skip_well_parenthesized_parentheses width action lexbuf }
| "/*"
{ skip_comments
(skip_well_parenthesized_parentheses width action) lexbuf }
| "\""
{ skip_string (skip_well_parenthesized_parentheses width action) lexbuf }
| char
{ skip_well_parenthesized_parentheses width action lexbuf }
| _
{ skip_well_parenthesized_parentheses width action lexbuf }
and skip_comments action = parse
| "*/"
{ action lexbuf }
| eol
{ incr_linenum lexbuf;
skip_comments action lexbuf }
| _
{ skip_comments action lexbuf }
and skip_string action = parse
| "\\\\"
{ skip_string action lexbuf }
| "\\\""
{ skip_string action lexbuf }
| "\""
{ action lexbuf }
| _
{ skip_string action lexbuf }
{
}