You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
329 lines
9.2 KiB
329 lines
9.2 KiB
7 months ago
|
"""Handwritten parser of dependency specifiers.
|
||
|
|
||
|
The docstring for each __parse_* function contains ENBF-inspired grammar representing
|
||
|
the implementation.
|
||
|
"""
|
||
|
|
||
|
import ast
|
||
|
from typing import Any, List, NamedTuple, Optional, Tuple, Union
|
||
|
|
||
|
from ._tokenizer import DEFAULT_RULES, Tokenizer
|
||
|
|
||
|
|
||
|
class Node:
|
||
|
def __init__(self, value: str) -> None:
|
||
|
self.value = value
|
||
|
|
||
|
def __str__(self) -> str:
|
||
|
return self.value
|
||
|
|
||
|
def __repr__(self) -> str:
|
||
|
return f"<{self.__class__.__name__}('{self}')>"
|
||
|
|
||
|
def serialize(self) -> str:
|
||
|
raise NotImplementedError
|
||
|
|
||
|
|
||
|
class Variable(Node):
|
||
|
def serialize(self) -> str:
|
||
|
return str(self)
|
||
|
|
||
|
|
||
|
class Value(Node):
|
||
|
def serialize(self) -> str:
|
||
|
return f'"{self}"'
|
||
|
|
||
|
|
||
|
class Op(Node):
|
||
|
def serialize(self) -> str:
|
||
|
return str(self)
|
||
|
|
||
|
|
||
|
MarkerVar = Union[Variable, Value]
|
||
|
MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
|
||
|
# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]]
|
||
|
# MarkerList = List[Union["MarkerList", MarkerAtom, str]]
|
||
|
# mypy does not support recursive type definition
|
||
|
# https://github.com/python/mypy/issues/731
|
||
|
MarkerAtom = Any
|
||
|
MarkerList = List[Any]
|
||
|
|
||
|
|
||
|
class ParsedRequirement(NamedTuple):
|
||
|
name: str
|
||
|
url: str
|
||
|
extras: List[str]
|
||
|
specifier: str
|
||
|
marker: Optional[MarkerList]
|
||
|
|
||
|
|
||
|
# --------------------------------------------------------------------------------------
|
||
|
# Recursive descent parser for dependency specifier
|
||
|
# --------------------------------------------------------------------------------------
|
||
|
def parse_requirement(source: str) -> ParsedRequirement:
|
||
|
return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
|
||
|
|
||
|
|
||
|
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
|
||
|
"""
|
||
|
requirement = WS? IDENTIFIER WS? extras WS? requirement_details
|
||
|
"""
|
||
|
tokenizer.consume("WS")
|
||
|
|
||
|
name_token = tokenizer.expect(
|
||
|
"IDENTIFIER", expected="package name at the start of dependency specifier"
|
||
|
)
|
||
|
name = name_token.text
|
||
|
tokenizer.consume("WS")
|
||
|
|
||
|
extras = _parse_extras(tokenizer)
|
||
|
tokenizer.consume("WS")
|
||
|
|
||
|
url, specifier, marker = _parse_requirement_details(tokenizer)
|
||
|
tokenizer.expect("END", expected="end of dependency specifier")
|
||
|
|
||
|
return ParsedRequirement(name, url, extras, specifier, marker)
|
||
|
|
||
|
|
||
|
def _parse_requirement_details(
|
||
|
tokenizer: Tokenizer,
|
||
|
) -> Tuple[str, str, Optional[MarkerList]]:
|
||
|
"""
|
||
|
requirement_details = AT URL (WS requirement_marker?)?
|
||
|
| specifier WS? (requirement_marker)?
|
||
|
"""
|
||
|
|
||
|
specifier = ""
|
||
|
url = ""
|
||
|
marker = None
|
||
|
|
||
|
if tokenizer.check("AT"):
|
||
|
tokenizer.read()
|
||
|
tokenizer.consume("WS")
|
||
|
|
||
|
url_start = tokenizer.position
|
||
|
url = tokenizer.expect("URL", expected="URL after @").text
|
||
|
if tokenizer.check("END", peek=True):
|
||
|
return (url, specifier, marker)
|
||
|
|
||
|
tokenizer.expect("WS", expected="whitespace after URL")
|
||
|
|
||
|
# The input might end after whitespace.
|
||
|
if tokenizer.check("END", peek=True):
|
||
|
return (url, specifier, marker)
|
||
|
|
||
|
marker = _parse_requirement_marker(
|
||
|
tokenizer, span_start=url_start, after="URL and whitespace"
|
||
|
)
|
||
|
else:
|
||
|
specifier_start = tokenizer.position
|
||
|
specifier = _parse_specifier(tokenizer)
|
||
|
tokenizer.consume("WS")
|
||
|
|
||
|
if tokenizer.check("END", peek=True):
|
||
|
return (url, specifier, marker)
|
||
|
|
||
|
marker = _parse_requirement_marker(
|
||
|
tokenizer,
|
||
|
span_start=specifier_start,
|
||
|
after=(
|
||
|
"version specifier"
|
||
|
if specifier
|
||
|
else "name and no valid version specifier"
|
||
|
),
|
||
|
)
|
||
|
|
||
|
return (url, specifier, marker)
|
||
|
|
||
|
|
||
|
def _parse_requirement_marker(
|
||
|
tokenizer: Tokenizer, *, span_start: int, after: str
|
||
|
) -> MarkerList:
|
||
|
"""
|
||
|
requirement_marker = SEMICOLON marker WS?
|
||
|
"""
|
||
|
|
||
|
if not tokenizer.check("SEMICOLON"):
|
||
|
tokenizer.raise_syntax_error(
|
||
|
f"Expected end or semicolon (after {after})",
|
||
|
span_start=span_start,
|
||
|
)
|
||
|
tokenizer.read()
|
||
|
|
||
|
marker = _parse_marker(tokenizer)
|
||
|
tokenizer.consume("WS")
|
||
|
|
||
|
return marker
|
||
|
|
||
|
|
||
|
def _parse_extras(tokenizer: Tokenizer) -> List[str]:
|
||
|
"""
|
||
|
extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
|
||
|
"""
|
||
|
if not tokenizer.check("LEFT_BRACKET", peek=True):
|
||
|
return []
|
||
|
|
||
|
with tokenizer.enclosing_tokens("LEFT_BRACKET", "RIGHT_BRACKET"):
|
||
|
tokenizer.consume("WS")
|
||
|
extras = _parse_extras_list(tokenizer)
|
||
|
tokenizer.consume("WS")
|
||
|
|
||
|
return extras
|
||
|
|
||
|
|
||
|
def _parse_extras_list(tokenizer: Tokenizer) -> List[str]:
|
||
|
"""
|
||
|
extras_list = identifier (wsp* ',' wsp* identifier)*
|
||
|
"""
|
||
|
extras: List[str] = []
|
||
|
|
||
|
if not tokenizer.check("IDENTIFIER"):
|
||
|
return extras
|
||
|
|
||
|
extras.append(tokenizer.read().text)
|
||
|
|
||
|
while True:
|
||
|
tokenizer.consume("WS")
|
||
|
if tokenizer.check("IDENTIFIER", peek=True):
|
||
|
tokenizer.raise_syntax_error("Expected comma between extra names")
|
||
|
elif not tokenizer.check("COMMA"):
|
||
|
break
|
||
|
|
||
|
tokenizer.read()
|
||
|
tokenizer.consume("WS")
|
||
|
|
||
|
extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
|
||
|
extras.append(extra_token.text)
|
||
|
|
||
|
return extras
|
||
|
|
||
|
|
||
|
def _parse_specifier(tokenizer: Tokenizer) -> str:
|
||
|
"""
|
||
|
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
|
||
|
| WS? version_many WS?
|
||
|
"""
|
||
|
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"):
|
||
|
tokenizer.consume("WS")
|
||
|
parsed_specifiers = _parse_version_many(tokenizer)
|
||
|
tokenizer.consume("WS")
|
||
|
|
||
|
return parsed_specifiers
|
||
|
|
||
|
|
||
|
def _parse_version_many(tokenizer: Tokenizer) -> str:
|
||
|
"""
|
||
|
version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
|
||
|
"""
|
||
|
parsed_specifiers = ""
|
||
|
while tokenizer.check("SPECIFIER"):
|
||
|
parsed_specifiers += tokenizer.read().text
|
||
|
tokenizer.consume("WS")
|
||
|
if not tokenizer.check("COMMA"):
|
||
|
break
|
||
|
parsed_specifiers += tokenizer.read().text
|
||
|
tokenizer.consume("WS")
|
||
|
|
||
|
return parsed_specifiers
|
||
|
|
||
|
|
||
|
# --------------------------------------------------------------------------------------
|
||
|
# Recursive descent parser for marker expression
|
||
|
# --------------------------------------------------------------------------------------
|
||
|
def parse_marker(source: str) -> MarkerList:
|
||
|
return _parse_marker(Tokenizer(source, rules=DEFAULT_RULES))
|
||
|
|
||
|
|
||
|
def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
|
||
|
"""
|
||
|
marker = marker_atom (BOOLOP marker_atom)+
|
||
|
"""
|
||
|
expression = [_parse_marker_atom(tokenizer)]
|
||
|
while tokenizer.check("BOOLOP"):
|
||
|
token = tokenizer.read()
|
||
|
expr_right = _parse_marker_atom(tokenizer)
|
||
|
expression.extend((token.text, expr_right))
|
||
|
return expression
|
||
|
|
||
|
|
||
|
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
|
||
|
"""
|
||
|
marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
|
||
|
| WS? marker_item WS?
|
||
|
"""
|
||
|
|
||
|
tokenizer.consume("WS")
|
||
|
if tokenizer.check("LEFT_PARENTHESIS", peek=True):
|
||
|
with tokenizer.enclosing_tokens("LEFT_PARENTHESIS", "RIGHT_PARENTHESIS"):
|
||
|
tokenizer.consume("WS")
|
||
|
marker: MarkerAtom = _parse_marker(tokenizer)
|
||
|
tokenizer.consume("WS")
|
||
|
else:
|
||
|
marker = _parse_marker_item(tokenizer)
|
||
|
tokenizer.consume("WS")
|
||
|
return marker
|
||
|
|
||
|
|
||
|
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
|
||
|
"""
|
||
|
marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
|
||
|
"""
|
||
|
tokenizer.consume("WS")
|
||
|
marker_var_left = _parse_marker_var(tokenizer)
|
||
|
tokenizer.consume("WS")
|
||
|
marker_op = _parse_marker_op(tokenizer)
|
||
|
tokenizer.consume("WS")
|
||
|
marker_var_right = _parse_marker_var(tokenizer)
|
||
|
tokenizer.consume("WS")
|
||
|
return (marker_var_left, marker_op, marker_var_right)
|
||
|
|
||
|
|
||
|
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
|
||
|
"""
|
||
|
marker_var = VARIABLE | QUOTED_STRING
|
||
|
"""
|
||
|
if tokenizer.check("VARIABLE"):
|
||
|
return process_env_var(tokenizer.read().text.replace(".", "_"))
|
||
|
elif tokenizer.check("QUOTED_STRING"):
|
||
|
return process_python_str(tokenizer.read().text)
|
||
|
else:
|
||
|
tokenizer.raise_syntax_error(
|
||
|
message="Expected a marker variable or quoted string"
|
||
|
)
|
||
|
|
||
|
|
||
|
def process_env_var(env_var: str) -> Variable:
|
||
|
if (
|
||
|
env_var == "platform_python_implementation"
|
||
|
or env_var == "python_implementation"
|
||
|
):
|
||
|
return Variable("platform_python_implementation")
|
||
|
else:
|
||
|
return Variable(env_var)
|
||
|
|
||
|
|
||
|
def process_python_str(python_str: str) -> Value:
|
||
|
value = ast.literal_eval(python_str)
|
||
|
return Value(str(value))
|
||
|
|
||
|
|
||
|
def _parse_marker_op(tokenizer: Tokenizer) -> Op:
|
||
|
"""
|
||
|
marker_op = IN | NOT IN | OP
|
||
|
"""
|
||
|
if tokenizer.check("IN"):
|
||
|
tokenizer.read()
|
||
|
return Op("in")
|
||
|
elif tokenizer.check("NOT"):
|
||
|
tokenizer.read()
|
||
|
tokenizer.expect("WS", expected="whitespace after 'not'")
|
||
|
tokenizer.expect("IN", expected="'in' after 'not'")
|
||
|
return Op("not in")
|
||
|
elif tokenizer.check("OP"):
|
||
|
return Op(tokenizer.read().text)
|
||
|
else:
|
||
|
return tokenizer.raise_syntax_error(
|
||
|
"Expected marker operator, one of "
|
||
|
"<=, <, !=, ==, >=, >, ~=, ===, in, not in"
|
||
|
)
|