I can define and eval BNF definitions
This commit is contained in:
+149
-332
@@ -1,5 +1,5 @@
|
||||
#####################################################################################################
|
||||
# This part of code is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
|
||||
# This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
|
||||
# I don't directly use the project, but it helped me figure out
|
||||
# what to do.
|
||||
# Dejanović I., Milosavljević G., Vaderna R.:
|
||||
@@ -10,7 +10,6 @@ from dataclasses import field, dataclass
|
||||
from collections import defaultdict
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from core.sheerka import ExecutionContext
|
||||
from core.tokenizer import TokenKind, Tokenizer, Token
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
import core.utils
|
||||
@@ -40,6 +39,18 @@ def flatten(iterable):
|
||||
class LexerNode(Node):
|
||||
start: int
|
||||
end: int
|
||||
tokens: list = None
|
||||
source: str = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LexerNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and self.end == other.end
|
||||
|
||||
|
||||
class ConceptNode(LexerNode):
|
||||
@@ -48,17 +59,24 @@ class ConceptNode(LexerNode):
|
||||
It represents a recognized concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start, end, tokens=None, source=None, children=None):
|
||||
super().__init__(start, end)
|
||||
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.concept = concept
|
||||
self.tokens = tokens
|
||||
self.source = source
|
||||
self.children = children
|
||||
self.underlying = underlying
|
||||
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, tuple):
|
||||
if len(other) == 2:
|
||||
return self.concept == other[0] and self.source == other[1]
|
||||
else:
|
||||
return self.concept == other[0] and \
|
||||
self.start == other[1] and \
|
||||
self.end == other[2] and \
|
||||
self.source == other[3]
|
||||
|
||||
if not super().__eq__(other):
|
||||
return False
|
||||
|
||||
@@ -66,10 +84,14 @@ class ConceptNode(LexerNode):
|
||||
return False
|
||||
|
||||
return self.concept == other.concept and \
|
||||
self.source == other.source
|
||||
self.source == other.source and \
|
||||
self.underlying == other.underlying
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.start, self.end, self.source))
|
||||
return hash((self.concept, self.start, self.end, self.source, self.underlying))
|
||||
|
||||
def __repr__(self):
|
||||
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class NonTerminalNode(LexerNode):
|
||||
@@ -77,8 +99,8 @@ class NonTerminalNode(LexerNode):
|
||||
Returned by the ConceptLexerParser
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, children=None):
|
||||
super().__init__(start, end)
|
||||
def __init__(self, parsing_expression, start, end, tokens, children=None):
|
||||
super().__init__(start, end, tokens)
|
||||
self.parsing_expression = parsing_expression
|
||||
self.children = children
|
||||
|
||||
@@ -90,6 +112,21 @@ class NonTerminalNode(LexerNode):
|
||||
sub_names = ""
|
||||
return name + sub_names
|
||||
|
||||
def __eq__(self, other):
|
||||
if not super().__eq__(other):
|
||||
return False
|
||||
|
||||
if not isinstance(other, NonTerminalNode):
|
||||
return False
|
||||
|
||||
return self.parsing_expression == other.parsing_expression and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.children == other.children
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.parsing_expression, self.start, self.end, self.children))
|
||||
|
||||
|
||||
class TerminalNode(LexerNode):
|
||||
"""
|
||||
@@ -97,7 +134,7 @@ class TerminalNode(LexerNode):
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, value):
|
||||
super().__init__(start, end)
|
||||
super().__init__(start, end, source=value)
|
||||
self.parsing_expression = parsing_expression
|
||||
self.value = value
|
||||
|
||||
@@ -105,23 +142,27 @@ class TerminalNode(LexerNode):
|
||||
name = self.parsing_expression.rule_name or ""
|
||||
return name + f"'{self.value}'"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not super().__eq__(other):
|
||||
return False
|
||||
|
||||
if not isinstance(other, TerminalNode):
|
||||
return False
|
||||
|
||||
return self.parsing_expression == other.parsing_expression and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.value == other.value
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.parsing_expression, self.start, self.end, self.value))
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GrammarErrorNode(ErrorNode):
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedTokenErrorNode(ErrorNode):
|
||||
message: str
|
||||
expected_tokens: list
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedEndOfFileError(ErrorNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnknownConceptNode(ErrorNode):
|
||||
concept_key: str
|
||||
@@ -175,7 +216,7 @@ class Sequence(ParsingExpression):
|
||||
children.append(node)
|
||||
end_pos = node.end
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, children)
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children or [])
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
@@ -194,7 +235,7 @@ class OrderedChoice(ParsingExpression):
|
||||
for e in self.nodes:
|
||||
node = e.parse(parser)
|
||||
if node:
|
||||
return NonTerminalNode(self, init_pos, node.end, [node])
|
||||
return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
|
||||
|
||||
parser.seek(init_pos) # backtrack
|
||||
|
||||
@@ -214,13 +255,18 @@ class Optional(ParsingExpression):
|
||||
|
||||
def _parse(self, parser):
|
||||
init_pos = parser.pos
|
||||
selected_node = NonTerminalNode(self, parser.pos, -1, [])
|
||||
selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
|
||||
|
||||
for e in self.nodes:
|
||||
node = e.parse(parser)
|
||||
if node:
|
||||
if node.end > selected_node.end:
|
||||
selected_node = node
|
||||
selected_node = NonTerminalNode(
|
||||
self,
|
||||
node.start,
|
||||
node.end,
|
||||
parser.tokens[node.start: node.end + 1],
|
||||
[node])
|
||||
|
||||
parser.seek(init_pos) # backtrack
|
||||
|
||||
@@ -327,12 +373,12 @@ class ConceptMatch(Match):
|
||||
When the grammar is created, it is replaced by the actual concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept_name):
|
||||
super(Match, self).__init__()
|
||||
self.concept_name = concept_name
|
||||
def __init__(self, concept, rule_name=""):
|
||||
super(Match, self).__init__(rule_name=rule_name)
|
||||
self.concept = concept
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.concept_name}"
|
||||
return f"{self.concept}"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not super().__eq__(other):
|
||||
@@ -341,32 +387,37 @@ class ConceptMatch(Match):
|
||||
if not isinstance(other, ConceptMatch):
|
||||
return False
|
||||
|
||||
return self.concept_name == other.concept_name
|
||||
|
||||
|
||||
class CrossRef:
|
||||
"""
|
||||
During the creation of the model,
|
||||
Creates reference to a concept, as it may not be resolved yet
|
||||
"""
|
||||
|
||||
def __init__(self, concept):
|
||||
self.concept = concept
|
||||
|
||||
def __repr__(self):
|
||||
return f"ref({self.concept.key})"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, CrossRef):
|
||||
return False
|
||||
if isinstance(self.concept, Concept):
|
||||
return self.concept.name == other.concept.name
|
||||
|
||||
return self.concept == other.concept
|
||||
|
||||
def _parse(self, parser):
|
||||
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
||||
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return None
|
||||
|
||||
if to_match not in parser.concepts_grammars:
|
||||
return None
|
||||
|
||||
self.concept = to_match # Memoize
|
||||
node = parser.concepts_grammars[to_match].parse(parser)
|
||||
if node is None:
|
||||
return None
|
||||
|
||||
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||
|
||||
|
||||
class ConceptLexerParser(BaseParser):
|
||||
def __init__(self, concepts_dict=None):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptLexer")
|
||||
self.concepts_dict = concepts_dict or {} # dict of concept, grammar
|
||||
if 'grammars' in kwargs:
|
||||
self.concepts_grammars = kwargs.get("grammars")
|
||||
elif 'sheerka' in kwargs:
|
||||
self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
|
||||
else:
|
||||
self.concepts_grammars = {}
|
||||
|
||||
self.ignore_case = True
|
||||
|
||||
self.token = None
|
||||
@@ -430,24 +481,23 @@ class ConceptLexerParser(BaseParser):
|
||||
self.pos -= 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
def initialize(self, context, grammars):
|
||||
def initialize(self, context, concepts_definitions):
|
||||
"""
|
||||
Adds a bunch of concepts, and how they can be recognized
|
||||
:param context: execution context
|
||||
:param grammars: dictionary of concept, concept_definition
|
||||
:param concepts_definitions: dictionary of concept, concept_definition
|
||||
:return:
|
||||
"""
|
||||
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
nodes_to_resolve = []
|
||||
concepts_to_resolve = set()
|
||||
|
||||
# ## Gets the grammars
|
||||
for concept, concept_def in grammars.items():
|
||||
for concept, concept_def in concepts_definitions.items():
|
||||
concept.init_key() # make sure that the key is initialized
|
||||
grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve)
|
||||
self.concepts_dict[concept] = grammar
|
||||
grammar = self.get_model(concept_def, concepts_to_resolve)
|
||||
self.concepts_grammars[concept] = grammar
|
||||
|
||||
if self.has_error:
|
||||
return self.sheerka.ret(self.name, False, self.error_sink)
|
||||
@@ -456,73 +506,68 @@ class ConceptLexerParser(BaseParser):
|
||||
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
|
||||
for concept in concepts_to_remove:
|
||||
concepts_to_resolve.remove(concept)
|
||||
del self.concepts_dict[concept]
|
||||
|
||||
# ## Resolves cross references and remove grammar with unresolved references
|
||||
self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve)
|
||||
del self.concepts_grammars[concept]
|
||||
|
||||
if self.has_error:
|
||||
return self.sheerka.ret(self.name, False, self.error_sink)
|
||||
else:
|
||||
return self.sheerka.ret(self.name, True, self.concepts_dict)
|
||||
return self.sheerka.ret(self.name, True, self.concepts_grammars)
|
||||
|
||||
def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve):
|
||||
def get_concept(concept_name):
|
||||
if concept_name in self.context.concepts_cache:
|
||||
return self.context.concepts_cache[concept_name]
|
||||
return self.sheerka.get(concept_name)
|
||||
def get_concept(self, concept_name):
|
||||
if concept_name in self.context.concepts_cache:
|
||||
return self.context.concepts_cache[concept_name]
|
||||
return self.sheerka.get(concept_name)
|
||||
|
||||
def get_model(self, concept_def, concepts_to_resolve):
|
||||
|
||||
# TODO
|
||||
# inner_get_model must not modify the initial ParsingExpression
|
||||
# A copy must be created
|
||||
def inner_get_model(expression):
|
||||
if isinstance(expression, Concept):
|
||||
ret = CrossRef(expression)
|
||||
concepts_to_resolve.add(concept)
|
||||
nodes_to_resolve.append(ret)
|
||||
ret = ConceptMatch(expression, rule_name=expression.name)
|
||||
concepts_to_resolve.add(expression)
|
||||
elif isinstance(expression, ConceptMatch):
|
||||
if expression.rule_name is None or expression.rule_name == "":
|
||||
expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
|
||||
else expression.concept
|
||||
concepts_to_resolve.add(expression.concept)
|
||||
ret = expression
|
||||
elif isinstance(expression, str):
|
||||
ret = StrMatch(expression, ignore_case=self.ignore_case)
|
||||
elif isinstance(expression, StrMatch):
|
||||
ret = expression
|
||||
if ret.ignore_case is None:
|
||||
ret.ignore_case = self.ignore_case
|
||||
elif isinstance(expression, ConceptMatch):
|
||||
to_match = get_concept(expression.concept_name)
|
||||
if hasattr(to_match, "__iter__"):
|
||||
ret = self.add_error(TooManyConceptNode(expression.concept_name), False)
|
||||
elif self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
ret = self.add_error(UnknownConceptNode(expression.concept_name), False)
|
||||
else:
|
||||
ret = CrossRef(to_match)
|
||||
concepts_to_resolve.add(concept)
|
||||
nodes_to_resolve.append(ret)
|
||||
elif isinstance(expression, Sequence) or \
|
||||
isinstance(expression, OrderedChoice) or \
|
||||
isinstance(expression, Optional):
|
||||
ret = expression
|
||||
ret.nodes.extend([inner_get_model(e) for e in ret.elements])
|
||||
if any((isinstance(x, CrossRef) for x in ret.nodes)):
|
||||
concepts_to_resolve.add(concept)
|
||||
nodes_to_resolve.append(ret)
|
||||
else:
|
||||
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
||||
return ret
|
||||
|
||||
model = inner_get_model(concept_def)
|
||||
if isinstance(model, CrossRef):
|
||||
concepts_to_resolve.add(concept)
|
||||
|
||||
model.rule_name = concept.key
|
||||
return model
|
||||
|
||||
def detect_infinite_recursion(self, concepts_to_resolve):
|
||||
|
||||
# infinite recursion matcher
|
||||
def _is_infinite_recursion(ref_concept, node):
|
||||
if isinstance(node, CrossRef):
|
||||
if isinstance(node, ConceptMatch):
|
||||
if node.concept == ref_concept:
|
||||
return True
|
||||
return _is_infinite_recursion(ref_concept, self.concepts_dict[node.concept])
|
||||
|
||||
if isinstance(node.concept, str):
|
||||
to_match = self.get_concept(node.concept)
|
||||
if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return False
|
||||
else:
|
||||
to_match = node.concept
|
||||
|
||||
return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
|
||||
|
||||
if isinstance(node, OrderedChoice):
|
||||
return _is_infinite_recursion(ref_concept, node.nodes[0])
|
||||
@@ -537,32 +582,16 @@ class ConceptLexerParser(BaseParser):
|
||||
|
||||
removed_concepts = []
|
||||
for e in concepts_to_resolve:
|
||||
to_resolve = self.concepts_dict[e]
|
||||
if isinstance(e, str):
|
||||
e = self.get_concept(e)
|
||||
if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
continue
|
||||
|
||||
to_resolve = self.concepts_grammars[e]
|
||||
if _is_infinite_recursion(e, to_resolve):
|
||||
removed_concepts.append(e)
|
||||
return removed_concepts
|
||||
|
||||
# Cross-ref resolving
|
||||
def resolve_cross_references(self, concepts_to_resolve, nodes_to_resolve):
|
||||
|
||||
repeat = True
|
||||
while repeat:
|
||||
repeat = False
|
||||
for e in concepts_to_resolve:
|
||||
to_resolve = self.concepts_dict[e]
|
||||
if isinstance(to_resolve, CrossRef):
|
||||
repeat = True
|
||||
self.concepts_dict[e] = self.concepts_dict[to_resolve.concept]
|
||||
|
||||
for e in nodes_to_resolve:
|
||||
if not isinstance(e, ParsingExpression):
|
||||
continue # cases when a concept directly references another concept
|
||||
|
||||
for i, node in enumerate(e.nodes):
|
||||
if isinstance(node, CrossRef):
|
||||
if node.concept in self.concepts_dict:
|
||||
e.nodes[i] = self.concepts_dict[node.concept]
|
||||
|
||||
def parse(self, context, text):
|
||||
if text == "":
|
||||
return context.sheerka.ret(
|
||||
@@ -591,13 +620,17 @@ class ConceptLexerParser(BaseParser):
|
||||
while True:
|
||||
init_pos = self.pos
|
||||
res = []
|
||||
for concept, grammar in self.concepts_dict.items():
|
||||
for concept, grammar in self.concepts_grammars.items():
|
||||
self.seek(init_pos)
|
||||
node = grammar.parse(self)
|
||||
node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
|
||||
if node is not None:
|
||||
concept_node = ConceptNode(concept, node.start, node.end, self.tokens[node.start: node.end + 1])
|
||||
if hasattr(node, "children"):
|
||||
concept_node.children = node.children
|
||||
concept_node = ConceptNode(
|
||||
concept,
|
||||
node.start,
|
||||
node.end,
|
||||
self.tokens[node.start: node.end + 1],
|
||||
None,
|
||||
node)
|
||||
res.append(concept_node)
|
||||
|
||||
if len(res) == 0: # not recognized
|
||||
@@ -606,9 +639,7 @@ class ConceptLexerParser(BaseParser):
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=not_recognized))
|
||||
break
|
||||
|
||||
res = self.get_bests(res) # only keep the concept that eat the more tokens
|
||||
for r in res:
|
||||
r.children = flatten(r.children)
|
||||
res = self.get_bests(res) # only keep the concepts that eat the more tokens
|
||||
concepts_found = core.utils.product(concepts_found, res)
|
||||
|
||||
# loop
|
||||
@@ -659,220 +690,6 @@ class ConceptLexerParser(BaseParser):
|
||||
return by_end_pos[max(by_end_pos)]
|
||||
|
||||
|
||||
class RegexParser:
|
||||
"""
|
||||
Parser used to transform litteral into ParsingExpression
|
||||
example :
|
||||
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
|
||||
|
||||
'|' (pipe) is used for OrderedChoice
|
||||
',' (comma) is used for Sequence
|
||||
'?' (question mark) is used for Optional
|
||||
'*' (star) is used for ZeroOrMore
|
||||
'+' (plus) is used for OneOrMore
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.has_error = False
|
||||
self.error_sink = []
|
||||
self.name = BaseParser.PREFIX + "RegexParser"
|
||||
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.after_current = None
|
||||
self.nb_open_par = 0
|
||||
self.context = None
|
||||
self.source = ""
|
||||
self.sheerka = None
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, RegexParser):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
|
||||
self._current = None
|
||||
self.after_current = None
|
||||
self.nb_open_par = 0
|
||||
|
||||
self.next_token()
|
||||
self.eat_white_space()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
def next_token(self, skip_whitespace=False):
|
||||
if self._current and self._current.type == TokenKind.EOF:
|
||||
return
|
||||
|
||||
try:
|
||||
self._current = self.after_current or next(self.lexer_iter)
|
||||
self.source += str(self._current.value)
|
||||
self.after_current = None
|
||||
|
||||
if skip_whitespace:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
self.source += str(self._current.value)
|
||||
except StopIteration:
|
||||
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
|
||||
def next_after(self):
|
||||
if self.after_current is not None:
|
||||
return self.after_current
|
||||
|
||||
try:
|
||||
self.after_current = next(self.lexer_iter)
|
||||
# self.source += str(self.after_current.value)
|
||||
return self.after_current
|
||||
except StopIteration:
|
||||
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
return self.after_current
|
||||
|
||||
def eat_white_space(self):
|
||||
if self.after_current is not None:
|
||||
self._current = self.after_current
|
||||
self.source += str(self._current.value)
|
||||
self.after_current = None
|
||||
|
||||
try:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
self.source += str(self._current.value)
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
|
||||
def maybe_sequence(self, first, second):
|
||||
token = self.get_token()
|
||||
return token.type == second or token.type == first and self.next_after().type == second
|
||||
|
||||
def parse(self, context: ExecutionContext, text):
|
||||
self.reset_parser(context, text)
|
||||
tree = self.parse_choice()
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=self.source,
|
||||
body=self.error_sink if self.has_error else tree,
|
||||
try_parsed=tree))
|
||||
|
||||
return ret
|
||||
|
||||
def parse_choice(self):
|
||||
sequence = self.parse_sequence()
|
||||
|
||||
self.eat_white_space()
|
||||
token = self.get_token()
|
||||
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
|
||||
return sequence
|
||||
|
||||
elements = [sequence]
|
||||
while True:
|
||||
# maybe eat the vertical bar
|
||||
self.eat_white_space()
|
||||
token = self.get_token()
|
||||
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
|
||||
break
|
||||
self.next_token(skip_whitespace=True)
|
||||
|
||||
sequence = self.parse_sequence()
|
||||
elements.append(sequence)
|
||||
|
||||
return OrderedChoice(*elements)
|
||||
|
||||
def parse_sequence(self):
|
||||
expr_and_modifier = self.parse_expression_and_modifier()
|
||||
token = self.get_token()
|
||||
if token is None or token.type == TokenKind.EOF or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
return expr_and_modifier
|
||||
|
||||
elements = [expr_and_modifier]
|
||||
while True:
|
||||
# maybe eat the comma
|
||||
token = self.get_token()
|
||||
if token is None or token.type == TokenKind.EOF or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
break
|
||||
self.eat_white_space()
|
||||
|
||||
sequence = self.parse_expression_and_modifier()
|
||||
elements.append(sequence)
|
||||
|
||||
return Sequence(*elements)
|
||||
|
||||
def parse_expression_and_modifier(self):
|
||||
expression = self.parse_expression()
|
||||
|
||||
token = self.get_token()
|
||||
|
||||
if token.type == TokenKind.QMARK:
|
||||
self.next_token()
|
||||
return Optional(expression)
|
||||
|
||||
if token.type == TokenKind.STAR:
|
||||
self.next_token()
|
||||
return ZeroOrMore(expression)
|
||||
|
||||
if token.type == TokenKind.PLUS:
|
||||
self.next_token()
|
||||
return OneOrMore(expression)
|
||||
|
||||
return expression
|
||||
|
||||
def parse_expression(self):
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.EOF:
|
||||
self.add_error(UnexpectedEndOfFileError(), False)
|
||||
if token.type == TokenKind.LPAR:
|
||||
self.nb_open_par += 1
|
||||
self.next_token()
|
||||
expression = self.parse_choice()
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.RPAR:
|
||||
self.nb_open_par -= 1
|
||||
self.next_token()
|
||||
return expression
|
||||
else:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR]))
|
||||
return expression
|
||||
|
||||
if token.type == TokenKind.IDENTIFIER:
|
||||
self.next_token()
|
||||
return ConceptMatch(token.value)
|
||||
# concept = self.sheerka.get(str(token.value))
|
||||
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# self.add_error(CannotResolveConceptNode(str(token.value)))
|
||||
# self.next_token()
|
||||
# return None
|
||||
# else:
|
||||
# self.next_token()
|
||||
# return concept
|
||||
|
||||
ret = StrMatch(core.utils.strip_quotes(token.value))
|
||||
self.next_token()
|
||||
return ret
|
||||
|
||||
|
||||
class ParsingExpressionVisitor:
|
||||
"""
|
||||
visit ParsingExpression
|
||||
|
||||
Reference in New Issue
Block a user