Added ZeroAndMore and OneAndMore to BNF. BNF expressions can now be captured

This commit is contained in:
2019-12-18 12:01:51 +01:00
parent 88cd3162be
commit 8dbe2e1b20
9 changed files with 425 additions and 91 deletions
+41 -12
View File
@@ -115,7 +115,11 @@ class BnfParser:
def parse(self, context: ExecutionContext, text):
self.reset_parser(context, text)
tree = self.parse_choice()
tree = self.parser_outer_rule_name()
token = self.get_token()
if token and token.type != TokenKind.EOF:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
ret = self.sheerka.ret(
self.name,
@@ -129,12 +133,15 @@ class BnfParser:
return ret
def parser_outer_rule_name(self):
return self.parser_rule_name(self.parse_choice)
def parse_choice(self):
sequence = self.parse_sequence()
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
if token is None or token.type != TokenKind.VBAR:
return sequence
elements = [sequence]
@@ -142,7 +149,7 @@ class BnfParser:
# maybe eat the vertical bar
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
if token is None or token.type != TokenKind.VBAR:
break
self.next_token(skip_whitespace=True)
@@ -152,30 +159,33 @@ class BnfParser:
return OrderedChoice(*elements)
def parse_sequence(self):
expr_and_modifier = self.parse_expression_and_modifier()
expr_and_modifier = self.parse_modifier()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
if token is None or \
token.type == TokenKind.EOF or \
token.type == TokenKind.EQUALS or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
return expr_and_modifier
elements = [expr_and_modifier]
while True:
# maybe eat the comma
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
if token is None or \
token.type == TokenKind.EOF or \
token.type == TokenKind.EQUALS or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
break
self.eat_white_space()
sequence = self.parse_expression_and_modifier()
sequence = self.parse_modifier()
elements.append(sequence)
return Sequence(*elements)
def parse_expression_and_modifier(self):
expression = self.parse_expression()
def parse_modifier(self):
expression = self.parser_inner_rule_name()
token = self.get_token()
@@ -193,6 +203,9 @@ class BnfParser:
return expression
def parser_inner_rule_name(self):
return self.parser_rule_name(self.parse_expression)
def parse_expression(self):
token = self.get_token()
if token.type == TokenKind.EOF:
@@ -207,7 +220,7 @@ class BnfParser:
self.next_token()
return expression
else:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR]))
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.RPAR]))
return expression
if token.type == TokenKind.IDENTIFIER:
@@ -224,4 +237,20 @@ class BnfParser:
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return ret
return ret
def parser_rule_name(self, next_to_parse):
expression = next_to_parse()
token = self.get_token()
if token is None or token.type != TokenKind.EQUALS:
return expression
self.next_token() # eat equals
token = self.get_token()
if token is None or token.type != TokenKind.IDENTIFIER:
return self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.IDENTIFIER]))
expression.rule_name = token.value
self.next_token()
return expression
+93 -12
View File
@@ -212,11 +212,11 @@ class Sequence(ParsingExpression):
if node is None:
return None
else:
if node.end != -1: # because Optional returns -1 when no match
if node.end != -1: # because returns -1 when no match
children.append(node)
end_pos = node.end
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children or [])
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
@@ -284,31 +284,101 @@ class Optional(ParsingExpression):
return f"({to_str})?"
class ZeroOrMore(ParsingExpression):
class Repetition(ParsingExpression):
"""
Base class for all repetition-like parser expressions (?,*,+)
Args:
eolterm(bool): Flag that indicates that end of line should
terminate repetition match.
"""
def __init__(self, *elements, **kwargs):
super(Repetition, self).__init__(*elements, **kwargs)
self.sep = kwargs.get('sep', None)
class ZeroOrMore(Repetition):
"""
ZeroOrMore will try to match parser expression specified zero or more
times. It will never fail.
"""
def _parse(self, parser):
raise NotImplementedError()
init_pos = parser.pos
end_pos = -1
children = []
# Uncomment when _parse is implemented
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return f"({to_str})*"
while True:
current_pos = parser.pos
# maybe eat the separator if needed
if self.sep and children:
sep_result = self.sep.parse(parser)
if sep_result is None:
parser.seek(current_pos)
break
# eat the ZeroOrMore
node = self.nodes[0].parse(parser)
if node is None:
parser.seek(current_pos)
break
else:
if node.end != -1: # because returns -1 when no match
children.append(node)
end_pos = node.end
if len(children) == 0:
return NonTerminalNode(self, init_pos, -1, [], [])
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})*"
class OneOrMore(ParsingExpression):
class OneOrMore(Repetition):
"""
OneOrMore will try to match parser expression specified one or more times.
"""
def _parse(self, parser):
raise NotImplementedError()
init_pos = parser.pos
end_pos = -1
children = []
while True:
current_pos = parser.pos
# maybe eat the separator if needed
if self.sep and children:
sep_result = self.sep.parse(parser)
if sep_result is None:
parser.seek(current_pos)
break
# eat the ZeroOrMore
node = self.nodes[0].parse(parser)
if node is None:
parser.seek(current_pos)
break
else:
if node.end != -1: # because returns -1 when no match
children.append(node)
end_pos = node.end
if len(children) == 0: # if nothing is found, it's an error
return None
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})+"
class UnorderedGroup(ParsingExpression):
class UnorderedGroup(Repetition):
"""
Will try to match all of the parsing expression in any order.
"""
@@ -316,6 +386,10 @@ class UnorderedGroup(ParsingExpression):
def _parse(self, parser):
raise NotImplementedError()
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return f"({to_str})#"
class Match(ParsingExpression):
"""
@@ -541,11 +615,18 @@ class ConceptLexerParser(BaseParser):
ret.ignore_case = self.ignore_case
elif isinstance(expression, Sequence) or \
isinstance(expression, OrderedChoice) or \
isinstance(expression, ZeroOrMore) or \
isinstance(expression, OneOrMore) or \
isinstance(expression, Optional):
ret = expression
ret.nodes.extend([inner_get_model(e) for e in ret.elements])
else:
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
# Translate separator expression.
if isinstance(expression, Repetition) and expression.sep:
expression.sep = inner_get_model(expression.sep)
return ret
model = inner_get_model(concept_def)
@@ -623,7 +704,7 @@ class ConceptLexerParser(BaseParser):
for concept, grammar in self.concepts_grammars.items():
self.seek(init_pos)
node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
if node is not None:
if node is not None and node.end != -1:
concept_node = ConceptNode(
concept,
node.start,