Added ZeroAndMore and OneAndMore to BNF. BNF expressions can now be captured
This commit is contained in:
@@ -212,11 +212,11 @@ class Sequence(ParsingExpression):
|
||||
if node is None:
|
||||
return None
|
||||
else:
|
||||
if node.end != -1: # because Optional returns -1 when no match
|
||||
if node.end != -1: # because returns -1 when no match
|
||||
children.append(node)
|
||||
end_pos = node.end
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children or [])
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
@@ -284,31 +284,101 @@ class Optional(ParsingExpression):
|
||||
return f"({to_str})?"
|
||||
|
||||
|
||||
class ZeroOrMore(ParsingExpression):
|
||||
class Repetition(ParsingExpression):
|
||||
"""
|
||||
Base class for all repetition-like parser expressions (?,*,+)
|
||||
Args:
|
||||
eolterm(bool): Flag that indicates that end of line should
|
||||
terminate repetition match.
|
||||
"""
|
||||
|
||||
def __init__(self, *elements, **kwargs):
|
||||
super(Repetition, self).__init__(*elements, **kwargs)
|
||||
self.sep = kwargs.get('sep', None)
|
||||
|
||||
|
||||
class ZeroOrMore(Repetition):
|
||||
"""
|
||||
ZeroOrMore will try to match parser expression specified zero or more
|
||||
times. It will never fail.
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
raise NotImplementedError()
|
||||
init_pos = parser.pos
|
||||
end_pos = -1
|
||||
children = []
|
||||
|
||||
# Uncomment when _parse is implemented
|
||||
# def __repr__(self):
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return f"({to_str})*"
|
||||
while True:
|
||||
current_pos = parser.pos
|
||||
|
||||
# maybe eat the separator if needed
|
||||
if self.sep and children:
|
||||
sep_result = self.sep.parse(parser)
|
||||
if sep_result is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
|
||||
# eat the ZeroOrMore
|
||||
node = self.nodes[0].parse(parser)
|
||||
if node is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
else:
|
||||
if node.end != -1: # because returns -1 when no match
|
||||
children.append(node)
|
||||
end_pos = node.end
|
||||
|
||||
if len(children) == 0:
|
||||
return NonTerminalNode(self, init_pos, -1, [], [])
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})*"
|
||||
|
||||
|
||||
class OneOrMore(ParsingExpression):
|
||||
class OneOrMore(Repetition):
|
||||
"""
|
||||
OneOrMore will try to match parser expression specified one or more times.
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
raise NotImplementedError()
|
||||
init_pos = parser.pos
|
||||
end_pos = -1
|
||||
children = []
|
||||
|
||||
while True:
|
||||
current_pos = parser.pos
|
||||
|
||||
# maybe eat the separator if needed
|
||||
if self.sep and children:
|
||||
sep_result = self.sep.parse(parser)
|
||||
if sep_result is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
|
||||
# eat the ZeroOrMore
|
||||
node = self.nodes[0].parse(parser)
|
||||
if node is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
else:
|
||||
if node.end != -1: # because returns -1 when no match
|
||||
children.append(node)
|
||||
end_pos = node.end
|
||||
|
||||
if len(children) == 0: # if nothing is found, it's an error
|
||||
return None
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})+"
|
||||
|
||||
|
||||
class UnorderedGroup(ParsingExpression):
|
||||
class UnorderedGroup(Repetition):
|
||||
"""
|
||||
Will try to match all of the parsing expression in any order.
|
||||
"""
|
||||
@@ -316,6 +386,10 @@ class UnorderedGroup(ParsingExpression):
|
||||
def _parse(self, parser):
|
||||
raise NotImplementedError()
|
||||
|
||||
# def __repr__(self):
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return f"({to_str})#"
|
||||
|
||||
|
||||
class Match(ParsingExpression):
|
||||
"""
|
||||
@@ -541,11 +615,18 @@ class ConceptLexerParser(BaseParser):
|
||||
ret.ignore_case = self.ignore_case
|
||||
elif isinstance(expression, Sequence) or \
|
||||
isinstance(expression, OrderedChoice) or \
|
||||
isinstance(expression, ZeroOrMore) or \
|
||||
isinstance(expression, OneOrMore) or \
|
||||
isinstance(expression, Optional):
|
||||
ret = expression
|
||||
ret.nodes.extend([inner_get_model(e) for e in ret.elements])
|
||||
else:
|
||||
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
||||
|
||||
# Translate separator expression.
|
||||
if isinstance(expression, Repetition) and expression.sep:
|
||||
expression.sep = inner_get_model(expression.sep)
|
||||
|
||||
return ret
|
||||
|
||||
model = inner_get_model(concept_def)
|
||||
@@ -623,7 +704,7 @@ class ConceptLexerParser(BaseParser):
|
||||
for concept, grammar in self.concepts_grammars.items():
|
||||
self.seek(init_pos)
|
||||
node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
|
||||
if node is not None:
|
||||
if node is not None and node.end != -1:
|
||||
concept_node = ConceptNode(
|
||||
concept,
|
||||
node.start,
|
||||
|
||||
Reference in New Issue
Block a user