Added ZeroAndMore and OneAndMore to BNF. BNF expressions can now be captured

This commit is contained in:
2019-12-18 12:01:51 +01:00
parent 88cd3162be
commit 8dbe2e1b20
9 changed files with 425 additions and 91 deletions
+4 -3
View File
@@ -254,13 +254,14 @@ class Sheerka(Concept):
# group the evaluators by priority and sort them # group the evaluators by priority and sort them
# The first one to be applied will be the one with the highest priority # The first one to be applied will be the one with the highest priority
grouped_evaluators = {} grouped_evaluators = {}
for item in [e() for e in self.evaluators]: all_evaluators = [e() for e in self.evaluators]
grouped_evaluators.setdefault(item.priority, []).append(item) for evaluator in [e for e in all_evaluators if e.enabled]:
grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator)
sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True) sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True)
# process # process
while True: while True:
simple_digest = return_values[:] # set(id(r) for r in return_values) simple_digest = return_values[:]
for priority in sorted_priorities: for priority in sorted_priorities:
# log.debug("Processing priority " + str(priority)) # log.debug("Processing priority " + str(priority))
+2 -1
View File
@@ -5,9 +5,10 @@ class BaseEvaluator:
PREFIX = "Evaluators:" PREFIX = "Evaluators:"
def __init__(self, name, priority: int): def __init__(self, name, priority: int, enabled=True):
self.name = self.PREFIX + name self.name = self.PREFIX + name
self.priority = priority self.priority = priority
self.enabled = enabled
class OneReturnValueEvaluator(BaseEvaluator): class OneReturnValueEvaluator(BaseEvaluator):
+13 -5
View File
@@ -56,7 +56,7 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator):
raise NotImplementedError("Not yet") raise NotImplementedError("Not yet")
def update_concept(self, sheerka, concept, underlying): def update_concept(self, sheerka, concept, underlying, init_empty_body=True):
""" """
Updates the property of the concept Updates the property of the concept
""" """
@@ -69,14 +69,22 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator):
if prop_name not in c.props or c.props[prop_name].value is None: if prop_name not in c.props or c.props[prop_name].value is None:
c.set_prop(prop_name, value) c.set_prop(prop_name, value)
else: else:
new_value = [c.props[prop_name].value, value] previous_value = c.props[prop_name].value
c.set_prop(prop_name, new_value) if isinstance(previous_value, list):
previous_value.append(value)
else:
new_value = [previous_value, value]
c.set_prop(prop_name, new_value)
parsing_expression = underlying.parsing_expression parsing_expression = underlying.parsing_expression
if parsing_expression.rule_name: if parsing_expression.rule_name:
_add_prop(concept, parsing_expression.rule_name, underlying.source) _add_prop(concept, parsing_expression.rule_name, underlying.source)
# the update of the body must come BEFORE the recursion
if init_empty_body and concept.body is None:
concept.metadata.body = underlying.source
if isinstance(underlying, NonTerminalNode): if isinstance(underlying, NonTerminalNode):
for child in underlying.children: for child in underlying.children:
if isinstance(child.parsing_expression, ConceptMatch): if isinstance(child.parsing_expression, ConceptMatch):
@@ -85,8 +93,8 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator):
if sheerka.isinstance(new_concept, BuiltinConcepts.UNKNOWN_CONCEPT): if sheerka.isinstance(new_concept, BuiltinConcepts.UNKNOWN_CONCEPT):
continue continue
else: else:
self.update_concept(sheerka, new_concept, child.children[0]) self.update_concept(sheerka, new_concept, child.children[0], init_empty_body)
else: else:
self.update_concept(sheerka, concept, child) self.update_concept(sheerka, concept, child, init_empty_body)
return concept return concept
+40 -11
View File
@@ -115,7 +115,11 @@ class BnfParser:
def parse(self, context: ExecutionContext, text): def parse(self, context: ExecutionContext, text):
self.reset_parser(context, text) self.reset_parser(context, text)
tree = self.parse_choice() tree = self.parser_outer_rule_name()
token = self.get_token()
if token and token.type != TokenKind.EOF:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
ret = self.sheerka.ret( ret = self.sheerka.ret(
self.name, self.name,
@@ -129,12 +133,15 @@ class BnfParser:
return ret return ret
def parser_outer_rule_name(self):
return self.parser_rule_name(self.parse_choice)
def parse_choice(self): def parse_choice(self):
sequence = self.parse_sequence() sequence = self.parse_sequence()
self.eat_white_space() self.eat_white_space()
token = self.get_token() token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR: if token is None or token.type != TokenKind.VBAR:
return sequence return sequence
elements = [sequence] elements = [sequence]
@@ -142,7 +149,7 @@ class BnfParser:
# maybe eat the vertical bar # maybe eat the vertical bar
self.eat_white_space() self.eat_white_space()
token = self.get_token() token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR: if token is None or token.type != TokenKind.VBAR:
break break
self.next_token(skip_whitespace=True) self.next_token(skip_whitespace=True)
@@ -152,30 +159,33 @@ class BnfParser:
return OrderedChoice(*elements) return OrderedChoice(*elements)
def parse_sequence(self): def parse_sequence(self):
expr_and_modifier = self.parse_expression_and_modifier() expr_and_modifier = self.parse_modifier()
token = self.get_token() token = self.get_token()
if token is None or token.type == TokenKind.EOF or \ if token is None or \
token.type == TokenKind.EOF or \
token.type == TokenKind.EQUALS or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \ self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR): self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
return expr_and_modifier return expr_and_modifier
elements = [expr_and_modifier] elements = [expr_and_modifier]
while True: while True:
# maybe eat the comma
token = self.get_token() token = self.get_token()
if token is None or token.type == TokenKind.EOF or \ if token is None or \
token.type == TokenKind.EOF or \
token.type == TokenKind.EQUALS or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \ self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR): self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
break break
self.eat_white_space() self.eat_white_space()
sequence = self.parse_expression_and_modifier() sequence = self.parse_modifier()
elements.append(sequence) elements.append(sequence)
return Sequence(*elements) return Sequence(*elements)
def parse_expression_and_modifier(self): def parse_modifier(self):
expression = self.parse_expression() expression = self.parser_inner_rule_name()
token = self.get_token() token = self.get_token()
@@ -193,6 +203,9 @@ class BnfParser:
return expression return expression
def parser_inner_rule_name(self):
return self.parser_rule_name(self.parse_expression)
def parse_expression(self): def parse_expression(self):
token = self.get_token() token = self.get_token()
if token.type == TokenKind.EOF: if token.type == TokenKind.EOF:
@@ -207,7 +220,7 @@ class BnfParser:
self.next_token() self.next_token()
return expression return expression
else: else:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR])) self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.RPAR]))
return expression return expression
if token.type == TokenKind.IDENTIFIER: if token.type == TokenKind.IDENTIFIER:
@@ -225,3 +238,19 @@ class BnfParser:
ret = StrMatch(core.utils.strip_quotes(token.value)) ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token() self.next_token()
return ret return ret
def parser_rule_name(self, next_to_parse):
expression = next_to_parse()
token = self.get_token()
if token is None or token.type != TokenKind.EQUALS:
return expression
self.next_token() # eat equals
token = self.get_token()
if token is None or token.type != TokenKind.IDENTIFIER:
return self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.IDENTIFIER]))
expression.rule_name = token.value
self.next_token()
return expression
+93 -12
View File
@@ -212,11 +212,11 @@ class Sequence(ParsingExpression):
if node is None: if node is None:
return None return None
else: else:
if node.end != -1: # because Optional returns -1 when no match if node.end != -1: # because returns -1 when no match
children.append(node) children.append(node)
end_pos = node.end end_pos = node.end
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children or []) return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
def __repr__(self): def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements) to_str = ", ".join(repr(n) for n in self.elements)
@@ -284,31 +284,101 @@ class Optional(ParsingExpression):
return f"({to_str})?" return f"({to_str})?"
class ZeroOrMore(ParsingExpression): class Repetition(ParsingExpression):
"""
Base class for all repetition-like parser expressions (?,*,+)
Args:
eolterm(bool): Flag that indicates that end of line should
terminate repetition match.
"""
def __init__(self, *elements, **kwargs):
super(Repetition, self).__init__(*elements, **kwargs)
self.sep = kwargs.get('sep', None)
class ZeroOrMore(Repetition):
""" """
ZeroOrMore will try to match parser expression specified zero or more ZeroOrMore will try to match parser expression specified zero or more
times. It will never fail. times. It will never fail.
""" """
def _parse(self, parser): def _parse(self, parser):
raise NotImplementedError() init_pos = parser.pos
end_pos = -1
children = []
# Uncomment when _parse is implemented while True:
# def __repr__(self): current_pos = parser.pos
# to_str = ", ".join(repr(n) for n in self.elements)
# return f"({to_str})*" # maybe eat the separator if needed
if self.sep and children:
sep_result = self.sep.parse(parser)
if sep_result is None:
parser.seek(current_pos)
break
# eat the ZeroOrMore
node = self.nodes[0].parse(parser)
if node is None:
parser.seek(current_pos)
break
else:
if node.end != -1: # because returns -1 when no match
children.append(node)
end_pos = node.end
if len(children) == 0:
return NonTerminalNode(self, init_pos, -1, [], [])
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})*"
class OneOrMore(ParsingExpression): class OneOrMore(Repetition):
""" """
OneOrMore will try to match parser expression specified one or more times. OneOrMore will try to match parser expression specified one or more times.
""" """
def _parse(self, parser): def _parse(self, parser):
raise NotImplementedError() init_pos = parser.pos
end_pos = -1
children = []
while True:
current_pos = parser.pos
# maybe eat the separator if needed
if self.sep and children:
sep_result = self.sep.parse(parser)
if sep_result is None:
parser.seek(current_pos)
break
# eat the ZeroOrMore
node = self.nodes[0].parse(parser)
if node is None:
parser.seek(current_pos)
break
else:
if node.end != -1: # because returns -1 when no match
children.append(node)
end_pos = node.end
if len(children) == 0: # if nothing is found, it's an error
return None
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})+"
class UnorderedGroup(ParsingExpression): class UnorderedGroup(Repetition):
""" """
Will try to match all of the parsing expression in any order. Will try to match all of the parsing expression in any order.
""" """
@@ -316,6 +386,10 @@ class UnorderedGroup(ParsingExpression):
def _parse(self, parser): def _parse(self, parser):
raise NotImplementedError() raise NotImplementedError()
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return f"({to_str})#"
class Match(ParsingExpression): class Match(ParsingExpression):
""" """
@@ -541,11 +615,18 @@ class ConceptLexerParser(BaseParser):
ret.ignore_case = self.ignore_case ret.ignore_case = self.ignore_case
elif isinstance(expression, Sequence) or \ elif isinstance(expression, Sequence) or \
isinstance(expression, OrderedChoice) or \ isinstance(expression, OrderedChoice) or \
isinstance(expression, ZeroOrMore) or \
isinstance(expression, OneOrMore) or \
isinstance(expression, Optional): isinstance(expression, Optional):
ret = expression ret = expression
ret.nodes.extend([inner_get_model(e) for e in ret.elements]) ret.nodes.extend([inner_get_model(e) for e in ret.elements])
else: else:
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False) ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
# Translate separator expression.
if isinstance(expression, Repetition) and expression.sep:
expression.sep = inner_get_model(expression.sep)
return ret return ret
model = inner_get_model(concept_def) model = inner_get_model(concept_def)
@@ -623,7 +704,7 @@ class ConceptLexerParser(BaseParser):
for concept, grammar in self.concepts_grammars.items(): for concept, grammar in self.concepts_grammars.items():
self.seek(init_pos) self.seek(init_pos)
node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
if node is not None: if node is not None and node.end != -1:
concept_node = ConceptNode( concept_node = ConceptNode(
concept, concept,
node.start, node.start,
+26 -47
View File
@@ -26,6 +26,7 @@ def get_context():
("'foo'+", OneOrMore(StrMatch("foo"))), ("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice( ("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")), Sequence(StrMatch("1"), StrMatch("2")),
@@ -39,6 +40,29 @@ def get_context():
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")), ("(1 )", StrMatch("1")),
("foo", ConceptMatch("foo")),
("foo*", ZeroOrMore(ConceptMatch("foo"))),
("foo 'and' bar+", Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))),
("foo | bar?", OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))),
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))),
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))),
("'str'=var", StrMatch("str", rule_name="var")),
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
]) ])
def test_i_can_parse_regex(expression, expected): def test_i_can_parse_regex(expression, expected):
parser = BnfParser() parser = BnfParser()
@@ -53,7 +77,8 @@ def test_i_can_parse_regex(expression, expected):
@pytest.mark.parametrize("expression, error", [ @pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()), ("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()), ("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])), ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", [TokenKind.RPAR])),
("1=", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", [TokenKind.IDENTIFIER])),
]) ])
def test_i_can_detect_errors(expression, error): def test_i_can_detect_errors(expression, error):
parser = BnfParser() parser = BnfParser()
@@ -64,52 +89,6 @@ def test_i_can_detect_errors(expression, error):
assert ret_value[0] == error assert ret_value[0] == error
def test_i_can_parse_regex_with_reference():
expression = "foo"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ConceptMatch("foo")
assert res.value.source == expression
def test_i_can_parse_cross_ref_with_modifier():
expression = "foo*"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
assert res.value.source == expression
def test_i_can_parse_sequence_with_cross_ref():
expression = "foo 'and' bar+"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_parse_choice_with_cross_ref():
foo = Concept("foo")
bar = Concept("bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
expression = "foo | bar?"
parser = BnfParser()
res = parser.parse(context, Tokenizer(expression))
assert res.status
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(): def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
foo = Concept(name="foo") foo = Concept(name="foo")
bar = Concept(name="bar") bar = Concept(name="bar")
+193 -1
View File
@@ -3,7 +3,7 @@ from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore
class ConceptVisitor(ParsingExpressionVisitor): class ConceptVisitor(ParsingExpressionVisitor):
@@ -352,6 +352,22 @@ def test_i_can_parse_sequence_with_optional_in_between():
assert res.value.body == [(foo, 0, 2, "one three")] assert res.value.body == [(foo, 0, 2, "one three")]
def test_i_cannot_parse_wrong_input_with_optional():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Optional("one")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "two")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == []
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "two"
def test_i_can_use_reference(): def test_i_can_use_reference():
# when there are multiple matches for the same input # when there are multiple matches for the same input
# Do I need to create a choice concept ? # Do I need to create a choice concept ?
@@ -469,6 +485,172 @@ def test_i_can_detect_duplicates_when_reference():
assert res[1].value.body == [(foo, 0, 0, "twenty")] assert res[1].value.body == [(foo, 0, 0, "twenty")]
def test_i_can_parse_zero_or_more():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: ZeroOrMore("one")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 2, source="one one",
underlying=u(concepts[foo], 0, 2, [
u("one", 0, 0),
u("one", 2, 2)]))]
def test_i_can_parse_sequence_and_zero_or_more():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence(ZeroOrMore("one"), "two")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one two")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 4, source="one one two",
underlying=u(concepts[foo], 0, 4, [
u(ZeroOrMore("one"), 0, 2, [
u("one", 0, 0),
u("one", 2, 2)]),
u("two", 4, 4)]))]
res = parser.parse(context, "two")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 0, source="two",
underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))]
def test_i_cannot_parse_zero_and_more_when_wrong_entry():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: ZeroOrMore("one")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one two")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == [
ConceptNode(foo, 0, 0, source="one", underlying=u(ZeroOrMore("one"), 0, 0, [u("one", 0, 0)]))]
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "two"
res = parser.parse(context, "two")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == []
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "two"
def test_i_can_parse_zero_and_more_with_separator():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: ZeroOrMore("one", sep=",")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one, one , one")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 7, source="one, one , one",
underlying=u(concepts[foo], 0, 7, [
u("one", 0, 0),
u("one", 3, 3),
u("one", 7, 7)]))]
def test_that_zero_and_more_is_greedy():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {foo: ZeroOrMore("one"), bar: "one"}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one one")
assert res.status
assert res.value.value == [(foo, 0, 4, "one one one")]
def test_i_can_parse_one_and_more():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: OneOrMore("one")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 2, source="one one",
underlying=u(concepts[foo], 0, 2, [
u("one", 0, 0),
u("one", 2, 2)]))]
def test_i_can_parse_sequence_and_one_or_more():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: Sequence(OneOrMore("one"), "two")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one two")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 4, source="one one two",
underlying=u(concepts[foo], 0, 4, [
u(ZeroOrMore("one"), 0, 2, [
u("one", 0, 0),
u("one", 2, 2)]),
u("two", 4, 4)]))]
res = parser.parse(context, "two")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == []
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "two"
def test_i_can_parse_one_and_more_with_separator():
context = get_context()
foo = Concept(name="foo")
concepts = {foo: OneOrMore("one", sep=",")}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one, one , one")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 7, source="one, one , one",
underlying=u(concepts[foo], 0, 7, [
u("one", 0, 0),
u("one", 3, 3),
u("one", 7, 7)]))]
def test_that_one_and_more_is_greedy():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {foo: OneOrMore("one"), bar: "one"}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
res = parser.parse(context, "one one one")
assert res.status
assert res.value.value == [(foo, 0, 4, "one one one")]
def test_i_can_detect_infinite_recursion(): def test_i_can_detect_infinite_recursion():
foo = Concept(name="foo") foo = Concept(name="foo")
bar = Concept(name="bar") bar = Concept(name="bar")
@@ -552,6 +734,16 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional():
pass pass
def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more():
# TODO infinite recursion with optional
pass
def test_i_can_detect_indirect_infinite_recursion_with_one_and_more():
# TODO infinite recursion with optional
pass
def test_i_can_visit_parsing_expression(): def test_i_can_visit_parsing_expression():
mult = Concept(name="mult") mult = Concept(name="mult")
add = Concept(name="add") add = Concept(name="add")
+52 -9
View File
@@ -5,7 +5,7 @@ from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator
from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, NonTerminalNode, Sequence, TerminalNode, \ from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, NonTerminalNode, Sequence, TerminalNode, \
StrMatch, Optional, OrderedChoice StrMatch, Optional, OrderedChoice, ZeroOrMore
def get_context(): def get_context():
@@ -61,7 +61,7 @@ def test_concept_is_returned_when_list_of_one_concept_node():
assert result.who == evaluator.name assert result.who == evaluator.name
assert result.status assert result.status
assert result.value == node.concept assert result.value == Concept("foo", body="foo").init_key()
assert result.parents == [ret_val] assert result.parents == [ret_val]
@@ -119,6 +119,22 @@ def test_concept_property_is_correctly_updated_for_optional():
assert updated.props["o"].value == "two" assert updated.props["o"].value == "two"
def test_concept_property_is_correctly_updated_for_zero_or_more():
context = get_context()
foo = Concept("foo")
grammar = {foo: ZeroOrMore("one", rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one one one")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "one one one"
def test_concept_property_is_correctly_updated_when_list_of_properties(): def test_concept_property_is_correctly_updated_when_list_of_properties():
context = get_context() context = get_context()
@@ -141,9 +157,10 @@ def test_concept_property_is_correctly_updated_when_another_concept():
foo = Concept("foo") foo = Concept("foo")
bar = Concept("bar") bar = Concept("bar")
context.sheerka.add_in_cache(foo) context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
grammar = { grammar = {
foo: Sequence("one", "two", rule_name="variable"), foo: Sequence("one", "two", rule_name="var"),
bar: Sequence(foo, "three", rule_name="variable")} bar: Sequence(foo, "three", rule_name="var")}
concept_node = get_concept_node(context, grammar, "one two three") concept_node = get_concept_node(context, grammar, "one two three")
updated = ConceptNodeEvaluator().update_concept( updated = ConceptNodeEvaluator().update_concept(
@@ -151,11 +168,11 @@ def test_concept_property_is_correctly_updated_when_another_concept():
context.sheerka.new(concept_node.concept.key), context.sheerka.new(concept_node.concept.key),
concept_node.underlying) concept_node.underlying)
assert updated.props["variable"].value == "one two three" assert updated.props["var"].value == "one two three"
assert updated.props["foo"].value == Concept("foo").set_prop("variable", "one two").init_key() assert updated.props["foo"].value == Concept("foo", body="one two").set_prop("var", "one two").init_key()
def test_concept_property_is_correctly_updated_when_concept_recursion(): def test_concept_property_is_correctly_updated_when_concept_recursion_using_optional():
context = get_context() context = get_context()
number = Concept("number") number = Concept("number")
@@ -173,6 +190,32 @@ def test_concept_property_is_correctly_updated_when_concept_recursion():
context.sheerka.new(concept_node.concept.key), context.sheerka.new(concept_node.concept.key),
concept_node.underlying) concept_node.underlying)
assert updated.props["number"].value == Concept("number").init_key() assert updated.props["number"].value == Concept("number", body="one").init_key()
assert updated.props["op"].value == "plus" assert updated.props["op"].value == "plus"
assert updated.props["add"].value == Concept("add").set_prop("number", Concept("number").init_key()).init_key() expected_add = Concept("add", body="two").set_prop("number", Concept("number", body="two").init_key()).init_key()
assert updated.props["add"].value == expected_add
def test_concept_property_is_correctly_updated_when_concept_recursion_using_zero_or_more():
context = get_context()
number = Concept("number")
add = Concept("add")
context.sheerka.add_in_cache(number)
context.sheerka.add_in_cache(add)
grammar = {
number: OrderedChoice("one", "two", 'three'),
add: Sequence(number, ZeroOrMore(Sequence(OrderedChoice("plus", "minus", rule_name="op"), number)))
}
concept_node = get_concept_node(context, grammar, "one plus two minus three")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying,
init_empty_body=True)
assert updated.props["number"].value == [Concept("number", body="one").init_key(),
Concept("number", body="two").init_key(),
Concept("number", body="three").init_key()]
assert updated.props["op"].value == ["plus", "minus"]
+1 -1
View File
@@ -626,7 +626,7 @@ def test_i_can_eval_bnf_definitions_with_variables():
return_value = res[0].value return_value = res[0].value
assert sheerka.isinstance(return_value, concept_b) assert sheerka.isinstance(return_value, concept_b)
assert return_value.props["a"] == Property("a", concept_a) assert return_value.props["a"] == Property("a", sheerka.new(concept_a.key, body="one"))
def test_i_can_eval_bnf_definitions_from_separate_instances(): def test_i_can_eval_bnf_definitions_from_separate_instances():