From 8dbe2e1b20312f91170ce25c7efb8cb402de69fa Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Wed, 18 Dec 2019 12:01:51 +0100 Subject: [PATCH] Added ZeroAndMore and OneAndMore to BNF. BNF expressions can now be captured --- core/sheerka.py | 7 +- evaluators/BaseEvaluator.py | 3 +- evaluators/ConceptNodeEvaluator.py | 18 ++- parsers/BnfParser.py | 53 ++++++-- parsers/ConceptLexerParser.py | 105 ++++++++++++++-- tests/test_BnfParser.py | 73 ++++------- tests/test_ConceptLexerParser.py | 194 ++++++++++++++++++++++++++++- tests/test_ConceptNodeEvaluator.py | 61 +++++++-- tests/test_sheerka.py | 2 +- 9 files changed, 425 insertions(+), 91 deletions(-) diff --git a/core/sheerka.py b/core/sheerka.py index b8a0b6f..7764136 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -254,13 +254,14 @@ class Sheerka(Concept): # group the evaluators by priority and sort them # The first one to be applied will be the one with the highest priority grouped_evaluators = {} - for item in [e() for e in self.evaluators]: - grouped_evaluators.setdefault(item.priority, []).append(item) + all_evaluators = [e() for e in self.evaluators] + for evaluator in [e for e in all_evaluators if e.enabled]: + grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator) sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True) # process while True: - simple_digest = return_values[:] # set(id(r) for r in return_values) + simple_digest = return_values[:] for priority in sorted_priorities: # log.debug("Processing priority " + str(priority)) diff --git a/evaluators/BaseEvaluator.py b/evaluators/BaseEvaluator.py index ae40ce4..6928452 100644 --- a/evaluators/BaseEvaluator.py +++ b/evaluators/BaseEvaluator.py @@ -5,9 +5,10 @@ class BaseEvaluator: PREFIX = "Evaluators:" - def __init__(self, name, priority: int): + def __init__(self, name, priority: int, enabled=True): self.name = self.PREFIX + name self.priority = priority + self.enabled = enabled class OneReturnValueEvaluator(BaseEvaluator): diff --git a/evaluators/ConceptNodeEvaluator.py b/evaluators/ConceptNodeEvaluator.py index 5d3dec0..aa6d57a 100644 --- a/evaluators/ConceptNodeEvaluator.py +++ b/evaluators/ConceptNodeEvaluator.py @@ -56,7 +56,7 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator): raise NotImplementedError("Not yet") - def update_concept(self, sheerka, concept, underlying): + def update_concept(self, sheerka, concept, underlying, init_empty_body=True): """ Updates the property of the concept """ @@ -69,14 +69,22 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator): if prop_name not in c.props or c.props[prop_name].value is None: c.set_prop(prop_name, value) else: - new_value = [c.props[prop_name].value, value] - c.set_prop(prop_name, new_value) + previous_value = c.props[prop_name].value + if isinstance(previous_value, list): + previous_value.append(value) + else: + new_value = [previous_value, value] + c.set_prop(prop_name, new_value) parsing_expression = underlying.parsing_expression if parsing_expression.rule_name: _add_prop(concept, parsing_expression.rule_name, underlying.source) + # the update of the body must come BEFORE the recursion + if init_empty_body and concept.body is None: + concept.metadata.body = underlying.source + if isinstance(underlying, NonTerminalNode): for child in underlying.children: if isinstance(child.parsing_expression, ConceptMatch): @@ -85,8 +93,8 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator): if sheerka.isinstance(new_concept, BuiltinConcepts.UNKNOWN_CONCEPT): continue else: - self.update_concept(sheerka, new_concept, child.children[0]) + self.update_concept(sheerka, new_concept, child.children[0], init_empty_body) else: - self.update_concept(sheerka, concept, child) + self.update_concept(sheerka, concept, child, init_empty_body) return concept diff --git a/parsers/BnfParser.py b/parsers/BnfParser.py index 85d0142..1636949 100644 --- a/parsers/BnfParser.py +++ b/parsers/BnfParser.py @@ -115,7 +115,11 @@ class BnfParser: def parse(self, context: ExecutionContext, text): self.reset_parser(context, text) - tree = self.parse_choice() + tree = self.parser_outer_rule_name() + + token = self.get_token() + if token and token.type != TokenKind.EOF: + self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [])) ret = self.sheerka.ret( self.name, @@ -129,12 +133,15 @@ class BnfParser: return ret + def parser_outer_rule_name(self): + return self.parser_rule_name(self.parse_choice) + def parse_choice(self): sequence = self.parse_sequence() self.eat_white_space() token = self.get_token() - if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR: + if token is None or token.type != TokenKind.VBAR: return sequence elements = [sequence] @@ -142,7 +149,7 @@ class BnfParser: # maybe eat the vertical bar self.eat_white_space() token = self.get_token() - if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR: + if token is None or token.type != TokenKind.VBAR: break self.next_token(skip_whitespace=True) @@ -152,30 +159,33 @@ class BnfParser: return OrderedChoice(*elements) def parse_sequence(self): - expr_and_modifier = self.parse_expression_and_modifier() + expr_and_modifier = self.parse_modifier() token = self.get_token() - if token is None or token.type == TokenKind.EOF or \ + if token is None or \ + token.type == TokenKind.EOF or \ + token.type == TokenKind.EQUALS or \ self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \ self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR): return expr_and_modifier elements = [expr_and_modifier] while True: - # maybe eat the comma token = self.get_token() - if token is None or token.type == TokenKind.EOF or \ + if token is None or \ + token.type == TokenKind.EOF or \ + token.type == TokenKind.EQUALS or \ self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \ self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR): break self.eat_white_space() - sequence = self.parse_expression_and_modifier() + sequence = self.parse_modifier() elements.append(sequence) return Sequence(*elements) - def parse_expression_and_modifier(self): - expression = self.parse_expression() + def parse_modifier(self): + expression = self.parser_inner_rule_name() token = self.get_token() @@ -193,6 +203,9 @@ class BnfParser: return expression + def parser_inner_rule_name(self): + return self.parser_rule_name(self.parse_expression) + def parse_expression(self): token = self.get_token() if token.type == TokenKind.EOF: @@ -207,7 +220,7 @@ class BnfParser: self.next_token() return expression else: - self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR])) + self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.RPAR])) return expression if token.type == TokenKind.IDENTIFIER: @@ -224,4 +237,20 @@ class BnfParser: ret = StrMatch(core.utils.strip_quotes(token.value)) self.next_token() - return ret \ No newline at end of file + return ret + + def parser_rule_name(self, next_to_parse): + expression = next_to_parse() + token = self.get_token() + if token is None or token.type != TokenKind.EQUALS: + return expression + + self.next_token() # eat equals + token = self.get_token() + + if token is None or token.type != TokenKind.IDENTIFIER: + return self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.IDENTIFIER])) + + expression.rule_name = token.value + self.next_token() + return expression diff --git a/parsers/ConceptLexerParser.py b/parsers/ConceptLexerParser.py index f4984d2..f90d390 100644 --- a/parsers/ConceptLexerParser.py +++ b/parsers/ConceptLexerParser.py @@ -212,11 +212,11 @@ class Sequence(ParsingExpression): if node is None: return None else: - if node.end != -1: # because Optional returns -1 when no match + if node.end != -1: # because returns -1 when no match children.append(node) end_pos = node.end - return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children or []) + return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) def __repr__(self): to_str = ", ".join(repr(n) for n in self.elements) @@ -284,31 +284,101 @@ class Optional(ParsingExpression): return f"({to_str})?" -class ZeroOrMore(ParsingExpression): +class Repetition(ParsingExpression): + """ + Base class for all repetition-like parser expressions (?,*,+) + Args: + eolterm(bool): Flag that indicates that end of line should + terminate repetition match. + """ + + def __init__(self, *elements, **kwargs): + super(Repetition, self).__init__(*elements, **kwargs) + self.sep = kwargs.get('sep', None) + + +class ZeroOrMore(Repetition): """ ZeroOrMore will try to match parser expression specified zero or more times. It will never fail. """ def _parse(self, parser): - raise NotImplementedError() + init_pos = parser.pos + end_pos = -1 + children = [] - # Uncomment when _parse is implemented - # def __repr__(self): - # to_str = ", ".join(repr(n) for n in self.elements) - # return f"({to_str})*" + while True: + current_pos = parser.pos + + # maybe eat the separator if needed + if self.sep and children: + sep_result = self.sep.parse(parser) + if sep_result is None: + parser.seek(current_pos) + break + + # eat the ZeroOrMore + node = self.nodes[0].parse(parser) + if node is None: + parser.seek(current_pos) + break + else: + if node.end != -1: # because returns -1 when no match + children.append(node) + end_pos = node.end + + if len(children) == 0: + return NonTerminalNode(self, init_pos, -1, [], []) + + return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) + + def __repr__(self): + to_str = ", ".join(repr(n) for n in self.elements) + return f"({to_str})*" -class OneOrMore(ParsingExpression): +class OneOrMore(Repetition): """ OneOrMore will try to match parser expression specified one or more times. """ def _parse(self, parser): - raise NotImplementedError() + init_pos = parser.pos + end_pos = -1 + children = [] + + while True: + current_pos = parser.pos + + # maybe eat the separator if needed + if self.sep and children: + sep_result = self.sep.parse(parser) + if sep_result is None: + parser.seek(current_pos) + break + + # eat the ZeroOrMore + node = self.nodes[0].parse(parser) + if node is None: + parser.seek(current_pos) + break + else: + if node.end != -1: # because returns -1 when no match + children.append(node) + end_pos = node.end + + if len(children) == 0: # if nothing is found, it's an error + return None + + return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) + + def __repr__(self): + to_str = ", ".join(repr(n) for n in self.elements) + return f"({to_str})+" -class UnorderedGroup(ParsingExpression): +class UnorderedGroup(Repetition): """ Will try to match all of the parsing expression in any order. """ @@ -316,6 +386,10 @@ class UnorderedGroup(ParsingExpression): def _parse(self, parser): raise NotImplementedError() + # def __repr__(self): + # to_str = ", ".join(repr(n) for n in self.elements) + # return f"({to_str})#" + class Match(ParsingExpression): """ @@ -541,11 +615,18 @@ class ConceptLexerParser(BaseParser): ret.ignore_case = self.ignore_case elif isinstance(expression, Sequence) or \ isinstance(expression, OrderedChoice) or \ + isinstance(expression, ZeroOrMore) or \ + isinstance(expression, OneOrMore) or \ isinstance(expression, Optional): ret = expression ret.nodes.extend([inner_get_model(e) for e in ret.elements]) else: ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False) + + # Translate separator expression. + if isinstance(expression, Repetition) and expression.sep: + expression.sep = inner_get_model(expression.sep) + return ret model = inner_get_model(concept_def) @@ -623,7 +704,7 @@ class ConceptLexerParser(BaseParser): for concept, grammar in self.concepts_grammars.items(): self.seek(init_pos) node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode - if node is not None: + if node is not None and node.end != -1: concept_node = ConceptNode( concept, node.start, diff --git a/tests/test_BnfParser.py b/tests/test_BnfParser.py index b2ce301..a544d84 100644 --- a/tests/test_BnfParser.py +++ b/tests/test_BnfParser.py @@ -26,6 +26,7 @@ def get_context(): ("'foo'+", OneOrMore(StrMatch("foo"))), ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), + ("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))), ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), ("1 2 | 3 4+", OrderedChoice( Sequence(StrMatch("1"), StrMatch("2")), @@ -39,6 +40,29 @@ def get_context(): ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), ("(1 )", StrMatch("1")), + ("foo", ConceptMatch("foo")), + ("foo*", ZeroOrMore(ConceptMatch("foo"))), + ("foo 'and' bar+", Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))), + ("foo | bar?", OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))), + ("'str' = var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))), + ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))), + ("'str'=var", StrMatch("str", rule_name="var")), + ("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")), + ("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")), + ("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), + ("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")), + ("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")), + ("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")), + ("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))), + ("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))), + ("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))), + ("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))), + ("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))), + ("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))), + ("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")), + ("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")), + ("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")), + ("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))), ]) def test_i_can_parse_regex(expression, expected): parser = BnfParser() @@ -53,7 +77,8 @@ def test_i_can_parse_regex(expression, expected): @pytest.mark.parametrize("expression, error", [ ("1 ", UnexpectedEndOfFileError()), ("1|", UnexpectedEndOfFileError()), - ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])), + ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token()'", [TokenKind.RPAR])), + ("1=", UnexpectedTokenErrorNode("Unexpected token 'Token()'", [TokenKind.IDENTIFIER])), ]) def test_i_can_detect_errors(expression, error): parser = BnfParser() @@ -64,52 +89,6 @@ def test_i_can_detect_errors(expression, error): assert ret_value[0] == error -def test_i_can_parse_regex_with_reference(): - expression = "foo" - parser = BnfParser() - res = parser.parse(get_context(), Tokenizer(expression)) - - assert res.status - assert res.value.value == ConceptMatch("foo") - assert res.value.source == expression - - -def test_i_can_parse_cross_ref_with_modifier(): - expression = "foo*" - parser = BnfParser() - res = parser.parse(get_context(), Tokenizer(expression)) - - assert res.status - assert res.value.value == ZeroOrMore(ConceptMatch("foo")) - assert res.value.source == expression - - -def test_i_can_parse_sequence_with_cross_ref(): - expression = "foo 'and' bar+" - parser = BnfParser() - res = parser.parse(get_context(), Tokenizer(expression)) - - assert res.status - assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar"))) - assert res.value.source == expression - - -def test_i_can_parse_choice_with_cross_ref(): - foo = Concept("foo") - bar = Concept("bar") - context = get_context() - context.sheerka.add_in_cache(foo) - context.sheerka.add_in_cache(bar) - - expression = "foo | bar?" - parser = BnfParser() - res = parser.parse(context, Tokenizer(expression)) - - assert res.status - assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar"))) - assert res.value.source == expression - - def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(): foo = Concept(name="foo") bar = Concept(name="bar") diff --git a/tests/test_ConceptLexerParser.py b/tests/test_ConceptLexerParser.py index 72b398c..c032a08 100644 --- a/tests/test_ConceptLexerParser.py +++ b/tests/test_ConceptLexerParser.py @@ -3,7 +3,7 @@ from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ - ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch + ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore class ConceptVisitor(ParsingExpressionVisitor): @@ -352,6 +352,22 @@ def test_i_can_parse_sequence_with_optional_in_between(): assert res.value.body == [(foo, 0, 2, "one three")] +def test_i_cannot_parse_wrong_input_with_optional(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: Optional("one")} + parser = ConceptLexerParser() + parser.initialize(context, concepts) + + res = parser.parse(context, "two") + assert not res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.try_parsed == [] + assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) + assert res.value.body[0].body == "two" + + def test_i_can_use_reference(): # when there are multiple matches for the same input # Do I need to create a choice concept ? @@ -469,6 +485,172 @@ def test_i_can_detect_duplicates_when_reference(): assert res[1].value.body == [(foo, 0, 0, "twenty")] +def test_i_can_parse_zero_or_more(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: ZeroOrMore("one")} + parser = ConceptLexerParser() + parser.initialize(context, concepts) + + res = parser.parse(context, "one one") + assert res.status + assert res.value.value == [ConceptNode(foo, 0, 2, source="one one", + underlying=u(concepts[foo], 0, 2, [ + u("one", 0, 0), + u("one", 2, 2)]))] + + +def test_i_can_parse_sequence_and_zero_or_more(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: Sequence(ZeroOrMore("one"), "two")} + parser = ConceptLexerParser() + parser.initialize(context, concepts) + + res = parser.parse(context, "one one two") + assert res.status + assert res.value.value == [ConceptNode(foo, 0, 4, source="one one two", + underlying=u(concepts[foo], 0, 4, [ + u(ZeroOrMore("one"), 0, 2, [ + u("one", 0, 0), + u("one", 2, 2)]), + u("two", 4, 4)]))] + + res = parser.parse(context, "two") + assert res.status + assert res.value.value == [ConceptNode(foo, 0, 0, source="two", + underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))] + + +def test_i_cannot_parse_zero_and_more_when_wrong_entry(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: ZeroOrMore("one")} + parser = ConceptLexerParser() + parser.initialize(context, concepts) + + res = parser.parse(context, "one two") + assert not res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.try_parsed == [ + ConceptNode(foo, 0, 0, source="one", underlying=u(ZeroOrMore("one"), 0, 0, [u("one", 0, 0)]))] + assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) + assert res.value.body[0].body == "two" + + res = parser.parse(context, "two") + assert not res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.try_parsed == [] + assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) + assert res.value.body[0].body == "two" + + +def test_i_can_parse_zero_and_more_with_separator(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: ZeroOrMore("one", sep=",")} + parser = ConceptLexerParser() + parser.initialize(context, concepts) + + res = parser.parse(context, "one, one , one") + assert res.status + assert res.value.value == [ConceptNode(foo, 0, 7, source="one, one , one", + underlying=u(concepts[foo], 0, 7, [ + u("one", 0, 0), + u("one", 3, 3), + u("one", 7, 7)]))] + + +def test_that_zero_and_more_is_greedy(): + context = get_context() + foo = Concept(name="foo") + bar = Concept(name="bar") + + concepts = {foo: ZeroOrMore("one"), bar: "one"} + parser = ConceptLexerParser() + parser.initialize(context, concepts) + + res = parser.parse(context, "one one one") + assert res.status + assert res.value.value == [(foo, 0, 4, "one one one")] + + +def test_i_can_parse_one_and_more(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: OneOrMore("one")} + parser = ConceptLexerParser() + parser.initialize(context, concepts) + + res = parser.parse(context, "one one") + assert res.status + assert res.value.value == [ConceptNode(foo, 0, 2, source="one one", + underlying=u(concepts[foo], 0, 2, [ + u("one", 0, 0), + u("one", 2, 2)]))] + + +def test_i_can_parse_sequence_and_one_or_more(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: Sequence(OneOrMore("one"), "two")} + parser = ConceptLexerParser() + parser.initialize(context, concepts) + + res = parser.parse(context, "one one two") + assert res.status + assert res.value.value == [ConceptNode(foo, 0, 4, source="one one two", + underlying=u(concepts[foo], 0, 4, [ + u(ZeroOrMore("one"), 0, 2, [ + u("one", 0, 0), + u("one", 2, 2)]), + u("two", 4, 4)]))] + + res = parser.parse(context, "two") + assert not res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) + assert res.value.try_parsed == [] + assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) + assert res.value.body[0].body == "two" + + +def test_i_can_parse_one_and_more_with_separator(): + context = get_context() + foo = Concept(name="foo") + + concepts = {foo: OneOrMore("one", sep=",")} + parser = ConceptLexerParser() + parser.initialize(context, concepts) + + res = parser.parse(context, "one, one , one") + assert res.status + assert res.value.value == [ConceptNode(foo, 0, 7, source="one, one , one", + underlying=u(concepts[foo], 0, 7, [ + u("one", 0, 0), + u("one", 3, 3), + u("one", 7, 7)]))] + + +def test_that_one_and_more_is_greedy(): + context = get_context() + foo = Concept(name="foo") + bar = Concept(name="bar") + + concepts = {foo: OneOrMore("one"), bar: "one"} + parser = ConceptLexerParser() + parser.initialize(context, concepts) + + res = parser.parse(context, "one one one") + assert res.status + assert res.value.value == [(foo, 0, 4, "one one one")] + + def test_i_can_detect_infinite_recursion(): foo = Concept(name="foo") bar = Concept(name="bar") @@ -552,6 +734,16 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional(): pass +def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more(): + # TODO infinite recursion with optional + pass + + +def test_i_can_detect_indirect_infinite_recursion_with_one_and_more(): + # TODO infinite recursion with optional + pass + + def test_i_can_visit_parsing_expression(): mult = Concept(name="mult") add = Concept(name="add") diff --git a/tests/test_ConceptNodeEvaluator.py b/tests/test_ConceptNodeEvaluator.py index 368c5ad..a51bff6 100644 --- a/tests/test_ConceptNodeEvaluator.py +++ b/tests/test_ConceptNodeEvaluator.py @@ -5,7 +5,7 @@ from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, NonTerminalNode, Sequence, TerminalNode, \ - StrMatch, Optional, OrderedChoice + StrMatch, Optional, OrderedChoice, ZeroOrMore def get_context(): @@ -61,7 +61,7 @@ def test_concept_is_returned_when_list_of_one_concept_node(): assert result.who == evaluator.name assert result.status - assert result.value == node.concept + assert result.value == Concept("foo", body="foo").init_key() assert result.parents == [ret_val] @@ -119,6 +119,22 @@ def test_concept_property_is_correctly_updated_for_optional(): assert updated.props["o"].value == "two" +def test_concept_property_is_correctly_updated_for_zero_or_more(): + context = get_context() + + foo = Concept("foo") + grammar = {foo: ZeroOrMore("one", rule_name="variable")} + concept_node = get_concept_node(context, grammar, "one one one") + + updated = ConceptNodeEvaluator().update_concept( + context.sheerka, + context.sheerka.new(concept_node.concept.key), + concept_node.underlying) + + assert "variable" in updated.props + assert updated.props["variable"].value == "one one one" + + def test_concept_property_is_correctly_updated_when_list_of_properties(): context = get_context() @@ -141,9 +157,10 @@ def test_concept_property_is_correctly_updated_when_another_concept(): foo = Concept("foo") bar = Concept("bar") context.sheerka.add_in_cache(foo) + context.sheerka.add_in_cache(bar) grammar = { - foo: Sequence("one", "two", rule_name="variable"), - bar: Sequence(foo, "three", rule_name="variable")} + foo: Sequence("one", "two", rule_name="var"), + bar: Sequence(foo, "three", rule_name="var")} concept_node = get_concept_node(context, grammar, "one two three") updated = ConceptNodeEvaluator().update_concept( @@ -151,11 +168,11 @@ def test_concept_property_is_correctly_updated_when_another_concept(): context.sheerka.new(concept_node.concept.key), concept_node.underlying) - assert updated.props["variable"].value == "one two three" - assert updated.props["foo"].value == Concept("foo").set_prop("variable", "one two").init_key() + assert updated.props["var"].value == "one two three" + assert updated.props["foo"].value == Concept("foo", body="one two").set_prop("var", "one two").init_key() -def test_concept_property_is_correctly_updated_when_concept_recursion(): +def test_concept_property_is_correctly_updated_when_concept_recursion_using_optional(): context = get_context() number = Concept("number") @@ -173,6 +190,32 @@ def test_concept_property_is_correctly_updated_when_concept_recursion(): context.sheerka.new(concept_node.concept.key), concept_node.underlying) - assert updated.props["number"].value == Concept("number").init_key() + assert updated.props["number"].value == Concept("number", body="one").init_key() assert updated.props["op"].value == "plus" - assert updated.props["add"].value == Concept("add").set_prop("number", Concept("number").init_key()).init_key() + expected_add = Concept("add", body="two").set_prop("number", Concept("number", body="two").init_key()).init_key() + assert updated.props["add"].value == expected_add + + +def test_concept_property_is_correctly_updated_when_concept_recursion_using_zero_or_more(): + context = get_context() + + number = Concept("number") + add = Concept("add") + context.sheerka.add_in_cache(number) + context.sheerka.add_in_cache(add) + grammar = { + number: OrderedChoice("one", "two", 'three'), + add: Sequence(number, ZeroOrMore(Sequence(OrderedChoice("plus", "minus", rule_name="op"), number))) + } + concept_node = get_concept_node(context, grammar, "one plus two minus three") + + updated = ConceptNodeEvaluator().update_concept( + context.sheerka, + context.sheerka.new(concept_node.concept.key), + concept_node.underlying, + init_empty_body=True) + + assert updated.props["number"].value == [Concept("number", body="one").init_key(), + Concept("number", body="two").init_key(), + Concept("number", body="three").init_key()] + assert updated.props["op"].value == ["plus", "minus"] diff --git a/tests/test_sheerka.py b/tests/test_sheerka.py index d5a913a..a41e48a 100644 --- a/tests/test_sheerka.py +++ b/tests/test_sheerka.py @@ -626,7 +626,7 @@ def test_i_can_eval_bnf_definitions_with_variables(): return_value = res[0].value assert sheerka.isinstance(return_value, concept_b) - assert return_value.props["a"] == Property("a", concept_a) + assert return_value.props["a"] == Property("a", sheerka.new(concept_a.key, body="one")) def test_i_can_eval_bnf_definitions_from_separate_instances():