From 3789ef25d10fa3d66ffc3b5a68e2331c926fd916 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Wed, 15 Jan 2020 19:44:32 +0100 Subject: [PATCH] Renamed ConceptMatch into ConceptExpression and added unit tests --- core/sheerka.py | 3 +- docs/blog.rst | 2 +- evaluators/AddConceptEvaluator.py | 2 +- parsers/BnfParser.py | 4 +- parsers/ConceptLexerParser.py | 128 +++++++++++++++--------------- parsers/PythonParser.py | 4 +- tests/test_AddConceptEvaluator.py | 4 +- tests/test_BnfParser.py | 14 ++-- tests/test_ConceptLexerParser.py | 4 +- tests/test_DefaultParser.py | 4 +- tests/test_sheerka.py | 22 ++++- tests/test_sheerka_non_reg.py | 6 +- 12 files changed, 109 insertions(+), 88 deletions(-) diff --git a/core/sheerka.py b/core/sheerka.py index 055a7ba..7595566 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -640,7 +640,8 @@ class Sheerka(Concept): sub_context.log_new(logger) # when it's a concept, evaluate it - if isinstance(to_resolve, Concept): + if isinstance(to_resolve, Concept) and \ + not context.sheerka.isinstance(to_resolve, BuiltinConcepts.RETURN_VALUE): evaluated = self.evaluate_concept(sub_context, to_resolve) sub_context.add_values(return_values=evaluated) if evaluated.key == to_resolve.key: diff --git a/docs/blog.rst b/docs/blog.rst index d13a47b..7745237 100644 --- a/docs/blog.rst +++ b/docs/blog.rst @@ -682,7 +682,7 @@ How does it works ? As explained in the code, my implementation is highly inspired by Arpegio project. To define your grammar, you use **ParsingExpressions**. There are several types -* some use to recognize tokens StrMatch, ConceptMatch +* some use to recognize tokens StrMatch, ConceptExpression * other use to tell how to recognize Sequence, OrderedChoice, Optional, OneOrMore, ZeroOrMore... Some example : diff --git a/evaluators/AddConceptEvaluator.py b/evaluators/AddConceptEvaluator.py index aecd966..0caeaae 100644 --- a/evaluators/AddConceptEvaluator.py +++ b/evaluators/AddConceptEvaluator.py @@ -19,7 +19,7 @@ class ConceptOrRuleNameVisitor(ParsingExpressionVisitor): def __init__(self): self.names = set() - def visit_ConceptMatch(self, node): + def visit_ConceptExpression(self, node): if node.rule_name: self.names.add(node.rule_name) elif isinstance(node.concept, Concept): diff --git a/parsers/BnfParser.py b/parsers/BnfParser.py index c9aaed1..29ba730 100644 --- a/parsers/BnfParser.py +++ b/parsers/BnfParser.py @@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts from core.sheerka import ExecutionContext from core.tokenizer import Tokenizer, Token, TokenKind, LexerError from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode -from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptMatch, StrMatch +from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, StrMatch @dataclass() @@ -231,7 +231,7 @@ class BnfParser(BaseParser): if token.type == TokenKind.IDENTIFIER: self.next_token() - return ConceptMatch(token.value) + return ConceptExpression(token.value) # concept = self.sheerka.get(str(token.value)) # if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): # self.add_error(CannotResolveConceptNode(str(token.value))) diff --git a/parsers/ConceptLexerParser.py b/parsers/ConceptLexerParser.py index 5dc5f6a..629dab7 100644 --- a/parsers/ConceptLexerParser.py +++ b/parsers/ConceptLexerParser.py @@ -244,6 +244,64 @@ class ParsingExpression: return self._parse(parser) +class ConceptExpression(ParsingExpression): + """ + Will match a concept + It used only for rule definition + + When the grammar is created, it is replaced by the actual concept + """ + + def __init__(self, concept, rule_name=""): + super().__init__(rule_name=rule_name) + self.concept = concept + + def __repr__(self): + return f"{self.concept}" + + def __eq__(self, other): + if not super().__eq__(other): + return False + + if not isinstance(other, ConceptExpression): + return False + + if isinstance(self.concept, Concept): + return self.concept.name == other.concept.name + + return self.concept == other.concept + + @staticmethod + def get_parsing_expression_from_name(name): + tokens = Tokenizer(name) + nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]] + if len(nodes) == 1: + return nodes[0] + else: + sequence = Sequence(nodes) + sequence.nodes = nodes + return sequence + + def _parse(self, parser): + to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept + if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): + return None + + self.concept = to_match # Memoize + + if to_match not in parser.concepts_grammars: + # Try to match the concept using its name + expr = self.get_parsing_expression_from_name(to_match.name) + node = expr.parse(parser) + else: + node = parser.concepts_grammars[to_match].parse(parser) + + if node is None: + return None + + return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node]) + + class Sequence(ParsingExpression): """ Will match sequence of parser expressions in exact order they are defined. @@ -486,64 +544,6 @@ class StrMatch(Match): return None -class ConceptMatch(Match): - """ - Will match a concept - It used only for rule definition - - When the grammar is created, it is replaced by the actual concept - """ - - def __init__(self, concept, rule_name=""): - super(Match, self).__init__(rule_name=rule_name) - self.concept = concept - - def __repr__(self): - return f"{self.concept}" - - def __eq__(self, other): - if not super().__eq__(other): - return False - - if not isinstance(other, ConceptMatch): - return False - - if isinstance(self.concept, Concept): - return self.concept.name == other.concept.name - - return self.concept == other.concept - - @staticmethod - def get_parsing_expression_from_name(name): - tokens = Tokenizer(name) - nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]] - if len(nodes) == 1: - return nodes[0] - else: - sequence = Sequence(nodes) - sequence.nodes = nodes - return sequence - - def _parse(self, parser): - to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept - if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): - return None - - self.concept = to_match # Memoize - - if to_match not in parser.concepts_grammars: - # Try to match the concept using its name - expr = self.get_parsing_expression_from_name(to_match.name) - node = expr.parse(parser) - else: - node = parser.concepts_grammars[to_match].parse(parser) - - if node is None: - return None - - return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node]) - - class ConceptLexerParser(BaseParser): def __init__(self, **kwargs): super().__init__("ConceptLexer", 50) @@ -667,9 +667,9 @@ class ConceptLexerParser(BaseParser): # A copy must be created def inner_get_model(expression): if isinstance(expression, Concept): - ret = ConceptMatch(expression, rule_name=expression.name) + ret = ConceptExpression(expression, rule_name=expression.name) concepts_to_resolve.add(expression) - elif isinstance(expression, ConceptMatch): + elif isinstance(expression, ConceptExpression): if expression.rule_name is None or expression.rule_name == "": expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \ else expression.concept @@ -705,7 +705,7 @@ class ConceptLexerParser(BaseParser): # infinite recursion matcher def _is_infinite_recursion(ref_concept, node): - if isinstance(node, ConceptMatch): + if isinstance(node, ConceptExpression): if node.concept == ref_concept: return True @@ -856,7 +856,7 @@ class ConceptLexerParser(BaseParser): Goes in recursion if the property is a concept """ - # this cache is to make sure that we return the same concept for the same ConceptMatch + # this cache is to make sure that we return the same concept for the same ConceptExpression _underlying_value_cache = {} def _add_prop(_concept, prop_name, value): @@ -877,7 +877,7 @@ class ConceptLexerParser(BaseParser): _concept.cached_asts[prop_name] = new_value def _look_for_concept_match(_underlying): - if isinstance(_underlying.parsing_expression, ConceptMatch): + if isinstance(_underlying.parsing_expression, ConceptExpression): return _underlying if not isinstance(_underlying, NonTerminalNode): @@ -957,7 +957,7 @@ class ParsingExpressionVisitor: for node in parsing_expression.elements: if isinstance(node, Concept): - self.visit(ConceptMatch(node.key or node.name)) + self.visit(ConceptExpression(node.key or node.name)) elif isinstance(node, str): self.visit(StrMatch(node)) else: diff --git a/parsers/PythonParser.py b/parsers/PythonParser.py index 6788660..e667f55 100644 --- a/parsers/PythonParser.py +++ b/parsers/PythonParser.py @@ -21,9 +21,9 @@ class PythonErrorNode(ErrorNode): class PythonNode(Node): - def __init__(self, source, ast_, concepts=None): + def __init__(self, source, ast_=None, concepts=None): self.source = source - self.ast_ = ast_ + self.ast_ = ast_ if ast_ else ast.parse(source, mode="eval") if source else None self.concepts = concepts or {} # when concepts are recognized in the expression # def __repr__(self): diff --git a/tests/test_AddConceptEvaluator.py b/tests/test_AddConceptEvaluator.py index 8a6b26b..4298a7d 100644 --- a/tests/test_AddConceptEvaluator.py +++ b/tests/test_AddConceptEvaluator.py @@ -8,7 +8,7 @@ from core.sheerka import Sheerka, ExecutionContext from core.tokenizer import Tokenizer from evaluators.AddConceptEvaluator import AddConceptEvaluator from parsers.BaseParser import BaseParser -from parsers.ConceptLexerParser import Sequence, StrMatch, ZeroOrMore, ConceptMatch +from parsers.ConceptLexerParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression from parsers.BnfParser import BnfParser from parsers.DefaultParser import DefConceptNode, NameNode from parsers.PythonParser import PythonNode, PythonParser @@ -184,7 +184,7 @@ def test_i_can_get_props_from_another_concept(): def test_i_can_get_props_from_definition(): - parsing_expression = Sequence(ConceptMatch('mult'), ZeroOrMore(Sequence(StrMatch("+"), ConceptMatch("add")))) + parsing_expression = Sequence(ConceptExpression('mult'), ZeroOrMore(Sequence(StrMatch("+"), ConceptExpression("add")))) ret_val = get_concept_definition("mult (('+'|'-') add)?", parsing_expression) assert AddConceptEvaluator.get_props(get_context(), ret_val, []) == ["add", "mult"] diff --git a/tests/test_BnfParser.py b/tests/test_BnfParser.py index eeb1538..2e6cae3 100644 --- a/tests/test_BnfParser.py +++ b/tests/test_BnfParser.py @@ -6,7 +6,7 @@ from core.tokenizer import Tokenizer, TokenKind, LexerError from parsers.BaseParser import UnexpectedTokenErrorNode from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ - ConceptLexerParser, ConceptNode, ConceptMatch, cnode + ConceptLexerParser, ConceptNode, ConceptExpression, cnode from sdp.sheerkaDataProvider import Event @@ -41,12 +41,12 @@ def get_context(): ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), ("(1 )", StrMatch("1")), - ("foo", ConceptMatch("foo")), - ("foo*", ZeroOrMore(ConceptMatch("foo"))), - ("foo 'and' bar+", Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))), - ("foo | bar?", OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))), - ("'str' = var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))), - ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), ConceptMatch("var"))), + ("foo", ConceptExpression("foo")), + ("foo*", ZeroOrMore(ConceptExpression("foo"))), + ("foo 'and' bar+", Sequence(ConceptExpression("foo"), StrMatch("and"), OneOrMore(ConceptExpression("bar")))), + ("foo | bar?", OrderedChoice(ConceptExpression("foo"), Optional(ConceptExpression("bar")))), + ("'str' = var", Sequence(StrMatch("str"), StrMatch("="), ConceptExpression("var"))), + ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), ConceptExpression("var"))), ("'str'=var", StrMatch("str", rule_name="var")), ("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")), ("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")), diff --git a/tests/test_ConceptLexerParser.py b/tests/test_ConceptLexerParser.py index eae1ab0..d211c05 100644 --- a/tests/test_ConceptLexerParser.py +++ b/tests/test_ConceptLexerParser.py @@ -5,7 +5,7 @@ from core.concept import Concept, ConceptParts, DoNotResolve from core.sheerka import Sheerka, ExecutionContext from core.tokenizer import Tokenizer, TokenKind, Token from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ - ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore, \ + ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptExpression, ZeroOrMore, OneOrMore, \ UnrecognizedTokensNode, cnode, short_cnode from sdp.sheerkaDataProvider import Event @@ -14,7 +14,7 @@ class ConceptVisitor(ParsingExpressionVisitor): def __init__(self): self.concepts = set() - def visit_ConceptMatch(self, node): + def visit_ConceptExpression(self, node): self.concepts.add(node.concept) diff --git a/tests/test_DefaultParser.py b/tests/test_DefaultParser.py index 1fefe92..97e3bdb 100644 --- a/tests/test_DefaultParser.py +++ b/tests/test_DefaultParser.py @@ -3,7 +3,7 @@ import ast from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept from core.sheerka import Sheerka, ExecutionContext -from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch +from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptExpression from parsers.PythonParser import PythonParser, PythonNode from core.tokenizer import Keywords, Tokenizer, LexerError from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode, IsaConceptNode @@ -259,7 +259,7 @@ def test_i_can_parse_def_concept_from_regex(): parser = DefaultParser() res = parser.parse(get_context(), text) node = res.value.value - definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string")) + definition = OrderedChoice(ConceptExpression("a_concept"), StrMatch("a_string")) parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", definition, definition) expected = get_def_concept(name="name", body="__definition[0]", definition=parser_result) diff --git a/tests/test_sheerka.py b/tests/test_sheerka.py index c8444d8..dd35ed7 100644 --- a/tests/test_sheerka.py +++ b/tests/test_sheerka.py @@ -3,9 +3,11 @@ import os from os import path import shutil -from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, UserInputConcept, ConceptAlreadyInSet +from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, UserInputConcept, ConceptAlreadyInSet, \ + ParserResultConcept from core.concept import Concept, PROPERTIES_TO_SERIALIZE, Property, ConceptParts, DoNotResolve from core.sheerka import Sheerka, ExecutionContext +from parsers.PythonParser import PythonNode from sdp.sheerkaDataProvider import SheerkaDataProvider, Event tests_root = path.abspath("../build/tests") @@ -709,6 +711,24 @@ def test_i_can_evaluate_when_property_asts_is_a_list(): assert props[1] == "1" +def test_i_can_evaluate_when_compiled_is_set_up_with_return_value(): + sheerka = get_sheerka() + + python_node = PythonNode("1 +1 ") + parser_result = ParserResultConcept(parser="who", value=python_node) + + concept = Concept("to_eval").set_prop("prop") + concept.cached_asts["prop"] = [ReturnValueConcept("who", True, parser_result)] + evaluated = sheerka.evaluate_concept(get_context(sheerka), concept) + assert evaluated.get_prop("prop") == 2 + + # also works when only one return value + concept = Concept("to_eval").set_prop("prop") + concept.cached_asts["prop"] = ReturnValueConcept("who", True, parser_result) + evaluated = sheerka.evaluate_concept(get_context(sheerka), concept) + assert evaluated.get_prop("prop") == 2 + + def test_i_can_reference_sheerka(): sheerka = get_sheerka() diff --git a/tests/test_sheerka_non_reg.py b/tests/test_sheerka_non_reg.py index 92815d8..b4d6a5e 100644 --- a/tests/test_sheerka_non_reg.py +++ b/tests/test_sheerka_non_reg.py @@ -8,7 +8,7 @@ from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, PROPERTIES_TO_SERIALIZE, Property from core.sheerka import Sheerka, ExecutionContext from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator -from parsers.ConceptLexerParser import Sequence, StrMatch, OrderedChoice, Optional, ConceptMatch +from parsers.ConceptLexerParser import Sequence, StrMatch, OrderedChoice, Optional, ConceptExpression from sdp.sheerkaDataProvider import SheerkaDataProvider, Event tests_root = path.abspath("../build/tests") @@ -330,8 +330,8 @@ def test_i_can_create_concept_with_bnf_definition(): saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY) expected_bnf = Sequence( - ConceptMatch("a", rule_name="a"), - Optional(Sequence(StrMatch("plus"), ConceptMatch("plus", rule_name="plus")))) + ConceptExpression("a", rule_name="a"), + Optional(Sequence(StrMatch("plus"), ConceptExpression("plus", rule_name="plus")))) assert saved_definitions[saved_concept] == expected_bnf new_concept = res[0].value.body