From 40416ac33769403d1c0cd1f7954c8fcb3ccd4201 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Sat, 11 Jan 2020 08:03:35 +0100 Subject: [PATCH] Enhanced complex concepts handling --- core/concept.py | 11 +- core/sheerka.py | 31 +- docs/blog.rst | 44 ++ evaluators/AddConceptEvaluator.py | 2 +- evaluators/ConceptComposerEvaluator.py | 218 +++---- evaluators/ConceptNodeEvaluator.py | 124 +--- evaluators/PythonEvaluator.py | 86 ++- parsers/ConceptLexerParser.py | 116 +++- parsers/DefaultParser.py | 3 +- parsers/ExactConceptParser.py | 3 +- parsers/MultipleConceptsParser.py | 96 +++ parsers/PythonParser.py | 12 +- parsers/PythonWithConceptsParser.py | 116 ++++ tests/test_AddConceptEvaluator.py | 48 +- tests/test_BnfParser.py | 6 +- tests/test_ConceptComposerEvaluator.py | 256 ++++---- tests/test_ConceptLexerParser.py | 846 ++++++++++++++----------- tests/test_ConceptNodeEvaluator.py | 172 +---- tests/test_MultipleConceptsParser.py | 160 +++++ tests/test_PythonEvaluator.py | 23 + tests/test_PythonParser.py | 2 +- tests/test_PythonWithConceptsParser.py | 142 +++++ tests/test_sheerka.py | 49 +- tests/test_sheerka_non_reg.py | 42 ++ 24 files changed, 1647 insertions(+), 961 deletions(-) create mode 100644 parsers/MultipleConceptsParser.py create mode 100644 parsers/PythonWithConceptsParser.py create mode 100644 tests/test_MultipleConceptsParser.py create mode 100644 tests/test_PythonWithConceptsParser.py diff --git a/core/concept.py b/core/concept.py index 30a9a12..f5efd39 100644 --- a/core/concept.py +++ b/core/concept.py @@ -129,6 +129,15 @@ class Concept: def __hash__(self): return hash(self.metadata.name) + def __getattr__(self, item): + # I have this complicated implementation because of the usage of Pickle + + if 'props' in vars(self) and item in self.props: + return self.props[item].value + + name = self.name if 'metadata' in vars(self) else 'Concept' + raise AttributeError(f"'{name}' concept has no attribute '{item}'") + @property def name(self): return self.metadata.name @@ -166,7 +175,7 @@ class Concept: if token.type == TokenKind.WHITESPACE: continue if not first: - key += " " + key += " " # spaces are normalized if variables is not None and token.value in variables: key += VARIABLE_PREFIX + str(variables.index(token.value)) else: diff --git a/core/sheerka.py b/core/sheerka.py index 9d2df56..538ba70 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -735,7 +735,12 @@ class Sheerka(Concept): :param kwargs: :return: """ - template = self.get(concept_key) + if isinstance(concept_key, tuple): + concept_key, concept_id = concept_key[0], concept_key[1] + else: + concept_id = None + + template = self.get(concept_key, concept_id) # manage concept not found if self.isinstance(template, BuiltinConcepts.UNKNOWN_CONCEPT) and \ @@ -747,7 +752,7 @@ class Sheerka(Concept): # if template is a list, it means that there a multiple concepts under the same key concepts = [self.new_from_template(t, concept_key, **kwargs) for t in template] - return self.new(BuiltinConcepts.ENUMERATION, body=concepts) + return concepts def new_from_template(self, template, key, **kwargs): # manage singleton @@ -759,15 +764,15 @@ class Sheerka(Concept): concept.update_from(template) # update the properties - for key, v in kwargs.items(): - if key in concept.props: - concept.set_prop(key, v) - elif key in PROPERTIES_FOR_NEW: - setattr(concept.metadata, key, v) - elif hasattr(concept, key): - setattr(concept, key, v) + for k, v in kwargs.items(): + if k in concept.props: + concept.set_prop(k, v) + elif k in PROPERTIES_FOR_NEW: + setattr(concept.metadata, k, v) + elif hasattr(concept, k): + setattr(concept, k, v) else: - return self.new(BuiltinConcepts.UNKNOWN_PROPERTY, body=key, concept=concept) + return self.new(BuiltinConcepts.UNKNOWN_PROPERTY, body=k, concept=concept) # TODO : add the concept to the list of known concepts (self.instances) return concept @@ -830,6 +835,12 @@ class Sheerka(Concept): return obj + def is_unknown(self, obj): + if not isinstance(obj, Concept): + return True + + return obj.key == BuiltinConcepts.UNKNOWN_CONCEPT + def isinstance(self, a, b): """ return true if the concept a is an instance of the concept b diff --git a/docs/blog.rst b/docs/blog.rst index dfa4d65..20f24f2 100644 --- a/docs/blog.rst +++ b/docs/blog.rst @@ -891,3 +891,47 @@ So, * If, for a given priority there is a match, the parser with a lower priority won't be executed * A parser has access to the output of the parsers of higher priorities (which were executed before it) +2020-01-11 +********** + +Status +"""""" + +Last status was back in October. At that time I could + +:: + + def concept hello name as "hello" + name + 1 + 1 + sheerka.test() + +1. I can evaluate concepts + +:: + + def concept hello a where a + hello kodjo + +2. I have worked on BNF definition of the concept + +:: + + def concept twenties from bnf 'twenty' (one | two | three)=unit as 20 + unit + twenty one + eval twenty one + +3. I can mix complex concepts (concepts with more than one word) and Python + +:: + + twenty one + twenty two + twenty one + one does not work :-( + + +4. I have a basic implementation for logging. With control of the verbosity + +5. The result of an user input evaluation is now persisted, alongside with the event +that was used for it. + + + diff --git a/evaluators/AddConceptEvaluator.py b/evaluators/AddConceptEvaluator.py index 0ca203f..aecd966 100644 --- a/evaluators/AddConceptEvaluator.py +++ b/evaluators/AddConceptEvaluator.py @@ -82,7 +82,7 @@ class AddConceptEvaluator(OneReturnValueEvaluator): # finish initialisation concept.init_key(def_concept_node.name.tokens) - concept.add_codes(def_concept_node.get_asts()) + #concept.add_codes(def_concept_node.get_asts()) if not isinstance(def_concept_node.definition, NotInitializedNode) and \ sheerka.is_success(def_concept_node.definition): concept.bnf = def_concept_node.definition.value.value diff --git a/evaluators/ConceptComposerEvaluator.py b/evaluators/ConceptComposerEvaluator.py index bfd55de..bc6e37b 100644 --- a/evaluators/ConceptComposerEvaluator.py +++ b/evaluators/ConceptComposerEvaluator.py @@ -1,109 +1,109 @@ -from core.builtin_concepts import BuiltinConcepts, ParserResultConcept -from core.concept import Concept -from core.tokenizer import TokenKind -from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator -from parsers.BaseParser import BaseParser -from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, ConceptLexerParser -import core.utils - - -class ConceptComposerEvaluator(AllReturnValuesEvaluator): - """ - Try to reassemble parts of concepts from different evaluators - """ - - NAME = "ConceptComposer" - - def __init__(self): - super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 40) - - def matches(self, context, return_values): - concept_lexer_parser_name = ConceptLexerParser().name - - for return_value in return_values: - if return_value.who.startswith(BaseParser.PREFIX) and return_value.status: - return False - - if return_value.who.startswith(BaseEvaluator.PREFIX): - return False - - if return_value.who != concept_lexer_parser_name: - continue - - if not isinstance(return_value.value, ParserResultConcept): - return False - - if not ( - isinstance(return_value.value.value, ConceptNode) or - isinstance(return_value.value.value, UnrecognizedTokensNode) or - ( - hasattr(return_value.value.value, "__iter__") and - len(return_value.value.value) > 0 and - ( - isinstance(return_value.value.value[0], ConceptNode) or - isinstance(return_value.value.value[0], UnrecognizedTokensNode) - ))): - return False - - self.eaten = return_value - return True - - return False - - def eval(self, context, return_value): - sheerka = context.sheerka - nodes = self.eaten.value.value - temp_res = [] - has_error = False - concepts_only = True - - for node in nodes: - if isinstance(node, UnrecognizedTokensNode): - tokens = core.utils.strip_tokens(node.tokens, True) - for token in tokens: - if token.type == TokenKind.IDENTIFIER: - concept = context.new_concept(token.value) - if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): - has_error = True - else: - with context.push(self.name, desc=f"Evaluating '{concept}'") as sub_context: - sub_context.log_new(self.verbose_log) - concept = sheerka.evaluate_concept(sub_context, concept, self.verbose_log) - sub_context.add_values(return_values=concept) - temp_res.append(concept) - - else: - temp_res.append(core.utils.strip_quotes(token.value)) - concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE - else: - with context.push(self.name, desc=f"Evaluating '{node.concept}'") as sub_context: - sub_context.log_new(self.verbose_log) - concept = sheerka.evaluate_concept(sub_context, node.concept, self.verbose_log) - sub_context.add_values(return_values=concept) - temp_res.append(concept) - - if has_error: - return sheerka.ret( - self.name, - False, - temp_res, - parents=[self.eaten]) - - if concepts_only: - res = [] - for r in temp_res: - if isinstance(r, Concept): - res.append(r) - else: - res = "" - for r in temp_res: - if isinstance(r, Concept): - res += sheerka.value(r) - else: - res += r - - return sheerka.ret( - self.name, - True, - res, - parents=[self.eaten]) +# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept +# from core.concept import Concept +# from core.tokenizer import TokenKind +# from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator +# from parsers.BaseParser import BaseParser +# from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, ConceptLexerParser +# import core.utils +# +# +# class ConceptComposerEvaluator(AllReturnValuesEvaluator): +# """ +# Try to reassemble parts of concepts from different evaluators +# """ +# +# NAME = "ConceptComposer" +# +# def __init__(self): +# super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 40) +# +# def matches(self, context, return_values): +# concept_lexer_parser_name = ConceptLexerParser().name +# +# for return_value in return_values: +# if return_value.who.startswith(BaseParser.PREFIX) and return_value.status: +# return False +# +# if return_value.who.startswith(BaseEvaluator.PREFIX): +# return False +# +# if return_value.who != concept_lexer_parser_name: +# continue +# +# if not isinstance(return_value.value, ParserResultConcept): +# return False +# +# if not ( +# isinstance(return_value.value.value, ConceptNode) or +# isinstance(return_value.value.value, UnrecognizedTokensNode) or +# ( +# hasattr(return_value.value.value, "__iter__") and +# len(return_value.value.value) > 0 and +# ( +# isinstance(return_value.value.value[0], ConceptNode) or +# isinstance(return_value.value.value[0], UnrecognizedTokensNode) +# ))): +# return False +# +# self.eaten = return_value +# return True +# +# return False +# +# def eval(self, context, return_value): +# sheerka = context.sheerka +# nodes = self.eaten.value.value +# temp_res = [] +# has_error = False +# concepts_only = True +# +# for node in nodes: +# if isinstance(node, UnrecognizedTokensNode): +# tokens = core.utils.strip_tokens(node.tokens, True) +# for token in tokens: +# if token.type == TokenKind.IDENTIFIER: +# concept = context.new_concept(token.value) +# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): +# has_error = True +# else: +# with context.push(self.name, desc=f"Evaluating '{concept}'") as sub_context: +# sub_context.log_new(self.verbose_log) +# concept = sheerka.evaluate_concept(sub_context, concept, self.verbose_log) +# sub_context.add_values(return_values=concept) +# temp_res.append(concept) +# +# else: +# temp_res.append(core.utils.strip_quotes(token.value)) +# concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE +# else: +# with context.push(self.name, desc=f"Evaluating '{node.concept}'") as sub_context: +# sub_context.log_new(self.verbose_log) +# concept = sheerka.evaluate_concept(sub_context, node.concept, self.verbose_log) +# sub_context.add_values(return_values=concept) +# temp_res.append(concept) +# +# if has_error: +# return sheerka.ret( +# self.name, +# False, +# temp_res, +# parents=[self.eaten]) +# +# if concepts_only: +# res = [] +# for r in temp_res: +# if isinstance(r, Concept): +# res.append(r) +# else: +# res = "" +# for r in temp_res: +# if isinstance(r, Concept): +# res += sheerka.value(r) +# else: +# res += r +# +# return sheerka.ret( +# self.name, +# True, +# res, +# parents=[self.eaten]) diff --git a/evaluators/ConceptNodeEvaluator.py b/evaluators/ConceptNodeEvaluator.py index 4ef034a..8d39128 100644 --- a/evaluators/ConceptNodeEvaluator.py +++ b/evaluators/ConceptNodeEvaluator.py @@ -1,7 +1,6 @@ from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from evaluators.BaseEvaluator import OneReturnValueEvaluator -import core.utils -from parsers.ConceptLexerParser import ConceptNode, NonTerminalNode, ConceptMatch, UnrecognizedTokensNode, TerminalNode +from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode class ConceptNodeEvaluator(OneReturnValueEvaluator): @@ -12,7 +11,7 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator): NAME = "ConceptNode" def __init__(self): - super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60) # more than the ConceptNodeEvaluator + super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60) def matches(self, context, return_value): if not return_value.status: @@ -50,9 +49,7 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator): for node in nodes: if isinstance(node, ConceptNode): source += node.source if source == "" else (" " + node.source) - concept = sheerka.new(node.concept.key) - concept = self.finalize_concept(sheerka, concept, node.underlying) - concepts.append(concept) + concepts.append(node.concept) else: error_found = True @@ -69,118 +66,3 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator): parents=[return_value]) return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME), parents=[return_value]) - - def finalize_concept(self, sheerka, concept, underlying, init_empty_body=True): - """ - Updates the properties of the concept - Goes in recursion if the property is a concept - """ - - def _add_prop(c, prop_name, value): - """ - Adds a new entry, - makes a list if the property already exists - """ - - if prop_name not in c.props or c.props[prop_name].value is None: - # new entry - c.set_prop(prop_name, value) - else: - # make a list if there was a value - previous_value = c.props[prop_name].value - if isinstance(previous_value, list): - previous_value.append(value) - else: - new_value = [previous_value, value] - c.set_prop(prop_name, new_value) - - parsing_expression = underlying.parsing_expression - - if parsing_expression.rule_name: - _add_prop(concept, parsing_expression.rule_name, self.get_underlying_as_string(underlying)) - - # the update of the body must come BEFORE the recursion - # otherwise it will be updated by a children and it won't be possible to modify the value - if init_empty_body and concept.body is None: - concept.metadata.body = self.get_underlying_as_string(underlying) # self.escape_if_needed(underlying.source) - - if isinstance(underlying, NonTerminalNode): - for child in underlying.children: - if isinstance(child.parsing_expression, ConceptMatch): - new_concept = sheerka.new(child.parsing_expression.concept.key) - _add_prop(concept, child.parsing_expression.rule_name, new_concept) - if sheerka.isinstance(new_concept, BuiltinConcepts.UNKNOWN_CONCEPT): - continue - else: - self.finalize_concept(sheerka, new_concept, child.children[0], init_empty_body) - else: - self.finalize_concept(sheerka, concept, child, init_empty_body) - - return concept - - @staticmethod - def escape_if_needed(value): - if not isinstance(value, str): - return value - - return "'" + core.utils.escape_char(value, "'") + "'" - - def get_underlying_as_string(self, underlying): - """ - Return the sequence of the recognized character - When a concept is recognized, return the string version of the concept eg c:concept name: - :param underlying: - :return: - """ - - # Example - # grammar = { - # foo: Sequence("one", "two", rule_name="var"), - # bar: Sequence(foo, "three", rule_name="var")} - # - # we want bar.body and bar.prop["var"] - # to be "foo 'three'" (no quotes surrounding foo, as it is a concept, not a string) - - if isinstance(underlying, TerminalNode): - return self.escape_if_needed(underlying.source) - - res = "" - first = True - in_quote = "" - for node in underlying.children: - if isinstance(node.parsing_expression, ConceptMatch): - if in_quote != "": - res += in_quote + "'" - if not first: - res += " " - res += node.parsing_expression.concept.key - in_quote = "" - else: - if in_quote == "": - in_quote = ("'" if first else " '") + core.utils.escape_char(node.source, "'") - else: - in_quote += ("" if first else " ") + core.utils.escape_char(node.source, "'") - - first = False - - if in_quote: - res += in_quote + "'" - return res - -# - - - E X P L A N A T I O N S - - - -# why do we need to update the body ? -# cf test_concept_property_is_correctly_updated_when_concept_recursion_using_zero_or_more() -# def concept number from bnf one | two | three -# def concept add from bnf number plus number -# -# the expression 'one plus two plus three' will match concept add -# add.props["number"] is a list of concepts 'number' -# But which one is 'one', which one is 'two' which one is 'three' ? -# -# That's the reason why we update the body -# add.props["number"] is a list of concepts 'number' but they won't have the same body -# -# !!! C A U T I O N !!! -# In the current implementation, the body is the sequence of char found -# If a concept is recognized, we don't put this information in the body -# Use get_body_as_string() instead of escape_if_needed() if we need this information diff --git a/evaluators/PythonEvaluator.py b/evaluators/PythonEvaluator.py index 877a930..1be83fe 100644 --- a/evaluators/PythonEvaluator.py +++ b/evaluators/PythonEvaluator.py @@ -40,7 +40,7 @@ class PythonEvaluator(OneReturnValueEvaluator): not_for_me = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=node) return sheerka.ret(self.name, False, not_for_me, parents=[return_value]) - my_locals = self.get_locals(context, node.ast_) + my_locals = self.get_locals(context, node) context.log(self.verbose_log, f"locals={my_locals}", self.name) if isinstance(node.ast_, ast.Expression): @@ -58,7 +58,7 @@ class PythonEvaluator(OneReturnValueEvaluator): error = sheerka.new(BuiltinConcepts.ERROR, body=error) return sheerka.ret(self.name, False, error, parents=[return_value]) - def get_locals(self, context, ast_): + def get_locals(self, context, node): my_locals = {"sheerka": context.sheerka} if context.obj: context.log(self.verbose_log, @@ -70,30 +70,32 @@ class PythonEvaluator(OneReturnValueEvaluator): else: my_locals[prop_name] = context.sheerka.value(prop_value.value) - node_concept = core.ast.nodes.python_to_concept(ast_) + node_concept = core.ast.nodes.python_to_concept(node.ast_) unreferenced_names_visitor = UnreferencedNamesVisitor(context.sheerka) unreferenced_names_visitor.visit(node_concept) for name in unreferenced_names_visitor.names: context.log(self.verbose_log, f"Resolving '{name}'.", self.name) - return_concept = False - if name.startswith("__C__") and name.endswith("__C__"): - name_resolved = name[5:-5] - return_concept = True + if name in node.concepts: + context.log(self.verbose_log, f"Using value from node.", self.name) + concept = node.concepts[name] + return_concept = False + else: - name_resolved = name + concept_key, concept_id, return_concept = self.resolve_name(context, name) - if name_resolved in my_locals: - context.log(self.verbose_log, f"Using value from property.", self.name) - continue + if concept_key in my_locals: + context.log(self.verbose_log, f"Using value from property.", self.name) + continue - concept = context.sheerka.new(name_resolved) - if context.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): - context.log(self.verbose_log, f"'{name_resolved}' is not a concept. Skipping.", self.name) - continue + context.log(self.verbose_log, f"Instantiating new concept.", self.name) + concept = context.sheerka.new((concept_key, concept_id)) + if context.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): + context.log(self.verbose_log, f"'{concept_key}' is not a concept. Skipping.", self.name) + continue - context.log(self.verbose_log, f"'{name_resolved}' is a concept. Evaluating.", self.name) + context.log(self.verbose_log, f"Evaluating '{concept}'", self.name) with context.push(self.name, desc=f"Evaluating '{concept}'", obj=concept) as sub_context: sub_context.log_new(self.verbose_log) evaluated = context.sheerka.evaluate_concept(sub_context, concept, self.verbose_log) @@ -109,6 +111,58 @@ class PythonEvaluator(OneReturnValueEvaluator): return my_locals + def resolve_name(self, context, to_resolve): + """ + Try to match + __C__concept_key__C__ + or + __C__concept_key__concept_id__C__ + + :param context: + :param to_resolve: + :return: + """ + if not to_resolve.startswith("__C__"): + return to_resolve, None, False + + context.log(self.verbose_log, f"Resolving name '{to_resolve}'.", self.name) + + if len(to_resolve) >= 18 and to_resolve[:18] == "__C__USE_CONCEPT__": + use_concept = True + index = 18 + else: + use_concept = False + index = 5 + + try: + next_index = to_resolve.index("__", index) + if next_index == index: + context.log(self.verbose_log, f"Error: no key between '__'.", self.name) + return None + concept_key = to_resolve[index: next_index] + except ValueError: + context.log(self.verbose_log, f"Error: Missing trailing '__'.", self.name) + return None + + if next_index == len(to_resolve) - 5: + context.log(self.verbose_log, f"Recognized concept '{concept_key}'", self.name) + return concept_key, None, use_concept + + index = next_index + 2 + try: + next_index = to_resolve.index("__", index) + if next_index == index: + context.log(self.verbose_log, f"Error: no id between '__'.", self.name) + return None + + concept_id = to_resolve[index: next_index] + except ValueError: + context.log(self.verbose_log, f"Recognized concept '{concept_key}'.", self.name) + return concept_key, None, use_concept + + context.log(self.verbose_log, f"Recognized concept '{concept_key}' (id='{concept_id}').", self.name) + return concept_key, concept_id, use_concept + @staticmethod def expr_to_expression(expr): expr.lineno = 0 diff --git a/parsers/ConceptLexerParser.py b/parsers/ConceptLexerParser.py index aad6938..9e030ba 100644 --- a/parsers/ConceptLexerParser.py +++ b/parsers/ConceptLexerParser.py @@ -34,10 +34,10 @@ def flatten(iterable): @dataclass() class LexerNode(Node): - start: int - end: int - tokens: list = None - source: str = None + start: int # starting index in the tokens list + end: int # ending index in the tokens list + tokens: list = None # tokens + source: str = None # string representation of what was parsed def __post_init__(self): if self.source is None: @@ -64,7 +64,15 @@ class UnrecognizedTokensNode(LexerNode): def fix_source(self): self.source = BaseParser.get_text_from_tokens(self.tokens) + def not_whitespace(self): + return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)) + def __eq__(self, other): + if isinstance(other, tuple): + if len(other) != 3: + return False + return self.start == other[0] and self.end == other[1] and self.source == other[2] + if not isinstance(other, UnrecognizedTokensNode): return False @@ -93,9 +101,9 @@ class ConceptNode(LexerNode): def __eq__(self, other): if isinstance(other, tuple): if len(other) == 2: - return self.concept == other[0] and self.source == other[1] + return self.concept.key == other[0] and self.source == other[1] else: - return self.concept == other[0] and \ + return self.concept.key == other[0] and \ self.start == other[1] and \ self.end == other[2] and \ self.source == other[3] @@ -567,7 +575,7 @@ class ConceptLexerParser(BaseParser): self.token = None self.pos = -1 - self.next_token() + self.next_token(False) return True def get_token(self) -> Token: @@ -762,8 +770,9 @@ class ConceptLexerParser(BaseParser): self.seek(init_pos) node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode if node is not None and node.end != -1: + updated_concept = self.finalize_concept(context.sheerka, concept, node) concept_node = ConceptNode( - concept, + updated_concept, node.start, node.end, self.tokens[node.start: node.end + 1], @@ -777,27 +786,30 @@ class ConceptLexerParser(BaseParser): unrecognized_tokens.add_token(self.get_token(), init_pos) else: unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()]) - concepts_found = core.utils.product(concepts_found, [unrecognized_tokens]) - has_unrecognized = True if not self.next_token(False): break else: # some concepts are recognized - if unrecognized_tokens: + if unrecognized_tokens and unrecognized_tokens.not_whitespace(): unrecognized_tokens.fix_source() - unrecognized_tokens = None + concepts_found = core.utils.product(concepts_found, [unrecognized_tokens]) + has_unrecognized = True + unrecognized_tokens = None + res = self.get_bests(res) # only keep the concepts that eat the more tokens concepts_found = core.utils.product(concepts_found, res) # loop self.seek(res[0].end) - if not self.next_token(): + if not self.next_token(False): break # Fix the source for unrecognized tokens - if unrecognized_tokens: + if unrecognized_tokens and unrecognized_tokens.not_whitespace(): unrecognized_tokens.fix_source() + concepts_found = core.utils.product(concepts_found, [unrecognized_tokens]) + has_unrecognized = True # else # returns as many ReturnValue than choices found @@ -821,6 +833,82 @@ class ConceptLexerParser(BaseParser): self.log_multiple_results(context, text, ret) return ret + def finalize_concept(self, sheerka, template, underlying, init_empty_body=True): + """ + Updates the properties of the concept + Goes in recursion if the property is a concept + """ + + # this cache is to make sure that we return the same concept for the same ConceptMatch + _underlying_value_cache = {} + + def _add_prop(_concept, prop_name, value): + """ + Adds a new entry, + makes a list if the property already exists + """ + if prop_name not in _concept.props or _concept.props[prop_name].value is None: + # new entry + _concept.set_prop(prop_name, value) + else: + # make a list if there was a value + previous_value = _concept.props[prop_name].value + if isinstance(previous_value, list): + previous_value.append(value) + else: + new_value = [previous_value, value] + _concept.set_prop(prop_name, new_value) + + def _look_for_concept_match(_underlying): + if isinstance(_underlying.parsing_expression, ConceptMatch): + return _underlying + + if not isinstance(_underlying, NonTerminalNode): + return None + + if len(_underlying.children) != 1: + return None + + return _look_for_concept_match(_underlying.children[0]) + + def _get_underlying_value(_underlying): + concept_match_node = _look_for_concept_match(_underlying) + if concept_match_node: + if id(concept_match_node) in _underlying_value_cache: + result = _underlying_value_cache[id(concept_match_node)] + else: + ref_tpl = concept_match_node.parsing_expression.concept + result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body) + _underlying_value_cache[id(concept_match_node)] = result + else: + result = _underlying.source + + return result + + def _process_rule_name(_concept, _underlying): + if _underlying.parsing_expression.rule_name: + value = _get_underlying_value(_underlying) + _add_prop(_concept, _underlying.parsing_expression.rule_name, value) + + if isinstance(_underlying, NonTerminalNode): + for child in _underlying.children: + _process_rule_name(_concept, child) + + key = (template.key, template.id) if template.id else template.key + concept = sheerka.new(key) + if init_empty_body and concept.body is None: + value = _get_underlying_value(underlying) + concept.metadata.body = value + concept.metadata.is_evaluated = True + if underlying.parsing_expression.rule_name: + _add_prop(concept, underlying.parsing_expression.rule_name, value) + + if isinstance(underlying, NonTerminalNode): + for node in underlying.children: + _process_rule_name(concept, node) + + return concept + @staticmethod def get_bests(results): """ diff --git a/parsers/DefaultParser.py b/parsers/DefaultParser.py index 9c65db7..a9159c8 100644 --- a/parsers/DefaultParser.py +++ b/parsers/DefaultParser.py @@ -92,7 +92,8 @@ class DefConceptNode(DefaultParserNode): if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body, ParserResultConcept) and hasattr( prop_value.body.body, "ast_"): - asts[part_key] = prop_value.body.body.ast_ + asts[part_key] = prop_value + #asts[part_key] = prop_value.body.body.ast_ return asts diff --git a/parsers/ExactConceptParser.py b/parsers/ExactConceptParser.py index 471bd3c..472407f 100644 --- a/parsers/ExactConceptParser.py +++ b/parsers/ExactConceptParser.py @@ -46,7 +46,8 @@ class ExactConceptParser(BaseParser): if sheerka.isinstance(result, BuiltinConcepts.UNKNOWN_CONCEPT): continue - concepts = result.body if sheerka.isinstance(result, BuiltinConcepts.ENUMERATION) else [result] + # concepts = result.body if sheerka.isinstance(result, BuiltinConcepts.ENUMERATION) else [result] + concepts = result if isinstance(result, list) else [result] for concept in concepts: context.log(self.verbose_log, f"Recognized concept {concept}.", self.name) diff --git a/parsers/MultipleConceptsParser.py b/parsers/MultipleConceptsParser.py new file mode 100644 index 0000000..6c37ee2 --- /dev/null +++ b/parsers/MultipleConceptsParser.py @@ -0,0 +1,96 @@ +from core.builtin_concepts import BuiltinConcepts +from core.tokenizer import TokenKind +from parsers.BaseParser import BaseParser +from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode +import core.utils + +concept_lexer_parser = ConceptLexerParser() + + +class MultipleConceptsParser(BaseParser): + """ + Parser that will take the result of ConceptLexerParser and + try to resolve the unrecognized tokens token by token + + It is a success when it returns a list ConceptNode exclusively + """ + + def __init__(self, **kwargs): + BaseParser.__init__(self, "MultipleConcepts", 45) + + def parse(self, context, text): + sheerka = context.sheerka + if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT): + return None + + if not text.parser == concept_lexer_parser: + return None + + sheerka = context.sheerka + nodes = text.value + nodes_found = [[]] + source = "" + concepts_only = True + + for node in nodes: + if isinstance(node, UnrecognizedTokensNode): + unrecognized_tokens = None + for i, token in enumerate(node.tokens): + index = node.start + i + + if token.type == TokenKind.IDENTIFIER: + # it may be a concept + concept = context.new_concept(token.value) + if hasattr(concept, "__iter__") or not sheerka.is_unknown(concept): + # finish processing unrecognized_tokens + if unrecognized_tokens: + unrecognized_tokens.fix_source() + source += unrecognized_tokens.source + if unrecognized_tokens.not_whitespace(): + nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) + unrecognized_tokens = None + + source += token.value + concepts = concept if hasattr(concept, "__iter__") else [concept] + concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts] + nodes_found = core.utils.product(nodes_found, concepts_nodes) + continue + else: + # it cannot be a concept + concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE + + if unrecognized_tokens: + unrecognized_tokens.add_token(token, index) + else: + unrecognized_tokens = UnrecognizedTokensNode(index, index, [token]) + + if unrecognized_tokens: + unrecognized_tokens.fix_source() + source += unrecognized_tokens.source + if unrecognized_tokens.not_whitespace(): + nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) + + else: + nodes_found = core.utils.product(nodes_found, [node]) + source += node.source + + ret = [] + for choice in nodes_found: + ret.append( + sheerka.ret( + self.name, + concepts_only, + sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=source, + body=choice, + try_parsed=None)) + ) + + if len(ret) == 1: + self.log_result(context, source, ret[0]) + return ret[0] + else: + self.log_multiple_results(context, source, ret) + return ret diff --git a/parsers/PythonParser.py b/parsers/PythonParser.py index 7c0408b..a3333c5 100644 --- a/parsers/PythonParser.py +++ b/parsers/PythonParser.py @@ -1,7 +1,7 @@ from core.builtin_concepts import BuiltinConcepts from core.tokenizer import Tokenizer, LexerError, TokenKind from parsers.BaseParser import BaseParser, Node, ErrorNode -from dataclasses import dataclass +from dataclasses import dataclass, field import ast import logging @@ -17,10 +17,12 @@ class PythonErrorNode(ErrorNode): # self.log.debug("-> PythonErrorNode: " + str(self.exception)) -@dataclass() class PythonNode(Node): - source: str - ast_: ast.AST + + def __init__(self, source, ast_, concepts=None): + self.source = source + self.ast_ = ast_ + self.concepts = concepts or {} # def __repr__(self): # return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")" @@ -67,7 +69,7 @@ class PythonParser(BaseParser): tree = None python_switcher = { - TokenKind.CONCEPT: lambda t: f"__C__{t.value}__C__" + TokenKind.CONCEPT: lambda t: f"__C__USE_CONCEPT__{t.value}__C__" } try: diff --git a/parsers/PythonWithConceptsParser.py b/parsers/PythonWithConceptsParser.py new file mode 100644 index 0000000..7d4d957 --- /dev/null +++ b/parsers/PythonWithConceptsParser.py @@ -0,0 +1,116 @@ +from core.builtin_concepts import BuiltinConcepts +from parsers.BaseParser import BaseParser +from parsers.ConceptLexerParser import UnrecognizedTokensNode, ConceptNode +from parsers.PythonParser import PythonParser + + +class PythonWithConceptsParser(BaseParser): + def __init__(self, **kwargs): + super().__init__("PythonWithConcepts", 20) + self.identifiers = None + self.identifiers_key = None + + @staticmethod + def sanitize(identifier): + res = "" + for c in identifier: + res += c if c.isalnum() else "0" + return res + + def parse(self, context, text): + sheerka = context.sheerka + if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT): + return None + + nodes = text.body + if not isinstance(nodes, list): + return None + + if len(nodes) == 0: + return None + + if not isinstance(nodes[0], (ConceptNode, UnrecognizedTokensNode)): + return None + + source = "" + to_parse = "" + identifiers = {} + identifiers_key = {} + python_ids_mappings = {} + + def _get_identifier(c): + """ + Internal function because I don't want identifiers, identifiers_key and python_ids_mappings + to be instance variables + I would like to keep this parser as stateless as possible + :param c: + :return: + """ + if id(c) in identifiers: + return identifiers[id(c)] + + identifier = "__C__" + self.sanitize(c.key or c.name) + if c.id: + identifier += "__" + c.id + + if identifier in identifiers_key: + identifiers_key[identifier] += 1 + identifier += f"_{identifiers_key[identifier]}" + else: + identifiers_key[identifier] = 0 + + identifier += "__C__" + + identifiers[id(c)] = identifier + return identifier + + for node in nodes: + if isinstance(node, ConceptNode): + source += node.source + if to_parse: + to_parse += " " + concept = node.concept + python_id = _get_identifier(concept) + to_parse += python_id + python_ids_mappings[python_id] = concept + else: + source += node.source + to_parse += node.source + + with context.push(self, "Trying Python for '" + to_parse + "'") as sub_context: + python_parser = PythonParser() + result = python_parser.parse(sub_context, to_parse) + + if result.status: + python_node = result.body.body + python_node.source = source + python_node.concepts = python_ids_mappings + + return sheerka.ret( + self.name, + True, + sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=source, + body=result.body.body, + try_parsed=None)) + + else: + + return sheerka.ret( + self.name, + False, + result.body) + + def concept_identifier(self, concept): + if id(concept) in self.identifiers: + return self.identifiers[id(concept)] + + + identifier = "__C__" + (concept.key or concept.name) + if concept.id: + identifier += "__" + concept.id + identifier += "__C__" + + return identifier diff --git a/tests/test_AddConceptEvaluator.py b/tests/test_AddConceptEvaluator.py index cb35868..8a6b26b 100644 --- a/tests/test_AddConceptEvaluator.py +++ b/tests/test_AddConceptEvaluator.py @@ -108,26 +108,34 @@ def test_that_the_source_is_correctly_set(): assert created_concept.metadata.definition == "hello a" -def test_that_the_ast_is_correctly_initialized(): - context = get_context() - def_concept_return_value = get_concept( - name="hello a", - definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))), - where="isinstance(a, str )", - pre="a is not None", - body="print('hello' + a)") - - evaluated = AddConceptEvaluator().eval(context, def_concept_return_value) - - assert evaluated.status - assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT) - - created_concept = evaluated.body.body - - assert ConceptParts.WHERE in created_concept.cached_asts - assert ConceptParts.PRE in created_concept.cached_asts - assert ConceptParts.BODY in created_concept.cached_asts - assert ConceptParts.POST not in created_concept.cached_asts +# def test_that_the_ast_is_correctly_initialized(): +# """ +# When I parse the definition of a concept, I evaluate the metadata (like the body) +# I wanted to keep in cache these evaluation for further utilisation but I have +# a serialization issue. +# So I had to comment concept.add_codes(def_concept_node.get_asts()) around line 85 +# So this test is now irrelevant +# :return: +# """ +# context = get_context() +# def_concept_return_value = get_concept( +# name="hello a", +# definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))), +# where="isinstance(a, str )", +# pre="a is not None", +# body="print('hello' + a)") +# +# evaluated = AddConceptEvaluator().eval(context, def_concept_return_value) +# +# assert evaluated.status +# assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT) +# +# created_concept = evaluated.body.body +# +# assert ConceptParts.WHERE in created_concept.cached_asts +# assert ConceptParts.PRE in created_concept.cached_asts +# assert ConceptParts.BODY in created_concept.cached_asts +# assert ConceptParts.POST not in created_concept.cached_asts def test_that_the_new_concept_is_correctly_saved(): diff --git a/tests/test_BnfParser.py b/tests/test_BnfParser.py index cb0cc4f..003d145 100644 --- a/tests/test_BnfParser.py +++ b/tests/test_BnfParser.py @@ -108,12 +108,12 @@ def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(): res = concept_parser.parse(context, "twenty two") assert res.status - assert res.value.body == [(bar, 0, 2, "twenty two")] + assert res.value.body == [("bar", 0, 2, "twenty two")] res = concept_parser.parse(context, "thirty one") assert res.status - assert res.value.body == [(bar, 0, 2, "thirty one")] + assert res.value.body == [("bar", 0, 2, "thirty one")] res = concept_parser.parse(context, "twenty") assert res.status - assert res.value.body == [(foo, 0, 0, "twenty")] + assert res.value.body == [("foo", 0, 0, "twenty")] diff --git a/tests/test_ConceptComposerEvaluator.py b/tests/test_ConceptComposerEvaluator.py index 0dc825f..fa2794c 100644 --- a/tests/test_ConceptComposerEvaluator.py +++ b/tests/test_ConceptComposerEvaluator.py @@ -1,128 +1,128 @@ -import pytest - -from core.builtin_concepts import ReturnValueConcept, ParserResultConcept -from core.concept import Concept -from core.sheerka import Sheerka, ExecutionContext -from evaluators.BaseEvaluator import BaseEvaluator -from evaluators.ConceptComposerEvaluator import ConceptComposerEvaluator -from parsers.BaseParser import BaseParser -from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, Sequence -from sdp.sheerkaDataProvider import Event - -concept_lexer_name = ConceptLexerParser().name - - -def get_context(): - sheerka = Sheerka(skip_builtins_in_db=True) - sheerka.initialize("mem://") - return ExecutionContext("test", Event(), sheerka) - - -def get_return_values(context, grammar, expression): - parser = ConceptLexerParser() - parser.initialize(context, grammar) - - ret_val = parser.parse(context, expression) - assert not ret_val.status - return [ret_val] - - -def init(concepts, grammar, expression): - context = get_context() - for c in concepts: - context.sheerka.add_in_cache(c) - return_values = get_return_values(context, grammar, expression) - - return context, return_values - - -@pytest.mark.parametrize("return_values, expected", [ - ([ - ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), - ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), - ReturnValueConcept("not a parser", True, "some value"), - ], True), - ([ - ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), - ], True), - ([ - ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not in error"), - ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), - ], False), - ([ - ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), - ReturnValueConcept(concept_lexer_name, True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), - ], False), - ([ - ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), - ReturnValueConcept(concept_lexer_name, False, "some value"), - ], False), - ([ - ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), - ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=["not a concept"])), - ], False), - ([ - ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", False, "evaluator in error"), - ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), - ReturnValueConcept("not a parser", True, "some value"), - ], False), - ([ - ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", True, "evaluator"), - ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), - ReturnValueConcept("not a parser", True, "some value"), - ], False), -]) -def test_i_can_match(return_values, expected): - context = get_context() - assert ConceptComposerEvaluator().matches(context, return_values) == expected - - -def test_i_can_eval_simple_concepts(): - foo = Concept("foo", body="'foo'") - bar = Concept("bar", body="'bar'") - grammar = {} - context, return_values = init([foo, bar], grammar, "bar foo") - - composer = ConceptComposerEvaluator() - assert composer.matches(context, return_values) - - ret_val = composer.eval(context, return_values) - assert ret_val.status - assert ret_val.who == composer.name - assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()] - assert ret_val.value[0].metadata.is_evaluated - assert ret_val.value[1].metadata.is_evaluated - assert ret_val.parents == [return_values[0]] - - -def test_i_can_eval_simple_concepts_when_some_are_bnf(): - foo = Concept("foo", body="'foo'") - bar = Concept("bar", body="'bar'") - grammar = {foo: "foo"} - context, return_values = init([foo, bar], grammar, "bar foo") - - composer = ConceptComposerEvaluator() - assert composer.matches(context, return_values) - - ret_val = composer.eval(context, return_values) - assert ret_val.status - assert ret_val.who == composer.name - assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()] - assert ret_val.value[0].metadata.is_evaluated - assert ret_val.value[1].metadata.is_evaluated - assert ret_val.parents == [return_values[0]] - - -def test_i_can_eval_simple_concept_and_text(): - foo = Concept("foo", body="'foo'") - grammar = {} - context, return_values = init([foo], grammar, "'bar' foo") - - composer = ConceptComposerEvaluator() - assert composer.matches(context, return_values) - - ret_val = composer.eval(context, return_values) - assert ret_val.status - assert ret_val.who == composer.name - assert ret_val.value == "bar foo" - assert ret_val.parents == [return_values[0]] +# import pytest +# +# from core.builtin_concepts import ReturnValueConcept, ParserResultConcept +# from core.concept import Concept +# from core.sheerka import Sheerka, ExecutionContext +# from evaluators.BaseEvaluator import BaseEvaluator +# from evaluators.ConceptComposerEvaluator import ConceptComposerEvaluator +# from parsers.BaseParser import BaseParser +# from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, Sequence +# from sdp.sheerkaDataProvider import Event +# +# concept_lexer_name = ConceptLexerParser().name +# +# +# def get_context(): +# sheerka = Sheerka(skip_builtins_in_db=True) +# sheerka.initialize("mem://") +# return ExecutionContext("test", Event(), sheerka) +# +# +# def get_return_values(context, grammar, expression): +# parser = ConceptLexerParser() +# parser.initialize(context, grammar) +# +# ret_val = parser.parse(context, expression) +# assert not ret_val.status +# return [ret_val] +# +# +# def init(concepts, grammar, expression): +# context = get_context() +# for c in concepts: +# context.sheerka.add_in_cache(c) +# return_values = get_return_values(context, grammar, expression) +# +# return context, return_values +# +# +# @pytest.mark.parametrize("return_values, expected", [ +# ([ +# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), +# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), +# ReturnValueConcept("not a parser", True, "some value"), +# ], True), +# ([ +# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), +# ], True), +# ([ +# ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not in error"), +# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), +# ], False), +# ([ +# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), +# ReturnValueConcept(concept_lexer_name, True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), +# ], False), +# ([ +# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), +# ReturnValueConcept(concept_lexer_name, False, "some value"), +# ], False), +# ([ +# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), +# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=["not a concept"])), +# ], False), +# ([ +# ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", False, "evaluator in error"), +# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), +# ReturnValueConcept("not a parser", True, "some value"), +# ], False), +# ([ +# ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", True, "evaluator"), +# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), +# ReturnValueConcept("not a parser", True, "some value"), +# ], False), +# ]) +# def test_i_can_match(return_values, expected): +# context = get_context() +# assert ConceptComposerEvaluator().matches(context, return_values) == expected +# +# +# def test_i_can_eval_simple_concepts(): +# foo = Concept("foo", body="'foo'") +# bar = Concept("bar", body="'bar'") +# grammar = {} +# context, return_values = init([foo, bar], grammar, "bar foo") +# +# composer = ConceptComposerEvaluator() +# assert composer.matches(context, return_values) +# +# ret_val = composer.eval(context, return_values) +# assert ret_val.status +# assert ret_val.who == composer.name +# assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()] +# assert ret_val.value[0].metadata.is_evaluated +# assert ret_val.value[1].metadata.is_evaluated +# assert ret_val.parents == [return_values[0]] +# +# +# def test_i_can_eval_simple_concepts_when_some_are_bnf(): +# foo = Concept("foo", body="'foo'") +# bar = Concept("bar", body="'bar'") +# grammar = {foo: "foo"} +# context, return_values = init([foo, bar], grammar, "bar foo") +# +# composer = ConceptComposerEvaluator() +# assert composer.matches(context, return_values) +# +# ret_val = composer.eval(context, return_values) +# assert ret_val.status +# assert ret_val.who == composer.name +# assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()] +# assert ret_val.value[0].metadata.is_evaluated +# assert ret_val.value[1].metadata.is_evaluated +# assert ret_val.parents == [return_values[0]] +# +# +# def test_i_can_eval_simple_concept_and_text(): +# foo = Concept("foo", body="'foo'") +# grammar = {} +# context, return_values = init([foo], grammar, "'bar' foo") +# +# composer = ConceptComposerEvaluator() +# assert composer.matches(context, return_values) +# +# ret_val = composer.eval(context, return_values) +# assert ret_val.status +# assert ret_val.who == composer.name +# assert ret_val.value == "bar foo" +# assert ret_val.parents == [return_values[0]] diff --git a/tests/test_ConceptLexerParser.py b/tests/test_ConceptLexerParser.py index 4fc4f51..2c4aed5 100644 --- a/tests/test_ConceptLexerParser.py +++ b/tests/test_ConceptLexerParser.py @@ -1,4 +1,5 @@ import pytest +import core.utils from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext @@ -18,6 +19,14 @@ class ConceptVisitor(ParsingExpressionVisitor): def u(parsing_expression, start, end, children=None): + """ + u stands for underlying + :param parsing_expression: + :param start: + :param end: + :param children: + :return: + """ if isinstance(parsing_expression, str): parsing_expression = StrMatch(parsing_expression) @@ -27,6 +36,10 @@ def u(parsing_expression, start, end, children=None): return NonTerminalNode(parsing_expression, start, end, [], children) +def evaluated(concept): + c = Concept(name=concept.name, body=concept.name) + + def t(text): if text.startswith("'") or text.startswith('"'): return Token(TokenKind.STRING, text, 0, 0, 0) @@ -37,6 +50,38 @@ def t(text): return Token(TokenKind.IDENTIFIER, text, 0, 0, 0) +def get_context(): + sheerka = Sheerka(skip_builtins_in_db=True) + sheerka.initialize("mem://") + + return ExecutionContext("sheerka", Event(), sheerka) + + +def get_expected(concept, text=None): + return Concept(name=concept.name, body=text or concept.name).init_key() + + +def init(concepts, grammar): + context = get_context() + for c in concepts: + context.sheerka.add_in_cache(c) + + parser = ConceptLexerParser() + parser.initialize(context, grammar) + + return context, parser + + +def execute(concepts, grammar, text): + context, parser = init(concepts, grammar) + + res = parser.parse(context, text) + wrapper = res.value + return_value = res.value.value + + return context, res, wrapper, return_value + + @pytest.mark.parametrize("match, text", [ ("foo", "foo"), ("'foo'", "'foo'"), @@ -50,146 +95,128 @@ def t(text): (StrMatch("+"), "+"), ]) def test_i_can_match_simple_tokens(match, text): - context = get_context() foo = Concept(name="foo") - concepts = {foo: text} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: match} - res = parser.parse(context, text) + context, res, wrapper, return_value = execute([foo], grammar, text) assert res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ConceptNode(foo, 0, 0, source=text, underlying=u(match, 0, 0))] + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [ConceptNode(get_expected(foo, text), 0, 0, source=text, underlying=u(match, 0, 0))] def test_i_can_match_multiple_concepts_in_one_input(): - context = get_context() one = Concept(name="one") two = Concept(name="two") - concepts = {one: "one", two: "two"} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {one: "one", two: "two"} - res = parser.parse(context, "one two one") + context, res, wrapper, return_value = execute([one, two], grammar, "one two one") assert res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ - ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)), - ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2)), - ConceptNode(one, 4, 4, source="one", underlying=u("one", 4, 4)), + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [ + ConceptNode(get_expected(one), 0, 0, source="one", underlying=u("one", 0, 0)), + ConceptNode(get_expected(two), 2, 2, source="two", underlying=u("two", 2, 2)), + ConceptNode(get_expected(one), 4, 4, source="one", underlying=u("one", 4, 4)), ] def test_i_can_match_sequence(): - context = get_context() foo = Concept(name="foo") - concepts = {foo: Sequence("one", "two", "three")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: Sequence("one", "two", "three")} - res = parser.parse(context, "one two three") + context, res, wrapper, return_value = execute([foo], grammar, "one two three") assert res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ - ConceptNode(foo, 0, 4, source="one two three", underlying=u(concepts[foo], 0, 4, [ - u("one", 0, 0), - u("two", 2, 2), - u("three", 4, 4)]))] + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [ + ConceptNode( + get_expected(foo, "one two three"), + 0, + 4, + source="one two three", + underlying=u(grammar[foo], 0, 4, [ + u("one", 0, 0), + u("two", 2, 2), + u("three", 4, 4)]))] def test_i_always_choose_the_longest_match(): - context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") - concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} + grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) - - res = parser.parse(context, "one two three") + context, res, wrapper, return_value = execute([foo, bar], grammar, "one two three") assert res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [(foo, "one two three")] + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [("foo", 0, 4, "one two three")] def test_i_can_match_several_sequences(): - context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") - concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} + grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) - - res = parser.parse(context, "one two three one two") + context, res, wrapper, return_value = execute([foo, bar], grammar, "one two three one two") assert res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ - (foo, 0, 4, "one two three"), - (bar, 6, 8, "one two"), + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [ + ("foo", 0, 4, "one two three"), + ("bar", 6, 8, "one two"), ] def test_i_can_match_ordered_choice(): - context = get_context() foo = Concept(name="foo") - concepts = {foo: OrderedChoice("one", "two")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: OrderedChoice("one", "two")} + context, parser = init([foo], grammar) res1 = parser.parse(context, "one") assert res1.status assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) - assert res1.value.body == [ - ConceptNode(foo, 0, 0, source="one", underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))] + assert res1.value.body == [("foo", 0, 0, "one")] + assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)]) res2 = parser.parse(context, "two") assert res2.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) - assert res2.value.body == [ - ConceptNode(foo, 0, 0, source="two", underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))] + assert res2.value.body == [("foo", 0, 0, "two")] + assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)]) res3 = parser.parse(context, "three") assert not res3.status - assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) + assert context.sheerka.isinstance(res3.value, BuiltinConcepts.PARSER_RESULT) assert res3.value.value == [ UnrecognizedTokensNode(0, 0, [t("three")]) ] def test_i_cannot_match_ordered_choice_with_empty_alternative(): - context = get_context() foo = Concept(name="foo") - concepts = {foo: Sequence(OrderedChoice("one", ""), "two")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: Sequence(OrderedChoice("one", ""), "two")} + + context, res, wrapper, return_value = execute([foo], grammar, "ok") - res = parser.parse(context, "ok") # because token[0] is not "one" and not "" (it is 'two') assert not res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [ UnrecognizedTokensNode(0, 0, [t("ok")]) ] def test_i_can_mix_sequences_and_ordered_choices(): - context = get_context() foo = Concept(name="foo") + grammar = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")} - concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, parser = init([foo], grammar) res1 = parser.parse(context, "twenty one ok") assert res1.status assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) - assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok", - underlying=u(concepts[foo], 0, 4, [ + assert res1.value.body == [ConceptNode(get_expected(foo, "twenty one ok"), 0, 4, source="twenty one ok", + underlying=u(grammar[foo], 0, 4, [ u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]), u("one", 2, 2), u("ok", 4, 4)]))] @@ -197,8 +224,8 @@ def test_i_can_mix_sequences_and_ordered_choices(): res2 = parser.parse(context, "thirty one ok") assert res2.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) - assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok", - underlying=u(concepts[foo], 0, 4, [ + assert res2.value.body == [ConceptNode(get_expected(foo, "thirty one ok"), 0, 4, source="thirty one ok", + underlying=u(grammar[foo], 0, 4, [ u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]), u("one", 2, 2), u("ok", 4, 4)]))] @@ -212,61 +239,54 @@ def test_i_can_mix_sequences_and_ordered_choices(): def test_i_can_mix_ordered_choices_and_sequences(): - context = get_context() foo = Concept(name="foo") + grammar = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")} - concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, parser = init([foo], grammar) res = parser.parse(context, "twenty thirty") assert res.status + assert res.value.value == [("foo", 0, 2, "twenty thirty")] res = parser.parse(context, "one") assert res.status + assert res.value.value == [("foo", 0, 0, "one")] def test_i_cannot_parse_empty_optional(): - context = get_context() foo = Concept(name="foo") - - concepts = {foo: Optional("one")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: Optional("one")} + context, parser = init([foo], grammar) res = parser.parse(context, "") + return_value = res.value + assert not res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.IS_EMPTY) + assert context.sheerka.isinstance(return_value, BuiltinConcepts.IS_EMPTY) def test_i_can_parse_optional(): - context = get_context() foo = Concept(name="foo") + grammar = {foo: Optional("one")} - concepts = {foo: Optional("one")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, res, wrapper, return_value = execute([foo], grammar, "one") - res = parser.parse(context, "one") assert res.status - assert res.value.value == [ConceptNode(foo, 0, 0, source="one", - underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))] + assert return_value == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", + underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] def test_i_can_parse_sequence_starting_with_optional(): - context = get_context() foo = Concept(name="foo") - - concepts = {foo: Sequence(Optional("twenty"), "one")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: Sequence(Optional("twenty"), "one")} + context, parser = init([foo], grammar) res = parser.parse(context, "twenty one") assert res.status assert res.value.body == [ConceptNode( - foo, 0, 2, + get_expected(foo, "twenty one"), 0, 2, source="twenty one", - underlying=u(concepts[foo], 0, 2, + underlying=u(grammar[foo], 0, 2, [ u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]), u("one", 2, 2)] @@ -274,56 +294,50 @@ def test_i_can_parse_sequence_starting_with_optional(): res = parser.parse(context, "one") assert res.status - assert res.value.body == [ConceptNode(foo, 0, 0, source="one", - underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))] + assert res.value.body == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", + underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] def test_i_can_parse_sequence_ending_with_optional(): - context = get_context() foo = Concept(name="foo") + grammar = {foo: Sequence("one", "two", Optional("three"))} - concepts = {foo: Sequence("one", "two", Optional("three"))} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, parser = init([foo], grammar) res = parser.parse(context, "one two three") assert res.status - assert res.value.body == [(foo, 0, 4, "one two three")] + assert res.value.body == [("foo", 0, 4, "one two three")] res = parser.parse(context, "one two") assert res.status - assert res.value.body == [(foo, 0, 2, "one two")] + assert res.value.body == [("foo", 0, 2, "one two")] def test_i_can_parse_sequence_with_optional_in_between(): - context = get_context() foo = Concept(name="foo") - concepts = {foo: Sequence("one", Optional("two"), "three")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: Sequence("one", Optional("two"), "three")} + + context, parser = init([foo], grammar) res = parser.parse(context, "one two three") assert res.status - assert res.value.body == [(foo, 0, 4, "one two three")] + assert res.value.body == [("foo", 0, 4, "one two three")] res = parser.parse(context, "one three") assert res.status - assert res.value.body == [(foo, 0, 2, "one three")] + assert res.value.body == [("foo", 0, 2, "one three")] def test_i_cannot_parse_wrong_input_with_optional(): - context = get_context() foo = Concept(name="foo") + grammar = {foo: Optional("one")} - concepts = {foo: Optional("one")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, res, wrapper, return_value = execute([foo], grammar, "two") - res = parser.parse(context, "two") assert not res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [ UnrecognizedTokensNode(0, 0, [t("two")]) ] @@ -332,27 +346,68 @@ def test_i_can_use_reference(): # when there are multiple matches for the same input # Do I need to create a choice concept ? # No, create a return value for every possible graph - context = get_context() + foo = Concept(name="foo") bar = Concept(name="bar") - - concepts = {foo: Sequence("one", "two"), bar: foo} - parser = ConceptLexerParser() - parser.initialize(context, concepts) - + grammar = {foo: Sequence("one", "two"), bar: foo} + context, parser = init([foo, bar], grammar) res = parser.parse(context, "one two") + assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two", - underlying=u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)]))] + assert res[0].value.body == [("foo", 0, 2, "one two")] + concept_found_1 = res[0].value.body[0].concept + assert concept_found_1.metadata.is_evaluated + assert concept_found_1.body == "one two" assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two", - underlying=u(ConceptMatch(foo, rule_name="foo"), 0, 2, - [u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)])]))] + assert res[1].value.body == [("bar", 0, 2, "one two")] + concept_found_2 = res[1].value.body[0].concept + assert concept_found_2.metadata.is_evaluated + # the body and the prop['foo'] are the same concept 'foo' + assert isinstance(concept_found_2.body, Concept) + assert concept_found_2.body.key == "foo" + assert concept_found_2.body.metadata.is_evaluated + assert concept_found_2.body.body == "one two" + assert id(concept_found_2.props["foo"].value) == id(concept_found_2.body) + + +def test_i_can_use_a_reference_with_a_body(): + """ + Same test than before (test_i_can_use_reference()) + but this time, the concept 'foo' already has a body. + :return: + """ + + foo = Concept(name="foo", body="'foo'") + bar = Concept(name="bar") + grammar = {foo: Sequence("one", "two"), bar: foo} + context, parser = init([foo, bar], grammar) + res = parser.parse(context, "one two") + + assert len(res) == 2 + + assert res[0].status + assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) + assert res[0].value.body == [("foo", 0, 2, "one two")] + concept_found_1 = res[0].value.body[0].concept + assert not concept_found_1.metadata.is_evaluated + assert concept_found_1.body == "'foo'" + + assert res[1].status + assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) + assert res[1].value.body == [("bar", 0, 2, "one two")] + concept_found_2 = res[1].value.body[0].concept + assert concept_found_2.metadata.is_evaluated + # the body and the prop['foo'] are the same concept 'foo' + assert isinstance(concept_found_2.body, Concept) + assert concept_found_2.body.key == "foo" + assert not concept_found_2.body.metadata.is_evaluated + assert concept_found_2.body.body == "'foo'" + assert id(concept_found_2.props["foo"].value) == id(concept_found_2.body) def test_i_can_use_context_reference_with_multiple_levels(): @@ -360,343 +415,354 @@ def test_i_can_use_context_reference_with_multiple_levels(): Same than previous one, but with reference of reference :return: """ - context = get_context() + foo = Concept(name="foo") bar = Concept(name="bar") baz = Concept(name="baz") - - concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: Sequence("one", "two"), bar: foo, baz: bar} + context, parser = init([foo, bar, baz], grammar) res = parser.parse(context, "one two") assert len(res) == 3 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [(foo, 0, 2, "one two")] + assert res[0].value.body == [("foo", 0, 2, "one two")] + concept_found_1 = res[0].value.body[0].concept + assert concept_found_1.body == "one two" + assert concept_found_1.metadata.is_evaluated assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [(bar, 0, 2, "one two")] + assert res[1].value.body == [("bar", 0, 2, "one two")] + concept_found_2 = res[1].value.body[0].concept + assert concept_found_2.body == get_expected(foo, "one two") + assert id(concept_found_2.props["foo"].value) == id(concept_found_2.body) assert res[2].status assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) - assert res[2].value.body == [(baz, 0, 2, "one two")] + assert res[2].value.body == [("baz", 0, 2, "one two")] + concept_found_3 = res[2].value.body[0].concept + expected_foo = get_expected(foo, "one two") + assert concept_found_3.body == get_expected(bar, expected_foo).set_prop("foo", expected_foo) + assert id(concept_found_3.props["bar"].value) == id(concept_found_3.body) def test_order_is_not_important_when_using_references(): - context = get_context() + """ + Same test than test_i_can_use_reference(), + but this time, 'bar' is declared before 'foo' + So the order of the result is different + :return: + """ foo = Concept(name="foo") bar = Concept(name="bar") - - concepts = {bar: foo, foo: Sequence("one", "two")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {bar: foo, foo: Sequence("one", "two")} + context, parser = init([foo, bar], grammar) res = parser.parse(context, "one two") assert len(res) == 2 - assert res[0].value.body == [(bar, 0, 2, "one two")] - assert res[1].value.body == [(foo, 0, 2, "one two")] + assert res[0].value.body == [("bar", 0, 2, "one two")] + assert res[1].value.body == [("foo", 0, 2, "one two")] def test_i_can_parse_when_reference(): - context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") - - concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} + context, parser = init([foo, bar], grammar) res = parser.parse(context, "twenty two") assert res.status - assert res.value.body == [(bar, 0, 2, "twenty two")] + assert res.value.body == [("bar", 0, 2, "twenty two")] + concept_found = res.value.body[0].concept + assert concept_found.body == "twenty two" + assert concept_found.metadata.is_evaluated + assert concept_found.get_prop("foo") == get_expected(foo, "twenty") + assert concept_found.get_prop("foo").metadata.is_evaluated res = parser.parse(context, "thirty one") assert res.status - assert res.value.body == [(bar, 0, 2, "thirty one")] + assert res.value.body == [("bar", 0, 2, "thirty one")] + concept_found = res.value.body[0].concept + assert concept_found.body == "thirty one" + assert concept_found.metadata.is_evaluated + assert concept_found.get_prop("foo") == get_expected(foo, "thirty") + assert concept_found.get_prop("foo").metadata.is_evaluated res = parser.parse(context, "twenty") assert res.status - assert res.value.body == [(foo, 0, 0, "twenty")] + assert res.value.body == [("foo", 0, 0, "twenty")] + concept_found = res.value.body[0].concept + assert concept_found.body == "twenty" + assert concept_found.metadata.is_evaluated + + +def test_i_can_parse_when_reference_has_a_body(): + foo = Concept(name="foo", body="'one'") + bar = Concept(name="bar") + grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} + context, parser = init([foo, bar], grammar) + + res = parser.parse(context, "twenty two") + assert res.status + assert res.value.body == [("bar", 0, 2, "twenty two")] + concept_found = res.value.body[0].concept + assert concept_found.body == "twenty two" + assert concept_found.metadata.is_evaluated + assert concept_found.get_prop("foo") == get_expected(foo, "'one'") + assert not concept_found.get_prop("foo").metadata.is_evaluated + + res = parser.parse(context, "twenty") + assert res.status + assert res.value.body == [("foo", 0, 0, "twenty")] + concept_found = res.value.body[0].concept + assert concept_found.body == "'one'" + assert not concept_found.metadata.is_evaluated def test_i_can_parse_multiple_results(): - context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") - - concepts = { + grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } - - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, parser = init([foo, bar], grammar) res = parser.parse(context, "one two") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [(bar, 0, 2, "one two")] + assert res[0].value.body == [("bar", 0, 2, "one two")] + concept_found_0 = res[0].value.body[0].concept + assert concept_found_0.body == "one two" + assert concept_found_0.metadata.is_evaluated + assert len(concept_found_0.props) == 0 assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [(foo, 0, 2, "one two")] + assert res[1].value.body == [("foo", 0, 2, "one two")] + concept_found_1 = res[1].value.body[0].concept + assert concept_found_1.body == "one two" + assert concept_found_1.metadata.is_evaluated + assert len(concept_found_1.props) == 0 def test_i_can_parse_multiple_results_times_two(): - context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") - - concepts = { + grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } - - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, parser = init([foo, bar], grammar) res = parser.parse(context, "one two one two") assert len(res) == 4 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [(bar, "one two"), (bar, "one two")] + assert res[0].value.body == [("bar", "one two"), ("bar", "one two")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [(foo, "one two"), (bar, "one two")] + assert res[1].value.body == [("foo", "one two"), ("bar", "one two")] assert res[2].status assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) - assert res[2].value.body == [(bar, "one two"), (foo, "one two")] + assert res[2].value.body == [("bar", "one two"), ("foo", "one two")] assert res[3].status assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT) - assert res[3].value.body == [(foo, "one two"), (foo, "one two")] + assert res[3].value.body == [("foo", "one two"), ("foo", "one two")] def test_i_can_parse_multiple_results_when_reference(): - context = get_context() + """ + TODO : There should no be two answer, has the one with bar is totally useless + Note that bar = Sequence(foo, OrderedChoice("one", "two")) does not match + + :return: + """ foo = Concept(name="foo") bar = Concept(name="bar") - - concepts = { + grammar = { bar: Sequence(foo, Optional(OrderedChoice("one", "two"))), foo: OrderedChoice("twenty", "thirty") } - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, parser = init([foo, bar], grammar) res = parser.parse(context, "twenty") assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [(bar, 0, 0, "twenty")] + assert res[0].value.body == [("bar", 0, 0, "twenty")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [(foo, 0, 0, "twenty")] + assert res[1].value.body == [("foo", 0, 0, "twenty")] def test_i_can_parse_concept_reference_that_is_not_in_grammar(): - context = get_context() one = Concept(name="one") two = Concept(name="two") foo = Concept(name="foo") - context.sheerka.add_in_cache(one) - context.sheerka.add_in_cache(two) + grammar = {foo: Sequence("twenty", OrderedChoice(one, two))} + context, parser = init([one, two, foo], grammar) - concepts = {foo: Sequence("twenty", OrderedChoice(one, two))} parser = ConceptLexerParser() - parser.initialize(context, concepts) + parser.initialize(context, grammar) res = parser.parse(context, "twenty two") assert res.status - assert res.value.body == [(foo, 0, 2, "twenty two")] + assert res.value.body == [("foo", 0, 2, "twenty two")] + concept_found = res.value.body[0].concept + assert concept_found.body == "twenty two" + assert concept_found.metadata.is_evaluated + assert concept_found.get_prop("two") == get_expected(two, "two") + assert concept_found.get_prop("two").metadata.is_evaluated res = parser.parse(context, "twenty one") assert res.status - assert res.value.body == [(foo, 0, 2, "twenty one")] + assert res.value.body == [("foo", 0, 2, "twenty one")] def test_i_can_parse_zero_or_more(): - context = get_context() foo = Concept(name="foo") + grammar = {foo: ZeroOrMore("one")} - concepts = {foo: ZeroOrMore("one")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, res, wrapper, return_value = execute([foo], grammar, "one one") - res = parser.parse(context, "one one") assert res.status - assert res.value.value == [ConceptNode(foo, 0, 2, source="one one", - underlying=u(concepts[foo], 0, 2, [ - u("one", 0, 0), - u("one", 2, 2)]))] + assert return_value == [("foo", 0, 2, "one one")] + assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)]) + + concept_found = return_value[0].concept + assert concept_found.body == "one one" + assert concept_found.metadata.is_evaluated def test_i_can_parse_sequence_and_zero_or_more(): - context = get_context() foo = Concept(name="foo") - - concepts = {foo: Sequence(ZeroOrMore("one"), "two")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: Sequence(ZeroOrMore("one"), "two")} + context, parser = init([foo], grammar) res = parser.parse(context, "one one two") assert res.status - assert res.value.value == [ConceptNode(foo, 0, 4, source="one one two", - underlying=u(concepts[foo], 0, 4, [ - u(ZeroOrMore("one"), 0, 2, [ - u("one", 0, 0), - u("one", 2, 2)]), - u("two", 4, 4)]))] + assert res.value.value == [("foo", 0, 4, "one one two")] res = parser.parse(context, "two") assert res.status - assert res.value.value == [ConceptNode(foo, 0, 0, source="two", - underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))] + assert res.value.value == [("foo", 0, 0, "two")] def test_i_cannot_parse_zero_and_more_when_wrong_entry(): - context = get_context() foo = Concept(name="foo") + grammar = {foo: ZeroOrMore("one")} + context, parser = init([foo], grammar) - concepts = {foo: ZeroOrMore("one")} parser = ConceptLexerParser() - parser.initialize(context, concepts) + parser.initialize(context, grammar) res = parser.parse(context, "one two") assert not res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.value == [ - ConceptNode(foo, 0, 0, source="one", underlying=u(ZeroOrMore("one"), 0, 0, [u("one", 0, 0)])), - UnrecognizedTokensNode(2, 2, [t("two")]) + ("foo", 0, 0, "one"), + UnrecognizedTokensNode(1, 2, [t(" "), t("two")]) ] res = parser.parse(context, "two") assert not res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.value == [ UnrecognizedTokensNode(0, 0, [t("two")]) ] def test_i_can_parse_zero_and_more_with_separator(): - context = get_context() foo = Concept(name="foo") + grammar = {foo: ZeroOrMore("one", sep=",")} - concepts = {foo: ZeroOrMore("one", sep=",")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, res, wrapper, return_value = execute([foo], grammar, "one, one , one") - res = parser.parse(context, "one, one , one") assert res.status - assert res.value.value == [ConceptNode(foo, 0, 7, source="one, one , one", - underlying=u(concepts[foo], 0, 7, [ - u("one", 0, 0), - u("one", 3, 3), - u("one", 7, 7)]))] + assert return_value == [("foo", 0, 7, "one, one , one")] def test_that_zero_and_more_is_greedy(): - context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") + grammar = {foo: ZeroOrMore("one"), bar: "one"} - concepts = {foo: ZeroOrMore("one"), bar: "one"} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, res, wrapper, return_value = execute([foo], grammar, "one one one") - res = parser.parse(context, "one one one") assert res.status - assert res.value.value == [(foo, 0, 4, "one one one")] + assert return_value == [("foo", 0, 4, "one one one")] def test_i_can_parse_one_and_more(): - context = get_context() foo = Concept(name="foo") + grammar = {foo: OneOrMore("one")} - concepts = {foo: OneOrMore("one")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, res, wrapper, return_value = execute([foo], grammar, "one one") - res = parser.parse(context, "one one") assert res.status - assert res.value.value == [ConceptNode(foo, 0, 2, source="one one", - underlying=u(concepts[foo], 0, 2, [ - u("one", 0, 0), - u("one", 2, 2)]))] + assert return_value == [("foo", 0, 2, "one one")] + assert return_value[0].underlying == u(grammar[foo], 0, 2, [ + u("one", 0, 0), + u("one", 2, 2)]) def test_i_can_parse_sequence_and_one_or_more(): - context = get_context() foo = Concept(name="foo") - - concepts = {foo: Sequence(OneOrMore("one"), "two")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: Sequence(OneOrMore("one"), "two")} + context, parser = init([foo], grammar) res = parser.parse(context, "one one two") assert res.status - assert res.value.value == [ConceptNode(foo, 0, 4, source="one one two", - underlying=u(concepts[foo], 0, 4, [ - u(ZeroOrMore("one"), 0, 2, [ - u("one", 0, 0), - u("one", 2, 2)]), - u("two", 4, 4)]))] + assert res.value.value == [("foo", 0, 4, "one one two")] res = parser.parse(context, "two") assert not res.status - assert res.value.body == [ + assert res.value.value == [ UnrecognizedTokensNode(0, 0, [t("two")]) ] def test_i_can_parse_one_and_more_with_separator(): - context = get_context() foo = Concept(name="foo") + grammar = {foo: OneOrMore("one", sep=",")} - concepts = {foo: OneOrMore("one", sep=",")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, res, wrapper, return_value = execute([foo], grammar, "one, one , one") - res = parser.parse(context, "one, one , one") assert res.status - assert res.value.value == [ConceptNode(foo, 0, 7, source="one, one , one", - underlying=u(concepts[foo], 0, 7, [ - u("one", 0, 0), - u("one", 3, 3), - u("one", 7, 7)]))] + assert return_value == [("foo", 0, 7, "one, one , one")] + assert return_value[0].underlying == u(grammar[foo], 0, 7, [ + u("one", 0, 0), + u("one", 3, 3), + u("one", 7, 7)]) def test_that_one_and_more_is_greedy(): - context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") + grammar = {foo: OneOrMore("one"), bar: "one"} - concepts = {foo: OneOrMore("one"), bar: "one"} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, res, wrapper, return_value = execute([foo], grammar, "one one one") - res = parser.parse(context, "one one one") assert res.status - assert res.value.value == [(foo, 0, 4, "one one one")] + assert return_value == [("foo", 0, 4, "one one one")] def test_i_can_detect_infinite_recursion(): foo = Concept(name="foo") bar = Concept(name="bar") - concepts = { + grammar = { bar: foo, foo: bar } parser = ConceptLexerParser() - parser.initialize(get_context(), concepts) + parser.initialize(get_context(), grammar) assert bar not in parser.concepts_grammars assert foo not in parser.concepts_grammars @@ -705,46 +771,45 @@ def test_i_can_detect_infinite_recursion(): def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(): foo = Concept(name="foo") bar = Concept(name="bar") - - concepts = { + grammar = { bar: foo, foo: OrderedChoice(bar, "foo") } + parser = ConceptLexerParser() - parser.initialize(get_context(), concepts) + parser.initialize(get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion # the other way around is possible - context = get_context() - concepts = { + grammar = { bar: foo, foo: OrderedChoice("foo", bar) } - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, parser = init([foo, bar], grammar) + assert foo in parser.concepts_grammars assert bar in parser.concepts_grammars res = parser.parse(context, "foo") assert len(res) == 2 assert res[0].status - assert res[0].value.body == [(bar, 0, 0, "foo")] + assert res[0].value.body == [("bar", 0, 0, "foo")] assert res[1].status - assert res[1].value.body == [(foo, 0, 0, "foo")] + assert res[1].value.body == [("foo", 0, 0, "foo")] def test_i_can_detect_indirect_infinite_recursion_with_sequence(): foo = Concept(name="foo") bar = Concept(name="bar") - concepts = { + grammar = { bar: foo, foo: Sequence("one", bar, "two") } parser = ConceptLexerParser() - parser.initialize(get_context(), concepts) + parser.initialize(get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion @@ -754,12 +819,12 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choic foo = Concept(name="foo") bar = Concept(name="bar") - concepts = { + grammar = { bar: foo, foo: Sequence("one", OrderedChoice(bar, "other"), "two") } parser = ConceptLexerParser() - parser.initialize(get_context(), concepts) + parser.initialize(get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion @@ -769,11 +834,11 @@ def test_infinite_recursion_does_not_fail_if_a_concept_is_missing(): foo = Concept(name="foo") bar = Concept(name="bar") - concepts = { + grammar = { foo: bar } parser = ConceptLexerParser() - parser.initialize(get_context(), concepts) + parser.initialize(get_context(), grammar) assert foo in parser.concepts_grammars @@ -808,156 +873,135 @@ def test_i_can_initialize_rule_names(): foo = Concept(name="foo") bar = Concept(name="bar") - concepts = {foo: Sequence("one", "two"), bar: foo} + grammar = {foo: Sequence("one", "two"), bar: foo} parser = ConceptLexerParser() - ret = parser.initialize(context, concepts) + ret = parser.initialize(context, grammar) return_value = ret.body assert return_value[foo].rule_name == "" assert return_value[bar].rule_name == "foo" -@pytest.mark.parametrize("text, end_position", [ +@pytest.mark.parametrize("text, end_position", [ ("foo", 0), - ("foo bar", 2) + ("foo bar", 2), + ("foo bar ", 3), + (" foo bar ", 4) ]) def test_cannot_parser_unknown_concepts(text, end_position): - context = get_context() - - parser = ConceptLexerParser() - parser.initialize(context, {}) - - res = parser.parse(context, text) + context, res, wrapper, return_value = execute([], {}, text) tokens = list(Tokenizer(text))[:-1] assert not res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [UnrecognizedTokensNode(0, end_position, tokens)] + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [UnrecognizedTokensNode(0, end_position, tokens)] def test_i_cannot_parse_when_part_of_the_input_is_unrecognized(): - context = get_context() one = Concept(name="one") two = Concept(name="two") - concepts = {one: "one", two: "two"} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {one: "one", two: "two"} + + context, res, wrapper, return_value = execute([one, two], grammar, "one two three") - res = parser.parse(context, "one two three") assert not res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ - ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)), - ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2)), - UnrecognizedTokensNode(4, 4, [t("three")]) + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [ + ConceptNode(get_expected(one, "one"), 0, 0, source="one", underlying=u("one", 0, 0)), + ConceptNode(get_expected(two, "two"), 2, 2, source="two", underlying=u("two", 2, 2)), + UnrecognizedTokensNode(3, 4, [t(" "), t("three")]) ] def test_i_cannot_parse_when_wrong_sequence(): - context = get_context() foo = Concept(name="foo") - concepts = {foo: Sequence("one", "two", "three")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: Sequence("one", "two", "three")} - res = parser.parse(context, "one two three one") + context, res, wrapper, return_value = execute([foo], grammar, "one two three one") assert not res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ - (foo, "one two three"), - UnrecognizedTokensNode(6, 6, [t("one")]) + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [ + ("foo", "one two three"), + UnrecognizedTokensNode(5, 6, [t(" "), t("one")]) ] def test_i_cannot_parse_when_sequence_cannot_match_because_of_end_of_file(): - context = get_context() foo = Concept(name="foo") - concepts = {foo: Sequence("one", "two", "three")} - parser = ConceptLexerParser() - parser.initialize(context, concepts) + grammar = {foo: Sequence("one", "two", "three")} - res = parser.parse(context, "one two") + context, res, wrapper, return_value = execute([foo], grammar, "one two") assert not res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [ UnrecognizedTokensNode(0, 2, [t("one"), t(" "), t("two")]) ] def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end(): - context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") - - concepts = { + grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } - - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, parser = init([foo, bar], grammar) res = parser.parse(context, "one two four five") + assert len(res) == 2 assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ - (bar, 0, 2, "one two"), - UnrecognizedTokensNode(4, 6, [t("four"), t(" "), t("five")]) + ("bar", 0, 2, "one two"), + UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ - (foo, 0, 2, "one two"), - UnrecognizedTokensNode(4, 6, [t("four"), t(" "), t("five")]) + ("foo", 0, 2, "one two"), + UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) ] def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens(): - context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") - - concepts = { + grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } - - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, parser = init([foo, bar], grammar) res = parser.parse(context, "four five one two") + assert len(res) == 2 assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - (bar, 4, 6, "one two"), + ("bar", 4, 6, "one two"), ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - (foo, 4, 6, "one two"), + ("foo", 4, 6, "one two"), ] def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens(): - context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") - - concepts = { + grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")) } - - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, parser = init([foo, bar], grammar) res = parser.parse(context, "four five one two six seven") assert len(res) == 2 @@ -965,16 +1009,16 @@ def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens(): assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - (bar, 4, 6, "one two"), - UnrecognizedTokensNode(8, 10, [t("six"), t(" "), t("seven")]), + ("bar", 4, 6, "one two"), + UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - (foo, 4, 6, "one two"), - UnrecognizedTokensNode(8, 10, [t("six"), t(" "), t("seven")]), + ("foo", 4, 6, "one two"), + UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), ] @@ -983,35 +1027,107 @@ def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle(): foo = Concept(name="foo") bar = Concept(name="bar") baz = Concept(name="baz") - - concepts = { + grammar = { bar: Sequence("one", "two"), foo: Sequence("one", OrderedChoice("two", "three")), baz: StrMatch("six"), } - - parser = ConceptLexerParser() - parser.initialize(context, concepts) + context, parser = init([foo, bar, baz], grammar) res = parser.parse(context, "one two four five six") assert len(res) == 2 assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ - (bar, 0, 2, "one two"), - UnrecognizedTokensNode(4, 7, [t("four"), t(" "), t("five"), t(" ")]), - (baz, 8, 8, "six"), + ("bar", 0, 2, "one two"), + UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), + ("baz", 8, 8, "six"), ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ - (foo, 0, 2, "one two"), - UnrecognizedTokensNode(4, 7, [t("four"), t(" "), t("five"), t(" ")]), - (baz, 8, 8, "six"), + ("foo", 0, 2, "one two"), + UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), + ("baz", 8, 8, "six"), ] +def test_i_can_get_the_inner_concept_when_possible(): + foo = Concept(name="foo") + one = Concept(name="one") + grammar = {foo: Sequence(Optional(ZeroOrMore(one)), ZeroOrMore("one"))} + + context, res, wrapper, return_value = execute([foo, one], grammar, "one") + + assert res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [("foo", 0, 0, "one")] + concept_found = return_value[0].concept + assert concept_found.body == get_expected(one, "one") + assert concept_found.get_prop("one") == concept_found.body + + +def test_i_can_get_the_inner_concept_when_possible_with_rule_name(): + foo = Concept(name="foo") + one = Concept(name="one") + grammar = {foo: Sequence( + Optional(ZeroOrMore(one, rule_name="zero"), rule_name="opt"), + ZeroOrMore("one"), rule_name="seq")} + + context, res, wrapper, return_value = execute([foo, one], grammar, "one") + + assert res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [("foo", 0, 0, "one")] + concept_found = return_value[0].concept + assert concept_found.body == get_expected(one, "one") + assert id(concept_found.get_prop("one")) == id(concept_found.body) + assert id(concept_found.get_prop("zero")) == id(concept_found.body) + assert id(concept_found.get_prop("opt")) == id(concept_found.body) + assert id(concept_found.get_prop("seq")) == id(concept_found.body) + + +def test_i_get_multiple_props_when_zero_or_more(): + foo = Concept(name="foo") + one = Concept(name="one") + grammar = {foo: ZeroOrMore(one)} + + context, res, wrapper, return_value = execute([foo, one], grammar, "one one one") + assert res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [("foo", 0, 4, "one one one")] + concept_found = return_value[0].concept + assert concept_found.body == "one one one" + assert len(concept_found.props) == 1 + assert len(concept_found.get_prop("one")) == 3 + assert concept_found.get_prop("one")[0] == get_expected(one) + assert concept_found.get_prop("one")[1] == get_expected(one) + assert concept_found.get_prop("one")[2] == get_expected(one) + assert id(concept_found.get_prop("one")[0]) != id(concept_found.get_prop("one")[1]) + assert id(concept_found.get_prop("one")[1]) != id(concept_found.get_prop("one")[2]) + assert id(concept_found.get_prop("one")[2]) != id(concept_found.get_prop("one")[0]) + + +def test_i_get_multiple_props_when_zero_or_more_and_different_values(): + foo = Concept(name="foo") + one = Concept(name="one") + grammar = {foo: ZeroOrMore(Sequence(one, "ok", rule_name="seq")), one: OrderedChoice("one", "un", "uno")} + + context, res, wrapper, return_value = execute([foo, one], grammar, "one ok un ok uno ok") + assert res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert return_value == [("foo", "one ok un ok uno ok")] + concept_found = return_value[0].concept + assert concept_found.get_prop("one")[0] == get_expected(one, "one") + assert concept_found.get_prop("one")[1] == get_expected(one, "un") + assert concept_found.get_prop("one")[2] == get_expected(one, "uno") + assert concept_found.get_prop("seq")[0] == "one ok" + assert concept_found.get_prop("seq")[1] == "un ok" + assert concept_found.get_prop("seq")[2] == "uno ok" + + + # # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(): # context = get_context() @@ -1019,14 +1135,14 @@ def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle(): # mult = Concept(name="mult") # atom = Concept(name="atom") # -# concepts = { +# grammar = { # add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))), # mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))), # atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')), # } # # parser = ConceptLexerParser() -# parser.register(concepts) +# parser.register(grammar) # # # res = parser.parse(context, "1") # # assert len(res) == 3 # add, mult, atom @@ -1036,12 +1152,11 @@ def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle(): # # # # res = parser.parse(context, "1 + 2") # # assert res.status -# # assert res.value.value == [ConceptNode(add, 0, 4, source="1 + 2")] +# # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2")] # # res = parser.parse(context, "1 * 2 + 3") # assert res.status -# assert res.value.value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")] - +# assert return_value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")] def test_i_can_register_concepts_with_the_same_name(): # TODO : concepts are registered by name, @@ -1053,10 +1168,3 @@ def test_i_can_parse_very_very_long_input(): # TODO: In the current implementation, all the tokens are loaded in memory # It's clearly not the good approach pass - - -def get_context(): - sheerka = Sheerka(skip_builtins_in_db=True) - sheerka.initialize("mem://") - - return ExecutionContext("sheerka", Event(), sheerka) diff --git a/tests/test_ConceptNodeEvaluator.py b/tests/test_ConceptNodeEvaluator.py index 0244654..89f81b9 100644 --- a/tests/test_ConceptNodeEvaluator.py +++ b/tests/test_ConceptNodeEvaluator.py @@ -1,6 +1,6 @@ import pytest -from core.builtin_concepts import ReturnValueConcept, ParserResultConcept +from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator @@ -15,7 +15,7 @@ def get_context(): return ExecutionContext("test", Event(), sheerka) -def get_return_value(context, grammar, expression): +def from_parsing(context, grammar, expression): parser = ConceptLexerParser() parser.initialize(context, grammar) @@ -31,7 +31,7 @@ def init(concept, grammar, text): context.sheerka.add_in_cache(c) else: context.sheerka.add_in_cache(concept) - ret_val = get_return_value(context, grammar, text) + ret_val = from_parsing(context, grammar, text) node = ret_val.value.value[0] return context, node @@ -56,172 +56,24 @@ def test_i_can_match(ret_val, expected): assert ConceptNodeEvaluator().matches(context, ret_val) == expected -def test_parser_result_of_concept_is_returned_when_list_of_one_concept_node(): +def test_concept_is_returned_when_only_one_in_the_list(): foo = Concept("foo") context = get_context() context.sheerka.add_in_cache(foo) evaluator = ConceptNodeEvaluator() - ret_val = get_return_value(context, {foo: StrMatch("foo")}, "foo") + ret_val = from_parsing(context, {foo: StrMatch("foo")}, "foo") result = evaluator.eval(context, ret_val) + wrapper = result.body + return_value = result.body.body assert result.who == evaluator.name assert result.status - assert result.value == ParserResultConcept( - evaluator, - "foo", - Concept("foo", body="'foo'").init_key(), - None) + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert wrapper.parser == evaluator + assert wrapper.source == "foo" + assert return_value == Concept("foo", body="foo").init_key() + assert return_value.metadata.is_evaluated assert result.parents == [ret_val] - -def test_concept_property_is_correctly_updated_for_str_match(): - foo = Concept("foo") - grammar = {foo: StrMatch("foo", rule_name="variable")} - context, node = init(foo, grammar, "foo") - - updated = ConceptNodeEvaluator().finalize_concept(context.sheerka, node.concept, node.underlying) - - assert "variable" in updated.props - assert updated.props["variable"].value == "'foo'" - assert updated.body == "'foo'" - - -def test_concept_property_is_correctly_updated_for_sequence(): - foo = Concept("foo") - grammar = {foo: Sequence("one", "two", rule_name="variable")} - context, node = init(foo, grammar, "one two") - - updated = ConceptNodeEvaluator().finalize_concept( - context.sheerka, - context.sheerka.new(node.concept.key), - node.underlying) - - assert "variable" in updated.props - assert updated.props["variable"].value == "'one two'" - assert updated.body == "'one two'" - - -def test_concept_property_is_updated_for_str_in_sequence(): - foo = Concept("foo") - grammar = {foo: Sequence(StrMatch("one", rule_name="s1"), StrMatch("two", rule_name="s2"), rule_name="variable")} - context, node = init(foo, grammar, "one two") - - updated = ConceptNodeEvaluator().finalize_concept( - context.sheerka, - context.sheerka.new(node.concept.key), - node.underlying) - - assert updated.props["variable"].value == "'one two'" - assert updated.props["s1"].value == "'one'" - assert updated.props["s2"].value == "'two'" - assert updated.body == "'one two'" - - -def test_concept_property_is_correctly_updated_for_optional(): - foo = Concept("foo") - grammar = {foo: Sequence("one", Optional("two", rule_name="o"), rule_name="variable")} - context, node = init(foo, grammar, "one two") - - updated = ConceptNodeEvaluator().finalize_concept( - context.sheerka, - context.sheerka.new(node.concept.key), - node.underlying) - - assert "variable" in updated.props - assert updated.props["variable"].value == "'one two'" - assert updated.props["o"].value == "'two'" - assert updated.body == "'one two'" - - -def test_concept_property_is_correctly_updated_for_zero_or_more(): - foo = Concept("foo") - grammar = {foo: ZeroOrMore("one", rule_name="variable")} - context, node = init(foo, grammar, "one one one") - - updated = ConceptNodeEvaluator().finalize_concept( - context.sheerka, - context.sheerka.new(node.concept.key), - node.underlying) - - assert "variable" in updated.props - assert updated.props["variable"].value == "'one one one'" - assert updated.body == "'one one one'" - - -def test_concept_property_is_correctly_updated_when_list_of_properties(): - foo = Concept("foo") - grammar = {foo: Sequence(StrMatch("one", rule_name="s"), StrMatch("two", rule_name="s"), rule_name="variable")} - context, node = init(foo, grammar, "one two") - - updated = ConceptNodeEvaluator().finalize_concept( - context.sheerka, - context.sheerka.new(node.concept.key), - node.underlying) - - assert updated.props["variable"].value == "'one two'" - assert updated.props["s"].value == ["'one'", "'two'"] - assert updated.body == "'one two'" - - -def test_concept_property_is_correctly_updated_when_another_concept(): - foo = Concept("foo") - bar = Concept("bar") - grammar = { - foo: Sequence("one", "two", rule_name="var"), - bar: Sequence(foo, "three", "four", rule_name="var")} - context, node = init([foo, bar], grammar, "one two three four") - - updated = ConceptNodeEvaluator().finalize_concept( - context.sheerka, - context.sheerka.new(node.concept.key), - node.underlying) - - assert updated.body == "foo 'three four'" - assert updated.props["var"].value == "foo 'three four'" - assert updated.props["foo"].value == Concept("foo", body="'one two'").set_prop("var", "'one two'").init_key() - - -def test_concept_property_is_correctly_updated_when_concept_recursion_using_optional(): - number = Concept("number") - add = Concept("add") - grammar = { - number: OrderedChoice("one", "two"), - add: Sequence(number, Optional(Sequence(OrderedChoice("plus", "minus", rule_name="op"), add))) - } - context, node = init([number, add], grammar, "one plus two") - - updated = ConceptNodeEvaluator().finalize_concept( - context.sheerka, - context.sheerka.new(node.concept.key), - node.underlying) - - assert updated.props["number"].value == Concept("number", body="'one'").init_key() - assert updated.props["op"].value == "'plus'" - expected_add = Concept("add", body="number"). \ - set_prop("number", Concept("number", body="'two'").init_key()). \ - init_key() - assert updated.props["add"].value == expected_add - - -def test_concept_property_is_correctly_updated_when_concept_recursion_using_zero_or_more(): - number = Concept("number") - add = Concept("add") - grammar = { - number: OrderedChoice("one", "two", 'three'), - add: Sequence(number, ZeroOrMore(Sequence(OrderedChoice("plus", "minus", rule_name="op"), number))) - } - context, node = init([number, add], grammar, "one plus two minus three") - - updated = ConceptNodeEvaluator().finalize_concept( - context.sheerka, - context.sheerka.new(node.concept.key), - node.underlying, - init_empty_body=True) - - assert updated.props["number"].value == [Concept("number", body="'one'").init_key(), - Concept("number", body="'two'").init_key(), - Concept("number", body="'three'").init_key()] - assert updated.props["op"].value == ["'plus'", "'minus'"] - diff --git a/tests/test_MultipleConceptsParser.py b/tests/test_MultipleConceptsParser.py new file mode 100644 index 0000000..16d215f --- /dev/null +++ b/tests/test_MultipleConceptsParser.py @@ -0,0 +1,160 @@ +from core.builtin_concepts import ParserResultConcept, BuiltinConcepts +from core.concept import Concept +from core.sheerka import Sheerka, ExecutionContext +from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, UnrecognizedTokensNode +from parsers.MultipleConceptsParser import MultipleConceptsParser +from sdp.sheerkaDataProvider import Event + + +def get_context(): + sheerka = Sheerka(skip_builtins_in_db=True) + sheerka.initialize("mem://") + return ExecutionContext("test", Event(), sheerka) + + +def get_return_value(context, grammar, expression): + parser = ConceptLexerParser() + parser.initialize(context, grammar) + + ret_val = parser.parse(context, expression) + assert not ret_val.status + return ret_val + + +def init(concepts, grammar, expression): + context = get_context() + for c in concepts: + context.sheerka.create_new_concept(context, c) + return_value = get_return_value(context, grammar, expression) + + return context, return_value + + +def test_not_interested_if_not_parser_result(): + context = get_context() + text = "not parser result" + + res = MultipleConceptsParser().parse(context, text) + assert res is None + + +def test_not_interested_if_not_from_concept_lexer_parser(): + context = get_context() + text = ParserResultConcept(parser="not concept lexer", value="some value") + + res = MultipleConceptsParser().parse(context, text) + assert res is None + + +def test_i_can_parse_exact_concepts(): + foo = Concept("foo", body="'foo'") + bar = Concept("bar", body="'bar'") + baz = Concept("baz", body="'baz'") + grammar = {} + context, return_value = init([foo, bar, baz], grammar, "bar foo baz") + + parser = MultipleConceptsParser() + ret_val = parser.parse(context, return_value.body) + + assert ret_val.status + assert ret_val.who == parser.name + assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) + assert ret_val.value.value == [ + ConceptNode(bar, 0, 0, source="bar"), + ConceptNode(foo, 2, 2, source="foo"), + ConceptNode(baz, 4, 4, source="baz")] + assert ret_val.value.source == "bar foo baz" + + +def test_i_can_parse_when_ending_with_bnf(): + foo = Concept("foo", body="'foo'") + bar = Concept("bar", body="'bar'") + grammar = {foo: Sequence("foo1", "foo2", "foo3")} + context, return_value = init([foo, bar], grammar, "bar foo1 foo2 foo3") + + parser = MultipleConceptsParser() + ret_val = parser.parse(context, return_value.body) + + assert ret_val.status + assert ret_val.who == parser.name + assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) + assert ret_val.value.value == [("bar", 0, 0, "bar"), ("foo", 2, 6, "foo1 foo2 foo3")] + assert ret_val.value.source == "bar foo1 foo2 foo3" + + +def test_i_can_parse_when_starting_with_bnf(): + foo = Concept("foo", body="'foo'") + bar = Concept("bar", body="'bar'") + grammar = {foo: Sequence("foo1", "foo2", "foo3")} + context, return_value = init([foo, bar], grammar, "foo1 foo2 foo3 bar") + + parser = MultipleConceptsParser() + ret_val = parser.parse(context, return_value.body) + + assert ret_val.status + assert ret_val.who == parser.name + assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) + assert ret_val.value.value == [("foo", 0, 4, "foo1 foo2 foo3"), ("bar", 6, 6, "bar")] + assert ret_val.value.source == "foo1 foo2 foo3 bar" + + +def test_i_can_parse_when_concept_are_mixed(): + foo = Concept("foo") + bar = Concept("bar") + baz = Concept("baz") + grammar = {foo: Sequence("foo1", "foo2", "foo3")} + context, return_value = init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar") + + parser = MultipleConceptsParser() + ret_val = parser.parse(context, return_value.body) + + assert ret_val.status + assert ret_val.who == parser.name + assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) + assert ret_val.value.value == [ + ("baz", 0, 0, "baz"), + ("foo", 2, 6, "foo1 foo2 foo3"), + ("bar", 8, 8, "bar")] + assert ret_val.value.source == "baz foo1 foo2 foo3 bar" + + +def test_i_can_parse_when_multiple_concept_are_matching(): + foo = Concept("foo") + bar = Concept("bar", body="bar1") + baz = Concept("bar", body="bar2") + grammar = {foo: "foo"} + context, return_value = init([foo, bar, baz], grammar, "foo bar") + + parser = MultipleConceptsParser() + ret_val = parser.parse(context, return_value.body) + + assert len(ret_val) == 2 + assert ret_val[0].status + assert ret_val[0].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")] + assert ret_val[0].value.source == "foo bar" + assert ret_val[0].value.value[1].concept.body == "bar1" + + assert ret_val[1].status + assert ret_val[1].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")] + assert ret_val[1].value.source == "foo bar" + assert ret_val[1].value.value[1].concept.body == "bar2" + + +def test_i_cannot_parse_when_unrecognized_token(): + twenty_two = Concept("twenty two") + one = Concept("one") + grammar = {twenty_two: Sequence("twenty", "two")} + context, return_value = init([twenty_two, one], grammar, "twenty two + one") + + parser = MultipleConceptsParser() + ret_val = parser.parse(context, return_value.body) + + assert not ret_val.status + assert ret_val.who == parser.name + assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) + assert ret_val.value.value == [ + ("twenty two", 0, 2, "twenty two"), + (3, 5, " + "), + ("one", 6, 6, "one") + ] + assert ret_val.value.source == "twenty two + one" diff --git a/tests/test_PythonEvaluator.py b/tests/test_PythonEvaluator.py index fe10982..0662611 100644 --- a/tests/test_PythonEvaluator.py +++ b/tests/test_PythonEvaluator.py @@ -126,3 +126,26 @@ def test_i_can_eval_concept_token(): assert not evaluated.status assert evaluated.body.body.args[0] == "'int' object has no attribute 'name'" + + +@pytest.mark.parametrize("text, concept_key, concept_id, use_concept", [ + ("__C__key__C__", "key", None, False), + ("__C__key__id__C__", "key", "id", False), + ("__C__USE_CONCEPT__key__id__C__", "key", "id", True), + ("__C__USE_CONCEPT__key__id__C__", "key", "id", True), +]) +def test_i_can_resolve_name(text, concept_key, concept_id, use_concept): + context = get_context() + assert PythonEvaluator().resolve_name(context, text) == (concept_key, concept_id, use_concept) + + +@pytest.mark.parametrize("text", [ + "__C__", + "__C__key", + "__C__key____", + "__C____", + "__C__USE_CONCEPT__", +]) +def test_i_cannot_resolve_name(text): + context = get_context() + assert PythonEvaluator().resolve_name(context, text) is None diff --git a/tests/test_PythonParser.py b/tests/test_PythonParser.py index 7df3668..efdb6b5 100644 --- a/tests/test_PythonParser.py +++ b/tests/test_PythonParser.py @@ -86,4 +86,4 @@ def test_i_can_parse_a_concept(): assert res assert res.value.value == PythonNode( "c:concept_name: + 1", - ast.parse("__C__concept_name__C__+1", mode="eval")) + ast.parse("__C__USE_CONCEPT__concept_name__C__+1", mode="eval")) diff --git a/tests/test_PythonWithConceptsParser.py b/tests/test_PythonWithConceptsParser.py new file mode 100644 index 0000000..8ebe1a0 --- /dev/null +++ b/tests/test_PythonWithConceptsParser.py @@ -0,0 +1,142 @@ +import ast + +import pytest + +from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept +from core.concept import Concept +from core.sheerka import Sheerka, ExecutionContext +from core.tokenizer import Token, TokenKind, Tokenizer +from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode +from parsers.PythonParser import PythonNode, PythonErrorNode +from parsers.PythonWithConceptsParser import PythonWithConceptsParser +from sdp.sheerkaDataProvider import Event + + +def get_context(): + sheerka = Sheerka(skip_builtins_in_db=True) + sheerka.initialize("mem://") + return ExecutionContext("test", Event(), sheerka) + + +def get_ret_from(*args): + result = [] + index = 0 + for item in args: + if isinstance(item, Concept): + tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)] + result.append(ConceptNode(item, index, index, tokens, item.name)) + index += 1 + else: + tokens = list(Tokenizer(item)) + result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens)) + index += len(tokens) + + return ReturnValueConcept("who", False, ParserResultConcept(parser="name", value=result)) + + +def to_str_ast(expression): + return PythonNode.get_dump(ast.parse(expression, mode="eval")) + + +@pytest.mark.parametrize("text", [ + "not parser result", + ParserResultConcept(value="not a list"), + ParserResultConcept(value=[]), + ParserResultConcept(value=["not a Node"]), +]) +def test_not_interested(text): + context = get_context() + + res = PythonWithConceptsParser().parse(context, text) + assert res is None + + +def test_i_can_parse_concepts_and_python(): + context = get_context() + foo = Concept("foo") + input_return_value = get_ret_from(foo, " + 1") + + parser = PythonWithConceptsParser() + result = parser.parse(context, input_return_value.body) + wrapper = result.value + return_value = result.value.value + + assert result.status + assert result.who == parser.name + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert wrapper.source == "foo + 1" + assert isinstance(return_value, PythonNode) + assert return_value.source == "foo + 1" + assert return_value.get_dump(return_value.ast_) == to_str_ast("__C__foo__C__ + 1") + assert return_value.concepts["__C__foo__C__"] == foo + + +def test_i_can_parse_concepts_and_python_when_concept_is_known(): + context = get_context() + foo = Concept("foo") + foo = context.sheerka.create_new_concept(context, foo).body.body + input_return_value = get_ret_from(foo, " + 1") + + parser = PythonWithConceptsParser() + result = parser.parse(context, input_return_value.body) + wrapper = result.value + return_value = result.value.value + + assert result.status + assert result.who == parser.name + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert wrapper.source == "foo + 1" + assert isinstance(return_value, PythonNode) + assert return_value.source == "foo + 1" + assert return_value.get_dump(return_value.ast_) == to_str_ast("__C__foo__1001__C__ + 1") + assert return_value.concepts["__C__foo__1001__C__"] == foo + + +def test_i_can_parse_when_concept_name_has_invalid_characters(): + context = get_context() + foo = Concept("foo et > (,") + foo = context.sheerka.create_new_concept(context, foo).body.body + input_return_value = get_ret_from(foo, " + 1") + + parser = PythonWithConceptsParser() + result = parser.parse(context, input_return_value.body) + return_value = result.value.value + + assert result.status + assert return_value.concepts["__C__foo0et000000__1001__C__"] == foo + + +def test_python_ids_mappings_are_correct_when_concepts_with_the_same_name(): + context = get_context() + foo1 = Concept("foo") + foo2 = Concept("foo") + foo3 = context.sheerka.create_new_concept(context, Concept("foo", body="foo3")).body.body + foo4 = context.sheerka.create_new_concept(context, Concept("foo", body="foo4")).body.body + + input_return_value = get_ret_from(foo1, "+", foo2, "+", foo3, "+", foo4) + + parser = PythonWithConceptsParser() + result = parser.parse(context, input_return_value.body) + return_value = result.value.value + + assert result.status + assert return_value.concepts["__C__foo__C__"] == foo1 + assert return_value.concepts["__C__foo_1__C__"] == foo2 + assert return_value.concepts["__C__foo__1001__C__"] == foo3 + assert return_value.concepts["__C__foo__1002__C__"] == foo4 + + +def test_i_cannot_parse_if_syntax_error(): + context = get_context() + foo = Concept("foo") + foo = context.sheerka.create_new_concept(context, foo).body.body + input_return_value = get_ret_from(foo, " + ") + + parser = PythonWithConceptsParser() + result = parser.parse(context, input_return_value.body) + wrapper = result.value + return_value = result.value.value + + assert not result.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert isinstance(return_value[0], PythonErrorNode) diff --git a/tests/test_sheerka.py b/tests/test_sheerka.py index d71349d..b6d57d4 100644 --- a/tests/test_sheerka.py +++ b/tests/test_sheerka.py @@ -144,7 +144,7 @@ def test_i_can_get_a_builtin_concept_by_their_enum_or_the_string(): assert sheerka.get(str(key)) is not None -def test_i_can_get_new_concept(): +def test_i_can_get_a_newly_created_concept(): sheerka = get_sheerka() concept = get_default_concept() @@ -324,6 +324,21 @@ def test_i_can_instantiate_a_concept(): assert new.props["b"].value == "value" +def test_i_can_instantiate_with_the_name_and_the_id(): + sheerka = get_sheerka() + sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo1")) + sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo2")) + + concepts = sheerka.new("foo") + assert len(concepts) == 2 + + foo1 = sheerka.new(("foo", "1001")) + assert foo1.body == "foo1" + + foo2 = sheerka.new(("foo", "1002")) + assert foo2.body == "foo2" + + def test_instances_are_different_when_asking_for_new(): sheerka = get_sheerka() concept = get_default_concept() @@ -357,6 +372,38 @@ def test_i_cannot_instantiate_an_unknown_concept(): assert new.body == "fake_concept" +def test_i_cannot_instantiate_with_invalid_id(): + sheerka = get_sheerka() + sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo1")) + sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo2")) + + new = sheerka.new(("foo", "invalid_id")) + + assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT) + assert new.body == "foo" + + +def test_i_cannot_instantiate_with_invalid_key(): + sheerka = get_sheerka() + sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo1")) + sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo2")) + + new = sheerka.new(("invalid_key", "1001")) + + assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT) + assert new.body == "invalid_key" + + +def test_concept_id_is_irrelevant_when_only_one_concept(): + sheerka = get_sheerka() + sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo1")) + + new = sheerka.new(("foo", "invalid_id")) + + assert sheerka.isinstance(new, "foo") + assert new.body == "foo1" + + def test_i_cannot_instantiate_when_properties_are_not_recognized(): sheerka = get_sheerka() concept = get_default_concept() diff --git a/tests/test_sheerka_non_reg.py b/tests/test_sheerka_non_reg.py index 5c1430e..3557dd2 100644 --- a/tests/test_sheerka_non_reg.py +++ b/tests/test_sheerka_non_reg.py @@ -401,6 +401,48 @@ def test_i_can_eval_bnf_definitions_from_separate_instances(): assert res[0].value.props["a"] == Property("a", sheerka.new(concept_a.key, body="one two").init_key()) +def test_i_can_eval_a_mix_with_bnf_and_python(): + sheerka = get_sheerka() + + sheerka.evaluate_user_input("def concept one as 1") + sheerka.evaluate_user_input("def concept twenties from bnf 'twenty' one as 20 + one") + + res = sheerka.evaluate_user_input("twenty one + 1") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 22 + + +def test_i_can_eval_a_mix_with_bnf_and_python_when_rule_name(): + sheerka = get_sheerka() + + sheerka.evaluate_user_input("def concept one as 1") + sheerka.evaluate_user_input("def concept two as 2") + sheerka.evaluate_user_input("def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit") + + assert sheerka.evaluate_user_input("eval twenty one")[0].body == 21 + + res = sheerka.evaluate_user_input("twenty one + 1") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 22 + + +def test_i_can_eval_a_more_complicated_mix_with_bnf_and_python(): + sheerka = get_sheerka() + + sheerka.evaluate_user_input("def concept one as 1") + sheerka.evaluate_user_input("def concept two as 2") + sheerka.evaluate_user_input("def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit") + + assert sheerka.evaluate_user_input("eval twenty one")[0].body == 21 + + res = sheerka.evaluate_user_input("twenty one + twenty two") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 43 + + def test_i_can_say_that_a_concept_isa_another_concept(): sheerka = get_sheerka() sheerka.evaluate_user_input("def concept foo")