From bef5f3208c3817cf2ed8b395b2959734a8f1f37b Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Tue, 13 Apr 2021 15:15:17 +0200 Subject: [PATCH] Fixed #55 : DefConceptParser: failed to recognize concept Fixed #62 : DefConceptParser: parsing error Fixed #64 : DefConceptParser: Failed to parse when too many concept keyword Fixed #65 : DefConceptParser : Add auto_eval keyword Fixed #66 : DefConceptParser : Add def_var keyword Fixed #67 : Add get_errors() --- LICENCE | 77 ++++++++ sheerka_backup/adjectives.sb | 11 +- sheerka_backup/admin.sb | 69 +++---- sheerka_backup/default.sb | 27 +-- src/core/sheerka/Sheerka.py | 100 ++++++++-- .../sheerka/services/SheerkaDebugManager.py | 1 + src/core/tokenizer.py | 2 + src/core/utils.py | 5 +- src/evaluators/DefConceptEvaluator.py | 160 +++++++++++----- src/parsers/BaseCustomGrammarParser.py | 124 +++++++++---- src/parsers/DefConceptParser.py | 66 ++++++- tests/BaseTest.py | 4 +- tests/core/test_sheerka.py | 138 +++++++++++++- tests/evaluators/test_DefConceptEvaluator.py | 173 ++++++++++++------ tests/non_reg/test_sheerka_non_reg.py | 4 +- tests/parsers/test_BaseCustomGrammarParser.py | 36 +++- tests/parsers/test_DefConceptParser.py | 76 +++++++- 17 files changed, 838 insertions(+), 235 deletions(-) create mode 100644 LICENCE diff --git a/LICENCE b/LICENCE new file mode 100644 index 0000000..5f696bc --- /dev/null +++ b/LICENCE @@ -0,0 +1,77 @@ + +Arpeggio is released under the terms of the MIT License +------------------------------------------------------- + +Copyright (c) 2009-2019 Igor R. Dejanović + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +PyFlwor is licensed under a BSD style license +------------------------------------------------------- + +Copyright (c) 2010, 2012 Tim Henderson +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the PyFlwor nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +OrderedSet is licensed under an MIT style license +------------------------------------------------------- + +Copyright (c) 2009 Raymond Hettinger + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/sheerka_backup/adjectives.sb b/sheerka_backup/adjectives.sb index 1d392f6..8ab6513 100644 --- a/sheerka_backup/adjectives.sb +++ b/sheerka_backup/adjectives.sb @@ -2,9 +2,9 @@ push_ontology("english") def concept adjective + def concept color set_isa(color, adjective) - def concept red def concept blue def concept orange @@ -31,5 +31,14 @@ set_isa(black, color) set_isa(white, color) set_isa(grey, color) +def concept size +size is an adjective + +def concept tall +tall is a size +def concept short +short is a size + + def concept qualify x from bnf adjective x as set_attr(x, c:adjective:, adjective) ret x def concept qualify x from bnf x 'is' adjective as set_attr(x, c:adjective:, adjective) ret x \ No newline at end of file diff --git a/sheerka_backup/admin.sb b/sheerka_backup/admin.sb index 9fb8b85..88039bb 100644 --- a/sheerka_backup/admin.sb +++ b/sheerka_backup/admin.sb @@ -1,57 +1,32 @@ # admin helpers push_ontology("admin") -def concept explain as get_results(id=0, depth=2) -set_isa(c:explain:, __AUTO_EVAL) +def concept explain as get_results(id=0, depth=2) auto_eval True +def concept explain last as get_last_results(id=0, depth=2) auto_eval True +def concept explain x as get_results(id=x, depth=3) auto_eval True -def concept explain last as get_last_results(id=0, depth=2) -set_isa(c:explain last:, __AUTO_EVAL) +def concept precedence a > precedence b as set_is_greater_than(__PRECEDENCE, a, b, 'Sya') auto_eval True -def concept explain x as get_results(id=x, depth=3) -set_isa(c:explain x:, __AUTO_EVAL) +def concept x is a command as set_auto_eval(x, __AUTO_EVAL) auto_eval True -def concept precedence a > precedence b as set_is_greater_than(__PRECEDENCE, a, b, 'Sya') -set_isa(c:precedence a > precedence b:, __AUTO_EVAL) +def concept activate debug as set_debug(True) auto_eval True +def concept deactivate debug as set_debug(False) auto_eval True +def concept debug on as set_debug(True) auto_eval True +def concept debug off as set_debug(False) auto_eval True -def concept x is a command as set_auto_eval(x, __AUTO_EVAL) -set_auto_eval(c:x is a command:) +def concept activate debug on x as debug_var(x) auto_eval True +def concept debug x as debug_var(x) auto_eval True -def concept activate debug as set_debug(True) -set_auto_eval(c:activate debug:) -def concept deactivate debug as set_debug(False) -set_auto_eval(c:deactivate debug:) -def concept debug on as set_debug(True) -set_auto_eval(c:debug on:) -def concept debug off as set_debug(False) -set_auto_eval(c:debug off:) +def concept debug var x as debug_var(variable=x) auto_eval True +def concept debug variable x as debug_var(variable=x) auto_eval True +def concept debug method x as debug_var(method=x) auto_eval True -def concept activate debug on x as debug_var(x) -set_auto_eval(c:activate debug on x:) -def concept debug x as debug_var(x) -set_auto_eval(c:debug x:) +def concept deactivate debug on x as debug_var(x, enabled=False) where x auto_eval True -def concept debug var x as debug_var(variable=x) -set_auto_eval(c:debug var x:) -def concept debug variable x as debug_var(variable=x) -set_auto_eval(c:debug variable x:) -def concept debug method x as debug_var(method=x) -set_auto_eval(c:debug method x:) - -def concept deactivate debug on x as debug_var(x, enabled=False) where x -set_auto_eval(c:deactivate debug on x:) - -def concept activate return values processing as set_var("sheerka.enable_process_return_values", True) -def concept deactivate return values processing as set_var("sheerka.enable_process_return_values", False) -set_auto_eval(c:activate return values processing:) -set_auto_eval(c:deactivate return values processing:) - -def concept rule x where isinstance(x, int) as r:|x: -set_auto_eval(c:rule x:) -def concept rule x > rule y where isinstance(x, int) and isinstance(y, int) as set_is_greater_than(__PRECEDENCE, r:|x:, r:|y:, 'Rule') -set_auto_eval(c:rule x > rule y:) -def concept rule x is greatest where isinstance(x, int) as set_is_greatest(__PRECEDENCE, r:|x:, 'Rule') -set_auto_eval(c:rule x is greatest:) -def concept rule x < rule y where isinstance(x, int) and isinstance(y, int) as set_is_less_than(__PRECEDENCE, r:|x:, r:|y:, 'Rule') -set_auto_eval(c:rule x < rule y:) -def concept rule x is lesser where isinstance(x, int) as set_is_lesser(__PRECEDENCE, r:|x:, 'Rule') -set_auto_eval(c:rule x is lesser:) +def concept activate return values processing as set_var("sheerka.enable_process_return_values", True) auto_eval True +def concept deactivate return values processing as set_var("sheerka.enable_process_return_values", False) auto_eval True +def concept rule x where isinstance(x, int) as r:|x: auto_eval True +def concept rule x > rule y where isinstance(x, int) and isinstance(y, int) as set_is_greater_than(__PRECEDENCE, r:|x:, r:|y:, 'Rule') auto_eval True +def concept rule x is greatest where isinstance(x, int) as set_is_greatest(__PRECEDENCE, r:|x:, 'Rule') auto_eval True +def concept rule x < rule y where isinstance(x, int) and isinstance(y, int) as set_is_less_than(__PRECEDENCE, r:|x:, r:|y:, 'Rule') auto_eval True +def concept rule x is lesser where isinstance(x, int) as set_is_lesser(__PRECEDENCE, r:|x:, 'Rule') auto_eval True diff --git a/sheerka_backup/default.sb b/sheerka_backup/default.sb index 4699f02..7adebc5 100644 --- a/sheerka_backup/default.sb +++ b/sheerka_backup/default.sb @@ -1,8 +1,7 @@ # question push_ontology("english") -def concept q from q ? as question(q) pre is_question() +def concept q from q ? as question(q) pre is_question() auto_eval True set_is_lesser(__PRECEDENCE, q, 'Sya') -set_auto_eval(c:q:) def concept the x ret memory(x) def concept a x where 'x is a concept' ret x @@ -14,25 +13,17 @@ set_is_greatest(__PRECEDENCE, c:an x:, 'Sya') def concept "x is a concept" as isinstance(x, Concept) pre is_question() # is a -def concept x is a y as set_isa(x, y) ret x -set_auto_eval(c:x is a y:) -def concept x is an y as set_isa(x, y) ret x -set_auto_eval(c:x is an y:) +def concept x is a y as set_isa(x, y) ret x auto_eval True +def concept x is an y as set_isa(x, y) ret x auto_eval True def concept x is a y as isa(x,y) pre is_question() -# no need to auto eval as it's a question def concept x is an y as isa(x,y) pre is_question() -# no need to auto eval as it's a question # has a -def concept x has a y as set_hasa(x, y) ret x -set_auto_eval(c:x has a y:) -def concept x has an y as set_hasa(x, y) ret x -set_auto_eval(c:x has an y:) +def concept x has a y as set_hasa(x, y) ret x auto_eval True +def concept x has an y as set_hasa(x, y) ret x auto_eval True def concept x has a y as hasa(x,y) pre is_question() -# no need to auto eval as it's a question def concept x has an y as hasa(x,y) pre is_question() -# no need to auto eval as it's a question # AND def concept x and y as x and y pre is_question() @@ -45,9 +36,7 @@ set_is_lesser(__PRECEDENCE, c:x or y:, 'Sya') set_is_greater_than(__PRECEDENCE, c:x and y:, c:x or y:, 'Sya') set_is_less_than(__PRECEDENCE, c:q:, c:x or y:, 'Sya') - - -# default +# some words def concept male def concept female def concept man @@ -62,6 +51,7 @@ def concept boys def concept girl def concept girls def concept shirt +def concept table # days of the week def concept monday @@ -73,5 +63,6 @@ def concept saturday def concept sunday # questions -def concept what x is y pre is_question() where isa(x, adjective) as smart_get_attr(y, x) def concept how is x pre is_question() as smart_get_attr(x, adjective) +def concept what x is y pre is_question() where x is an adjective as smart_get_attr(y, x) +def concept what is the x of y pre is_question() where x is an adjective as smart_get_attr(y, x) diff --git a/src/core/sheerka/Sheerka.py b/src/core/sheerka/Sheerka.py index b6abb77..33c8f0e 100644 --- a/src/core/sheerka/Sheerka.py +++ b/src/core/sheerka/Sheerka.py @@ -673,23 +673,95 @@ class Sheerka(Concept): return (self.objvalue(obj) for obj in objs.body) - def get_error(self, obj): - if isinstance(obj, Concept) and obj._metadata.is_builtin and obj.key in BuiltinErrors: - return obj + def get_errors(self, obj, **kwargs): + """ + Browse obj, looking for error + :param obj: + :param kwargs: if defined, specialize the error + :return: + """ - if isinstance(obj, (list, set, tuple)): - return [self.get_error(o) for o in obj] - - if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE): - if obj.status: - return None - - if self.isinstance(obj.body, BuiltinConcepts.PARSER_RESULT): - return self.get_error(obj.body.body) + def filter_by_type(x, name): + if isinstance(x, Concept): + return x.name == name else: - return obj.body + return type(x).__name__ == name - raise NotImplementedError() + def filter_by_attribute(x, attr_name, attr_value): + if hasattr(x, "as_bag"): + try: + return x.as_bag()[attr_name] == attr_value + except KeyError: + return False + else: + try: + return getattr(x, attr_name) == attr_value + except AttributeError: + return False + + def and_filter(x, cond): + for c in cond: + if not c(x): + return False + + return True + + def is_error(_obj): + if isinstance(_obj, ErrorObj): + return True + + if isinstance(_obj, Concept) and _obj.get_metadata().is_builtin and _obj.key in BuiltinErrors: + return True + + return False + + def filter_objects(_objects): + if kwargs: + cond = [] + for k, v in kwargs.items(): + if k == "__type": + expected_type = v + cond.append(lambda x: filter_by_type(x, expected_type)) + else: + attr_name = k + expect_value = v + cond.append(lambda x: filter_by_attribute(x, attr_name, expect_value)) + + if len(cond) > 1: + copy_of_conditions = cond.copy() + full_cond = lambda x: and_filter(x, copy_of_conditions) + + else: + full_cond = cond[0] + + return [o for o in _objects if full_cond(o)] + + return _objects + + def inner_get_errors(_obj): + if self.isinstance(_obj, BuiltinConcepts.RETURN_VALUE) and _obj.status: + return [] + + if isinstance(_obj, (list, set, tuple)): + return core.utils.flatten([inner_get_errors(o) for o in _obj]) + + if is_error(_obj): + if isinstance(_obj, Concept) and _obj.body not in (NotInit, None): + return [_obj] + inner_get_errors(_obj.body) + else: + return [_obj] + + if isinstance(_obj, Concept) and _obj.body != NotInit: + return inner_get_errors(_obj.body) + + return [] + + errors = inner_get_errors(obj) + return filter_objects([e for e in errors]) + + def has_error(self, obj, **kwargs): + errors = self.get_errors(obj, **kwargs) + return len(errors) > 0 def get_evaluator_name(self, name): if self.evaluators_prefix is None: diff --git a/src/core/sheerka/services/SheerkaDebugManager.py b/src/core/sheerka/services/SheerkaDebugManager.py index a6bef45..91e99a5 100644 --- a/src/core/sheerka/services/SheerkaDebugManager.py +++ b/src/core/sheerka/services/SheerkaDebugManager.py @@ -346,6 +346,7 @@ class SheerkaDebugManager(BaseService): self.register_debug_vars(DefConceptEvaluator.NAME, "matches", "*") self.register_debug_vars(DefConceptEvaluator.NAME, "eval", "*") self.register_debug_vars(DefConceptEvaluator.NAME, "get_variables", "names") + self.register_debug_vars(DefConceptEvaluator.NAME, "get_variables", "possible_vars") self.register_debug_vars(PythonEvaluator.NAME, "eval", "globals") self.register_debug_vars(PythonEvaluator.NAME, "eval", "ret") self.register_debug_vars("Exceptions", PythonEvaluator.NAME+"-eval", "exception") diff --git a/src/core/tokenizer.py b/src/core/tokenizer.py index 51834bf..3c69e06 100644 --- a/src/core/tokenizer.py +++ b/src/core/tokenizer.py @@ -156,6 +156,8 @@ class Keywords(Enum): WHEN = "when" PRINT = "print" THEN = "then" + AUTO_EVAL = "auto_eval" + DEF_VAR = "def_var" class Tokenizer: diff --git a/src/core/utils.py b/src/core/utils.py index 90e439b..03a615b 100644 --- a/src/core/utils.py +++ b/src/core/utils.py @@ -645,8 +645,11 @@ def flatten_all_children(item, get_children): def flatten(list_of_lists): """ Flatten an list containing other lists + https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-a-list-of-lists?page=1&tab=votes#tab-top """ - return [item for sublist in list_of_lists for item in sublist] + import functools + import operator + return functools.reduce(operator.iconcat, list_of_lists, []) def get_text_from_tokens(tokens, custom_switcher=None, tracker=None): diff --git a/src/evaluators/DefConceptEvaluator.py b/src/evaluators/DefConceptEvaluator.py index f9a15b2..bafdd57 100644 --- a/src/evaluators/DefConceptEvaluator.py +++ b/src/evaluators/DefConceptEvaluator.py @@ -14,7 +14,12 @@ from parsers.PythonParser import get_python_node @dataclass(eq=True, frozen=True) -class MandatoryVariable: +class ConceptEvaluatorVariable: + name: str + + +@dataclass(eq=True, frozen=True) +class MandatoryVariable(ConceptEvaluatorVariable): """ When we are searching for variables, we are searching for potential variable So if the variable found has no match in the concept definition, it's not a problem @@ -24,19 +29,51 @@ class MandatoryVariable: But there are cases where the variable found must exist, otherwise, it's an error example: - def concept foo from bnf xxx - 'xxx' is detected as a variable (assuming that there is no concept named 'xxx' and a match must be - found in the the name of the variable + def concept foo from bnf unknown_concept + 'unknown_concept' will be detected and considered as a variable . But it is not, as it's not + declared in the name of the concept. - To distinguish between mandatory and not mandatory variable, we use MandatoryVariable + We return MandatoryVariable (instead of a variable name) to let the evaluator know that if the variable is not + declared in the name of the concept, it's an error """ - name: str def __hash__(self): return hash(("MandatoryVariable", self.name)) -class ConceptOrRuleNameVisitor(ParsingExpressionVisitor): +@dataclass(eq=True, frozen=True) +class PossibleVariable(ConceptEvaluatorVariable): + """ + When a name/identifier is found in a concept part (pre, post, where, body...) + It is considered as a possible variable. It will only added as a variable if the exact name / identifier + is found in the name of the concept + + example: + def concept a plus b as a + b + 'a' and 'b' are found in the body and thy also exist in the name of the concept + -> They will be added as variable + """ + + def __hash__(self): + return hash(("PossibleVariable", self.name)) + + +@dataclass(eq=True, frozen=True) +class CertainVariable(ConceptEvaluatorVariable): + """ + A certain variable will be added as a variable regardless of a possible match in the name + + example: + def concept number + def concept plus from bnf number=n1 plus number=n2 + 'n1' and 'n2' do not appear in the name of the concept, but they are variables + """ + + def __hash__(self): + return hash(("PossibleVariable", self.name)) + + +class ConceptOrRuleVariableVisitor(ParsingExpressionVisitor): """ Gets the concepts referenced by BNF If a rule_name is given, it will also be considered as a potential property @@ -44,22 +81,22 @@ class ConceptOrRuleNameVisitor(ParsingExpressionVisitor): def __init__(self): super().__init__() - self.names = set() + self.variables = [] def visit_ConceptExpression(self, node): if node.rule_name: - self.names.add(node.rule_name) + self.variables.append(CertainVariable(node.rule_name)) elif isinstance(node.concept, Concept): - self.names.add(node.concept.name) + self.variables.append(CertainVariable(node.concept.name)) else: - self.names.add(node.concept) + self.variables.append(CertainVariable(node.concept)) def visit_VariableExpression(self, node): - self.names.add(MandatoryVariable(node.rule_name)) + self.variables.append(MandatoryVariable(node.rule_name)) def visit_all(self, node): if node.rule_name: - self.names.add(node.rule_name) + self.variables.append(CertainVariable(node.rule_name)) class DefConceptEvaluator(OneReturnValueEvaluator): @@ -89,12 +126,18 @@ class DefConceptEvaluator(OneReturnValueEvaluator): # validate the node variables_found = set() mandatory_variables = set() # these variable MUST have a match in the name (if the name is not None) + certain_variables = [] + skip_variables_resolution = False concept = Concept(str(def_concept_node.name)) concept.get_metadata().definition_type = def_concept_node.definition_type name_to_use = self.get_name_to_use(def_concept_node) - # get variables + if def_concept_node.variables != NotInit: + certain_variables = def_concept_node.variables.copy() + skip_variables_resolution = True + + # get variables and set the sources for prop in ("definition", "where", "pre", "post", "body", "ret"): part_ret_val = getattr(def_concept_node, prop) @@ -111,26 +154,29 @@ class DefConceptEvaluator(OneReturnValueEvaluator): raise Exception("Unexpected") setattr(concept.get_metadata(), prop, source) + if skip_variables_resolution: + continue + # Do not try to resolve variables from itself if prop == "definition" and concept.get_metadata().definition_type == DEFINITION_TYPE_DEF: continue # try to find what can be a property for p in self.get_variables(context, part_ret_val, name_to_use): + variables_found.add(p.name) if isinstance(p, MandatoryVariable): - variables_found.add(p.name) mandatory_variables.add(p.name) - else: - variables_found.add(p) + elif isinstance(p, CertainVariable): + certain_variables.append(p.name) - # add variables by order of appearance when possible - for name_part in name_to_use: + # add variables by order of appearance + for name_part in [name_part for name_part in name_to_use if str(name_part).isalnum()]: if name_part in variables_found: concept.def_var(name_part, None) # check that all mandatory variables are defined in the name # KSI: 2021-02-17 - # The mandatory variables come for bnf definition where it was not possible to resolve to a concept + # The mandatory variables come from bnf definition where it was not possible to resolve to a concept # So rather that issuing a 'UnresolvedVariableError' I prefer UNKNOWN_CONCEPT if (diff := mandatory_variables.difference(set(name_to_use))) != set(): unknown_concepts = [sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body={"name": c}) for c in sorted(diff)] @@ -139,7 +185,7 @@ class DefConceptEvaluator(OneReturnValueEvaluator): # add the remaining properties # They mainly come from BNF definition - for p in variables_found: + for p in certain_variables: if p not in concept.values(): concept.def_var(p, None) @@ -154,6 +200,10 @@ class DefConceptEvaluator(OneReturnValueEvaluator): def_concept_node.definition_type == DEFINITION_TYPE_BNF: concept.set_bnf(def_concept_node.definition.value.value) + # manage auto eval + if def_concept_node.auto_eval: + concept.add_prop(BuiltinConcepts.ISA, sheerka.new(BuiltinConcepts.AUTO_EVAL)) + ret = sheerka.create_new_concept(context, concept) if not ret.status: error_cause = sheerka.objvalue(ret.body) @@ -172,24 +222,38 @@ class DefConceptEvaluator(OneReturnValueEvaluator): This function can only be a draft, as there may be tons of different situations I guess that it can only be complete when will we have access to Sheerka memory """ + + def get_inner_concept(parsing_result): + if not isinstance(parsing_result, ParserResultConcept): + return None + + if isinstance(parsing_result.body, Concept): + return parsing_result.body + + # manage other cases (conceptNode) later + return None + debugger = context.get_debugger(DefConceptEvaluator.NAME, "get_variables") # # Case of NameNode # if isinstance(ret_value, NameNode): - names = [str(t.value) for t in ret_value.tokens if t.type in ( - TokenKind.IDENTIFIER, TokenKind.STRING, TokenKind.KEYWORD)] + names = [str(t.value) for t in ret_value.tokens if t.type in (TokenKind.IDENTIFIER, + TokenKind.STRING, + TokenKind.KEYWORD)] + possible_vars = filter(lambda x: x in concept_name and context.sheerka.is_not_a_variable(x), names) debugger.debug_var("names", names, hint="from NameNode") - return set(filter(lambda x: x in concept_name and context.sheerka.is_not_a_variable(x), names)) + debugger.debug_var("possible_vars", possible_vars, hint="from NameNode") + return [PossibleVariable(v) for v in possible_vars] # # case of BNF # if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, ParsingExpression): - visitor = ConceptOrRuleNameVisitor() + visitor = ConceptOrRuleVariableVisitor() visitor.visit(ret_value.value.value) - debugger.debug_var("names", visitor.names, hint="from BNF") - return set(visitor.names) + debugger.debug_var("names", visitor.variables, hint="from BNF") + return visitor.variables # # Case of python code @@ -198,31 +262,43 @@ class DefConceptEvaluator(OneReturnValueEvaluator): if len(concept_name) > 1: visitor = UnreferencedVariablesVisitor(context) names = visitor.get_names(python_node.ast_) + possible_vars = filter(lambda x: x in concept_name and context.sheerka.is_not_a_variable(x), names) debugger.debug_var("names", names, hint="from python node") - return set(filter(lambda x: x in concept_name and context.sheerka.is_not_a_variable(x), names)) + debugger.debug_var("possible_vars", possible_vars, hint="from python node") + return [PossibleVariable(v) for v in possible_vars] else: - return set() + return [] # - # Concept + # Case of Concept + # + if (concept := get_inner_concept(ret_value.value)) is not None and len(concept_name) > 1: + # use the variables of the concept is any + names = [var_value or var_name for var_name, var_value in concept.get_metadata().variables] + possible_vars = filter(lambda x: context.sheerka.is_not_a_variable(x), names) + debugger.debug_var("names", names, hint="from concept") + debugger.debug_var("possible_vars", possible_vars, hint="from concept") + return [PossibleVariable(v) for v in possible_vars] + + # + # Other cases # if isinstance(ret_value.value, ParserResultConcept) and len(concept_name) > 1: - variables = set() - source = ret_value.value.source.as_text() if isinstance(ret_value.value.source, - ParserInput) else ret_value.value.source + + source = ret_value.value.source.as_text() if isinstance(ret_value.value.source, ParserInput) else \ + ret_value.value.source tokens = ret_value.value.tokens or list(Tokenizer(source, yield_eof=False)) - possible_vars = set() + names = [] for t in tokens: if t.type == TokenKind.RULE: for v in [v for v in t.value if v is not None]: - possible_vars.add(v) + names.append(v) else: - possible_vars.add(t.str_value) + names.append(t.str_value) - for identifier in [i for i in concept_name if str(i).isalnum()]: - if identifier in possible_vars: - variables.add(identifier) - debugger.debug_var("names", variables, hint="from concept") - return variables + possible_vars = filter(lambda x: context.sheerka.is_not_a_variable(x), names) + debugger.debug_var("names", names, hint="from source") + debugger.debug_var("possible_vars", possible_vars, hint="from source") + return [PossibleVariable(v) for v in possible_vars] - return set() + return [] diff --git a/src/parsers/BaseCustomGrammarParser.py b/src/parsers/BaseCustomGrammarParser.py index 558295c..086e3af 100644 --- a/src/parsers/BaseCustomGrammarParser.py +++ b/src/parsers/BaseCustomGrammarParser.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field import core.utils -from core.tokenizer import Keywords, TokenKind, Tokenizer +from core.tokenizer import Keywords, TokenKind, Tokenizer, Token from parsers.BaseParser import Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError, \ BaseParserInputParser @@ -161,16 +161,16 @@ class BaseCustomGrammarParser(BaseParserInputParser): return tokens[pos:] - def get_parts(self, keywords, expected_first_token=None, strip_tokens=False): + def get_parts(self, keywords, expected_first_token=None, strip_tokens=False, allow_multiple=None): """ Reads Parser Input and groups the tokens by keywords ex: tokens = Tokenizer("as a b c pre u v w where x y z") keywords = ["as", "pre", "where"] assert get_parts(keywords) == { - Keyword("as"): [Token("a"), Token(), Token("b"), Token(), Token("c"), Token()], - Keyword("pre"): [Token("u"), Token(), Token("v"), Token(), Token("w"), Token()], - Keyword("where"): [Token("x"), Token(), Token("y"), Token(), Token("z"), Token()]} + Keyword("as"): [Token("as"), Token("a"), Token(), Token("b"), Token(), Token("c"), Token()], + Keyword("pre"): [Token("pre"), Token("u"), Token(), Token("v"), Token(), Token("w"), Token()], + Keyword("where"): [Token("where"), Token("x"), Token(), Token("y"), Token(), Token("z"), Token()]} * The order of appearance of the keywords is not important "as w pre y where z" and "where z pre y as w" will produce the same dictionary @@ -179,27 +179,86 @@ class BaseCustomGrammarParser(BaseParserInputParser): where "x y" will produce the entry Keyword("where"): [Token("x"), Token(), Token("y"), Token()] where 'x y' will produce the entry Keyword("where"): [Token("'x y'")] + * If a keyword does not appears in allow_multiple, it will recognized only once + tokens = Tokenizer("def concept x is a concept") + keywords = ["concept"], allow_multiple={} + assert get_parts(keywords) == { + Keyword("concept"): [Token("x"), Token(), Token("is"), Token(), Token("concept")]} + + * If the token appears in allow_multiple, it can be parse several time + in this case, in result, one token will represent one occurrence of the keyword (whitespaces are discarded) + tokens = Tokenizer("def_var var1 def_var var2") + keywords = ["def_var"], allow_multiple={"def_var"} + assert get_parts(keywords) == { + Keyword("def_var"): [Token("def_var"), Token("var1"), Token("var2")]} + + Long declaration are transformed into a string token + tokens = Tokenizer("def_var a very long declaration") + keywords = ["def_var"], allow_multiple={"def_var"} + assert get_parts(keywords) == { + Keyword("def_var"): [Token("def_var"), Token("'a very long declaration'")]} + :param keywords: - :param expected_first_token: it must be a KeyW + :param expected_first_token: it must be a Keyword :param strip_tokens: if True, the returned tokens will be trimmed + :param allow_multiple: set indicating the keywords that may appear several times :return: dictionary """ + if allow_multiple is None: + allow_multiple = set() - def new_part(t, cma, p): + def new_part(_token, _colon_mode_activated, _previous, _already_found): """ - :param t: token - :param cma: colon_mode_activated - :param p: previous token + :param _token: current token + :param _colon_mode_activated: colon_mode_activated + :param _previous: previous token + :param _already_found: keyword that are already found :return: """ - if t.value not in keywords: + if _token.value not in keywords: + # not even a keyword! return False - if not cma or not p: + if _token.value in _already_found and _token.value not in allow_multiple: + # keywords are recognized only once + return False + + if not _colon_mode_activated or not _previous: return True - return p.line != t.line + return _previous.line != _token.line + + def manage_buffer(_res, _keyword, _buffer): + stripped = core.utils.strip_tokens(_buffer) + + # manage colon first, to sure that what is protected by the quotes remains protected + if len(stripped) > 0 and stripped[0].type == TokenKind.COLON: + body = self.get_body(stripped[1:]) + if body: + _res[_keyword].extend(body) + + # only add one token when allow multiple is True + elif _keyword.value in allow_multiple: + if len(stripped) > 1: + buffer_as_str_token = Token(TokenKind.STRING, + "'" + core.utils.get_text_from_tokens(stripped) + "'", + stripped[0].index, + stripped[0].line, + stripped[0].column) + _res[_keyword].append(buffer_as_str_token) + else: + _res[_keyword].append(stripped[0]) + + # replace double quoted strings by their content + elif len(stripped) == 1 and stripped[0].type == TokenKind.STRING and stripped[0].value[0] == '"': + _res[_keyword].extend(list(Tokenizer(stripped[0].strip_quote, yield_eof=False))) + + elif strip_tokens: + _res[_keyword].extend(stripped) + + else: + _res[_keyword].extend(_buffer) if self.parser_input.token is None: self.add_error(KeywordNotFound([], keywords)) @@ -223,6 +282,9 @@ class BaseCustomGrammarParser(BaseParserInputParser): previous_token = None res = {} + keywords_found = set() + keyword = None + buffer = [] # More explanations on colon_mode_activated # You can use the pattern @@ -248,39 +310,31 @@ class BaseCustomGrammarParser(BaseParserInputParser): # loop thru the tokens, and put them in the correct tokens_found_by_parts entry while True: - if new_part(token, colon_mode_activated, previous_token): - keyword = Keywords(token.value) - if keyword in res: - # a part is defined more than once - self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations.")) - break + if new_part(token, colon_mode_activated, previous_token, keywords_found): + # manage the previous part + if buffer: + manage_buffer(res, keyword, buffer) + buffer.clear() - res[keyword] = [token] # to keep track of when it starts + keyword = Keywords(token.value) + if keyword not in res: + res[keyword] = [token] # to keep track of when it starts + + keywords_found.add(token.value) colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON if not self.parser_input.next_token(): self.add_error(UnexpectedEofParsingError(f"While parsing keyword '{keyword.value}'.")) break else: - res[keyword].append(token) + buffer.append(token) if not self.parser_input.next_token(skip_whitespace=False): break previous_token = token token = self.parser_input.token - # Post process the result if needed - for k, v in res.items(): - stripped = core.utils.strip_tokens(v[1:]) - - # manage colon first, to sure that what is protected by the quotes remains protected - if len(stripped) > 0 and stripped[0].type == TokenKind.COLON: - body = self.get_body(stripped[1:]) - if body: - res[k] = v[0:1] + body - # replace double quoted strings by their content - elif len(stripped) == 1 and stripped[0].type == TokenKind.STRING and stripped[0].value[0] == '"': - res[k] = v[0:1] + list(Tokenizer(stripped[0].strip_quote, yield_eof=False)) - elif strip_tokens: - res[k] = core.utils.strip_tokens(v) + # do not forget to flush the buffer + if buffer: + manage_buffer(res, keyword, buffer) return res diff --git a/src/parsers/DefConceptParser.py b/src/parsers/DefConceptParser.py index 66f1189..aa2c35b 100644 --- a/src/parsers/DefConceptParser.py +++ b/src/parsers/DefConceptParser.py @@ -40,6 +40,8 @@ class DefConceptNode(CustomGrammarParserNode): ret: ReturnValueConcept = NotInit definition: ReturnValueConcept = NotInit definition_type: str = None + auto_eval: bool = False + variables: list = NotInit @dataclass() @@ -53,7 +55,15 @@ class DefConceptParser(BaseCustomGrammarParser): Parse sheerka specific grammar (like def concept) """ - KEYWORDS = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST, Keywords.RET] + KEYWORDS = [Keywords.CONCEPT, + Keywords.FROM, + Keywords.AS, + Keywords.WHERE, + Keywords.PRE, + Keywords.POST, + Keywords.RET, + Keywords.AUTO_EVAL, + Keywords.DEF_VAR] KEYWORDS_VALUES = [k.value for k in KEYWORDS] def __init__(self, **kwargs): @@ -112,7 +122,9 @@ class DefConceptParser(BaseCustomGrammarParser): # Keywords.PRE to know if the conditions to evaluate the concept # Keywords.POST to apply or verify once the concept is executed # Keywords.RET to transform the concept into another concept - parts = self.get_parts(self.KEYWORDS_VALUES, expected_first_token=Keywords.CONCEPT) + parts = self.get_parts(self.KEYWORDS_VALUES, + expected_first_token=Keywords.CONCEPT, + allow_multiple={Keywords.DEF_VAR.value}) if parts is None: return None @@ -134,8 +146,58 @@ class DefConceptParser(BaseCustomGrammarParser): node.post = self.get_ast(Keywords.POST, parts) node.ret = self.get_ast(Keywords.RET, parts) + # other information + node.auto_eval = self.get_concept_auto_eval(parts) + node.variables = self.get_concept_variables(parts) + return node + def get_concept_auto_eval(self, parts): + """ + Recognize the part corresponding to auto_eval True | False + :param parts: + :return: + """ + try: + tokens = parts[Keywords.AUTO_EVAL] + + if len(tokens) == 1: + self.add_error(SyntaxErrorNode(tokens, f"Empty 'auto_eval' declaration.")) + return None + + if len(tokens) > 2 or tokens[1].type != TokenKind.IDENTIFIER: + self.add_error(CannotHandleParsingError(tokens, f"Invalid 'auto_eval' declaration")) + return None + + auto_eval_value = tokens[1].value.lower() + if auto_eval_value == "true": + return True + elif auto_eval_value == "false": + return False + + self.add_error(CannotHandleParsingError(tokens, f"Invalid 'auto_eval' declaration")) + return None + + except KeyError: + return False + + def get_concept_variables(self, parts): + """ + Recognize the part corresponding to def_var var_name + :param parts: + :return: + """ + try: + tokens = parts[Keywords.DEF_VAR] + + if len(tokens) == 1: + self.add_error(SyntaxErrorNode(tokens, f"Empty 'def_var' declaration.")) + return None + + return [t.strip_quote for t in tokens[1:]] + except KeyError: + return NotInit + def get_concept_name(self, tokens): name_tokens = core.utils.strip_tokens(tokens[1:]) if len(name_tokens) == 0: diff --git a/tests/BaseTest.py b/tests/BaseTest.py index 0a663cb..bd0aab6 100644 --- a/tests/BaseTest.py +++ b/tests/BaseTest.py @@ -45,7 +45,7 @@ class InitTestHelper: c.get_metadata().definition_type = DEFINITION_TYPE_BNF else: raise Exception(f"Error in bnf definition '{c.get_metadata().definition}'", - self.sheerka.get_error(res)) + self.sheerka.get_errors(res)) if create_new: self.sheerka.create_new_concept(self.context, c) @@ -89,7 +89,7 @@ class InitTestHelper: res = self.sheerka.create_new_rule(self.context, rule) if not res.status: raise Exception(f"Error in rule definition '{res.body}'", - self.sheerka.get_error(res)) + self.sheerka.get_errors(res)) self.items.append(res.body.body) else: self.items.append(rule) diff --git a/tests/core/test_sheerka.py b/tests/core/test_sheerka.py index e58d779..10d8033 100644 --- a/tests/core/test_sheerka.py +++ b/tests/core/test_sheerka.py @@ -3,13 +3,14 @@ import os import pytest from conftest import SHEERKA_TEST_FOLDER -from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, UserInputConcept +from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, UserInputConcept, UnknownConcept from core.builtin_concepts_ids import AllBuiltinConcepts from core.concept import Concept, PROPERTIES_TO_SERIALIZE, ConceptParts, get_concept_attrs from core.global_symbols import NotInit from core.sheerka.Sheerka import Sheerka, BASE_NODE_PARSER_CLASS -from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager +from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager, ValueNotFound from core.tokenizer import Token, TokenKind +from parsers.PythonParser import PythonErrorNode from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -425,6 +426,139 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka): assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.ONTOLOGY_ALREADY_DEFINED) + @pytest.mark.parametrize("obj, expected", [ + ("a string", []), + (True, []), + (False, []), + (Concept("foo"), []), + (Concept("foo", body=False).auto_init(), []), + (UnknownConcept(), [UnknownConcept()]), + (Concept("foo", body=UnknownConcept()).auto_init(), [UnknownConcept()]), + (PythonErrorNode("msg", None), [PythonErrorNode("msg", None)]) + ]) + def test_i_can_get_error_for_simple_objects(self, obj, expected): + sheerka, context = self.init_test().unpack() + + assert sheerka.get_errors(obj) == expected + + def test_i_can_get_error_when_builtin_concept_in_error(self): + sheerka, context = self.init_test().unpack() + + obj = sheerka.new(BuiltinConcepts.ONTOLOGY_ALREADY_DEFINED) + assert sheerka.get_errors(obj) == [obj] + + def test_i_can_get_error_when_return_value(self): + sheerka, context = self.init_test().unpack() + + error = sheerka.err("an error") + ret_val = ReturnValueConcept("Test", False, sheerka.err("an error")) + assert sheerka.get_errors(ret_val) == [error] + + def test_i_can_get_inner_error(self): + sheerka, context = self.init_test().unpack() + + error = sheerka.err("an error") + ret_val = ReturnValueConcept("Test", False, sheerka.err("an error")) + assert sheerka.get_errors(ret_val) == [error] + + def test_i_can_get_error_when_embedded_errors(self): + sheerka, context = self.init_test().unpack() + + concept_eval_error = sheerka.new(BuiltinConcepts.CONCEPT_EVAL_ERROR) + unknown_concept = sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT) + not_an_error = sheerka.new(BuiltinConcepts.AUTO_EVAL) + error = sheerka.err([concept_eval_error, unknown_concept, not_an_error]) + + ret_val = ReturnValueConcept("Test", False, error) + errors_found = sheerka.get_errors(ret_val) + + assert errors_found == [error, concept_eval_error, unknown_concept] + + def test_i_can_get_error_from_list(self): + sheerka, context = self.init_test().unpack() + + concept_eval_error = sheerka.new(BuiltinConcepts.CONCEPT_EVAL_ERROR) + unknown_concept = sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT) + not_an_error = sheerka.new(BuiltinConcepts.AUTO_EVAL) + error = sheerka.err([concept_eval_error, unknown_concept, not_an_error]) + ret_val_1 = ReturnValueConcept("Test", False, error) + + python_error = PythonErrorNode("msg", Exception()) + value_not_found = ValueNotFound("item", "value") + multiple_error = sheerka.new(BuiltinConcepts.MULTIPLE_ERRORS, body=[python_error, value_not_found]) + ret_val_2 = ReturnValueConcept("Test", False, multiple_error) + + errors_found = sheerka.get_errors([ret_val_1, ret_val_2]) + + assert errors_found == [error, concept_eval_error, unknown_concept, + multiple_error, python_error, value_not_found] + + def test_i_can_filter_error_by_concept_key(self): + sheerka, context = self.init_test().unpack() + + concept_eval_error = sheerka.new(BuiltinConcepts.CONCEPT_EVAL_ERROR) + unknown_concept = sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT) + python_error = PythonErrorNode("msg", Exception()) + error = sheerka.err([concept_eval_error, unknown_concept, python_error]) + ret_val = ReturnValueConcept("Test", False, error) + + errors_found = sheerka.get_errors(ret_val, __type=BuiltinConcepts.CONCEPT_EVAL_ERROR) + assert errors_found == [concept_eval_error] + + def test_i_can_filter_error_by_class_name(self): + sheerka, context = self.init_test().unpack() + + concept_eval_error = sheerka.new(BuiltinConcepts.CONCEPT_EVAL_ERROR) + unknown_concept = sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT) + python_error = PythonErrorNode("msg", Exception()) + error = sheerka.err([concept_eval_error, unknown_concept, python_error]) + ret_val = ReturnValueConcept("Test", False, error) + + errors_found = sheerka.get_errors(ret_val, __type="PythonErrorNode") + assert errors_found == [python_error] + + def test_i_can_filter_error_by_concept_attribute(self): + sheerka, context = self.init_test().unpack() + + concept_eval_error = sheerka.new(BuiltinConcepts.CONCEPT_EVAL_ERROR) + unknown_concept = sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, concept_ref="a_concept_ref") + python_error = PythonErrorNode("msg", Exception()) + error = sheerka.err([concept_eval_error, unknown_concept, python_error]) + ret_val = ReturnValueConcept("Test", False, error) + + errors_found = sheerka.get_errors(ret_val, concept_ref="a_concept_ref") + assert errors_found == [unknown_concept] + + def test_i_can_filter_error_by_class_attribute(self): + sheerka, context = self.init_test().unpack() + + concept_eval_error = sheerka.new(BuiltinConcepts.CONCEPT_EVAL_ERROR) + unknown_concept = sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, concept_ref="a_concept_ref") + python_error = PythonErrorNode("error source", Exception()) + error = sheerka.err([concept_eval_error, unknown_concept, python_error]) + ret_val = ReturnValueConcept("Test", False, error) + + errors_found = sheerka.get_errors(ret_val, source="error source") + assert errors_found == [python_error] + + def test_i_can_filter_error_on_multiple_criteria(self): + sheerka, context = self.init_test().unpack() + + concept_eval_error = sheerka.new(BuiltinConcepts.CONCEPT_EVAL_ERROR) + unknown_concept = sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, concept_ref="a_concept_ref") + value_not_found = ValueNotFound("an_item", "a value") + error = sheerka.err([concept_eval_error, unknown_concept, value_not_found]) + ret_val = ReturnValueConcept("Test", False, error) + + errors_found = sheerka.get_errors(ret_val, __type="ValueNotFound", item="an_item", value="a value") + assert errors_found == [value_not_found] + + def test_i_cannot_get_error_when_return_value_s_status_is_true(self): + sheerka, context = self.init_test().unpack() + + ret_val = ReturnValueConcept("Test", True, sheerka.err("an error")) + assert sheerka.get_errors(ret_val) == [] + class TestSheerkaUsingFileBasedSheerka(TestUsingFileBasedSheerka): diff --git a/tests/evaluators/test_DefConceptEvaluator.py b/tests/evaluators/test_DefConceptEvaluator.py index 1df9768..b879bd2 100644 --- a/tests/evaluators/test_DefConceptEvaluator.py +++ b/tests/evaluators/test_DefConceptEvaluator.py @@ -7,7 +7,7 @@ from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, DEFINITI from core.sheerka.services.SheerkaConceptManager import NoFirstTokenError from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer -from evaluators.DefConceptEvaluator import DefConceptEvaluator +from evaluators.DefConceptEvaluator import DefConceptEvaluator, PossibleVariable, CertainVariable from parsers.BaseParser import BaseParser from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfNodeParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression, VariableExpression @@ -144,20 +144,112 @@ class TestDefConceptEvaluator(TestUsingMemoryBasedSheerka): assert created_concept.get_metadata().definition == "hello a" assert created_concept.get_metadata().definition_type == "def" - def test_i_can_add_concept_with_the_correct_variables_when_referencing_other_concepts(self): + @pytest.mark.parametrize("expression, name, expected", [ + ("isinstance(a, str)", "a b", {"a"}), + ("a.location = b", "a is in b", {"a", "b"}), + ("a.location = b", "'a' is in b", {"b"}), + ("date.today()", "what is the date", set()), + ("a.location", "where is a", {"a"}) + ]) + def test_i_can_get_variables_from_python_node_when_long_name(self, expression, name, expected): + ret_val = self.get_concept_part(expression) context = self.get_context() - def_concept_return_value = self.get_def_concept( - name="x plus y", - where=self.pretval(Concept("u is a v").def_var("u").def_var("v"), source="x is a number"), - body=self.pretval(Concept("add a b").def_var("a").def_var("b"), source="add x y"), ) - evaluated = DefConceptEvaluator().eval(context, def_concept_return_value) + resolved_expected = [PossibleVariable(e) for e in expected] + assert DefConceptEvaluator.get_variables(context, ret_val, name.split()) == resolved_expected + + def test_i_can_get_variables_when_keywords(self): + sheerka, context = self.init_concepts() + + def_concept = self.get_def_concept("condition pre").value.value + name_to_use = DefConceptEvaluator.get_name_to_use(def_concept) + concept_part = self.get_concept_part("pre") + + assert DefConceptEvaluator.get_variables(context, concept_part, name_to_use) == [PossibleVariable("pre")] + + def test_i_can_get_variable_when_rule_is_defined(self): + sheerka, context = self.init_test().unpack() + def_concept_parser = DefConceptParser() + + parsed_ret_val = def_concept_parser.parse(context, ParserInput("def concept rule x as r:|x:")) + assert parsed_ret_val.status # sanity check + + evaluated = DefConceptEvaluator().eval(context, parsed_ret_val) + assert evaluated.status + assert evaluated.body.body.key == "rule __var__0" + assert evaluated.body.body.get_metadata().variables == [("x", None)] + + def test_i_cannot_get_variables_from_python_node_when_name_has_only_one_token(self): + ret_val = self.get_concept_part("isinstance(a, str)") + context = self.get_context() + + assert DefConceptEvaluator.get_variables(context, ret_val, ["a"]) == [] + + def test_i_can_get_variables_from_definition(self): + parsing_expression = Sequence(ConceptExpression('mult'), + ZeroOrMore(Sequence(StrMatch("+"), ConceptExpression("add")))) + ret_val = self.get_return_value("mult (('+'|'-') add)?", parsing_expression) + + expected = [CertainVariable("mult"), CertainVariable("add")] + assert DefConceptEvaluator.get_variables(self.get_context(), ret_val, []) == expected + + def test_i_can_get_variable_from_bnf_definition_2(self): + sheerka, context, one, two = self.init_concepts("one", "two") + + text = "def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit" + def_ret_val = DefConceptParser().parse(context, ParserInput(text)) + concept_definition = def_ret_val.value.body.definition + + expected = [CertainVariable("unit"), CertainVariable("one"), CertainVariable("two")] + assert DefConceptEvaluator.get_variables(context, concept_definition, []) == expected + + def test_i_can_recognize_variables_when_referencing_other_concepts(self): + sheerka, context, isa_concept = self.init_concepts(Concept("x is an y").def_var("x").def_var("y")) + + text = "def concept what x is y pre is_question() where x is an adjective as get_attr(x, y)" + def_ret_val = DefConceptParser().parse(context, ParserInput(text)) + evaluated = DefConceptEvaluator().eval(context, def_ret_val) + new_concept = evaluated.body.body assert evaluated.status - assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT) + assert new_concept.get_metadata().variables == [('x', None), ('y', None)] - created_concept = evaluated.body.body - assert created_concept.get_metadata().variables == [("x", None), ("y", None)] + def test_i_can_recognize_variables_when_given_in_concept_definition(self): + sheerka, context = self.init_test().unpack() + + text = "def concept a plus b as a + b def_var plus" + def_ret_val = DefConceptParser().parse(context, ParserInput(text)) + evaluated = DefConceptEvaluator().eval(context, def_ret_val) + new_concept = evaluated.body.body + + assert evaluated.status + assert new_concept.get_metadata().variables == [('plus', None)] + + def test_i_can_recognize_variables_when_referencing_other_concepts_with_variable_mapping(self): + sheerka, context, number, isa, add = self.init_concepts( + "number", + Concept("u is a v").def_var("u").def_var("v"), + Concept("add a b").def_var("a").def_var("b") + ) + + text = "def concept x plus y where x is a number as add x y" + def_ret_val = DefConceptParser().parse(context, ParserInput(text)) + evaluated = DefConceptEvaluator().eval(context, def_ret_val) + new_concept = evaluated.body.body + + assert evaluated.status + assert new_concept.get_metadata().variables == [('x', None), ('y', None)] + + def test_i_do_no_mixed_up_concept_and_variable_name(self): + sheerka, context, activate_debug = self.init_concepts(Concept("activate debug")) + + text = "def concept debug on as activate debug" + def_ret_val = DefConceptParser().parse(context, ParserInput(text)) + evaluated = DefConceptEvaluator().eval(context, def_ret_val) + new_concept = evaluated.body.body + + assert evaluated.status + assert new_concept.get_metadata().variables == [] def test_other_concepts_are_not_variables(self): sheerka, context, *concepts = self.init_test().with_concepts("little", "size", create_new=True).unpack() @@ -166,7 +258,7 @@ class TestDefConceptEvaluator(TestUsingMemoryBasedSheerka): name_to_use = DefConceptEvaluator.get_name_to_use(def_concept_node) concept_part = self.get_concept_part("set_attr(x, size, little)") - assert DefConceptEvaluator.get_variables(context, concept_part, name_to_use) == {"x"} + assert DefConceptEvaluator.get_variables(context, concept_part, name_to_use) == [PossibleVariable("x")] def test_that_the_new_concept_is_correctly_saved_in_db(self): context = self.get_context() @@ -199,41 +291,6 @@ class TestDefConceptEvaluator(TestUsingMemoryBasedSheerka): assert from_db.get_compiled() == {} # ast is not saved in db - @pytest.mark.parametrize("expression, name, expected", [ - ("isinstance(a, str)", "a b", {"a"}), - ("a.location = b", "a is in b", {"a", "b"}), - ("a.location = b", "'a' is in b", {"b"}), - ("date.today()", "what is the date", set()), - ("a.location", "where is a", {"a"}) - ]) - def test_i_can_get_variables_from_python_node_when_long_name(self, expression, name, expected): - ret_val = self.get_concept_part(expression) - context = self.get_context() - - assert DefConceptEvaluator.get_variables(context, ret_val, name.split()) == expected - - def test_i_can_get_variables_when_keywords(self): - sheerka, context = self.init_concepts() - - def_concept = self.get_def_concept("condition pre").value.value - name_to_use = DefConceptEvaluator.get_name_to_use(def_concept) - concept_part = self.get_concept_part("pre") - - assert DefConceptEvaluator.get_variables(context, concept_part, name_to_use) == {"pre"} - - def test_i_cannot_get_variables_from_python_node_when_name_has_only_one_token(self): - ret_val = self.get_concept_part("isinstance(a, str)") - context = self.get_context() - - assert DefConceptEvaluator.get_variables(context, ret_val, ["a"]) == set() - - def test_i_can_get_variables_from_definition(self): - parsing_expression = Sequence(ConceptExpression('mult'), - ZeroOrMore(Sequence(StrMatch("+"), ConceptExpression("add")))) - ret_val = self.get_return_value("mult (('+'|'-') add)?", parsing_expression) - - assert DefConceptEvaluator.get_variables(self.get_context(), ret_val, []) == {"add", "mult"} - def test_concept_that_references_itself_is_correctly_created(self): context = self.get_context() def_concept_as_return_value = self.get_def_concept("foo", body="foo") @@ -247,18 +304,6 @@ class TestDefConceptEvaluator(TestUsingMemoryBasedSheerka): assert new_concept.values() == {} assert new_concept.get_metadata().variables == [] - def test_i_can_get_variable_when_rule_is_defined(self): - sheerka, context = self.init_test().unpack() - def_concept_parser = DefConceptParser() - - parsed_ret_val = def_concept_parser.parse(context, ParserInput("def concept rule x as r:|x:")) - assert parsed_ret_val.status # sanity check - - evaluated = DefConceptEvaluator().eval(context, parsed_ret_val) - assert evaluated.status - assert evaluated.body.body.key == "rule __var__0" - assert evaluated.body.body.get_metadata().variables == [("x", None)] - def test_i_can_recognize_variable_when_keyword_argument(self): sheerka, context = self.init_test().unpack() def_concept_parser = DefConceptParser() @@ -309,7 +354,19 @@ class TestDefConceptEvaluator(TestUsingMemoryBasedSheerka): assert created_concept.get_metadata().variables == [("x", None)] assert created_concept._bnf == Sequence(StrMatch("hello"), VariableExpression("x")) + def test_i_can_eval_when_auto_eval_is_true(self): + sheerka, context = self.init_test().unpack() + def_ret_val = DefConceptParser().parse(context, ParserInput("def concept foo auto_eval True")) + evaluated = DefConceptEvaluator().eval(context, def_ret_val) + + assert evaluated.status + assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT) + created_concept = evaluated.body.body + + assert sheerka.get_property(created_concept, BuiltinConcepts.ISA) == {sheerka.new(BuiltinConcepts.AUTO_EVAL)} + def test_i_cannot_eval_bnf_concept_with_unknown_variable(self): + # testing MandatoryVariable context = self.get_context() def_ret_val = DefConceptParser().parse(context, ParserInput("def concept name from bnf unknown foo")) evaluated = DefConceptEvaluator().eval(context, def_ret_val) diff --git a/tests/non_reg/test_sheerka_non_reg.py b/tests/non_reg/test_sheerka_non_reg.py index 5e53290..95cf658 100644 --- a/tests/non_reg/test_sheerka_non_reg.py +++ b/tests/non_reg/test_sheerka_non_reg.py @@ -683,12 +683,12 @@ as: assert len(res) == 1 assert not res[0].status assert sheerka.isinstance(res[0].body, BuiltinConcepts.MULTIPLE_ERRORS) - assert str(BuiltinConcepts.CONDITION_FAILED) in [error.key for error in sheerka.get_error(res[0].body.body)] + assert str(BuiltinConcepts.CONDITION_FAILED) in [error.key for error in sheerka.get_errors(res[0].body.body)] res = sheerka.evaluate_user_input("eval twenty three") assert len(res) == 1 assert not res[0].status - assert str(BuiltinConcepts.CONDITION_FAILED) in [error.key for error in sheerka.get_error(res[0].body.body)] + assert str(BuiltinConcepts.CONDITION_FAILED) in [error.key for error in sheerka.get_errors(res[0].body.body)] def test_i_can_manage_some_type_of_infinite_recursion(self): sheerka = self.get_sheerka() diff --git a/tests/parsers/test_BaseCustomGrammarParser.py b/tests/parsers/test_BaseCustomGrammarParser.py index e6022cd..46bd042 100644 --- a/tests/parsers/test_BaseCustomGrammarParser.py +++ b/tests/parsers/test_BaseCustomGrammarParser.py @@ -7,14 +7,23 @@ from parsers.BaseParser import UnexpectedEofParsingError, UnexpectedTokenParsing from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +def get_tokens(items): + return [list(Tokenizer(item, yield_eof=False))[0] for item in items] + + class TestBaseCustomGrammarParser(TestUsingMemoryBasedSheerka): @staticmethod def compare_results(actual, expected, compare_str=False): resolved_expected = {} for k, v in expected.items(): - tokens = list(Tokenizer(v, yield_eof=False)) - resolved_expected[k] = [tokens[0]] + tokens[2:] + if isinstance(v, str): + # case like {Keywords.DEF_VAR: "def_var var1 def_var var2"} + tokens = list(Tokenizer(v, yield_eof=False)) + resolved_expected[k] = [tokens[0]] + tokens[2:] + else: + # case like {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])} + resolved_expected[k] = v def get_better_representation(value): better_repr = {} @@ -23,6 +32,7 @@ class TestBaseCustomGrammarParser(TestUsingMemoryBasedSheerka): better_repr[k] = [tokens[0].repr_value, value] return better_repr + # it's easier to compare two list of string actual_to_compare = get_better_representation(actual) expected_to_compare = get_better_representation(resolved_expected) @@ -67,6 +77,20 @@ func(a) res = parser.get_parts(["when"]) self.compare_results(res, expected) + @pytest.mark.parametrize("text, allow_multiple, expected", [ + ("def_var var1 def_var var2", {}, {Keywords.DEF_VAR: "def_var var1 def_var var2"}), + ("def_var var1 def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])}), + ("def_var x y z def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "'x y z'", "var2"])}), + ("def_var 'x y z' def_var var2", {"def_var"}, {Keywords.DEF_VAR: get_tokens(["def_var", "'x y z'", "var2"])}), + ("def_var var1 def_var x y z def_var var2", {"def_var"}, + {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "'x y z'", "var2"])}), + ]) + def test_i_can_get_parts_when_allow_multiple_is_set(self, text, allow_multiple, expected): + sheerka, context, parser = self.init_parser(text) + + res = parser.get_parts(["def_var"], allow_multiple=allow_multiple) + self.compare_results(res, expected) + @pytest.mark.parametrize("text", [ "", "no keyword", @@ -88,14 +112,6 @@ func(a) "when", [Keywords.PRINT])] - def test_i_can_detect_when_a_keyword_appears_several_times(self): - sheerka, context, parser = self.init_parser("print hello when True print True") - - parser.get_parts(["print"]) - assert len(parser.error_sink) == 1 - assert isinstance(parser.error_sink[0], SyntaxErrorNode) - assert parser.error_sink[0].message == "Too many 'print' declarations." - @pytest.mark.parametrize("text", [ "print", "print ", diff --git a/tests/parsers/test_DefConceptParser.py b/tests/parsers/test_DefConceptParser.py index a094894..b1990ab 100644 --- a/tests/parsers/test_DefConceptParser.py +++ b/tests/parsers/test_DefConceptParser.py @@ -12,7 +12,7 @@ from parsers.BaseParser import UnexpectedEofParsingError from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch, Sequence, RegExMatch, OneOrMore, \ VariableExpression -from parsers.DefConceptParser import DefConceptParser, NameNode, SyntaxErrorNode +from parsers.DefConceptParser import DefConceptParser, NameNode, SyntaxErrorNode, CannotHandleParsingError from parsers.DefConceptParser import UnexpectedTokenParsingError, DefConceptNode from parsers.FunctionParser import FunctionParser from parsers.PythonParser import PythonParser, PythonNode @@ -636,3 +636,77 @@ from give me the date ! assert res.value.source == text assert isinstance(res.value, ParserResultConcept) assert node == expected + + def test_i_can_parse_when_multiple_keyword_and_no_ambiguity(self): + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput("def concept x is a concept")) + + assert res.status + + @pytest.mark.parametrize("text, expected", [ + ("def concept foo auto_eval True", True), + ("def concept foo auto_eval true", True), + ("def concept foo auto_eval False", False), + ("def concept foo auto_eval false", False), + ]) + def test_i_can_parse_auto_eval(self, text, expected): + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + def_concept_node = res.value.value + + assert res.status + assert res.who == parser.name + assert res.value.source == text + assert isinstance(res.value, ParserResultConcept) + + assert def_concept_node.auto_eval == expected + + @pytest.mark.parametrize("text", [ + "def concept foo auto_eval", + "def concept foo auto_eval as 1" + ]) + def test_i_cannot_parse_when_missing_auto_eval_value(self, text): + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + + assert not res.status + assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR) + assert sheerka.has_error(res, __type="SyntaxErrorNode", message="Empty 'auto_eval' declaration.") + + def test_i_cannot_parse_when_wrong_auto_eval_value(self): + sheerka, context, parser, *concepts = self.init_parser() + text = "def concept foo auto_eval wrong_value" + res = parser.parse(context, ParserInput(text)) + + assert not res.status + assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR) + assert isinstance(res.value.body[0], CannotHandleParsingError) + + @pytest.mark.parametrize("text, expected", [ + ("def concept foo def_var var", ["var"]), + ("def concept foo def_var var1 def_var var2", ["var1", "var2"]), + ("def concept foo def_var var1 def_var long var name def_var var2", ["var1", "long var name", "var2"]), + ]) + def test_i_can_parse_variable_definition(self, text, expected): + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + def_concept_node = res.value.value + + assert res.status + assert res.who == parser.name + assert res.value.source == text + assert isinstance(res.value, ParserResultConcept) + + assert def_concept_node.variables == expected + + @pytest.mark.parametrize("text", [ + "def concept foo def_var", + "def concept foo def_var as 1" + ]) + def test_i_cannot_parse_variable_definition_when_missing_value(self, text): + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + + assert not res.status + assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR) + assert sheerka.has_error(res, __type="SyntaxErrorNode", message="Empty 'def_var' declaration.")