From 54e5681c5ac546115d6c319219acc181c651811e Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Mon, 6 Sep 2021 11:51:50 +0200 Subject: [PATCH] Fixed #109 : Mix python and concept. List comprehension Fixed #110 : SheerkaDebugManager: add list_debug_settings Fixed #111 : SheerkaDebugManager: Implement ListDebugLogger Fixed #112 : SyaNodeParser: rewrite this parser Fixed #113 : Sheerka.: Add enable_parser_caching to disable parsers caching Fixed #114 : SyaNodeParser : Implement fast cache to resolve unrecognized tokens requests Fixed #115 : BnfNodeParser : Implement fast cache to resolve unrecognized tokens requests Fixed #116 : SequenceNodeParser : Implement fast cache to resolve unrecognized tokens requests Fixed #117 : ResolveMultiplePluralAmbiguityEvaluator: Resolve Multiple plural ambiguity --- sheerka_backup/admin.sb | 12 +- src/cache/FastCache.py | 6 +- src/core/builtin_concepts.py | 6 +- src/core/builtin_helpers.py | 80 +- src/core/concept.py | 2 +- src/core/sheerka/ExecutionContext.py | 35 +- src/core/sheerka/Sheerka.py | 29 +- src/core/sheerka/services/SheerkaAdmin.py | 13 +- .../sheerka/services/SheerkaDebugManager.py | 364 ++- .../services/SheerkaEvaluateConcept.py | 96 +- src/core/sheerka/services/SheerkaExecute.py | 32 +- .../sheerka/services/SheerkaIsAManager.py | 17 +- .../sheerka/services/SheerkaRuleManager.py | 19 +- .../services/SheerkaVariableManager.py | 13 +- src/core/simple_debug.py | 9 + src/core/tokenizer.py | 30 +- src/core/utils.py | 40 +- src/evaluators/BaseEvaluator.py | 10 +- src/evaluators/DefConceptEvaluator.py | 1 + src/evaluators/ResolveAmbiguityEvaluator.py | 11 +- ...ResolveMultiplePluralAmbiguityEvaluator.py | 49 + src/evaluators/ValidateConceptEvaluator.py | 4 +- src/parsers/BaseCustomGrammarParser.py | 2 +- src/parsers/BaseExpressionParser.py | 160 +- src/parsers/BaseNodeParser.py | 57 +- src/parsers/BaseParser.py | 3 + src/parsers/BnfDefinitionParser.py | 16 +- src/parsers/BnfNodeParser.py | 36 +- src/parsers/DefRuleParser.py | 2 +- src/parsers/FunctionParser.py | 8 +- src/parsers/ListComprehensionParser.py | 219 ++ src/parsers/ListParser.py | 77 + src/parsers/LogicalOperatorParser.py | 84 +- src/parsers/RelationalOperatorParser.py | 4 +- src/parsers/SequenceNodeParser.py | 56 +- src/parsers/SyaNodeParser.py | 2335 +++++++--------- src/parsers/UnrecognizedNodeParser.py | 6 +- src/sheerkapython/ExprToPython.py | 389 +++ tests/BaseTest.py | 12 +- tests/core/test_SheerkaDebugManager.py | 28 +- tests/core/test_SheerkaEvaluateConcept.py | 52 +- tests/evaluators/EvaluatorTestsUtils.py | 56 +- tests/evaluators/test_DefConceptEvaluator.py | 12 + ...ResolveMultiplePluralAmbiguityEvaluator.py | 60 + tests/non_reg/test_sheerka_non_reg2.py | 40 +- tests/non_reg/test_sheerka_non_reg_out.py | 21 + tests/parsers/parsers_utils.py | 464 +-- tests/parsers/test_BaseCustomGrammarParser.py | 2 +- tests/parsers/test_BnfNodeParser.py | 22 +- tests/parsers/test_DefConceptParser.py | 4 +- tests/parsers/test_DefRuleParser.py | 4 +- tests/parsers/test_ListComprehensionParser.py | 180 ++ tests/parsers/test_ListParser.py | 69 + tests/parsers/test_LogicalOperatorParser.py | 12 +- tests/parsers/test_SequenceNodeParser.py | 24 +- tests/parsers/test_SyaNodeParser.py | 2488 +++++++++-------- tests/sheerkapython/test_ExprToPython.py | 422 +++ 57 files changed, 5179 insertions(+), 3125 deletions(-) create mode 100644 src/evaluators/ResolveMultiplePluralAmbiguityEvaluator.py create mode 100644 src/parsers/ListComprehensionParser.py create mode 100644 src/parsers/ListParser.py create mode 100644 src/sheerkapython/ExprToPython.py create mode 100644 tests/evaluators/test_ResolveMultiplePluralAmbiguityEvaluator.py create mode 100644 tests/parsers/test_ListComprehensionParser.py create mode 100644 tests/parsers/test_ListParser.py create mode 100644 tests/sheerkapython/test_ExprToPython.py diff --git a/sheerka_backup/admin.sb b/sheerka_backup/admin.sb index 88039bb..60ae33a 100644 --- a/sheerka_backup/admin.sb +++ b/sheerka_backup/admin.sb @@ -13,12 +13,14 @@ def concept deactivate debug as set_debug(False) auto_eval True def concept debug on as set_debug(True) auto_eval True def concept debug off as set_debug(False) auto_eval True -def concept activate debug on x as debug_var(x) auto_eval True -def concept debug x as debug_var(x) auto_eval True +def concept activate debug on x as set_debug_var(x) auto_eval True +def concept activate debug on x id=y as set_debug_var(x, y) auto_eval True +def concept debug x as set_debug_var(x) auto_eval True -def concept debug var x as debug_var(variable=x) auto_eval True -def concept debug variable x as debug_var(variable=x) auto_eval True -def concept debug method x as debug_var(method=x) auto_eval True +def concept debug var x as set_debug_var(x) auto_eval True +def concept debug variable x as set_debug_var(variable=x) auto_eval True +def concept debug method x as set_debug_var(method=x) auto_eval True +def concept debug service x as set_debug_var(service=x) auto_eval True def concept deactivate debug on x as debug_var(x, enabled=False) where x auto_eval True diff --git a/src/cache/FastCache.py b/src/cache/FastCache.py index a0098ed..0d853af 100644 --- a/src/cache/FastCache.py +++ b/src/cache/FastCache.py @@ -11,6 +11,7 @@ class FastCache: self.cache = {} self.lru = [] self.default = default + self.calls = {} def __contains__(self, item): return self.has(item) @@ -24,13 +25,16 @@ class FastCache: self.cache[key] = value self.lru.append(key) + self.calls[key] = 0 def has(self, key): return key in self.cache def get(self, key): try: - return self.cache[key] + res = self.cache[key] + self.calls[key] += 1 + return res except KeyError: if self.default: value = self.default(key) diff --git a/src/core/builtin_concepts.py b/src/core/builtin_concepts.py index fdfc1e4..352517c 100644 --- a/src/core/builtin_concepts.py +++ b/src/core/builtin_concepts.py @@ -140,7 +140,10 @@ class ParserResultConcept(Concept): def __repr__(self): text = f"ParserResult(parser={self.parser}" - text += f", source='{self.source}')" if self.source else f", body='{self.value}')" + # text += f", source='{self.source}')" if self.source else f", body='{self.value}')" + from core.builtin_helpers import debug_nodes + value = debug_nodes(self.value) if isinstance(self.value, list) else self.value + text += f", source='{self.source}', '{value=}')" return text def __eq__(self, other): @@ -252,6 +255,7 @@ class ListConcept(Concept): class FilteredConcept(Concept): ALL_ATTRIBUTES = ["filtered", "iterable", "predicate", "reason"] + # To explain the reason why it's filtered, you can either # provide the original list (iterable) and the predicate # provide the reason (It may be a CONDITION_FAILED concept) diff --git a/src/core/builtin_helpers.py b/src/core/builtin_helpers.py index 980b1c5..b8e7c7f 100644 --- a/src/core/builtin_helpers.py +++ b/src/core/builtin_helpers.py @@ -5,15 +5,11 @@ from cache.Cache import Cache from core.ast_helpers import ast_to_props from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts, DEFINITION_TYPE_BNF, concept_part_value -from core.global_symbols import NotInit, NotFound, INIT_AST_PARSERS, DEFAULT_EVALUATORS +from core.global_symbols import DEFAULT_EVALUATORS, INIT_AST_PARSERS, NotFound, NotInit from core.rule import Rule from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import Tokenizer, TokenKind +from core.tokenizer import TokenKind, Tokenizer from core.utils import as_bag -from parsers.BaseExpressionParser import compile_disjunctions, AndNode -from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode, SourceCodeWithConceptNode, \ - RuleNode, LexerNode -from parsers.BaseParser import ParsingError PARSE_STEPS = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING] EVAL_ONLY_STEPS = [BuiltinConcepts.BEFORE_EVALUATION, BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION] @@ -226,9 +222,9 @@ def resolve_ambiguity(context, concepts): remaining_concepts.extend(by_complexity[complexity]) else: for c in by_complexity[complexity]: + from core.sheerka.services.SheerkaEvaluateConcept import EvaluationHints evaluated = context.sheerka.evaluate_concept(context, c, - eval_body=False, - validation_only=True, + hints=EvaluationHints(eval_body=False, expression_only=True), metadata=[ConceptParts.PRE, ConceptParts.WHERE]) if context.sheerka.is_success(evaluated) or evaluated.key == c.key: remaining_concepts.append(c) @@ -255,6 +251,9 @@ def get_condition_complexity(context, condition): # # count the number of conjunctions from parsers.LogicalOperatorParser import LogicalOperatorParser + from parsers.BaseExpressionParser import compile_disjunctions + from parsers.BaseExpressionParser import AndNode + parser = LogicalOperatorParser() res = parser.parse(context, ParserInput(condition)) if not res.status: @@ -314,6 +313,9 @@ def only_parsers_results(context, return_values): :return: """ + from parsers.BaseNodeParser import UnrecognizedTokensNode + from parsers.BaseParser import ParsingError + if not isinstance(return_values, list): return return_values @@ -335,8 +337,8 @@ def only_parsers_results(context, return_values): if isinstance(ret_val.body.body, ParsingError): continue if isinstance(ret_val.body.body, list) and \ - len(ret_val.body.body) == 1 and \ - isinstance(ret_val.body.body[0], UnrecognizedTokensNode): + len(ret_val.body.body) == 1 and \ + isinstance(ret_val.body.body[0], UnrecognizedTokensNode): continue temp_ret_val.append(ret_val) return_values_ok = temp_ret_val @@ -479,6 +481,7 @@ def get_lexer_nodes(return_values, start, tokens): :return: list of list (list of concept node sequence) """ from evaluators.BaseEvaluator import BaseEvaluator + from parsers.BaseNodeParser import ConceptNode, LexerNode, RuleNode, SourceCodeNode lexer_nodes = [] for ret_val in return_values: @@ -546,6 +549,7 @@ def get_lexer_nodes_using_positions(return_values, positions): """ from evaluators.BaseEvaluator import BaseEvaluator + from parsers.BaseNodeParser import ConceptNode, LexerNode, RuleNode, SourceCodeNode lexer_nodes = [] for ret_val, position in zip(return_values, positions): @@ -615,8 +619,8 @@ def ensure_evaluated(context, concept, eval_body=True, metadata=None): :param metadata: :return: """ + from core.sheerka.services.SheerkaEvaluateConcept import SheerkaEvaluateConcept, EvaluationHints if concept.get_hints().is_evaluated: - from core.sheerka.services.SheerkaEvaluateConcept import SheerkaEvaluateConcept return SheerkaEvaluateConcept.apply_ret(concept, eval_body or context.in_context(BuiltinConcepts.EVAL_BODY_REQUESTED)) @@ -624,11 +628,14 @@ def ensure_evaluated(context, concept, eval_body=True, metadata=None): if concept.get_metadata().definition_type != DEFINITION_TYPE_BNF: for var_name, var_default_value in concept.get_metadata().variables: if var_default_value is None and \ - var_name not in concept.get_compiled() and \ - (var_name not in concept.values() or concept.get_value(var_name) == NotInit): + var_name not in concept.get_compiled() and \ + (var_name not in concept.values() or concept.get_value(var_name) == NotInit): return concept - evaluated = context.sheerka.evaluate_concept(context, concept, eval_body=eval_body, metadata=metadata) + evaluated = context.sheerka.evaluate_concept(context, + concept, + hints=EvaluationHints(eval_body=eval_body), + metadata=metadata) return evaluated @@ -663,6 +670,9 @@ def update_compiled(context, concept, errors, parsers=None): :param parsers: to customize the parsers to use :return: """ + + from parsers.BaseNodeParser import ConceptNode, SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode + sheerka = context.sheerka parsers = parsers or PARSERS @@ -676,6 +686,15 @@ def update_compiled(context, concept, errors, parsers=None): if isinstance(v, Concept): _validate_concept(v) + elif isinstance(v, ConceptNode): + _validate_concept(v.concept) + c.get_compiled()[k] = v.concept + + elif isinstance(v, SourceCodeNode): + if not v.return_value: + raise NotImplementedError("SourceCodeNode") + c.get_compiled()[k] = [v.return_value] + elif isinstance(v, SourceCodeWithConceptNode): if v.return_value: res = v.return_value @@ -939,7 +958,8 @@ def get_possible_variables_from_concept(context, concept): return set() concept_name = [t.str_value for t in Tokenizer(concept.name, yield_eof=False)] - names = [v_value or v_name for v_name, v_value in concept.get_metadata().variables if v_name in concept_name] + names = [v_value.strip() or v_name for v_name, v_value in concept.get_metadata().variables if + v_name in concept_name] possible_vars = filter(lambda x: context.sheerka.is_not_a_concept_name(x), names) to_keep = set() for var in possible_vars: @@ -961,6 +981,36 @@ def is_only_successful(sheerka, return_value): sheerka.isinstance(return_value.body, BuiltinConcepts.ONLY_SUCCESSFUL) +def debug_nodes(nodes): + from parsers.BaseNodeParser import UnrecognizedTokensNode + + res = [] + for node in nodes: + if isinstance(node, UnrecognizedTokensNode): + res.append(node.source) + elif hasattr(node, "get_concept"): + concept = node.get_concept() + res.append(concept) + else: + res.append(node) + + return res + + +def get_new_variables_definitions(concept): + """ + Return a new set of variable definition, where the default value are initialized with what was compiled + """ + new_variables = [] + for var_name, var_default_value in concept.get_metadata().variables: + if var_name in concept.get_metadata().parameters and hasattr(concept.get_compiled()[var_name], "source"): + new_variables.append((var_name, concept.get_compiled()[var_name].source)) + else: + new_variables.append((var_name, var_default_value)) + + return new_variables + + class CreateObjectIdentifiers: """ Class that creates unique identifiers for Concept or Rule objects diff --git a/src/core/concept.py b/src/core/concept.py index 0337cda..a35d9bb 100644 --- a/src/core/concept.py +++ b/src/core/concept.py @@ -44,7 +44,7 @@ def concept_part_value(c): class ConceptHints: is_evaluated: bool = False # True is the concept is evaluated by sheerka.eval_concept() need_validation: bool = False # True if the properties of the concept need to be validated - recognized_by: str = None # recognized by its name, by its id or its key + recognized_by: str = None # RECOGNIZED_BY_ID, RECOGNIZED_BY_NAME, RECOGNIZED_BY_KEY (from Sheerka.py) use_copy: bool = False # Do not modify, make a copy first is_instance: bool = True # False if we think we recognize the definition of a concept, not its usage diff --git a/src/core/sheerka/ExecutionContext.py b/src/core/sheerka/ExecutionContext.py index eaf0700..9d26436 100644 --- a/src/core/sheerka/ExecutionContext.py +++ b/src/core/sheerka/ExecutionContext.py @@ -6,7 +6,7 @@ from core.builtin_concepts import BuiltinConcepts, ParserResultConcept from core.concept import Concept, get_concept_attrs from core.global_symbols import EVENT_CONTEXT_DISPOSED, NO_MATCH from core.sheerka.services.SheerkaMemory import SheerkaMemory -from core.utils import CONSOLE_COLORS_MAP as CCM, CONSOLE_COLUMNS +from core.utils import CONSOLE_COLUMNS from sdp.sheerkaDataProvider import Event pp = pprint.PrettyPrinter(indent=2, width=CONSOLE_COLUMNS) @@ -334,37 +334,8 @@ class ExecutionContext: to_str = self.return_value_to_str(r) self._logger.debug(f"[{self._id:2}]" + self._tab + "-> " + to_str) - def get_debugger(self, who, method_name, new_debug_id=True): - return self.sheerka.get_debugger(self, who, method_name, new_debug_id) - - # TODO: TO REMOVE - def debug(self, who, method_name, variable_name, text, is_error=False): - activated = self.sheerka.debug_activated_for(who) - if activated: - str_text = pp.pformat(text) - color = 'red' if is_error else 'green' - if "\n" not in str(str_text): - self.sheerka.debug( - f"[{self._id:3}] {CCM[color]}{who}.{method_name}.{variable_name}: {CCM['reset']}{str_text}") - else: - self.sheerka.debug(f"[{self._id:3}] {CCM[color]}{who}.{method_name}.{variable_name}: {CCM['reset']}") - self.sheerka.debug(str_text) - - # TODO: TO REMOVE - def debug_entering(self, who, method_name, **kwargs): - if self.sheerka.debug_activated_for(who): - str_text = pp.pformat(kwargs) - if "\n" not in str(str_text): - self.sheerka.debug( - f"[{self._id:3}] {CCM['blue']}Entering {who}.{method_name} with {CCM['reset']}{str_text}") - else: - self.sheerka.debug(f"[{self._id:3}] {CCM['blue']}Entering {who}.{method_name}:{CCM['reset']}") - self.sheerka.debug(f"[{self._id:3}] {str_text}") - - # TODO: TO REMOVE - def debug_log(self, who, text): - if self.sheerka.debug_activated_for(who): - self.sheerka.debug(f"[{self._id:3}] {CCM['blue']}{text}{CCM['reset']}") + def get_debugger(self, who, method_name, new_debug_id=True, forced_debug_id=None): + return self.sheerka.get_debugger(self, who, method_name, new_debug_id, forced_debug_id) def get_parent(self): return self._parent diff --git a/src/core/sheerka/Sheerka.py b/src/core/sheerka/Sheerka.py index ea466a3..c626466 100644 --- a/src/core/sheerka/Sheerka.py +++ b/src/core/sheerka/Sheerka.py @@ -8,12 +8,12 @@ import core.utils from cache.Cache import Cache from cache.IncCache import IncCache from core.builtin_concepts import ErrorConcept, ReturnValueConcept, UnknownConcept -from core.builtin_concepts_ids import BuiltinErrors, BuiltinConcepts +from core.builtin_concepts_ids import BuiltinConcepts, BuiltinErrors from core.concept import Concept, ConceptParts, get_concept_attrs -from core.global_symbols import EVENT_USER_INPUT_EVALUATED, NotInit, NotFound, ErrorObj, EVENT_ONTOLOGY_CREATED +from core.global_symbols import EVENT_ONTOLOGY_CREATED, EVENT_USER_INPUT_EVALUATED, ErrorObj, NotFound, NotInit from core.profiling import profile from core.sheerka.ExecutionContext import ExecutionContext -from core.sheerka.SheerkaOntologyManager import SheerkaOntologyManager, OntologyAlreadyExists +from core.sheerka.SheerkaOntologyManager import OntologyAlreadyExists, SheerkaOntologyManager from core.sheerka_logger import console_handler from core.tokenizer import Token, TokenKind from printer.SheerkaPrinter import SheerkaPrinter @@ -119,6 +119,7 @@ class Sheerka(Concept): self.enable_process_return_values = True self.enable_process_rules = True self.enable_commands_backup = True + self.enable_parser_caching = True self.methods_with_context = {"test_using_context"} # only the names, the method is defined in sheerka_methods self.pipe_functions = set() @@ -193,6 +194,7 @@ class Sheerka(Concept): self.enable_process_return_values) self.enable_process_rules = kwargs.get("enable_process_rules", self.enable_process_rules) self.enable_commands_backup = kwargs.get("enable_commands_backup", self.enable_commands_backup) + self.enable_parser_caching = kwargs.get("enable_parser_caching", self.enable_parser_caching) try: self.during_initialisation = True @@ -449,8 +451,8 @@ class Sheerka(Concept): concept = concept.value # concept is now a tuple if isinstance(concept, str) and \ - concept.startswith("c:") and \ - (tmp := core.utils.unstr_concept(concept)) != (None, None): + concept.startswith("c:") and \ + (tmp := core.utils.unstr_concept(concept)) != (None, None): concept = tmp # ############## @@ -567,7 +569,7 @@ class Sheerka(Concept): # manage concept not found if self.isinstance(template, BuiltinConcepts.UNKNOWN_CONCEPT) and \ - concept_key != BuiltinConcepts.UNKNOWN_CONCEPT: + concept_key != BuiltinConcepts.UNKNOWN_CONCEPT: return template if isinstance(template, list): @@ -791,7 +793,7 @@ class Sheerka(Concept): Browse obj, looking for error :param context: :param obj: - :param kwargs: if defined, specialize the error + :param kwargs: if defined, specialize the error (example __type="PythonEvalError") :return: """ @@ -834,6 +836,19 @@ class Sheerka(Concept): errors = self.get_errors(context, obj, **kwargs) return len(errors) > 0 + def get_error_cause(self, obj): + if self.isinstance(obj, BuiltinConcepts.NOT_FOR_ME): + res = obj.reason + elif self.isinstance(obj, BuiltinConcepts.ERROR): + res = obj.body + else: + res = None + + if isinstance(res, list) and len(res) == 1: + return res[0] + else: + return res + def get_evaluator_name(self, name): if self.evaluators_prefix is None: base_evaluator_class = core.utils.get_class("evaluators.BaseEvaluator.BaseEvaluator") diff --git a/src/core/sheerka/services/SheerkaAdmin.py b/src/core/sheerka/services/SheerkaAdmin.py index 36631b2..7fa55c0 100644 --- a/src/core/sheerka/services/SheerkaAdmin.py +++ b/src/core/sheerka/services/SheerkaAdmin.py @@ -3,7 +3,7 @@ import time from os import path from core.builtin_concepts_ids import BuiltinConcepts, BuiltinContainers -from core.builtin_helpers import ensure_concept_or_rule, ensure_concept +from core.builtin_helpers import ensure_concept_or_rule from core.concept import Concept from core.global_symbols import SHEERKA_BACKUP_FOLDER from core.sheerka.services.SheerkaExecute import SheerkaExecute @@ -40,6 +40,7 @@ class SheerkaAdmin(BaseService): self.sheerka.bind_service_method(self.NAME, self.in_memory, False) self.sheerka.bind_service_method(self.NAME, self.admin_history, False, as_name="history") self.sheerka.bind_service_method(self.NAME, self.sdp, False) + self.sheerka.bind_service_method(self.NAME, self.set_sheerka, True) def caches_names(self): """ @@ -284,3 +285,13 @@ class SheerkaAdmin(BaseService): def admin_history(self, depth=10, start=0): history = self.sheerka.services[SheerkaHistoryManager.NAME].history(depth, start) return self.sheerka.new(BuiltinConcepts.TO_LIST, body=history) + + def set_sheerka(self, context, key, value, service=None): + """ + @param context: + @param key: + @param value: + @param service: + @return: + """ + return self.sheerka.record_var(context, service or self.sheerka.name, key, value) diff --git a/src/core/sheerka/services/SheerkaDebugManager.py b/src/core/sheerka/services/SheerkaDebugManager.py index 146eff2..d19d747 100644 --- a/src/core/sheerka/services/SheerkaDebugManager.py +++ b/src/core/sheerka/services/SheerkaDebugManager.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from core.builtin_concepts import BuiltinConcepts from core.builtin_helpers import evaluate_expression from core.concept import Concept -from core.global_symbols import NotInit, NotFound +from core.global_symbols import NotFound, NotInit from core.sheerka.ExecutionContext import ExecutionContext from core.sheerka.services.sheerka_service import BaseService from core.utils import CONSOLE_COLORS_MAP as CCM, CONSOLE_COLUMNS, PRIMITIVES_TYPES @@ -100,13 +100,22 @@ class BaseDebugLogger: BaseDebugLogger.ids[hint] = 0 return 0 - def __init__(self, debug_manager, context, who, method_name, debug_id): - pass + def __init__(self, keep_track, debug_manager, context, service_name, method_name, debug_id): + self.debug_manager = debug_manager + self.service_name = service_name + self.method_name = method_name + self.context = context + self.debug_id = debug_id + + self.keep_track = keep_track # Does debug_manager need to keep track of this logger ? def debug_entering(self, **kwargs): pass - def debug_log(self, text, is_error=False): + def debug_leaving(self, **kwargs): + pass + + def debug_log(self, text, is_error=False, args=None): pass def debug_var(self, name, value, is_error=False, hint=None): @@ -122,7 +131,36 @@ class BaseDebugLogger: pass def get_enabled_vars(self): - pass + """ + Returns the list of all enabled variables for this console debugger + :return: + """ + return self.debug_manager.get_enabled_items("vars", + self.context, + self.service_name, + self.method_name, + self.debug_id) + + def should_i_log_var(self, name, is_error=False): + return is_error or self.debug_manager.compute_debug_var(self.context, + self.service_name, + self.method_name, + name, + self.debug_id) + + def should_i_log_rule(self, rule_id, is_error=False): + return is_error or self.debug_manager.compute_debug_rule(self.context, + self.service_name, + self.method_name, + rule_id, + self.debug_id) + + def should_i_log_concept(self, concept_id, is_error=False): + return is_error or self.debug_manager.compute_debug_concept(self.context, + self.service_name, + self.method_name, + concept_id, + self.debug_id) class NullDebugLogger(BaseDebugLogger): @@ -139,12 +177,7 @@ class NullDebugLogger(BaseDebugLogger): class ConsoleDebugLogger(BaseDebugLogger): def __init__(self, debug_manager, context, service_name, method_name, debug_id): - BaseDebugLogger.__init__(self, debug_manager, context, service_name, method_name, debug_id) - self.debug_manager = debug_manager - self.service_name = service_name - self.method_name = method_name - self.context = context - self.debug_id = debug_id + BaseDebugLogger.__init__(self, False, debug_manager, context, service_name, method_name, debug_id) self.is_highlighted = "" def is_enabled(self): @@ -154,17 +187,6 @@ class ConsoleDebugLogger(BaseDebugLogger): """ return True - def get_enabled_vars(self): - """ - Returns the list of all enabled variables for this console debugger - :return: - """ - return self.debug_manager.get_enabled_items("vars", - self.context, - self.service_name, - self.method_name, - self.debug_id) - def debug_entering(self, **kwargs): """ Log that we start debugging a method (for a specified service and context) @@ -181,11 +203,31 @@ class ConsoleDebugLogger(BaseDebugLogger): self.debug_manager.debug(self.prefix() + str_text) self.debug_manager.debug(self.prefix() + str_vars) - def debug_log(self, text, is_error=False): + def debug_leaving(self, **kwargs): + """ + Log that we start debugging a method (for a specified service and context) + :param kwargs: + :return: + """ + super().debug_leaving(**kwargs) + + if not kwargs: + return + + str_text = f"{CCM['blue']}Leaving {self.service_name}.{self.method_name} with {CCM['reset']}" + str_vars = pp.pformat(kwargs) + if "\n" not in str(str_vars): + self.debug_manager.debug(self.prefix() + str_text + str_vars) + else: + self.debug_manager.debug(self.prefix() + str_text) + self.debug_manager.debug(self.prefix() + str_vars) + + def debug_log(self, text, is_error=False, args=None): """ Prints a debug information (not related to a specific variable, concept or rule) :param text: :param is_error: + :param args: :return: """ color = 'red' if is_error else 'blue' @@ -268,8 +310,153 @@ class ConsoleDebugLogger(BaseDebugLogger): return f"[{self.context.id:2}][{self.debug_id:2}] {self.is_highlighted}" +class ListDebugLogger(BaseDebugLogger): + ITEM_TYPE_ENTERING = "entering" + ITEM_TYPE_LEAVING = "leaving" + ITEM_TYPE_LOG = "log" + ITEM_TYPE_VAR = "var" + ITEM_TYPE_RULE = "rule" + ITEM_TYPE_CONCEPT = "concept" + + class DebugItem: + def __init__(self, item_type, text, is_error=False, args=None): + self.type = item_type + self.text = text + self.is_error = is_error + self.args = args + + def __repr__(self): + return self.text + + def __init__(self, debug_manager, context, service_name, method_name, debug_id): + BaseDebugLogger.__init__(self, True, debug_manager, context, service_name, method_name, debug_id) + self.items = [] + + def is_enabled(self): + """ + True if the debug is activated for the current service, method and context + :return: + """ + return True + + def debug_entering(self, **kwargs): + """ + Log that we start debugging a method (for a specified service and context) + :param kwargs: + :return: + """ + text = f"Entering {self.service_name}.{self.method_name}" + self.items.append(ListDebugLogger.DebugItem(ListDebugLogger.ITEM_TYPE_ENTERING, text, False, kwargs)) + + def debug_leaving(self, **kwargs): + """ + Log that we start debugging a method (for a specified service and context) + :param kwargs: + :return: + """ + super().debug_leaving(**kwargs) + + if not kwargs: + return + + text = f"Leaving {self.service_name}.{self.method_name}" + self.items.append(ListDebugLogger.DebugItem(ListDebugLogger.ITEM_TYPE_LEAVING, text, False, kwargs)) + + def debug_log(self, text, is_error=False, args=None): + """ + Prints a debug information (not related to a specific variable, concept or rule) + :param text: + :param is_error: + :param args: + :return: + """ + self.items.append(ListDebugLogger.DebugItem(ListDebugLogger.ITEM_TYPE_LOG, text, is_error, args)) + + def debug_var(self, name, value, is_error=False, hint=None): + """ + Prints the value of a variable + :param name: + :param value: + :param is_error: + :param hint: + :return: + """ + if not self.should_i_log_var(name, is_error): + return + + text = name + hint if hint else name + self.items.append(ListDebugLogger.DebugItem(ListDebugLogger.ITEM_TYPE_VAR, text, is_error, value)) + + def debug_rule(self, rule, results): + """ + Prints debug information related to a specific rule id + :param rule: + :param results: + :return: + """ + if not self.should_i_log_rule(rule.id): + return + + self.items.append(ListDebugLogger.DebugItem(ListDebugLogger.ITEM_TYPE_RULE, rule.id, False, results)) + + def debug_concept(self, concept, text=None, **kwargs): + """ + Prints debug information related to a specific concept + :param concept: + :param text: + :param kwargs: + :return: + """ + if not self.debug_manager.compute_debug_concept(self.context, + self.service_name, + self.method_name, + concept.id, + self.debug_id): + return + + if not self.should_i_log_concept(concept.id): + return + + concept_id = f"{concept.id}{text}" if text else f"{concept.id}" + self.items.append(ListDebugLogger.DebugItem(ListDebugLogger.ITEM_TYPE_CONCEPT, concept_id, False, kwargs)) + + +class TeeDebugLogger(BaseDebugLogger): + def __init__(self, debug_manager, context, service_name, method_name, debug_id, loggers): + BaseDebugLogger.__init__(self, False, debug_manager, context, service_name, method_name, debug_id) + self.loggers = loggers + + def is_enabled(self): + """ + True if the debug is activated for the current service, method and context + :return: + """ + return True + + def debug_entering(self, **kwargs): + for logger in self.loggers: + logger.debug_entering(kwargs) + + def debug_log(self, text, is_error=False, args=None): + for logger in self.loggers: + logger.debug_log(text, is_error, args=None) + + def debug_var(self, name, value, is_error=False, hint=None): + for logger in self.loggers: + logger.debug_var(name, value, is_error, hint) + + def debug_rule(self, rule, results): + for logger in self.loggers: + logger.debug_rule(rule, results) + + def debug_concept(self, concept, text=None, **kwargs): + for logger in self.loggers: + logger.debug_concept(concept, text, **kwargs) + + @dataclass class DebugItem: + debug_type: str item: str service_name: str method_name: str @@ -280,6 +467,16 @@ class DebugItem: enabled: bool + def __repr__(self): + text = f"type={self.debug_type}" + text += f", setting={self.service_name or '*'}.{self.method_name or '*'}.{self.item or '*'}" + text += f", context_id={self.context_id}" + text += f", debug_id={self.debug_id}" + text += f", context_children={self.context_children}" + text += f", debug_children={self.debug_children}" + text += f" (enabled={self.enabled})" + return f"DebugItem({text})" + class SheerkaDebugManager(BaseService): NAME = "Debug" @@ -302,7 +499,10 @@ class SheerkaDebugManager(BaseService): self.registered_vars = [] # list of all variables that can be debugged self.registered_rules = [] # list of all rules that can be debugged self.registered_concepts = [] # list of all concept that can be debugged + self.debug_logger_definition = ConsoleDebugLogger # logger to use + self.instantiated_loggers = {} + # variable that needs to be reset on restart self.state_vars = [ "activated", "explicit", # to remove ? @@ -323,25 +523,33 @@ class SheerkaDebugManager(BaseService): self.sheerka.bind_service_method(self.NAME, self.set_debug, True) self.sheerka.bind_service_method(self.NAME, self.inspect, False) self.sheerka.bind_service_method(self.NAME, self.get_value, False) - self.sheerka.bind_service_method(self.NAME, self.get_debugger, False) self.sheerka.bind_service_method(self.NAME, self.reset_debug, False) self.sheerka.bind_service_method(self.NAME, self.set_debug_var, True) self.sheerka.bind_service_method(self.NAME, self.set_debug_rule, True) self.sheerka.bind_service_method(self.NAME, self.set_debug_concept, True) - self.sheerka.bind_service_method(self.NAME, self.list_debug_vars, True) - self.sheerka.bind_service_method(self.NAME, self.list_debug_rules, True) - self.sheerka.bind_service_method(self.NAME, self.list_debug_concepts, True) + self.sheerka.bind_service_method(self.NAME, self.list_debug_vars, False) + self.sheerka.bind_service_method(self.NAME, self.list_debug_rules, False) + self.sheerka.bind_service_method(self.NAME, self.list_debug_concepts, False) + self.sheerka.bind_service_method(self.NAME, self.list_debug_settings, False) + self.sheerka.bind_service_method(self.NAME, self.register_debug_vars, True, visible=False) self.sheerka.bind_service_method(self.NAME, self.register_debug_rules, True, visible=False) self.sheerka.bind_service_method(self.NAME, self.register_debug_concepts, True, visible=False) + self.sheerka.bind_service_method(self.NAME, self.get_debugger, False, visible=False) + self.sheerka.bind_service_method(self.NAME, self.get_debugger_logs, False, visible=False) + self.sheerka.bind_service_method(self.NAME, self.set_debug_logger_definition, True, visible=False) + # self.sheerka.bind_service_method(self.NAME,self.get_debug_settings, False, as_name="debug_settings") # register what can be registered from parsers.BnfNodeParser import BnfNodeParser from evaluators.DefConceptEvaluator import DefConceptEvaluator from evaluators.PythonEvaluator import PythonEvaluator from parsers.SyaNodeParser import SyaNodeParser + from parsers.SequenceNodeParser import SequenceNodeParser self.register_debug_vars(BnfNodeParser.NAME, "parse", "result") + self.register_debug_vars(BnfNodeParser.NAME, "parse", "stats") + self.register_debug_vars(SequenceNodeParser.NAME, "parse", "stats") self.register_debug_concepts(BnfNodeParser.NAME, "parse", "*") self.register_debug_vars(DefConceptEvaluator.NAME, "matches", "*") self.register_debug_vars(DefConceptEvaluator.NAME, "eval", "*") @@ -351,7 +559,8 @@ class SheerkaDebugManager(BaseService): self.register_debug_vars(PythonEvaluator.NAME, "eval", "ret") self.register_debug_vars("Exceptions", PythonEvaluator.NAME + "-eval", "exception") self.register_debug_vars("Exceptions", PythonEvaluator.NAME + "-eval", "trace") - self.register_debug_vars(SyaNodeParser.NAME, "parse", "*") + self.register_debug_vars(SyaNodeParser.NAME, "parse", "#[number]") + self.register_debug_vars(SyaNodeParser.NAME, "parse", "stats") self.register_debug_vars(MultipleSuccessEvaluator.NAME, "matches", "return_values") def initialize_deferred(self, context, is_first_time): @@ -498,14 +707,30 @@ class SheerkaDebugManager(BaseService): self.sheerka.record_var(context, self.NAME, "activated", self.activated) return self.sheerka.ret(SheerkaDebugManager.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS)) + def set_debug_logger_definition(self, logger_definition): + """ + Logger definition to use. By default it's the ConsoleDebugLogger + logger_definition can be a list of debug loggers + :param logger_definition: + :return: + """ + self.debug_logger_definition = logger_definition + def debug(self, *args, **kwargs): print(*args, **kwargs) - def get_debugger(self, context, who, method_name, new_debug_id=True): + def get_debugger(self, context, who, method_name, new_debug_id=True, forced_debug_id=None): if self.compute_debug(context, who, method_name): - debug_id = ConsoleDebugLogger.next_id(context.event.get_digest() + str(context.id)) if new_debug_id \ - else ConsoleDebugLogger.current_id(context.event.get_digest() + str(context.id)) - return ConsoleDebugLogger(self, context, who, method_name, debug_id) + hint = context.event.get_digest() + str(context.id) + if forced_debug_id is not None: + debug_id = forced_debug_id + BaseDebugLogger.ids[hint] = debug_id + elif new_debug_id: + debug_id = BaseDebugLogger.next_id(hint) + else: + debug_id = BaseDebugLogger.current_id(hint) + + return self.get_new_debug_logger_instance(context, who, method_name, debug_id) return NullDebugLogger() @@ -525,16 +750,17 @@ class SheerkaDebugManager(BaseService): items_container = getattr(self, item_type_full_name) for setting in items_container: if setting.item == item and \ - setting.service_name == service and \ - setting.method_name == method and \ - setting.context_id == context_id and \ - setting.context_children == context_children and \ - setting.debug_id == debug_id and \ - setting.debug_children == debug_children: + setting.service_name == service and \ + setting.method_name == method and \ + setting.context_id == context_id and \ + setting.context_children == context_children and \ + setting.debug_id == debug_id and \ + setting.debug_children == debug_children: setting.enabled = enabled break else: - items_container.append(DebugItem(item, + items_container.append(DebugItem(item_type, + item, service, method, context_id, @@ -564,9 +790,9 @@ class SheerkaDebugManager(BaseService): continue if (setting.service_name is None or setting.service_name == service_name) and \ - (setting.method_name is None or setting.method_name == method_name) and \ - (setting.context_id is None or setting.context_id == context.id or ( - setting.context_children and context.has_parent(setting.context_id))): + (setting.method_name is None or setting.method_name == method_name) and \ + (setting.context_id is None or setting.context_id == context.id or ( + setting.context_children and context.has_parent(setting.context_id))): selected.append(setting.enabled) if len(selected) == 0: @@ -599,13 +825,13 @@ class SheerkaDebugManager(BaseService): continue if (setting.service_name is None or setting.service_name == service_name) and \ - (setting.method_name is None or setting.method_name == method_name) and \ - (setting.context_id is None or setting.context_id == context.id or ( - setting.context_children and context.has_parent(setting.context_id))) and \ - (setting.item is None or - setting.item == "*" or - setting.item == item) and \ - (setting.debug_id is None or setting.debug_id == debug_id): + (setting.method_name is None or setting.method_name == method_name) and \ + (setting.context_id is None or setting.context_id == context.id or ( + setting.context_children and context.has_parent(setting.context_id))) and \ + (setting.item is None or + setting.item == "*" or + setting.item == item) and \ + (setting.debug_id is None or setting.debug_id == debug_id): selected.append(setting.enabled) if len(selected) == 0: @@ -633,10 +859,10 @@ class SheerkaDebugManager(BaseService): continue if (setting.service_name is None or setting.service_name == service_name) and \ - (setting.method_name is None or setting.method_name == method_name) and \ - (setting.context_id is None or setting.context_id == context.id or ( - setting.context_children and context.has_parent(setting.context_id))) and \ - (setting.debug_id is None or setting.debug_id == debug_id): + (setting.method_name is None or setting.method_name == method_name) and \ + (setting.context_id is None or setting.context_id == context.id or ( + setting.context_children and context.has_parent(setting.context_id))) and \ + (setting.debug_id is None or setting.debug_id == debug_id): selected.add(setting.item) return selected @@ -919,3 +1145,35 @@ class SheerkaDebugManager(BaseService): del res["self"] return res + + def list_debug_settings(self): + settings = self.debug_vars_settings + self.debug_concepts_settings + self.debug_rules_settings + return self.sheerka.new(BuiltinConcepts.TO_LIST, body=settings) + + def get_new_debug_logger_instance(self, context, who, method_name, debug_id): + if hasattr(self.debug_logger_definition, "__iter__"): + loggers = [] + for logger_type in self.debug_logger_definition: + logger = logger_type(self, context, who, method_name, debug_id) + if logger.keep_track: + key = (who, method_name, context.id, debug_id) + self.instantiated_loggers[key] = logger + return TeeDebugLogger(self, context, who, method_name, debug_id, loggers) + + logger = self.debug_logger_definition(self, context, who, method_name, debug_id) + if logger.keep_track: + key = (who, method_name, context.id, debug_id) + self.instantiated_loggers[key] = logger + return logger + + def get_debugger_logs(self): + res = {} + for k, v in [(k, v) for k, v in self.instantiated_loggers.items() if isinstance(v, ListDebugLogger)]: + key = list(k) + if v.items and v.items[0].type == ListDebugLogger.ITEM_TYPE_ENTERING: + if "source" in v.items[0].args: + key.append(v.items[0].args["source"]) + + res[tuple(key)] = v.items + + return res diff --git a/src/core/sheerka/services/SheerkaEvaluateConcept.py b/src/core/sheerka/services/SheerkaEvaluateConcept.py index 3ecaaa0..564bcc1 100644 --- a/src/core/sheerka/services/SheerkaEvaluateConcept.py +++ b/src/core/sheerka/services/SheerkaEvaluateConcept.py @@ -1,15 +1,15 @@ from dataclasses import dataclass from core.builtin_concepts import BuiltinConcepts -from core.builtin_helpers import expect_one, only_successful, ensure_concept, is_only_successful, ensure_bnf -from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved, AllConceptParts, \ +from core.builtin_helpers import ensure_bnf, ensure_concept, expect_one, is_only_successful, only_successful +from core.concept import AllConceptParts, Concept, ConceptParts, DoNotResolve, InfiniteRecursionResolved, \ concept_part_value -from core.global_symbols import NotInit, CURRENT_OBJ, INIT_AST_PARSERS, INIT_AST_QUESTION_PARSERS +from core.global_symbols import CURRENT_OBJ, INIT_AST_PARSERS, INIT_AST_QUESTION_PARSERS, NotInit from core.rule import Rule from core.sheerka.services.SheerkaEvaluateRules import SheerkaEvaluateRules from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute from core.sheerka.services.SheerkaRuleManager import PythonConditionExprVisitor -from core.sheerka.services.sheerka_service import BaseService, FailedToCompileError, ChickenAndEggException +from core.sheerka.services.sheerka_service import BaseService, ChickenAndEggException, FailedToCompileError from core.tokenizer import Tokenizer from core.utils import unstr_concept from parsers.BaseExpressionParser import TrueifyVisitor @@ -38,6 +38,13 @@ class WhereClauseDef: conditions: list # compiled trueified +@dataclass +class EvaluationHints: + eval_body: bool = None # true if the body must be evaluated + eval_question: bool = None # true if the eval_question must be set + expression_only: bool = None # True if function/methods to forbid functions with side effect + + class SheerkaEvaluateConcept(BaseService): NAME = "EvaluateConcept" @@ -48,7 +55,7 @@ class SheerkaEvaluateConcept(BaseService): def initialize(self): self.sheerka.bind_service_method(self.NAME, self.evaluate_concept, True) self.sheerka.bind_service_method(self.NAME, self.call_concept, True) - self.sheerka.bind_service_method(self.NAME, self.call_concept, False, as_name="evaluate_question") + self.sheerka.bind_service_method(self.NAME, self.evaluate_question, False) self.sheerka.bind_service_method(self.NAME, self.set_auto_eval, True) def initialize_deferred(self, context, is_first_time): @@ -68,9 +75,9 @@ class SheerkaEvaluateConcept(BaseService): parent = context.get_parent() while parent is not None: if (parent.who == context.who and - parent.action == BuiltinConcepts.EVALUATING_CONCEPT and - parent.obj == concept and - parent.obj.get_compiled() == concept.get_compiled()): + parent.action == BuiltinConcepts.EVALUATING_CONCEPT and + parent.obj == concept and + parent.obj.get_compiled() == concept.get_compiled()): return True parent = parent.get_parent() @@ -97,8 +104,8 @@ class SheerkaEvaluateConcept(BaseService): :return: """ if (eval_body and - ConceptParts.RET in concept.values() and - (ret_value := concept.get_value(ConceptParts.RET)) != NotInit): + ConceptParts.RET in concept.values() and + (ret_value := concept.get_value(ConceptParts.RET)) != NotInit): return ret_value else: return concept @@ -512,7 +519,7 @@ class SheerkaEvaluateConcept(BaseService): # when it's a concept, evaluate it if isinstance(to_resolve, Concept) and \ - not context.sheerka.isinstance(to_resolve, BuiltinConcepts.RETURN_VALUE): + not context.sheerka.isinstance(to_resolve, BuiltinConcepts.RETURN_VALUE): evaluated = self.evaluate_concept(sub_context, to_resolve) sub_context.add_values(return_values=evaluated) @@ -532,6 +539,8 @@ class SheerkaEvaluateConcept(BaseService): value = current_concept.get_value(var[0]) if value != NotInit: sub_context.add_to_short_term_memory(var[0], current_concept.get_value(var[0])) + + # KSI 2021-08-10 It seems that a copy is not needed here, as it's the first thing done ine execute() use_copy = to_resolve.copy() if isinstance(to_resolve, list) else to_resolve r = self.sheerka.execute(sub_context, use_copy, CONCEPT_EVALUATION_STEPS) @@ -601,22 +610,30 @@ class SheerkaEvaluateConcept(BaseService): return res - def evaluate_concept(self, context, concept: Concept, eval_body=False, validation_only=False, metadata=None): + def evaluate_concept(self, context, concept: Concept, hints: EvaluationHints = None, metadata=None): """ Evaluation a concept ie : resolve its body :param context: :param concept: - :param eval_body: - :param validation_only: When set, the body is never evaluated, whatever eval_body or EVAL_BODY_REQUESTED + :param hints: :param metadata: list of metadata to evaluate ('pre', 'post'...) :return: value of the evaluation or error """ failed_to_evaluate_body = False + hints = hints or EvaluationHints() if concept.get_hints().is_evaluated: - return self.apply_ret(concept, eval_body or context.in_context(BuiltinConcepts.EVAL_BODY_REQUESTED)) + return self.apply_ret(concept, hints.eval_body or context.in_context(BuiltinConcepts.EVAL_BODY_REQUESTED)) + + to_reset = set() + if isinstance(hints.eval_body, bool) and not hints.eval_body: + to_reset.add(BuiltinConcepts.EVAL_BODY_REQUESTED) + if isinstance(hints.eval_question, bool) and not hints.eval_question: + to_reset.add(BuiltinConcepts.EVAL_QUESTION_REQUESTED) + if isinstance(hints.expression_only, bool) and not hints.expression_only: + to_reset.add(BuiltinConcepts.EXPRESSION_ONLY_REQUESTED) # if concept.get_hints().use_copy: # raise Exception("Use copy") @@ -635,17 +652,21 @@ class SheerkaEvaluateConcept(BaseService): # return concept desc = f"Evaluating concept {concept}" - with context.push(BuiltinConcepts.EVALUATING_CONCEPT, concept, desc=desc) as sub_context: - sub_context.add_inputs(eval_body=eval_body) - if eval_body: - # ask for body evaluation + with context.push(BuiltinConcepts.EVALUATING_CONCEPT, concept, desc=desc, reset_hints=to_reset) as sub_context: + sub_context.add_inputs(hints=hints) + + # update context with evaluate_concept parameters + if hints.eval_body: sub_context.protected_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED) - if validation_only: + if hints.eval_question: + sub_context.protected_hints.add(BuiltinConcepts.EVAL_QUESTION_REQUESTED) + + if hints.expression_only: # Never call methods with side effect in this concept or sub concepts sub_context.protected_hints.add(BuiltinConcepts.EXPRESSION_ONLY_REQUESTED) - # auto evaluate commands + # update context with evaluate_concept parameters if context.sheerka.isa(concept, context.sheerka.new(BuiltinConcepts.AUTO_EVAL)): sub_context.protected_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED) @@ -659,10 +680,10 @@ class SheerkaEvaluateConcept(BaseService): except ChickenAndEggException as ex: return ex.error - # to make sure of the order, it don't use ConceptParts.get_parts() + # to make sure of the order, it does not use ConceptParts.get_parts() # variables must be evaluated first, body must be evaluated before where all_metadata_to_eval = metadata or self.compute_metadata_to_eval(sub_context, concept) - if validation_only and ConceptParts.BODY in all_metadata_to_eval: + if hints.expression_only and ConceptParts.BODY in all_metadata_to_eval: all_metadata_to_eval.remove(ConceptParts.BODY) for metadata_to_eval in all_metadata_to_eval: @@ -681,7 +702,7 @@ class SheerkaEvaluateConcept(BaseService): var_name, concept, True, - not sub_context.in_context(BuiltinConcepts.EVAL_BODY_REQUESTED), + sub_context.in_context(BuiltinConcepts.EXPRESSION_ONLY_REQUESTED), w_clause) else: # Do not send the current concept for the properties @@ -690,7 +711,7 @@ class SheerkaEvaluateConcept(BaseService): var_name, concept, True, - not sub_context.in_context(BuiltinConcepts.EVAL_BODY_REQUESTED), + sub_context.in_context(BuiltinConcepts.EXPRESSION_ONLY_REQUESTED), w_clause) if isinstance(resolved, Concept) and not sub_context.sheerka.is_success(resolved): @@ -722,7 +743,7 @@ class SheerkaEvaluateConcept(BaseService): part_key, concept, force_concept_eval, - not sub_context.in_context(BuiltinConcepts.EVAL_BODY_REQUESTED), + sub_context.in_context(BuiltinConcepts.EXPRESSION_ONLY_REQUESTED), None) # 'FATAL' error is detected, let's stop @@ -766,10 +787,25 @@ class SheerkaEvaluateConcept(BaseService): return self.apply_ret(concept, sub_context.in_context(BuiltinConcepts.EVAL_BODY_REQUESTED)) def call_concept(self, context, concept, *args, **kwargs): + return self.call_concept_with_args(context, + concept, + hints=EvaluationHints(eval_body=True, eval_question=False), + *args, + **kwargs) + + def evaluate_question(self, context, concept, *args, **kwargs): + return self.call_concept_with_args(context, + concept, + hints=EvaluationHints(eval_body=True, eval_question=True), + *args, + **kwargs) + + def call_concept_with_args(self, context, concept, hints, *args, **kwargs): """ call the concept using either args or kwargs (not both) :param context: :param concept: + :param hints: :param args: :param kwargs: :return: @@ -780,9 +816,13 @@ class SheerkaEvaluateConcept(BaseService): concept.get_hints().is_instance = True concept.get_hints().is_evaluated = False # force evaluation - # TODO : update the variables before calling the concept + for param_name, arg in zip(concept.get_metadata().parameters, args): + concept.get_compiled()[param_name] = DoNotResolve(arg) - evaluated = self.evaluate_concept(context, concept) + for param_name, param_value in kwargs.items(): + concept.get_compiled()[param_name] = DoNotResolve(param_value) + + evaluated = self.evaluate_concept(context, concept, hints=hints) if self.sheerka.has_error(context, evaluated): raise ConceptEvalException(evaluated) diff --git a/src/core/sheerka/services/SheerkaExecute.py b/src/core/sheerka/services/SheerkaExecute.py index c0357e7..1bb8386 100644 --- a/src/core/sheerka/services/SheerkaExecute.py +++ b/src/core/sheerka/services/SheerkaExecute.py @@ -2,9 +2,9 @@ import core.utils from cache.FastCache import FastCache from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept from core.concept import ConceptParts -from core.global_symbols import NotFound, NO_MATCH, EVENT_CONCEPT_CREATED, EVENT_CONCEPT_MODIFIED, EVENT_CONCEPT_DELETED +from core.global_symbols import EVENT_CONCEPT_CREATED, EVENT_CONCEPT_DELETED, EVENT_CONCEPT_MODIFIED, NO_MATCH, NotFound from core.sheerka.services.sheerka_service import BaseService -from core.tokenizer import Tokenizer, TokenKind, Token, Keywords +from core.tokenizer import Keywords, Token, TokenKind, Tokenizer PARSE_STEPS = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING] PARSE_AND_EVAL_STEPS = PARSE_STEPS + [BuiltinConcepts.BEFORE_EVALUATION, @@ -179,6 +179,24 @@ class ParserInput: return True return False + def clone(self): + clone = ParserInput(self.text) + clone.tokens = self.tokens + clone.length = self.length + clone.yield_oef = self.yield_oef + clone.start = self.start + clone.end = self.end + clone.sub_text = self.sub_text + clone.sub_tokens = self.sub_tokens + clone.from_tokens = self.from_tokens + clone.pos = self.pos + clone.token = self.token + + return clone + + def sub_part(self, start, end, yield_oef=None): + return ParserInput(self.text, self.tokens, self.length, start, end, yield_oef) + class SheerkaExecute(BaseService): """ @@ -227,6 +245,7 @@ class SheerkaExecute(BaseService): self.sheerka.bind_service_method(self.NAME, self.parse_function, False, visible=False) self.sheerka.bind_service_method(self.NAME, self.parse_python, False, visible=False) self.sheerka.bind_service_method(self.NAME, self.parse_expression, False, visible=False) + self.sheerka.bind_service_method(self.NAME, self.clear_parser_cache, True) self.reset_registered_evaluators() self.reset_registered_parsers() @@ -274,6 +293,9 @@ class SheerkaExecute(BaseService): use_classes=True) self.question_parsers = [p.name for p in question_parsers] + def clear_parser_cache(self): + self.parsers_cache.clear() + @staticmethod def get_grouped(evaluators, use_classes=False): """ @@ -442,7 +464,7 @@ class SheerkaExecute(BaseService): return None def add_to_parser_cache(self, parsers_key, text, return_value): - if parsers_key is None: + if not self.sheerka.enable_parser_caching or parsers_key is None: return key = (parsers_key, text) @@ -517,7 +539,7 @@ class SheerkaExecute(BaseService): pass # 3. Try the cache - if to_process and parsers_key: + if self.sheerka.enable_parser_caching and to_process and parsers_key: processed = [] for return_value in to_process: to_parse_as_str = self.get_input_as_text(return_value.body.body) \ @@ -825,7 +847,7 @@ class SheerkaExecute(BaseService): with context.push(BuiltinConcepts.PARSING, action_context, who=who, desc=desc) as sub_context: if (prop in (Keywords.WHERE, Keywords.PRE, ConceptParts.WHERE, ConceptParts.PRE, Keywords.WHEN) or - is_question): + is_question): sub_context.protected_hints.add(BuiltinConcepts.EVAL_QUESTION_REQUESTED) # disable all parsers but the requested ones diff --git a/src/core/sheerka/services/SheerkaIsAManager.py b/src/core/sheerka/services/SheerkaIsAManager.py index 3966830..bf2ba12 100644 --- a/src/core/sheerka/services/SheerkaIsAManager.py +++ b/src/core/sheerka/services/SheerkaIsAManager.py @@ -6,7 +6,7 @@ from core.concept import Concept, DEFINITION_TYPE_BNF from core.global_symbols import NotFound from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager from core.sheerka.services.sheerka_service import BaseService -from core.tokenizer import Tokenizer, TokenKind +from core.tokenizer import TokenKind, Tokenizer from core.utils import merge_sets @@ -49,7 +49,7 @@ class SheerkaIsAManager(BaseService): concept_to_use = concept if BuiltinConcepts.ISA in concept_to_use.get_metadata().props and \ - concept_set in concept_to_use.get_metadata().props[BuiltinConcepts.ISA]: + concept_set in concept_to_use.get_metadata().props[BuiltinConcepts.ISA]: return self.sheerka.ret( self.NAME, False, @@ -66,7 +66,8 @@ class SheerkaIsAManager(BaseService): return res concept.set_prop(BuiltinConcepts.ISA, new_concept_set) - res = self.add_concept_to_set(context, concept, concept_set) + # KSI 2021-08-12. Make sure to use the newly created concept to put in cache + res = self.add_concept_to_set(context, res.body.body, concept_set) return res else: concept.set_prop(BuiltinConcepts.ISA, new_concept_set) @@ -144,15 +145,16 @@ class SheerkaIsAManager(BaseService): if not self.isaset(context, sub_concept): return self.sheerka.new(BuiltinConcepts.NOT_A_SET, body=concept) + # is it a valid concept ? + sub_concept = core.builtin_helpers.ensure_evaluated(context, sub_concept) + if not self.sheerka.is_success(sub_concept): + return sub_concept + # first, try to see if sub_concept has it's own group entry ids = self.sheerka.om.get(self.CONCEPTS_GROUPS_ENTRY, sub_concept.id) concepts = self._get_concepts(context, ids, True) # aggregate with en entries from its body - sub_concept = core.builtin_helpers.ensure_evaluated(context, sub_concept) - if not self.sheerka.is_success(sub_concept): - return sub_concept - if self.isaset(context, sub_concept.body): other_concepts = _get_set_elements(sub_concept.body) if not self.sheerka.is_success(other_concepts): @@ -169,6 +171,7 @@ class SheerkaIsAManager(BaseService): if (res := self.sheerka.om.get(self.CONCEPTS_IN_GROUPS_ENTRY, concept.id)) is not NotFound: return res + # get the elements that are in the set res = _get_set_elements(concept) # put in cache diff --git a/src/core/sheerka/services/SheerkaRuleManager.py b/src/core/sheerka/services/SheerkaRuleManager.py index 17391d6..9630158 100644 --- a/src/core/sheerka/services/SheerkaRuleManager.py +++ b/src/core/sheerka/services/SheerkaRuleManager.py @@ -15,7 +15,7 @@ from core.rule import Rule, ACTION_TYPE_PRINT from core.sheerka.Sheerka import RECOGNIZED_BY_NAME, RECOGNIZED_BY_ID from core.sheerka.services.sheerka_service import BaseService, FailedToCompileError, UnknownVariableError from core.tokenizer import Keywords, TokenKind, Token -from core.utils import merge_dictionaries, merge_sets, get_safe_str_value +from core.utils import merge_dicts, merge_sets, get_safe_str_value from evaluators.PythonEvaluator import PythonEvaluator from parsers.BaseExpressionParser import AndNode, ExpressionVisitor, VariableNode, ComparisonNode, FunctionNode, \ ComparisonType, NotNode, NameExprNode @@ -503,8 +503,11 @@ class GetConditionExprVisitor(ExpressionVisitor): def get_object_name(self, obj, objects=None): """ object found during the parsing are not serialized - They are kept in a dictionary and this function returns a new name for every new object - :return: + They are kept in a dictionary. + This function returns a new name for every new object + :param obj: object for which a name is to be created + :param objects: already created names (it's a dictionary) + :return: tuple(name created, dictionary of already created names) """ if objects is None: objects = {} @@ -526,9 +529,9 @@ class GetConditionExprVisitor(ExpressionVisitor): def add_variable(self, target): """ - Create a new variable + Create a new variable name of the object 'target' :param target: - :return: + :return: generated name """ var_name = f"__x_{self.var_counter:02}__" self.var_counter += 1 @@ -881,7 +884,7 @@ class PythonConditionExprVisitorObj: return PythonConditionExprVisitorObj(get_source(left.text, right.text), get_source(left.source, right.source), - merge_dictionaries(left.objects, right.objects), + merge_dicts(left.objects, right.objects), merge_sets(left.variables, right.variables), merge_sets(left.not_variables, right.not_variables)) @@ -903,7 +906,7 @@ class PythonConditionExprVisitorObj: return PythonConditionExprVisitorObj(get_source(left.text, right.text), get_source(left.source, right.source), - merge_dictionaries(left.objects, right.objects), + merge_dicts(left.objects, right.objects), merge_sets(left.variables, right.variables), merge_sets(left.not_variables, right.not_variables)) @@ -929,7 +932,7 @@ class PythonConditionExprVisitorObj: return PythonConditionExprVisitorObj(text, get_source(left.source, right.source), - merge_dictionaries(left.objects, right.objects), + merge_dicts(left.objects, right.objects), merge_sets(left.variables, right.variables), merge_sets(left.not_variables, right.not_variables)) diff --git a/src/core/sheerka/services/SheerkaVariableManager.py b/src/core/sheerka/services/SheerkaVariableManager.py index 17a3893..27f12f6 100644 --- a/src/core/sheerka/services/SheerkaVariableManager.py +++ b/src/core/sheerka/services/SheerkaVariableManager.py @@ -20,9 +20,12 @@ class Variable(ServiceObj): def get_key(self): return f"{self.who}|{self.key}" - def __str__(self): + def __repr__(self): return f"({self.who}){self.key}={self.value}" + def __str__(self): + return f"{self.who}.{self.key}={self.value}" + @dataclass class InternalObj: @@ -49,7 +52,8 @@ class SheerkaVariableManager(BaseService): self.sheerka.name: {"enable_process_return_values", "save_execution_context", "enable_process_rules", - "enable_commands_backup"} + "enable_commands_backup", + "enable_parser_caching"} } def initialize(self): @@ -79,7 +83,7 @@ class SheerkaVariableManager(BaseService): def record_var(self, context, who, key, value): """ - + Internal set :param context: :param who: entity that owns the key (acts as a namespace) :param key: @@ -96,6 +100,9 @@ class SheerkaVariableManager(BaseService): setattr(service, key, value) def load_var(self, who, key): + """ + Internal get + """ variable = self.sheerka.om.get(self.VARIABLES_ENTRY, who + "|" + key) if variable is NotFound: return NotFound diff --git a/src/core/simple_debug.py b/src/core/simple_debug.py index e2254ed..cc1b421 100644 --- a/src/core/simple_debug.py +++ b/src/core/simple_debug.py @@ -1,5 +1,12 @@ +import os + default_debug_name = "*default*" debug_activated = set() +append_file = False +items = [] + +if not append_file and os.path.exists("debug.txt"): + os.remove("debug.txt") def my_debug(*args, check_started=None): @@ -28,8 +35,10 @@ def my_debug(*args, check_started=None): if isinstance(arg, list): for item in arg: f.write(f"{item}\n") + items.append(item) else: f.write(f"{arg}\n") + items.append(args) def start_debug(debug_name=default_debug_name, msg=None): diff --git a/src/core/tokenizer.py b/src/core/tokenizer.py index 3c69e06..6094405 100644 --- a/src/core/tokenizer.py +++ b/src/core/tokenizer.py @@ -1,3 +1,4 @@ +from contextlib import contextmanager from dataclasses import dataclass, field from enum import Enum @@ -25,10 +26,10 @@ class TokenKind(Enum): STAR = "star" SLASH = "slash" PERCENT = "percent" - COMMA = "comma" - SEMICOLON = "semicolon" - COLON = "colon" - DOT = "dot" + COMMA = "comma" # , + SEMICOLON = "semicolon" # ; + COLON = "colon" # : + DOT = "dot" # . QMARK = "qmark" VBAR = "vbar" AMPER = "amper" @@ -95,7 +96,7 @@ class Token: if self.type == TokenKind.EOF: self._repr_value = "" elif self.type == TokenKind.WHITESPACE: - self._repr_value = "" if self.value == "" else "" if self.value[0] == "\t" else "" + self._repr_value = "" if self.value == "" else "" if self.value[0] == "\t" else "" elif self.type == TokenKind.NEWLINE: self._repr_value = "" elif self.type == TokenKind.CONCEPT: @@ -201,8 +202,8 @@ class Tokenizer: elif c == "_": from core.concept import VARIABLE_PREFIX if self.i + 7 < self.text_len and \ - self.text[self.i: self.i + 7] == VARIABLE_PREFIX and \ - self.text[self.i + 7].isdigit(): + self.text[self.i: self.i + 7] == VARIABLE_PREFIX and \ + self.text[self.i + 7].isdigit(): number = self.eat_number(self.i + 7) yield Token(TokenKind.VAR_DEF, VARIABLE_PREFIX + number, self.i, self.line, self.column) self.i += 7 + len(number) @@ -566,3 +567,18 @@ class IterParser: return token_after except StopIteration: return Token(TokenKind.EOF, -1, -1, -1, -1) + + +def simple_token_compare(a, b): + return a.type == b.type and a.value == b.value + + +@contextmanager +def comparable_tokens(): + eq = Token.__eq__ + ne = Token.__ne__ + setattr(Token, "__eq__", simple_token_compare) + setattr(Token, "__ne__", lambda a, b: not simple_token_compare(a, b)) + yield + setattr(Token, "__eq__", eq) + setattr(Token, "__ne__", ne) diff --git a/src/core/utils.py b/src/core/utils.py index c045820..38d5f11 100644 --- a/src/core/utils.py +++ b/src/core/utils.py @@ -8,10 +8,10 @@ import warnings from copy import deepcopy # from pyparsing import * -from pyparsing import Literal, Word, nums, Combine, Optional, delimitedList, oneOf, alphas, Suppress +from pyparsing import Combine, Literal, Optional, Suppress, Word, alphas, delimitedList, nums, oneOf from core.global_symbols import CustomType -from core.tokenizer import TokenKind, Tokenizer, Token +from core.tokenizer import Token, TokenKind, Tokenizer COLORS = { "black", @@ -306,42 +306,34 @@ def dict_product(a, b): return res -def merge_dictionaries(a, b): +def merge_dicts(*items): """ - Returns a new dictionary which is the merge - :param a: - :param b: - :return: + Returns a new dictionary which is the merge of all others + :type items: object + :return: """ - if a is None and b is None: + if items is None: return None res = {} - if a: - res.update(a) - - if b: - res.update(b) + for item in [item for item in items if item]: + res.update(item) return res -def merge_sets(a, b): +def merge_sets(*items): """ - Merge that can handle None - :param a: - :param b: + Returns a new dictionary which is the merge of all others + :type items: object :return: """ - if a is None and b is None: + if items is None: return None res = set() - if a: - res.update(a) - - if b: - res.update(b) + for item in [item for item in items if item]: + res.update(item) return res @@ -612,7 +604,7 @@ def tokens_index(tokens, sub_tokens, skip=0, start_from_end=False): else: skip -= 1 - raise ValueError(f"sub tokens '{sub_tokens}' not found") + raise ValueError(f"sub tokens '{get_text_from_tokens(sub_tokens)}' from {sub_tokens} not found") def as_bag(obj, forced_properties=None): diff --git a/src/evaluators/BaseEvaluator.py b/src/evaluators/BaseEvaluator.py index 94e7dbb..6eecb7a 100644 --- a/src/evaluators/BaseEvaluator.py +++ b/src/evaluators/BaseEvaluator.py @@ -1,3 +1,4 @@ +from core.builtin_concepts_ids import BuiltinConcepts from core.sheerka.Sheerka import ExecutionContext @@ -9,10 +10,6 @@ class BaseEvaluator: PREFIX = "evaluators." def __init__(self, name, steps, priority: int, enabled=True): - # self.log = get_logger(self.PREFIX + self.__class__.__name__) - # self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__) - # self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__) - self.name = BaseEvaluator.get_name(name) self.short_name = name self.steps = steps @@ -71,3 +68,8 @@ class AllReturnValuesEvaluator(BaseEvaluator): def reset(self): self.eaten.clear() + + @staticmethod + def valid_parser_results(context, return_values): + return [ret for ret in return_values if + ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.PARSER_RESULT)] diff --git a/src/evaluators/DefConceptEvaluator.py b/src/evaluators/DefConceptEvaluator.py index f7d97c5..6131a53 100644 --- a/src/evaluators/DefConceptEvaluator.py +++ b/src/evaluators/DefConceptEvaluator.py @@ -173,6 +173,7 @@ class DefConceptEvaluator(OneReturnValueEvaluator): if def_concept_node.variables != NotInit: certain_variables = def_concept_node.variables.copy() + variables_found.update(certain_variables) skip_variables_resolution = True # get variables and set the sources diff --git a/src/evaluators/ResolveAmbiguityEvaluator.py b/src/evaluators/ResolveAmbiguityEvaluator.py index e6e3f12..7c23a88 100644 --- a/src/evaluators/ResolveAmbiguityEvaluator.py +++ b/src/evaluators/ResolveAmbiguityEvaluator.py @@ -25,8 +25,7 @@ class ResolveAmbiguityEvaluator(AllReturnValuesEvaluator): # If they share the same source, that means that there are multiple results for one ParserInput self.sources = {} success = False - for ret in [ret for ret in return_values if - ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.PARSER_RESULT)]: + for ret in self.valid_parser_results(context, return_values): source = self.get_source(context, ret) concept_is_instance = self.get_has_concept_instance(ret) @@ -72,10 +71,10 @@ class ResolveAmbiguityEvaluator(AllReturnValuesEvaluator): :return: """ if context.sheerka.isinstance(return_value.body, BuiltinConcepts.PARSER_RESULT) and \ - (isinstance(return_value.body.body, Concept) or - (isinstance(return_value.body.body, list) and - len(return_value.body.body) == 1) and - isinstance(return_value.body.body[0], ConceptNode)): + (isinstance(return_value.body.body, Concept) or + (isinstance(return_value.body.body, list) and + len(return_value.body.body) == 1) and + isinstance(return_value.body.body[0], ConceptNode)): return return_value.body.source return None diff --git a/src/evaluators/ResolveMultiplePluralAmbiguityEvaluator.py b/src/evaluators/ResolveMultiplePluralAmbiguityEvaluator.py new file mode 100644 index 0000000..d4a69ae --- /dev/null +++ b/src/evaluators/ResolveMultiplePluralAmbiguityEvaluator.py @@ -0,0 +1,49 @@ +from core.builtin_concepts_ids import BuiltinConcepts +from core.sheerka.ExecutionContext import ExecutionContext +from evaluators.BaseEvaluator import AllReturnValuesEvaluator +from parsers.BaseNodeParser import ConceptNode +from parsers.BaseParser import BaseParser +from parsers.PythonParser import PythonParser +from parsers.SequenceNodeParser import SequenceNodeParser + + +class ResolveMultiplePluralAmbiguityEvaluator(AllReturnValuesEvaluator): + NAME = "ResolveMultiplePluralAmbiguity" + + def __init__(self): + super().__init__(self.NAME, [BuiltinConcepts.AFTER_PARSING], 55) + self.python = None + self.sequence = None + + def reset(self): + super().reset() + self.python = None + self.sequence = None + + def matches(self, context: ExecutionContext, return_values): + for ret in self.valid_parser_results(context, return_values): + if ret.body.parser.short_name == PythonParser.NAME: + self.python = ret + elif ret.body.parser.short_name == SequenceNodeParser.NAME: + self.sequence = ret + + if self.python and self.sequence: + if (len(self.sequence.body.body) == 1 and + isinstance(self.sequence.body.body[0], ConceptNode) and + context.sheerka.is_plural(self.sequence.body.body[0].concept)): + return True + + return False + + def eval(self, context: ExecutionContext, return_values): + symbol = self.sequence.body.body[0].concept.name + if context.sheerka.isinstance(context.sheerka.memory(context, symbol), BuiltinConcepts.NOT_FOUND): + return [self.copy(context, self.sequence)] + else: + return [self.copy(context, self.python)] + + def copy(self, context, to_keep): + return context.sheerka.ret(self.name, + to_keep.status, + to_keep.value, + parents=[self.python, self.sequence]) diff --git a/src/evaluators/ValidateConceptEvaluator.py b/src/evaluators/ValidateConceptEvaluator.py index f69d3fa..03e900e 100644 --- a/src/evaluators/ValidateConceptEvaluator.py +++ b/src/evaluators/ValidateConceptEvaluator.py @@ -1,5 +1,6 @@ from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts +from core.sheerka.services.SheerkaEvaluateConcept import EvaluationHints from evaluators.BaseEvaluator import OneReturnValueEvaluator from parsers.BaseNodeParser import ConceptNode from parsers.BaseParser import BaseParser @@ -73,8 +74,7 @@ class ValidateConceptEvaluator(OneReturnValueEvaluator): res = sheerka.evaluate_concept(context, concept, - eval_body=False, - validation_only=True, + hints=EvaluationHints(eval_body=True, expression_only=True), metadata=["variables", ConceptParts.PRE, ConceptParts.WHERE]) # either the 'pre' or the 'where' condition is not fulfilled diff --git a/src/parsers/BaseCustomGrammarParser.py b/src/parsers/BaseCustomGrammarParser.py index 086e3af..0e4c10c 100644 --- a/src/parsers/BaseCustomGrammarParser.py +++ b/src/parsers/BaseCustomGrammarParser.py @@ -323,7 +323,7 @@ class BaseCustomGrammarParser(BaseParserInputParser): keywords_found.add(token.value) colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON if not self.parser_input.next_token(): - self.add_error(UnexpectedEofParsingError(f"While parsing keyword '{keyword.value}'.")) + self.add_error(UnexpectedEofParsingError(f"while parsing keyword '{keyword.value}'")) break else: buffer.append(token) diff --git a/src/parsers/BaseExpressionParser.py b/src/parsers/BaseExpressionParser.py index d5daf90..7a1dbeb 100644 --- a/src/parsers/BaseExpressionParser.py +++ b/src/parsers/BaseExpressionParser.py @@ -4,11 +4,26 @@ from typing import List, Union from core.builtin_concepts_ids import BuiltinConcepts from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import Token, TokenKind, Tokenizer, LexerError -from core.utils import tokens_are_matching, get_text_from_tokens +from core.tokenizer import LexerError, Token, TokenKind, Tokenizer +from core.utils import get_text_from_tokens, tokens_are_matching from parsers.BaseNodeParser import UnrecognizedTokensNode -from parsers.BaseParser import Node, ParsingError, BaseParser, ErrorSink, UnexpectedTokenParsingError +from parsers.BaseParser import BaseParser, ErrorSink, Node, ParsingError, UnexpectedTokenParsingError +open_parenthesis_mapping = { + TokenKind.LBRACKET: Token(TokenKind.LBRACKET, "[", -1, -1, -1), + TokenKind.LPAR: Token(TokenKind.LPAR, "(", -1, -1, -1), + TokenKind.LBRACE: Token(TokenKind.LBRACE, "{", -1, -1, -1), +} + +end_parenthesis_mapping = { + TokenKind.LBRACKET: Token(TokenKind.RBRACKET, "]", -1, -1, -1), + TokenKind.LPAR: Token(TokenKind.RPAR, ")", -1, -1, -1), + TokenKind.LBRACE: Token(TokenKind.RBRACE, "}", -1, -1, -1), +} + +end_parenthesis_types = {TokenKind.RBRACKET, TokenKind.RPAR, TokenKind.RBRACE} + +comma = Token(TokenKind.COMMA, ",", -1, -1, -1) class ComparisonType: EQUALS = "EQ" @@ -26,7 +41,14 @@ class LeftPartNotFoundError(ParsingError): """ When the expression starts with 'or' or 'and' """ - pass + keyword: str + pos: int + + def __repr__(self): + return f"LeftPartNotFoundError(keyword={self.keyword}, pos={self.pos})" + + def __str__(self): + return f"There is not left part to '{self.keyword}' at position {self.pos}" @dataclass() @@ -390,6 +412,67 @@ class FunctionNode(ExprNode): return f"{self.first} {self.parameters} {self.last}" +@dataclass() +class Comprehension: + target: ExprNode + iterable: ExprNode + if_expr: ExprNode + + +class ListComprehensionNode(ExprNode): + def __init__(self, start, end, tokens, element: ExprNode, generators: List[Comprehension]): + super().__init__(start, end, tokens) + self.element = element + self.generators = generators + + def __eq__(self, other): + if id(self) == id(other): + return True + + if not isinstance(other, ListComprehensionNode): + return False + + if not super().__eq__(other): + return False + + return self.element == other.element and self.generators == other.generators + + def __repr__(self): + msg = f"ListComprehensionNode(start={self.start}, end={self.end}, element='{self.element}', generators=" + for comp in self.generators: + msg += f"['{comp.target}', {comp.iterable}, '{comp.if_expr}'], " + return msg + ")" + + +class ListNode(ExprNode): + def __init__(self, start, end, tokens, first, last, items: List[ExprNode], sep=None): + super().__init__(start, end, tokens) + self.first = first + self.last = last + self.items = items + self.sep = sep or comma + + def __eq__(self, other): + if id(self) == id(other): + return True + + if not isinstance(other, ListNode): + return False + + if not super().__eq__(other): + return False + + return (self.first == other.first and + self.last == other.last and + self.items == other.items and + self.sep == other.sep) + + def __repr__(self): + msg = f"ListNode(start={self.start}, end={self.end}, sep={self.sep}" + msg += f"first='{self.first}', last='{self.last}', items={self.items})" + return msg + + class BaseExpressionParser(BaseParser): def reset_parser_input(self, parser_input: ParserInput, error_sink): @@ -432,12 +515,15 @@ class BaseExpressionParser(BaseParser): # The node is compiled in ExpressionParser.parse() or FunctionParser.parse(), depending of the requirement node = self.parse_input(context, parser_input, error_sink) - token = parser_input.token - if token and token.type != TokenKind.EOF: - if token.type == TokenKind.RPAR: - error_sink.add_error(ParenthesisMismatchError(token)) - else: - error_sink.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.EOF])) + if not error_sink.has_error: + token = parser_input.token + if token and token.type != TokenKind.EOF: + if token.type == TokenKind.RPAR: + error_sink.add_error(ParenthesisMismatchError(token)) + else: + error_sink.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", + token, + [TokenKind.EOF])) if isinstance(node, ParenthesisNode): node = node.node @@ -454,17 +540,16 @@ class BaseExpressionParser(BaseParser): return ret - def parse_input(self, context, parser_input: ParserInput, error_sink: list): + def parse_input(self, context, parser_input: ParserInput, error_sink: ErrorSink): raise NotImplementedError def parse_tokens_stop_condition(self, token, parser_input): raise NotImplementedError - def parse_tokens(self, context, parser_input, error_sink): + def parse_tokens(self, context, parser_input, error_sink, stop_condition, expr_parser): def stop(): return token.type == TokenKind.EOF or \ - paren_count == 0 and (token.type == TokenKind.RPAR or - self.parse_tokens_stop_condition(token, parser_input)) + paren_count == 0 and (token.type == TokenKind.RPAR or stop_condition(token, parser_input)) token = parser_input.token if token.type == TokenKind.EOF: @@ -474,6 +559,10 @@ class BaseExpressionParser(BaseParser): last_paren = token start = parser_input.pos parser_input.next_token() + # KSI 2021-09-01. I am quite sure I need a specific parser to parse inside the parenthesis + # if so, add a inside_parenthesis_parser to the list of this function parameter + # parser_to_use = inside_parenthesis_parser or self + # expr = parser_to_use.parse_input(context, parser_input, error_sink) expr = self.parse_input(context, parser_input, error_sink) token = parser_input.token if token.type != TokenKind.RPAR: @@ -491,10 +580,10 @@ class BaseExpressionParser(BaseParser): while not stop(): last_is_whitespace = token.type == TokenKind.WHITESPACE end += 1 - if token.type == TokenKind.LPAR: + if token.type in (TokenKind.LPAR, TokenKind.LBRACKET, TokenKind.LBRACE): last_paren = token paren_count += 1 - if token.type == TokenKind.RPAR: + if token.type in (TokenKind.RPAR, TokenKind.RBRACKET, TokenKind.RBRACE): paren_count -= 1 parser_input.next_token(False) token = parser_input.token @@ -503,15 +592,15 @@ class BaseExpressionParser(BaseParser): end -= 1 if start == end: - if token.type != TokenKind.RPAR: - error_sink.add_error(LeftPartNotFoundError()) return None - if paren_count != 0: + if paren_count > 0: + # Only if the count is > 0 as the left parenthesis may have occurred before the parse_tokens() error_sink.add_error(ParenthesisMismatchError(last_paren)) return None - if self.expr_parser: + if expr_parser: + # to sub parse (parse with more fineness) new_parsing_input = ParserInput( None, tokens=parser_input.tokens, @@ -520,14 +609,14 @@ class BaseExpressionParser(BaseParser): end=end - 1, yield_oef=False).reset() new_parsing_input.next_token() - return self.expr_parser.parse_input(context, new_parsing_input, error_sink) + return expr_parser.parse_input(context, new_parsing_input, error_sink) else: return NameExprNode(start, end - 1, parser_input.tokens[start:end]) class ExpressionVisitor: """ - Pyhtonic implementation of visitors for ExprNode + Pythonic implementation of visitors for ExprNode """ def visit(self, expr_node): @@ -548,6 +637,29 @@ class ExpressionVisitor: self.visit(value) +class ExpressionVisitorWithHint: + """ + Pythonic implementation of visitors for ExprNode + """ + + def visit(self, expr_node, hint): + name = expr_node.__class__.__name__ + + method = 'visit_' + name + visitor = getattr(self, method, self.generic_visit) + return visitor(expr_node, hint) + + def generic_visit(self, expr_node, hint): + """Called if no explicit visitor function exists for a node.""" + for field, value in expr_node.__dict__.items(): + if isinstance(value, (list, tuple)): + for item in value: + if isinstance(item, ExprNode): + self.visit(item, hint) + elif isinstance(value, ExprNode): + self.visit(value, hint) + + class TrueifyVisitor(ExpressionVisitor): """ Visit an ExprNode @@ -599,13 +711,13 @@ class IsAQuestionVisitor(ExpressionVisitor): def visit_NameExprNode(self, expr_node): if tokens_are_matching(expr_node.tokens, is_question_tokens) or \ - tokens_are_matching(expr_node.tokens, eval_question_requested_in_context_tokens): + tokens_are_matching(expr_node.tokens, eval_question_requested_in_context_tokens): return True return None def visit_FunctionNode(self, expr_node: FunctionNode): if tokens_are_matching(expr_node.tokens, is_question_tokens) or \ - tokens_are_matching(expr_node.tokens, in_context_tokens): + tokens_are_matching(expr_node.tokens, in_context_tokens): return True return None diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index 05990fc..2a3830f 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -1,9 +1,12 @@ from dataclasses import dataclass import core.utils -from core.tokenizer import TokenKind, Token +from cache.FastCache import FastCache +from core import builtin_helpers +from core.sheerka.ExecutionContext import ExecutionContext +from core.tokenizer import Token, TokenKind from core.var_ref import VariableRef -from parsers.BaseParser import Node, ParsingError, BaseParserInputParser +from parsers.BaseParser import BaseParserInputParser, Node, ParsingError DEBUG_COMPILED = True @@ -411,6 +414,13 @@ class SourceCodeWithConceptNode(LexerNode): def get_source_to_parse(self): return self.python_node.source + def get_errors(self): + errors = [] + for n in self.nodes: + if hasattr(n, "error") and n.error: + errors.append(n.error) + return errors + class VariableNode(LexerNode): """ @@ -460,13 +470,52 @@ class NoMatchingTokenError(ParsingError): pos: int +class UnrecognizedTokensCache: + def __init__(self, parsers): + self.parsers = parsers + self.cache = FastCache() + self.hits = 0 + self.calls = 0 + + def get_lexer_nodes_from_unrecognized(self, + context: ExecutionContext, + unrecognized_tokens: UnrecognizedTokensNode): + to_request = unrecognized_tokens.source + self.calls += 1 + if to_request in self.cache: + self.hits += 1 + return self.cache.get(to_request) + + res = builtin_helpers.get_lexer_nodes_from_unrecognized(context, + unrecognized_tokens, + self.parsers) + self.cache.put(to_request, res) + return res + + @property + def ratio(self): + return self.hits / self.calls if self.calls else 0 + + @property + def calls_details(self): + return self.cache.calls + + def to_dict(self): + return { + "calls": self.calls, + "hits": self.hits, + "ratio": self.ratio, + "calls_details": self.calls_details, + } + + class BaseNodeParser(BaseParserInputParser): """ Parser that return LexerNode """ - def __init__(self, name, priority, **kwargs): - super().__init__(name, priority, yield_eof=True) + def __init__(self, name, priority, enabled=True, **kwargs): + super().__init__(name, priority, yield_eof=True, enabled=enabled) def init_from_concepts(self, context, concepts, **kwargs): """ diff --git a/src/parsers/BaseParser.py b/src/parsers/BaseParser.py index 92cab2d..0831d57 100644 --- a/src/parsers/BaseParser.py +++ b/src/parsers/BaseParser.py @@ -67,6 +67,9 @@ class UnexpectedTokenParsingError(ParsingError): class UnexpectedEofParsingError(ParsingError): message: str = None + def __repr__(self): + return f"UnexpectedEofParsingError({self.message})" + class BaseParser: PREFIX = "parsers." diff --git a/src/parsers/BnfDefinitionParser.py b/src/parsers/BnfDefinitionParser.py index ef71a5e..6530e05 100644 --- a/src/parsers/BnfDefinitionParser.py +++ b/src/parsers/BnfDefinitionParser.py @@ -240,31 +240,31 @@ class BnfDefinitionParser(BaseParser): self.add_error(concept) return None - expr = ConceptExpression(concept, rule_name=concept.name) + expr = ConceptExpression(concept, rule_name=concept.key) return self.eat_rule_name_if_needed(expr) if token.type in (TokenKind.IDENTIFIER, TokenKind.KEYWORD): self.next_token() - concept_name = token.str_value + concept_key = token.str_value # we are trying to match against a concept which is still under construction ! # (for example of recursive bnf definition) if self.context.obj and hasattr(self.context.obj, "name"): - if concept_name == str(self.context.obj.name): - return self.eat_rule_name_if_needed(ConceptExpression(concept_name)) # 2021-02-17 no rule name ? + if concept_key == str(self.context.obj.name): + return self.eat_rule_name_if_needed(ConceptExpression(concept_key)) # 2021-02-17 no rule name ? - concept = self.context.get_concept(concept_name) + concept = self.context.get_concept(concept_key) if not self.sheerka.is_known(concept): - expr = VariableExpression(concept_name) + expr = VariableExpression(concept_key) return self.eat_rule_name_if_needed(expr) elif hasattr(concept, "__iter__"): self.add_error( self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT, - body=("key", concept_name))) + body=("key", concept_key))) return None else: - expr = ConceptExpression(concept, rule_name=concept.name) + expr = ConceptExpression(concept, rule_name=concept.key) return self.eat_rule_name_if_needed(expr) if token.type == TokenKind.STRING: diff --git a/src/parsers/BnfNodeParser.py b/src/parsers/BnfNodeParser.py index a39879b..e497a6a 100644 --- a/src/parsers/BnfNodeParser.py +++ b/src/parsers/BnfNodeParser.py @@ -16,13 +16,13 @@ import core.builtin_helpers import core.utils from cache.Cache import Cache from core.builtin_concepts import BuiltinConcepts -from core.concept import DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts, Concept +from core.concept import Concept, ConceptParts, DEFINITION_TYPE_BNF, DoNotResolve from core.global_symbols import NotFound from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import Tokenizer, TokenKind, Token +from core.tokenizer import Token, TokenKind, Tokenizer from core.utils import CONSOLE_COLORS_MAP as CCM -from parsers.BaseNodeParser import BaseNodeParser, GrammarErrorNode, UnrecognizedTokensNode, ConceptNode, \ - NoMatchingTokenError, RuleNode, SourceCodeNode, SourceCodeWithConceptNode +from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, GrammarErrorNode, NoMatchingTokenError, RuleNode, \ + SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensCache, UnrecognizedTokensNode PARSERS = ["Sequence", "Sya", "Python"] VARIABLE_EXPR_PARSER = ["Sequence", "Sya", "Python", "Bnf"] @@ -618,9 +618,8 @@ class VariableExpression(ParsingExpression): return None utn = UnrecognizedTokensNode(start, end, tokens) - nodes_sequences = core.builtin_helpers.get_lexer_nodes_from_unrecognized(parser_helper.parser.context, - utn, - VARIABLE_EXPR_PARSER) + nodes_sequences = parser_helper.parser.cache2.get_lexer_nodes_from_unrecognized(parser_helper.parser.context, + utn) return nodes_sequences @staticmethod @@ -1264,8 +1263,8 @@ class BnfConceptParserHelper: self.pos = -1 def __repr__(self): - concepts = [item.concept if isinstance(item, ConceptNode) else "***" for item in self.sequence] - return f"BnfConceptParserHelper({concepts})" + nodes = core.builtin_helpers.debug_nodes(self.sequence) + return f"BnfConceptParserHelper({nodes})" def __eq__(self, other): if id(self) == id(other): @@ -1443,9 +1442,8 @@ class BnfConceptParserHelper: self.unrecognized_tokens.fix_source() # try to recognize concepts - nodes_sequences = core.builtin_helpers.get_lexer_nodes_from_unrecognized(self.parser.context, - self.unrecognized_tokens, - PARSERS) + nodes_sequences = self.parser.cache.get_lexer_nodes_from_unrecognized(self.parser.context, + self.unrecognized_tokens) if nodes_sequences: instances = [self] @@ -1458,7 +1456,7 @@ class BnfConceptParserHelper: for node in node_sequence: instance.sequence.append(node) if isinstance(node, UnrecognizedTokensNode) or \ - hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens: + hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens: instance.has_unrecognized = True instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) @@ -1568,7 +1566,7 @@ class BnfConceptParserHelper: if _underlying.parsing_expression.rule_name: # make sure VariableExpression are only added once if (not isinstance(_underlying.parsing_expression, VariableExpression) or - _underlying.parsing_expression.rule_name not in _concept.get_compiled()): + _underlying.parsing_expression.rule_name not in _concept.get_compiled()): var_value = _get_underlying_value(_underlying) _add_compiled(_concept, _underlying.parsing_expression.rule_name, var_value) _concept.get_hints().need_validation = True @@ -1638,6 +1636,8 @@ class BnfNodeParser(BaseNodeParser): else: self.concepts_grammars = Cache() + self.cache = UnrecognizedTokensCache(PARSERS) + self.cache2 = UnrecognizedTokensCache(VARIABLE_EXPR_PARSER) self.ignore_case = True @staticmethod @@ -1963,7 +1963,7 @@ class BnfNodeParser(BaseNodeParser): nodes = [] for c in valid_concepts: - nodes.append(ConceptExpression(c, rule_name=c.name)) + nodes.append(ConceptExpression(c, rule_name=c.key)) resolved = self.resolve_parsing_expression(ssc, UnOrderedChoice(*nodes), @@ -2116,6 +2116,12 @@ class BnfNodeParser(BaseNodeParser): sequences = self.get_concepts_sequences(context) valid_parser_helpers = self.get_valid(sequences) + + debugger = context.get_debugger(self.NAME, "parse") + if debugger.is_enabled: + debugger.debug_var("stats", self.cache.to_dict()) + #debugger.debug_var("stats", self.cache2.to_dict()) + if valid_parser_helpers is None: return self.sheerka.ret( self.name, diff --git a/src/parsers/DefRuleParser.py b/src/parsers/DefRuleParser.py index 74c3aac..a0fff94 100644 --- a/src/parsers/DefRuleParser.py +++ b/src/parsers/DefRuleParser.py @@ -110,7 +110,7 @@ class DefRuleParser(BaseCustomGrammarParser): return None if not self.parser_input.next_token(): # eat as - self.add_error(UnexpectedEofParsingError("While parsing 'when'.")) + self.add_error(UnexpectedEofParsingError("while parsing 'when'")) return None rule = self.parse_rule() diff --git a/src/parsers/FunctionParser.py b/src/parsers/FunctionParser.py index cd7e1cb..9276347 100644 --- a/src/parsers/FunctionParser.py +++ b/src/parsers/FunctionParser.py @@ -98,7 +98,7 @@ class FunctionParser(BaseExpressionParser): return None if not parser_input.next_token(): - error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis")) + error_sink.add_error(UnexpectedEofParsingError(f"while parsing left parenthesis")) return None token = parser_input.token @@ -110,7 +110,7 @@ class FunctionParser(BaseExpressionParser): start_node = NameExprNode(start, start + 1, parser_input.tokens[start:start + 2]) if not parser_input.next_token(): - error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis")) + error_sink.add_error(UnexpectedEofParsingError(f"after left parenthesis")) return FunctionNode(start, start + 1, [], start_node, None, None) params = self.parse_parameters(context, parser_input, error_sink) @@ -146,7 +146,7 @@ class FunctionParser(BaseExpressionParser): token = parser_input.token if token.type == TokenKind.EOF: - error_sink.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters")) + error_sink.add_error(UnexpectedEofParsingError(f"while parsing parameters")) return None if token.type == TokenKind.RPAR: @@ -173,7 +173,7 @@ class FunctionParser(BaseExpressionParser): # otherwise, eat until LPAR or separator parser_input.seek(start_pos) - return self.parse_tokens(context, parser_input, error_sink) + return self.parse_tokens(context, parser_input, error_sink, self.parse_tokens_stop_condition, self.expr_parser) def parse_tokens_stop_condition(self, token, parser_input): return token.value == self.sep diff --git a/src/parsers/ListComprehensionParser.py b/src/parsers/ListComprehensionParser.py new file mode 100644 index 0000000..7a007ff --- /dev/null +++ b/src/parsers/ListComprehensionParser.py @@ -0,0 +1,219 @@ +from dataclasses import dataclass + +from core.sheerka.services.SheerkaExecute import ParserInput +from core.tokenizer import TokenKind +from parsers.BaseExpressionParser import BaseExpressionParser, Comprehension, ListComprehensionNode, \ + ParenthesisMismatchError, end_parenthesis_mapping, end_parenthesis_types +from parsers.BaseParser import ErrorSink, ParsingError, UnexpectedEofParsingError +from parsers.ListParser import ListParser +from sheerkapython.ExprToPython import PythonExprVisitor + + +class LeadingParenthesisNotFound(ParsingError): + pass + + +@dataclass +class FailedToParse(ParsingError): + part: str # part that fails ('element', 'targets', 'generator' or 'if' + pos: int # position after which the element was not found + + def __repr__(self): + return f"Failed to find <{self.part}> after position {self.pos}" + + +class ForNotFound(ParsingError): + pass + + +class ElementNotFound(ParsingError): + pass + + +class ListComprehensionParser(BaseExpressionParser): + NAME = "ListComprehension" + + def __init__(self, **kwargs): + super().__init__(self.NAME, 55, yield_eof=True) + self.expr_parser = kwargs.get("expr_parser", None) + self.auto_compile = kwargs.get("auto_compile", True) + self.element_parser = ListParser() + + @staticmethod + def stop_condition(keywords, end_parenthesis=None): + if end_parenthesis is None: + return lambda t, pi: t.type == TokenKind.IDENTIFIER and t.value in keywords + else: + return lambda t, pi: t.type == TokenKind.IDENTIFIER and t.value in keywords or t.type in end_parenthesis + + def parse_input(self, context, parser_input, error_sink): + return self.parse_list_comprehension(context, parser_input, error_sink) + + def parse_list_comprehension(self, context, parser_input, error_sink): + start_pos = parser_input.pos + + # first define the leading parenthesis / bracket / brace + start_parenthesis = parser_input.token + if start_parenthesis.type not in end_parenthesis_mapping: + error_sink.add_error(LeadingParenthesisNotFound()) + return None + + end_parenthesis = end_parenthesis_mapping[start_parenthesis.type] + + if not parser_input.next_token(): + error_sink.add_error(UnexpectedEofParsingError("when start parsing")) + return None + + element_start_pos = parser_input.pos + + # search the 'for' tokens. They will be the pivot for our parsing + for_tokens_positions = self.get_for_positions(parser_input) + if not for_tokens_positions: + error_sink.add_error(ForNotFound()) + return None + + comprehensions = [] + sub_error_sink = ErrorSink() + for start, end in reversed(for_tokens_positions): + sub_input = parser_input.sub_part(start, end, yield_oef=True) + sub_input.reset() + sub_input.next_token() + comprehension = self.parse_comprehension(context, sub_input, sub_error_sink, end_parenthesis.type) + + if not comprehension: + element_end_pos = end + break + else: + comprehensions.insert(0, comprehension) + else: + element_end_pos = start - 1 + + if not comprehensions: + error_sink.sink.extend(sub_error_sink.sink) + return None + + if element_end_pos < element_start_pos: + error_sink.add_error(ElementNotFound()) + return None + + sub_input = parser_input.sub_part(element_start_pos, element_end_pos, yield_oef=True) + sub_input.reset() + sub_input.next_token() + element_expr = self.element_parser.parse_input(context, sub_input, error_sink) + + if not element_expr: + return None + + if not parser_input.token.type == end_parenthesis.type: + error_sink.add_error(ParenthesisMismatchError(end_parenthesis)) + return None + + end_pos = parser_input.pos + parser_input.next_token() + + return ListComprehensionNode(start_pos, + end_pos, + parser_input.tokens[start_pos: end_pos + 1], + element_expr, + comprehensions) + + def parse_comprehension(self, context, parser_input, error_sink, end_parenthesis_type): + parser_input.next_token() # eat the leading 'for' + + pos = parser_input.pos + target_expr = self.parse_tokens(context, + parser_input, + error_sink, + self.stop_condition(["in"], [end_parenthesis_type]), + None) + if not target_expr: + error_sink.add_error(FailedToParse('target', pos)) + return None + + if not parser_input.next_token(): + error_sink.add_error(UnexpectedEofParsingError("while parsing comprehension")) + return None + + pos = parser_input.pos + generator_expr = self.parse_tokens(context, + parser_input, + error_sink, + self.stop_condition(["for", "if"], [end_parenthesis_type]), + None) + if not generator_expr: + error_sink.add_error(FailedToParse('generator', pos)) + return None + + token = parser_input.token + if token.value == "if": + if not parser_input.next_token(): + error_sink.add_error(UnexpectedEofParsingError("while parsing comprehension")) + return None + + pos = parser_input.pos + if_expr = self.parse_tokens(context, + parser_input, + error_sink, + self.stop_condition(["for"], [end_parenthesis_type]), + None) + + if not if_expr: + error_sink.add_error(FailedToParse('if', pos)) + return None + else: + if_expr = None + + return Comprehension(target_expr, generator_expr, if_expr) + + def parse_tokens_stop_condition(self, token, parser_input): + raise NotImplementedError() + + def parse(self, context, parser_input: ParserInput): + ret = super().parse(context, parser_input) + + if not self.auto_compile: + return ret + + if ret is None: + return None + + if not ret.status: + return ret + + node = ret.body.body + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + return ret[0] if len(ret) == 1 else ret + + @staticmethod + def get_for_positions(parser_input): + """ + browse the tokens to get the starting and ending position of 'for' tokens + ex: + [ x, t for x in z if t for a in b ] + ^ ^ + will return + :param parser_input: + :return: list of tuple representing where the 'for' token starts and where it ends + """ + res = [] + current_pos = None + nb_parenthesis = 1 + while True: + if parser_input.token.value == "for": + if current_pos: + res.append((current_pos, parser_input.pos - 1)) + current_pos = parser_input.pos + elif parser_input.token.type in end_parenthesis_mapping: + nb_parenthesis += 1 + elif parser_input.token.type in end_parenthesis_types: + nb_parenthesis -= 1 + + if nb_parenthesis == 0 or not parser_input.next_token(): + break + + if current_pos: + res.append((current_pos, parser_input.pos - 1)) + + return res diff --git a/src/parsers/ListParser.py b/src/parsers/ListParser.py new file mode 100644 index 0000000..2090cd7 --- /dev/null +++ b/src/parsers/ListParser.py @@ -0,0 +1,77 @@ +from core.sheerka.services.SheerkaExecute import ParserInput +from core.tokenizer import TokenKind +from parsers.BaseExpressionParser import BaseExpressionParser, ListNode, NameExprNode, ParenthesisMismatchError, \ + comma, end_parenthesis_mapping +from parsers.BaseParser import ErrorSink + + +class ListParser(BaseExpressionParser): + NAME = "List" + + def __init__(self, sep=None, **kwargs): + super().__init__(self.NAME, 50, False, yield_eof=True) + # KSI 2021-09-02 : The priority (50) is not set + self.sep = sep or comma + self.end_tokens = [self.sep] if sep else [self.sep] + self.expr_parser = kwargs.get("expr_parser", None) + + def parse_input(self, context, parser_input: ParserInput, error_sink: ErrorSink): + start = parser_input.pos + token = parser_input.token + first = None + trailing_token = None + + # define the opening parenthesis and set the expected closing parenthesis + if token.type in end_parenthesis_mapping: + trailing_token = end_parenthesis_mapping[token.type] + self.end_tokens.append(trailing_token) + first = NameExprNode(start, start, parser_input.tokens[start:start + 1]) + + parser_input.next_token() + + token = parser_input.token + items = [] + while token.type != TokenKind.EOF: + parsed = self.parse_tokens(context, + parser_input, + error_sink, + self.parse_tokens_stop_condition, + self.expr_parser) + + if parsed: + items.append(parsed) + + token = parser_input.token + if not parsed or error_sink.has_error or (token.type != self.sep.type or token.value != self.sep.value): + break + + parser_input.next_token() + + if error_sink.has_error: + return None + + # make sure the trailing parenthesis is present if required + if trailing_token: + if token.type == trailing_token.type: + pos = parser_input.pos + last = NameExprNode(pos, pos, parser_input.tokens[pos:pos + 1]) + else: + last = None + error_sink.add_error(ParenthesisMismatchError(trailing_token)) + else: + last = None + + end = parser_input.pos if token.type != TokenKind.EOF else parser_input.pos - 1 + if self.parse_tokens_stop_condition(token, None): + parser_input.next_token() + + if first is None and last is None and len(items) == 0: + return None + + return ListNode(start, end, parser_input.tokens[start:end + 1], first, last, items, self.sep) + + def parse_tokens_stop_condition(self, token, parser_input): + for t in self.end_tokens: + if t.type == token.type and t.value == token.value: + return True + return False diff --git a/src/parsers/LogicalOperatorParser.py b/src/parsers/LogicalOperatorParser.py index c6dd823..03020d9 100644 --- a/src/parsers/LogicalOperatorParser.py +++ b/src/parsers/LogicalOperatorParser.py @@ -1,60 +1,7 @@ -from core.sheerka.services.SheerkaExecute import ParserInput -from core.sheerka.services.sheerka_service import FailedToCompileError -from core.tokenizer import TokenKind, Tokenizer -from core.utils import get_text_from_tokens -from parsers.BaseExpressionParser import ParenthesisNode, OrNode, AndNode, NotNode, VariableNode, \ - ComparisonNode, BaseExpressionParser -from parsers.BaseParser import UnexpectedEofParsingError, ErrorSink -from sheerkarete.common import V -from sheerkarete.conditions import Condition, AndConditions - - -class ReteConditionsEmitter: - - def __init__(self, context): - from parsers.RelationalOperatorParser import RelationalOperatorParser - self.context = context - self.comparison_parser = RelationalOperatorParser() - self.var_counter = 0 - self.variables = {} - - def add_variable(self, target): - var_name = f"__x_{self.var_counter:02}__" - self.var_counter += 1 - self.variables[target] = var_name - return var_name - - def init_variable_if_needed(self, node, res): - if node.name not in self.variables: - var_name = self.add_variable(node.name) - res.append(Condition(V(var_name), "__name__", node.name)) - - return V(self.variables[node.name]) - - def get_conditions(self, expr_nodes): - conditions = [] - for expr_node in expr_nodes: - error_sink = ErrorSink() - parser_input = ParserInput(None, tokens=expr_node.tokens).reset() - parser_input.next_token() - - parsed = self.comparison_parser.parse_input(self.context, parser_input, error_sink) - if error_sink.has_error: - raise FailedToCompileError(error_sink.sink) - - if isinstance(parsed, VariableNode): - var_name = self.init_variable_if_needed(parsed, conditions) - if parsed.attributes_str is not None: - conditions.append(Condition(var_name, parsed.attributes_str, True)) - - elif isinstance(parsed, ComparisonNode): - if isinstance(parsed.left, VariableNode): - left = self.init_variable_if_needed(parsed.left, conditions) - attr = parsed.left.attributes_str or "__self__" - right = eval(get_text_from_tokens(parsed.right.tokens)) - conditions.append(Condition(left, attr, right)) - - return [AndConditions(conditions)] +from core.tokenizer import TokenKind +from parsers.BaseExpressionParser import AndNode, BaseExpressionParser, LeftPartNotFoundError, NotNode, \ + OrNode, ParenthesisNode +from parsers.BaseParser import UnexpectedEofParsingError class LogicalOperatorParser(BaseExpressionParser): @@ -70,9 +17,6 @@ class LogicalOperatorParser(BaseExpressionParser): def __init__(self, **kwargs): super().__init__(self.NAME, 50, False, yield_eof=True) - self.and_tokens = list(Tokenizer(" and ", yield_eof=False)) - self.and_not_tokens = list(Tokenizer(" and not ", yield_eof=False)) - self.not_tokens = list(Tokenizer("not ", yield_eof=False)) self.expr_parser = kwargs.get("expr_parser", None) @staticmethod @@ -87,16 +31,20 @@ class LogicalOperatorParser(BaseExpressionParser): def parse_or(self, context, parser_input, error_sink): start = parser_input.pos expr = self.parse_and(context, parser_input, error_sink) + token = parser_input.token if token.type != TokenKind.IDENTIFIER or token.value != "or": return expr + if expr is None: + error_sink.add_error(LeftPartNotFoundError("or", start)) + parts = [expr] while token.type == TokenKind.IDENTIFIER and token.value == "or": parser_input.next_token() expr = self.parse_and(context, parser_input, error_sink) if expr is None: - error_sink.add_error(UnexpectedEofParsingError("When parsing 'or'")) + error_sink.add_error(UnexpectedEofParsingError("while parsing 'or'")) end = parser_input.pos self.clean_parenthesis_nodes(parts) return OrNode(start, end, parser_input.tokens[start: end + 1], *parts) @@ -110,16 +58,20 @@ class LogicalOperatorParser(BaseExpressionParser): def parse_and(self, context, parser_input, error_sink): start = parser_input.pos expr = self.parse_not(context, parser_input, error_sink) + token = parser_input.token if token.type != TokenKind.IDENTIFIER or token.value != "and": return expr + if expr is None: + error_sink.add_error(LeftPartNotFoundError("and", start)) + parts = [expr] while token.type == TokenKind.IDENTIFIER and token.value == "and": parser_input.next_token() expr = self.parse_not(context, parser_input, error_sink) if expr is None: - error_sink.add_error(UnexpectedEofParsingError("When parsing 'and'")) + error_sink.add_error(UnexpectedEofParsingError("while parsing 'and'")) end = parser_input.pos self.clean_parenthesis_nodes(parts) return AndNode(start, end, parser_input.tokens[start: end + 1], *parts) @@ -134,7 +86,7 @@ class LogicalOperatorParser(BaseExpressionParser): token = parser_input.token start = parser_input.pos if (token.type == TokenKind.IDENTIFIER and token.value == "not" and - parser_input.the_token_after(True).value != "in"): + parser_input.the_token_after(True).value != "in"): parser_input.next_token() parsed = self.parse_not(context, parser_input, error_sink) node = parsed.node if isinstance(parsed, ParenthesisNode) else parsed @@ -143,7 +95,11 @@ class LogicalOperatorParser(BaseExpressionParser): parser_input.tokens[start: parsed.end + 1], node) else: - return self.parse_tokens(context, parser_input, error_sink) + return self.parse_tokens(context, + parser_input, + error_sink, + self.parse_tokens_stop_condition, + self.expr_parser) def parse_tokens_stop_condition(self, token, parser_input): return token.type == TokenKind.IDENTIFIER and token.value in ("and", "or") or \ diff --git a/src/parsers/RelationalOperatorParser.py b/src/parsers/RelationalOperatorParser.py index b804eec..44d459e 100644 --- a/src/parsers/RelationalOperatorParser.py +++ b/src/parsers/RelationalOperatorParser.py @@ -21,14 +21,14 @@ class RelationalOperatorParser(BaseExpressionParser): def parse_compare(self, context, parser_input, error_sink): start = parser_input.pos - left = self.parse_tokens(context, parser_input, error_sink) + left = self.parse_tokens(context, parser_input, error_sink, self.parse_tokens_stop_condition, self.expr_parser) if left is None: return None if (comp := self.eat_comparison(parser_input)) is None: return left - right = self.parse_tokens(context, parser_input, error_sink) + right = self.parse_tokens(context, parser_input, error_sink, self.parse_tokens_stop_condition, self.expr_parser) if comp == ComparisonType.IN and not isinstance(right, ParenthesisNode): t = right.tokens[0] diff --git a/src/parsers/SequenceNodeParser.py b/src/parsers/SequenceNodeParser.py index 8c488c5..031d698 100644 --- a/src/parsers/SequenceNodeParser.py +++ b/src/parsers/SequenceNodeParser.py @@ -1,15 +1,15 @@ from dataclasses import dataclass -from core import builtin_helpers from core.builtin_concepts import BuiltinConcepts -from core.builtin_helpers import update_concepts_hints -from core.concept import DEFINITION_TYPE_BNF, Concept +from core.builtin_helpers import debug_nodes, update_concepts_hints +from core.concept import Concept, DEFINITION_TYPE_BNF from core.global_symbols import NotFound from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import Tokenizer, TokenKind -from core.utils import strip_tokens, make_unique -from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode -from parsers.BaseParser import UnexpectedTokenParsingError, ParsingError +from core.tokenizer import TokenKind, Tokenizer +from core.utils import make_unique, strip_tokens +from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, SourceCodeNode, UnrecognizedTokensCache, \ + UnrecognizedTokensNode +from parsers.BaseParser import ParsingError, UnexpectedTokenParsingError from parsers.BnfNodeParser import BnfNodeParser from parsers.SyaNodeParser import SyaNodeParser @@ -47,9 +47,10 @@ class TokensNodeFoundError(ParsingError): class AtomConceptParserHelper: - def __init__(self, context): + def __init__(self, parser): - self.context = context + self.parser = parser + self.context = parser.context self.debug = [] self.sequence = [] # sequence of concepts already found found self.current_concept: ConceptNode = None # concept being parsed @@ -80,7 +81,7 @@ class AtomConceptParserHelper: return hash(len(self.sequence)) def __repr__(self): - return f"{self.sequence}" + return f"{debug_nodes(self.sequence)}" def lock(self): self.is_locked = True @@ -146,10 +147,7 @@ class AtomConceptParserHelper: self.unrecognized_tokens.fix_source() # try to recognize concepts - nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized( - self.context, - self.unrecognized_tokens, - PARSERS) + nodes_sequences = self.parser.cache.get_lexer_nodes_from_unrecognized(self.context, self.unrecognized_tokens) if nodes_sequences: instances = [self] @@ -162,7 +160,7 @@ class AtomConceptParserHelper: for node in node_sequence: instance.sequence.append(node) if isinstance(node, (UnrecognizedTokensNode, SourceCodeNode)) or \ - hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens: + hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens: instance.has_unrecognized = True instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) @@ -191,7 +189,7 @@ class AtomConceptParserHelper: self.errors.append(TokensNodeFoundError(self.expected_tokens)) def clone(self): - clone = AtomConceptParserHelper(self.context) + clone = AtomConceptParserHelper(self.parser) clone.debug = self.debug[:] clone.sequence = self.sequence[:] clone.current_concept = self.current_concept.clone() if self.current_concept else None @@ -224,6 +222,7 @@ class SequenceNodeParser(BaseNodeParser): def __init__(self, **kwargs): super().__init__(SequenceNodeParser.NAME, 50, **kwargs) + self.cache = UnrecognizedTokensCache(PARSERS) @staticmethod def _is_eligible(concept): @@ -278,7 +277,7 @@ class SequenceNodeParser(BaseNodeParser): concept_parser_helpers.extend(forked) forked.clear() - concept_parser_helpers = [AtomConceptParserHelper(self.context)] + concept_parser_helpers = [AtomConceptParserHelper(self)] while self.parser_input.next_token(False): for concept_parser in concept_parser_helpers: @@ -355,7 +354,7 @@ class SequenceNodeParser(BaseNodeParser): res = [] start, end = self.get_tokens_boundaries(self.parser_input.as_tokens()) for concept in concepts: - parser_helper = AtomConceptParserHelper(None) + parser_helper = AtomConceptParserHelper(self) parser_helper.sequence.append(ConceptNode(concept, start, end, @@ -419,6 +418,9 @@ class SequenceNodeParser(BaseNodeParser): False, context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + debugger = context.get_debugger(self.NAME, "parse") + debugger.debug_entering(source=self.parser_input.as_text()) + sequences = self.get_concepts_sequences() if by_name := self.get_by_name(): # note that concepts by names must be appended, not prepended @@ -427,6 +429,10 @@ class SequenceNodeParser(BaseNodeParser): parser_helpers = self.get_valid(sequences) + if debugger.is_enabled(): + debugger.debug_var("stats", self.cache.to_dict()) + debugger.debug_leaving(result=parser_helpers) + if len(parser_helpers): ret = [] for parser_helper in parser_helpers: @@ -471,8 +477,18 @@ class SequenceNodeParser(BaseNodeParser): if not eligible: return None - return [self.sheerka.new_dynamic(c, BuiltinConcepts.PLURAL, name=token.value, props={BuiltinConcepts.PLURAL: c}) - for c in concepts] + plural_concepts = [self.sheerka.new_dynamic(c, + BuiltinConcepts.PLURAL, + name=token.value, + props={BuiltinConcepts.PLURAL: c}) + for c in concepts] + + for concept in plural_concepts: + underlying_concept = concept.get_prop(BuiltinConcepts.PLURAL) + if self.sheerka.isaset(self.context, underlying_concept): + concept.get_metadata().body = f"get_set_elements(c:|{underlying_concept.id}:)" + + return plural_concepts @staticmethod def as_list(obj): diff --git a/src/parsers/SyaNodeParser.py b/src/parsers/SyaNodeParser.py index fbb39d7..135f83c 100644 --- a/src/parsers/SyaNodeParser.py +++ b/src/parsers/SyaNodeParser.py @@ -1,113 +1,428 @@ -from collections import namedtuple -from dataclasses import dataclass, field -from operator import attrgetter -from typing import List +from dataclasses import dataclass -from core import builtin_helpers -from core.builtin_concepts import BuiltinConcepts -from core.builtin_helpers import update_compiled +from core.builtin_concepts_ids import BuiltinConcepts +from core.builtin_helpers import debug_nodes, get_new_variables_definitions, update_compiled from core.concept import Concept, DEFINITION_TYPE_BNF from core.global_symbols import CONCEPT_COMPARISON_CONTEXT, SyaAssociativity +from core.sheerka.Sheerka import RECOGNIZED_BY_KEY from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import Token, TokenKind, Tokenizer -from core.utils import get_n_clones, get_text_from_tokens, NextIdManager, replace_after -from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, \ - SourceCodeWithConceptNode, BaseNodeParser, VariableNode +from core.tokenizer import TokenKind, Tokenizer +from core.utils import flatten, get_text_from_tokens, strip_tokens +from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensCache, UnrecognizedTokensNode from parsers.BaseParser import ParsingError PARSERS = ["Sequence", "Bnf", "Python"] -function_parser_res = namedtuple("FunctionParserRes", 'to_out function') -DEBUG_PUSH = "PUSH" -DEBUG_PUSH_UNREC = "PUSH_UNREC" -DEBUG_POP = "POP" -DEBUG_EAT = "EAT" -DEBUG_RECOG = "RECOG" -DEBUG_CAN_POP = "CAN_POP" +class SyaNodeException(Exception): + pass -@dataclass() -class DebugInfo: - """ - Debug item to trace how the sya parser worked - Possible action: - PUSH: push the token or the concept to the stack - PUSH_UNREC: push the token to the UnrecognizedTokensNode - POP: pop item to out - EAT: eat the current token (it means that it was part of the concept currently being parsed) - RECOG: when tokens from UnrecognizedTokensNode are parsed and recognized - """ - pos: int = -1 # position of the parser input - token: Token = None # current token - concept: Concept = None # current concept if any - action: str = None # action taken - level: str = None +class NoSyaConceptFound(ParsingError): + pass + +@dataclass +class NotEnoughParameters(ParsingError, SyaNodeException): + concept: Concept + tokens: str # token before which the parameters are expected + pos: int # position of the token + nb_expected: int + parameters: list = None def __repr__(self): - token_repr = self.token.repr_value if isinstance(self.token, Token) else self.token - msg = f"{self.pos:3}:{token_repr}" if self.pos != -1 else " _:" - if self.concept: - msg += f" {self.concept.short_repr()}" - return msg + f" => {self.action}" + return f"not enough parameters found when parsing {self.concept}." - -class ParenthesisMismatchError(ParsingError): - - def __init__(self, error_int): - if isinstance(error_int, tuple): - if isinstance(error_int[0], Token): - self.token_value = error_int[0].value - self.token = error_int[0] - else: - self.token_value = error_int[0] - self.token = None - self.pos = error_int[1] - elif isinstance(error_int, Token): - self.token = error_int - self.token_value = error_int.value - self.pos = -1 - else: # isinstance(UnrecognizedTokensNode) - for i, t in reversed(list(enumerate(error_int.tokens))): - if t.type == TokenKind.LPAR: - self.token = t - self.token_value = t.value - self.pos = i + error_int.start + def __str__(self): + return repr(self) def __eq__(self, other): - if id(self) == id(other): - return True - - if not isinstance(other, ParenthesisMismatchError): + if not isinstance(other, NotEnoughParameters): return False - return self.token_value == other.token_value and self.pos == other.pos + return ( + self.concept == other.concept and + self.tokens == other.tokens and + self.pos == other.pos and + self.nb_expected == other.nb_expected and + (other.parameters is None or self.parameters == other.parameters) + ) - def __hash__(self): - return hash(self.pos) + +@dataclass +class TooManyParameters(ParsingError, SyaNodeException): + concept: Concept + tokens: str # token before which the parameters are expected + pos: int # position of the token + nb_expected: int + parameters: list = None def __repr__(self): - return f"ParenthesisMismatchError('{self.token_value}', {self.pos}" + return f"Too many parameters found when parsing {self.concept}." + + def __str__(self): + return repr(self) + + def __eq__(self, other): + if not isinstance(other, TooManyParameters): + return False + + return ( + self.concept == other.concept and + self.tokens == other.tokens and + self.pos == other.pos and + self.nb_expected == other.nb_expected and + (other.parameters is None or self.parameters == other.parameters) + ) + + +@dataclass +class TokensNotFound(ParsingError, SyaNodeException): + concept: Concept # concept being parsed + tokens: str + + def __repr__(self): + return f"Failed to find '{self.tokens}' when parsing {self.concept}." + + def __str__(self): + return repr(self) + + +@dataclass +class NoneAssociativeConceptsError(ParsingError, SyaNodeException): + concept_a: Concept + concept_b: Concept + + +@dataclass +class FunctionDetected(ParsingError): + pass + + +@dataclass +class DebugItem: + text: str + is_error: bool + args: dict + + def __repr__(self): + return f"(DebugItem '{self.text}')" + + +class SyaState: + def __init__(self, name, owner, next_state=None): + self.name = name + self.owner = owner + self.next_state = next_state + + def __repr__(self): + return f"({self.name}, token={self.owner.parser_input.token}, pos={self.owner.parser_input.pos})" + + def next(self): + pass + + def run(self): + pass + + +class EatUnrecognizedTokenState(SyaState): + def run(self): + token = self.owner.parser_input.token + self.owner.unrecognized_tokens.add_token(token, self.owner.parser_input.pos) + + def next(self): + return self.owner.all_states.read_next_token + + +class ManageUnrecognizedState(SyaState): + def run(self): + if self.owner.unrecognized_tokens.is_empty(): + return + + self.owner.unrecognized_tokens.fix_source() + + if not self.owner.unrecognized_tokens.is_whitespace(): + # try to recognize concepts + cache = self.owner.get_unrecognized_tokens_requests_cache() + nodes_sequences = cache.get_lexer_nodes_from_unrecognized(self.owner.context, + self.owner.unrecognized_tokens) + + if nodes_sequences: + if self.owner.debugger.is_enabled(): + nodes_sequences_as_dbg = [debug_nodes(nodes) for nodes in nodes_sequences] + debug_text = f"from '{self.owner.unrecognized_tokens.source}', recognized {nodes_sequences_as_dbg}" + self.owner.debug(debug_text, nodes_sequences=nodes_sequences) + + concept_parser_clones = self.owner.n_clones(len(nodes_sequences)) + for concept_parser, node_sequence in zip(concept_parser_clones, nodes_sequences): + concept_parser.stack.extend(node_sequence) + concept_parser.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + if concept_parser != self.owner: + concept_parser.set_state(self.next_state) + + return + + self.owner.stack.append(self.owner.unrecognized_tokens) + self.owner.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + + def next(self): + return self.next_state + + +class CreateNewConceptParserState(SyaState): + def run(self): + concept = self.owner.state_context + concept_parser = SyaConceptParser(self.owner.get_tokens_parser(), concept, self.owner.stack) + self.owner.state_context = concept_parser + + def next(self): + return self.owner.all_states.parse_concept + + +class ParseConceptState(SyaState): + def run(self): + start_pos = self.owner.parser_input.pos + concept_parser = self.owner.state_context + concept_parser.parse() + + if concept_parser.has_error(): + # we did not parse the concept we though we parsed. Let's rollback + token = self.owner.parser_input.tokens[start_pos] + + if self.owner.debugger.is_enabled(): + msg = f"Rollbacking {concept_parser.concept}. " + msg += f"Token '{token.str_value}' is now unrecognized. pos={start_pos}." + self.owner.debug(msg, is_error=True) + + self.owner.unrecognized_tokens.add_token(token, start_pos) + self.owner.parser_input.seek(start_pos) + concept_parser.prefix_parameters.clear() + concept_parser.prefix_parameters.extend(concept_parser.prefix_parameters_snapshot) + + else: + self.owner.stack.append(concept_parser.concept_node) + if isinstance(self.owner, SyaConceptParser) and self.owner.expected and self.owner.expected[0][0]: + # the parent (owner) is a concept parser that still need some tokens + # What was recognized by the current concept_parser may be irrelevant + self.owner.parser_input.seek(concept_parser.concept_node.end) + else: + self.owner.stack.extend(concept_parser.parameters) # append the parameters parsed but not used + + if concept_parser.after_parsing_hint: + self.next_state = self.owner.all_states.get_state(concept_parser.after_parsing_hint.next_state) + self.owner.state_context = concept_parser.state_context + else: + self.next_state = self.owner.all_states.read_next_token + + # manage when there are remaining parameters. It means that we may not have parse the correct concept + # Concept("one x").def_var("x"), + # Concept("a plus b").def_var("a").def_var("b"), + # and parsing one plus two + if (not concept_parser.has_error() and + concept_parser.parameters and + concept_parser.sub_concept_detected): # not the longest match and possible concept misread + assert isinstance(self.owner, SyaTokensParser) + + fork = self.owner.sya_node_parser.fork_tokens_parser(self.owner) + fork.stack.clear() + token = self.owner.parser_input.tokens[start_pos] + fork.unrecognized_tokens.add_token(token, start_pos) + fork.parser_input.seek(start_pos) + fork.set_state(fork.all_states.read_next_token) + + def next(self): + return self.next_state + + +class TokensParserStartState(SyaState): + def run(self): + pass + + def next(self): + return self.owner.all_states.read_next_token + + +class TokensParserReadNextTokenState(SyaState): + def run(self): + if not self.owner.parser_input.next_token(False): + self.next_state = self.owner.all_states.finalize_end + return + + token = self.owner.parser_input.token + concepts = self.owner.sya_node_parser.get_concepts(self.owner.context, token) + if concepts: + instances = self.owner.sya_node_parser.fork(self.owner, len(concepts)) + for instance, concept in zip(instances, concepts): + instance.debug(f"concepts found. {concepts=}. Will parse {concept}.", concepts=concepts) + instance.state_context = concept + if instance != self.owner: + instance.set_state(self.owner.all_states.on_new_concept) + self.next_state = self.owner.all_states.on_new_concept + return + + self.next_state = self.owner.all_states.eat_unrecognized_token + + def next(self): + return self.next_state + + +class TokensParserFinalizeState(SyaState): + def run(self): + self.owner.finalize() + + def next(self): + return None + + +class ConceptParserStartState(SyaState): + def run(self): + self.owner.initialize_expected_parameters() + + def next(self): + return self.owner.all_states.read_next_token + + +class ConceptParserReadNextTokenState(SyaState): + def run(self): + + if not self.owner.expected: + self.next_state = self.owner.all_states.finalize_end + self.owner.checkpoint = self.owner.parser_input.pos + if self.owner.debugger.is_enabled(): + self.owner.debug("The last part is found. Let's finalize.") + return + + if not self.owner.parser_input.next_token(False): + self.next_state = self.owner.all_states.finalize_start + self.owner.checkpoint = self.owner.parser_input.pos + return + + token = self.owner.parser_input.token + self.owner.checkpoint = self.owner.parser_input.pos + + # Are we parsing the current concept ? + if self.owner.is_part_of_concept_definition(): + if self.owner.debugger.is_enabled(): + msg = f"'{get_text_from_tokens(self.owner.expected[0][0])}'" + msg += f" (from expected {self.owner.nb_expected_popped}) successfully recognized." + self.owner.debug(msg, expected=self.owner.expected[0][0]) + self.next_state = self.owner.all_states.concept_tokens_start + return + else: + if self.owner.expected[0][0] and len(self.owner.stack) >= self.owner.expected[0][1]: + # Improvement: the number of parameter is already found, so we must look for concept parts + raise TokensNotFound(self.owner.concept, get_text_from_tokens(self.owner.expected[0][0])) + + # is it a new concept ? + concepts = self.owner.parent_tokens_parser.sya_node_parser.get_concepts(self.owner.context, token) + if concepts: + concept_parser_clones = self.owner.n_clones(len(concepts)) + for concept_parser, concept in zip(concept_parser_clones, concepts): + concept_parser.debug(f"concepts found. {concepts=}. Will parse {concept}.", concepts=concepts) + concept_parser.state_context = concept + self.owner.sub_concepts_start_positions.append(self.owner.parser_input.pos) + if concept_parser != self.owner: + concept_parser.set_state(concept_parser.all_states.on_new_concept) + self.next_state = self.owner.all_states.on_new_concept + return + + # push the token into unrecognized + self.next_state = self.owner.all_states.eat_unrecognized_token + + def next(self): + return self.next_state + + +class ConceptParserManageParametersState(SyaState): + def run(self): + self.owner.manage_parameters() + + def next(self): + return self.next_state + + +class ConceptParserOnNewConceptState(SyaState): + def run(self): + concept = self.owner.state_context + if self.owner.must_pop(concept): + hint = AfterRunHint(self.owner.all_states.on_new_concept, concept) + self.owner.after_parsing_hint = hint + self.next_state = self.owner.all_states.finalize_start + else: + self.next_state = self.owner.all_states.new_concept_start + + def next(self): + return self.next_state + + +class ConceptParserStartFinalizeState(SyaState): + def run(self): + if len(self.owner.expected) > 0 and len(self.owner.expected[0][0]) > 0: + raise TokensNotFound(self.owner.concept, get_text_from_tokens(self.owner.expected[0][0])) + + def next(self): + return self.owner.all_states.finalize_manage_unrecognized + + +class ConceptParserEndFinalizeState(SyaState): + def run(self): + self.owner.concept_node = self.owner.create_concept_node() + + def next(self): + return None + + +class TokensParserStates: + def __init__(self, owner): + self.start = TokensParserStartState("start", owner) + self.read_next_token = TokensParserReadNextTokenState("read_next_token", owner) + self.finalize_end = TokensParserFinalizeState("finalize_end", owner) + self.create_concept_parser = CreateNewConceptParserState("create_concept_parser", owner) + self.on_new_concept = ManageUnrecognizedState("on_new_concept", owner, self.create_concept_parser) + self.parse_concept = ParseConceptState("parse_concept", owner) + self.eat_unrecognized_token = EatUnrecognizedTokenState("eat_unrecognized_token", owner) + + self._all = {prop.name: prop for prop in vars(self).values()} + + def get_state(self, state): + return self._all[state.name] if state else None + + +class ConceptParserStates: + def __init__(self, owner): + self.start = ConceptParserStartState("start", owner) + self.read_next_token = ConceptParserReadNextTokenState("read_next_token", owner) + + self.concept_tokens_end = ConceptParserManageParametersState("manage_parameters", owner, self.read_next_token) + self.concept_tokens_start = ManageUnrecognizedState("manage_concept_tokens", owner, self.concept_tokens_end) + + self.on_new_concept = ConceptParserOnNewConceptState("on_new_concept", owner) + self.create_concept_parser = CreateNewConceptParserState("create_concept_parser", owner) + self.new_concept_start = ManageUnrecognizedState("new_concept_start", owner, self.create_concept_parser) + self.parse_concept = ParseConceptState("parse_concept", owner) + + self.eat_unrecognized_token = EatUnrecognizedTokenState("eat_unrecognized_token", owner) + + self.finalize_start = ConceptParserStartFinalizeState("finalize_start", owner) + self.finalize_end = ConceptParserEndFinalizeState("finalize_end", owner) + self.finalize_manage_parameters = ConceptParserManageParametersState("finalize_manage_parameters", owner, + self.finalize_end) + self.finalize_manage_unrecognized = ManageUnrecognizedState("finalize_manage_unrecognized", owner, + self.finalize_manage_parameters) + + self._all = {prop.name: prop for prop in vars(self).values()} + + def get_state(self, state): + return self._all[state.name] if state else None @dataclass() -class NoneAssociativeSequenceError(ParsingError): - concept: Concept - first: int - second: int - tokens: List[Token] = None - - -@dataclass() -class TooManyParametersFoundError(ParsingError): - concept: Concept - pos: int # position of the concept - token: Token # token of the concept where the error was noticed - parameters: list # list of unmatched parameters - - def __repr__(self): - return f"Too many parameters found for '{self.concept}' before token '{self.token}'" +class AfterRunHint: + """ + What to do when a sub state machine ends + """ + next_state: SyaState + state_context: object @dataclass() @@ -121,27 +436,13 @@ class SyaConceptDef: associativity: SyaAssociativity = SyaAssociativity.Right @staticmethod - def get_sya_concept_def(concept, parser, sheerka): + def get_sya_concept_def(context, concept): sya_concept_def = SyaConceptDef(concept) - # first, try to look in the parser - # it is where to find the data during the unit tests - if parser and concept.id in parser.test_only_sya_definitions: - # Manage when precedence and associativity are given in the unit tests - sya_def = parser.test_only_sya_definitions.get(concept.id) - if sya_def[0] is not None: - sya_concept_def.precedence = sya_def[0] - if sya_def[1] is not None: - sya_concept_def.associativity = sya_def[1] + concept_weight = context.sheerka.get_weights(BuiltinConcepts.PRECEDENCE, CONCEPT_COMPARISON_CONTEXT) + if concept.str_id in concept_weight: + sya_concept_def.precedence = concept_weight[concept.str_id] - # otherwise, use sheerka # KSI 20210109 otherwise or override ?? - if sheerka: - concept_weight = parser.sheerka.get_weights(BuiltinConcepts.PRECEDENCE, CONCEPT_COMPARISON_CONTEXT) - if concept.str_id in concept_weight: - sya_concept_def.precedence = concept_weight[concept.str_id] - - # in the case of Sheerka, the associativity is managed by the concept itself - # There is no conflict with the settings of the unit test, as I don't use the props in the unit tests if associativity := concept.get_prop(BuiltinConcepts.ASSOCIATIVITY): sya_concept_def.associativity = SyaAssociativity(associativity) @@ -150,1076 +451,478 @@ class SyaConceptDef: def short_repr(self): return f"({self.concept}, prio={self.precedence}, assoc={self.associativity})" - -@dataclass() -class SyaConceptParserHelper: - """ - Use because the is not enough information to create the final ConceptNode - """ - concept: Concept - start: int # position of the token in the tokenizer (Caution, it is not token.index) - end: int = field(default=-1, repr=False, compare=False, hash=None) - expected: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None) - expected_parameters_before_first_token: int = field(default=0, repr=False, compare=False, hash=None) - last_token_before_first_token: Token = field(default=None, repr=False, compare=False, hash=None) - potential_pos: int = field(default=-1, repr=False, compare=False, hash=None) - parameters_list_at_init: list = field(default_factory=list, repr=False, compare=False, hash=None) - tokens: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None) # tokens eaten - remember_whitespace: Token = field(default=None, repr=False, compare=False, hash=None) - error: str = None - - def __post_init__(self): - concept = self.concept.concept if isinstance(self.concept, SyaConceptDef) else self.concept - if self.end == -1: - self.end = self.start - - first_keyword_found = None - for token in Tokenizer(concept.key, yield_eof=False): - if not first_keyword_found and token.type != TokenKind.WHITESPACE and token.type != TokenKind.VAR_DEF: - first_keyword_found = token - - if first_keyword_found: - self.expected.append(token) - else: - self.last_token_before_first_token = token - if token.type != TokenKind.WHITESPACE: - self.expected_parameters_before_first_token += 1 - - # remove useless whitespaces (spaces that are between VAR_DEF) - if len(self.expected) > 2: - temp = [self.expected[0]] - for i in range(1, len(self.expected) - 1): - token = self.expected[i] - if (token.type == TokenKind.WHITESPACE and - self.expected[i - 1].type == TokenKind.VAR_DEF and - self.expected[i + 1].type == TokenKind.VAR_DEF): - continue # skip it - temp.append(token) - temp.append(self.expected[-1]) - self.expected = temp - - self.eat_token(first_keyword_found) # remove the first token - self.tokens.append(first_keyword_found) # and add it to the list of tokens eaten - - def is_matched(self): - return len(self.expected) == 0 - - def is_atom(self): - return len(self.concept.concept.get_metadata().variables) == 0 and len(self.expected) == 0 - - def is_next(self, token): - """ - To match long named concepts - :param token: - :return: - """ - if self.is_matched() or len(self.expected) == 0: - return False - - # True if the next token is the one that is expected - # Or if the next token is a whitespace and the expected one is the one after - # (whitespace are sometimes not mandatory) - return token.strip_quote == self.expected[0].strip_quote or \ - self.expected[0].type == TokenKind.WHITESPACE and token.strip_quote == self.expected[1].strip_quote - - def is_expected(self, token): - if self.is_matched() or token.type == TokenKind.WHITESPACE: - return False - - for expected in self.expected: - if expected.type != TokenKind.VAR_DEF and expected.strip_quote == token.strip_quote: - return True - - return False - - def expected_parameters(self): - return sum(map(lambda e: e.type == TokenKind.VAR_DEF, self.expected)) - - def eat_token(self, until_token): - """ - eat until token 'until' - :param until_token: - :return: - """ - # No check, as it is used only after is_expected() or is_next() - while self.expected[0].strip_quote != until_token.strip_quote: - del self.expected[0] - del self.expected[0] - - # return True is a whole sequence of keyword is eaten - # example - # Concept("foo a bar baz qux b").def_var("a").def_var("b") - # 'bar' is just eaten. We will return False because 'baz' and 'qux' are still waiting - if len(self.expected) == 0: - return True - - # also return True at the end of a name sequence - # ... bar baz qux - # return True after 'qux', to indicate all the parameters from must be processed - return self.expected[0].type == TokenKind.VAR_DEF - - def eat_parameter(self, parameter): - if self.is_matched() and parameter == self: - return # not a error - - if self.is_matched(): - self.error = "No more parameter expected" - return - - if self.expected[0].type != TokenKind.VAR_DEF: - self.error = "Parameter was not expected" - return - - del self.expected[0] - - def fix_concept(self): - """ - When the SYA is done, we only need the concept, not the sya concept - :return: - """ - if isinstance(self.concept, SyaConceptDef): - self.concept = self.concept.concept - return self - - def clone(self): - clone = SyaConceptParserHelper(self.concept, self.start, self.end) - clone.expected = self.expected[:] - clone.expected_parameters_before_first_token = self.expected_parameters_before_first_token - clone.potential_pos = self.potential_pos - clone.parameters_list_at_init = self.parameters_list_at_init - clone.error = self.error - - return clone + def get_concept(self): + return self.concept -class InFixToPostFix: - def __init__(self, context, next_id_manager, debugger=None): +class BaseSyaParser: + def __init__(self, + context, + parser_input, + all_states, + start_pos=None, + end_pos=None, + stack=None, + unrecognized_tokens=None, + state=None, + state_context=None, + debug_items=None): self.context = context + self.parser_input = parser_input - self.next_id_manager = next_id_manager - self.id = self.next_id_manager.get_next_id() + self.start_pos = start_pos if start_pos is not None else parser_input.pos + self.end_pos = end_pos if end_pos is not None else self.start_pos - self.debugger = debugger - if debugger: - self.debug_enabled = debugger.is_enabled() - self.enabled_debug_levels = debugger.get_enabled_vars() - else: - self.debug_enabled = False - self.enabled_debug_levels = None + self.stack = stack if stack is not None else [] + self.unrecognized_tokens = unrecognized_tokens or UnrecognizedTokensNode(-1, -1, []) - self.is_locked = False # when locked, cannot process input + self.errors = [] - self.out = [] # shunting yard algo out - self.stack = [] # shunting yard algo stack - self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # buffer that keeps tracks of tokens positions + self.all_states = all_states - self.parameters_list = [] # list of the parameters that need to be associated to a concept - self.errors = [] # Not quite sure that I can handle more than one error + self._state = self.all_states.get_state(state) if state else self.all_states.start + self.state_context = state_context + self.debug_items = debug_items if debug_items is not None else [] - self.debug = [] - self.false_positives = [] # concepts that looks like known one, but not (for debug purpose) - self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens + def has_error(self): + return len(self.errors) > 0 - self.parsing_function = False # indicate that we are currently parsing a function + def add_to_unrecognized(self, token, pos): + self.unrecognized_tokens.add_token(token, pos) + self.end_pos = pos + + @property + def state(self): + return self._state + + def set_state(self, state): + self._state = self.all_states.get_state(state) + + def get_unrecognized_tokens_requests_cache(self): + raise NotImplementedError() + + def get_tokens_parser(self): + raise NotImplementedError() + + def add_debug(self, text, is_error=False, **kwargs): + args = {"token": self.parser_input.token, + "pos": self.parser_input.pos, + "stack": self.stack.copy()} + + args.update(kwargs) + self.debug_items.append(DebugItem(text, is_error, args)) + return args + + +class SyaTokensParser(BaseSyaParser): + def __init__(self, + context, + sya_node_parser, + parser_input: ParserInput, + start_pos=None, + stack=None, + unrecognized_tokens=None, + state=None, + state_context=None, + debug_items=None): + super().__init__(context, + parser_input, + TokensParserStates(self), + start_pos, + start_pos, + stack, + unrecognized_tokens, + state, + state_context, + debug_items) + self.id = sya_node_parser.get_next_tokens_parser_id() + self.sya_node_parser = sya_node_parser + self.has_sya_concept = False + self.has_unrecognized = False + self.debugger = context.get_debugger(sya_node_parser.NAME, "parse", forced_debug_id=self.id) def __repr__(self): - return f"InFixToPostFix({self.debug})" + return f"SyaTokensParser({debug_nodes(self.stack)})" - def __eq__(self, other): - if id(self) == id(other): - return True + def parse(self): + try: + while self.state: + self.state.run() + self._state = self.state.next() - if not isinstance(other, InFixToPostFix): - return False + except SyaNodeException as err: + self.debug(err, is_error=True, error=err) + self.errors.append(err) - return self.out == other.out and self.errors == other.errors - - def __hash__(self): - return len(self.sequence) + len(self.errors) - - def _add_error(self, error): - if self.debug_enabled: - self._add_debug(DebugInfo(action=f"=> ERROR {error}")) - self.errors.append(error) - - def _add_debug(self, debug_info: DebugInfo): - if debug_info.level is None or (self.enabled_debug_levels and - (f"#{self.id}.{debug_info.level}" in self.enabled_debug_levels or - "*" in self.enabled_debug_levels)): - self.debug.append(debug_info) - - def _is_lpar(self, token): + def finalize(self): """ - True if the token is a left parenthesis '(' - Note that when we are parsing non recognized tokens, - we consider that the parenthesis are part of the non recognized - :param token: - :return: + Merge UnrecognizedTokensNodes when there are following each other """ - # return isinstance(token, Token) and token.type == TokenKind.LPAR - if isinstance(token, Token) and token.type == TokenKind.LPAR: - return True - if isinstance(token, tuple) and token[0].type == TokenKind.LPAR: - return True - if isinstance(token, UnrecognizedTokensNode) and token.parenthesis_count > 0: - return True - return False - def _is_rpar(self, token): - """ - True if the token is a right parenthesis ')' - Note that when we are parsing non recognized tokens, - we consider that the parenthesis are part of the non recognized - :param token: - :return: - """ - return isinstance(token, Token) and token.type == TokenKind.RPAR + # flush the unrecognized + if not self.unrecognized_tokens.is_empty(): + self.unrecognized_tokens.fix_source() + self.stack.append(self.unrecognized_tokens) + self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) - def _concepts(self): - """ - Return the concept currently being parsed - :return: - """ + # merge sequential unrecognized res = [] - for item in self.stack: - if isinstance(item, SyaConceptParserHelper): - res.append(item) - return res - - def _put_to_out(self, item): - """ - Helper function that Put an item in the out - :param item: - :return: - """ - if isinstance(item, SyaConceptParserHelper) and len(item.expected) > 0 and not item.error: - if item.expected[0].type == TokenKind.VAR_DEF: - item.error = "Not enough suffix parameters" - else: - item.error = f"token '{item.expected[0].strip_quote}' not found" - if self.debug_enabled: - self._add_debug(DebugInfo(action=f"ERROR {item.error}")) - - if self.debug_enabled: - self._add_debug(DebugInfo(action=f"{DEBUG_POP} {item}")) - if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1: - self.out.insert(item.potential_pos, item) - else: - self.out.append(item) - - # put the item to the list of awaiting parameters only if it's not the end of function marker - if item != ")": - self.parameters_list.append(item) - - if len(self._concepts()) > 0: - # try to predict the final position of the current concept - # This position can be altered by concept associativity and precedence - # So it's only a prediction - current = self._concepts()[-1] - if current.expected_parameters() == len(self.parameters_list) - len(current.parameters_list_at_init): - self._concepts()[-1].potential_pos = len(self.out) - - def _stack_isinstance(self, type): - """ - Check the type of the top item in the stack - :param type: - :return: - """ - return len(self.stack) > 0 and isinstance(self.stack[-1], type) - - def _make_source_code_with_concept(self, start, rpar_token, end): - """ - - :param start: - :param rpar_token: - :param end: - :return: - """ - source_code = SourceCodeWithConceptNode( - self.stack.pop(), - UnrecognizedTokensNode(end, end, [rpar_token]), - self.out[start + 1:] - ).pseudo_fix_source() - return source_code - - def _transform_to_unrecognized(self, parser_helper): - # an Unrecognized when sent to out too prematurely - if len(self.out) > 0 and isinstance(self.out[-1], UnrecognizedTokensNode): - self.unrecognized_tokens = self.out.pop() - - if parser_helper.remember_whitespace: - self.unrecognized_tokens.add_token(parser_helper.remember_whitespace, parser_helper.start - 1) - for i, token in enumerate(parser_helper.tokens): - self.unrecognized_tokens.add_token(token, parser_helper.start + i) - - def _remove_debug_info_if_needed(self): - """ - Before trying to manage the unrecognized, a line is added to explain the token which has triggered - the recognition try - This line is useless if self.unrecognized_tokens was irrelevant - :return: - """ - if len(self.debug) > 0 and self.debug[-1].action == "??": - self.debug.pop() - - def _debug_nodes(self, nodes_sequences): - """ - Returns a debug representation of a sequence of LexerNodes - :param nodes_sequences: - :return: - """ - res = "[" - first = True - for sequence in nodes_sequences: - if not first: - res += ", " - res += "[" + ", ".join([n.to_short_str() for n in sequence]) + "]" - first = False - return res + "]" - - def get_errors(self): - def has_error(item): - if isinstance(item, SyaConceptParserHelper) and item.error: - return True - if isinstance(item, SourceCodeWithConceptNode): - for n in item.nodes: - if hasattr(n, "error") and n.error: - return True - return False - - res = [] - res.extend(self.errors) - res.extend([item for item in self.out if has_error(item)]) - return res - - def lock(self): - self.is_locked = True - - def reset(self): - if len(self.errors) > 0: - return - - self.is_locked = False - - def manage_parameters_when_new_concept(self, parser_helper): - """ - When a new concept is create, we need to check what to do with the parameters - that were queued - :param parser_helper: new concept - :return: - """ - if len(self.parameters_list) < parser_helper.expected_parameters_before_first_token: - # There is not enough parameters to fill the new concept - # Try to develop the UnrecognizedTokesNode, to see if it can match - developed_param_list = self.develop_parameter_list(self.parameters_list) - if len(developed_param_list) < parser_helper.expected_parameters_before_first_token: - # The new concept expect some prefix parameters, but there's not enough - parser_helper.error = "Not enough prefix parameters" - return - - # the developed_param_list does the job. Let's replace the previous values - pivot = self.parameters_list[0] - replace_after(self.parameters_list, pivot, developed_param_list) - replace_after(self.out, pivot, developed_param_list) - - if len(self.parameters_list) > parser_helper.expected_parameters_before_first_token: - # There are more parameters than needed by the new concept - # These others parameters are either - # - parameters for the previous suffixed concept (if any) - # - concepts on their own - # - syntax error - # In all the cases, the only thing that matter is to pop what is expected by the new concept - for i in range(parser_helper.expected_parameters_before_first_token): - self.parameters_list.pop() - parser_helper.parameters_list_at_init.extend(self.parameters_list) - return - - # len(self.parameters_list) == temp_concept_node.expected_parameters_before_first_token - # => We consider that the parameter are part of the new concept - - if len(self._concepts()) > 1: - # The new concept is a parameter of the previous one. - # So reset the potential_pos of the previous concept - self._concepts()[-2].potential_pos = -1 - - # eat them all - self.parameters_list.clear() - - def manage_parameters(self): - """ - Some new parameters were added to the list. - What to do with them ? - :return: - """ - - def nb_expected_parameters(expected): - """ - Count the number of successive variables that are still expected - :param expected: - :return: - """ - i = 0 - for token in expected: - if token.type == TokenKind.VAR_DEF: - i += 1 + unrecognized_tokens_node = None + for node in self.stack: + if isinstance(node, UnrecognizedTokensNode): + if unrecognized_tokens_node: + for i, token in enumerate(node.tokens): + unrecognized_tokens_node.add_token(token, node.start + i) else: - break - return i - - # manage parenthesis that didn't find any match - if self._is_lpar(self.stack[-1]): - self._add_error(ParenthesisMismatchError(self.stack[-1])) - - # The parameter must be part the current concept being parsed - assert len(self._concepts()) != 0 # sanity check - - current_concept = self._concepts()[-1] - - if (nb_expected := nb_expected_parameters(current_concept.expected)) > len(self.parameters_list): - # There is not enough parameters in the list to fill the concept - # Try to develop the UnrecognizedTokensNode to see if it can match - developed_param_list = self.develop_parameter_list(self.parameters_list) - if nb_expected == len(developed_param_list): - pivot = self.parameters_list[0] - replace_after(self.parameters_list, pivot, developed_param_list) - replace_after(self.out, pivot, developed_param_list) - - while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF: - # eat everything that was expected - if len(self.parameters_list) == 0: - current_concept.error = f"Failed to match parameter '{current_concept.expected[0].strip_quote}'" - return - del self.parameters_list[0] - del current_concept.expected[0] - - def manage_unrecognized(self): - - if self.unrecognized_tokens.is_empty(): - return - - # do not put empty UnrecognizedToken in out - if self.unrecognized_tokens.is_whitespace(): - self.unrecognized_tokens.reset() - return - - self.unrecognized_tokens.fix_source() - - if self.unrecognized_tokens.parenthesis_count > 0: - # parenthesis mismatch detected, do not try to resolve the unrecognized - self._add_error(ParenthesisMismatchError(self.unrecognized_tokens)) - self._put_to_out(self.unrecognized_tokens) - else: - # try to recognize concepts - nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized( - self.context, - self.unrecognized_tokens, - PARSERS) - - if nodes_sequences: - # There are more than one solution found - # In the case, we create a new InfixToPostfix for each new possibility - if self.debug_enabled: - self._add_debug(DebugInfo(action=f"{DEBUG_RECOG} {self._debug_nodes(nodes_sequences)}")) - if len(nodes_sequences) > 1: - for node_sequence in nodes_sequences[1:]: - clone = self.clone() - for node in node_sequence: - clone._put_to_out(node) - clone.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) - self.forked.append(clone) - - # Do not forget the first result that will go with the current InfixToPostfix - for node in nodes_sequences[0]: - self._put_to_out(node) + unrecognized_tokens_node = node + res.append(unrecognized_tokens_node) else: - self._put_to_out(self.unrecognized_tokens) + res.append(node) + if unrecognized_tokens_node: + unrecognized_tokens_node.fix_source() + unrecognized_tokens_node = None - # create another instance - self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + if unrecognized_tokens_node: + unrecognized_tokens_node.fix_source() - def get_functions_names_from_unrecognized(self, token, pos): - """ - The unrecognized ends with an lpar '(' - It means that its a function like foo(something) - The problem is that we need to know if there are other concepts before the function - ex : suffix one function(x) - suffix and one are not / may not be part of the name of the function + if len(res) != len(self.stack) and self.debugger.is_enabled(): + self.debug(f"Transformed stack from {debug_nodes(self.stack)} into {debug_nodes(res)}") - We need to call the function to recognize the parts and act accordingly - :return: list of function_parser_res - """ - self.unrecognized_tokens.fix_source() - nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized( - self.context, - self.unrecognized_tokens, - PARSERS) - - if not nodes_sequences: - nodes_sequences = [[self.unrecognized_tokens.clone()]] - - res = [] - for sequence in nodes_sequences: - last_node = sequence[-1] - - if len(last_node.tokens) > 1: - if isinstance(last_node, UnrecognizedTokensNode): - to_out = [UnrecognizedTokensNode(last_node.start, pos - 2, last_node.tokens[:-1]).fix_source()] - function_name = UnrecognizedTokensNode(pos - 1, pos - 1, [last_node.tokens[-1]]) - function_name.add_token(token, pos) - else: - to_out = [last_node.fix_source()] - function_name = None - - else: # len(last_node.tokens) == 1 - if not isinstance(last_node, UnrecognizedTokensNode): - function_name = UnrecognizedTokensNode(last_node.start, last_node.end, last_node.tokens) - else: - function_name = last_node - function_name.add_token(token, pos) - to_out = [] - - res.append(function_parser_res(sequence[:-1] + to_out, function_name)) - return res - - def pop_stack_to_out(self): - """ - Helper function that pops the stack and put the item to the output, if needed - :return: - """ - - item = self.stack[-1] - - # fix the concept is needed - if isinstance(item, SyaConceptParserHelper): - if len(item.expected) > 0: - # make sure the expected parameters of this item are eaten - if len(item.expected) <= len(self.parameters_list): - self.manage_parameters() - else: - # second chance to match the parameter list when it contains unrecognized token - developed_param_list = self.develop_parameter_list(self.parameters_list) - if len(item.expected) <= len(developed_param_list): - pivot = self.parameters_list[0] - replace_after(self.parameters_list, pivot, developed_param_list) - replace_after(self.out, pivot, developed_param_list) - self.manage_parameters() - - item.fix_concept() - - self.stack.pop() - self._put_to_out(item) - - def i_can_pop(self, sya_parser_helper): - """ - Validate the Shunting Yard Algorithm conditions to pop out from the stack - Note that it's a custom implementation as I need to manage UnrecognizedTokensNode - :param sya_parser_helper: - :return: - """ - if len(self.stack) == 0: - if self.debug_enabled: - self._add_debug(DebugInfo(action=f"No stack. {DEBUG_CAN_POP} false.", level="can_pop")) - return False - - stack_head = self.stack[-1] - - if not isinstance(stack_head, SyaConceptParserHelper): # mostly left parenthesis - if self.debug_enabled: - self._add_debug(DebugInfo(action=f"No concept. {DEBUG_CAN_POP} false.", level="can_pop")) - return False - - current = sya_parser_helper.concept - stack = stack_head.concept - - if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No: - self._add_error( - NoneAssociativeSequenceError(current.concept, stack_head.start, sya_parser_helper.start)) - - if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence: - if self.debug_enabled: - current_debug = f"{current.concept.id}({current.precedence})" - stack_debug = f"{stack.concept.id}({stack.precedence})" - self._add_debug( - DebugInfo(action=f"assoc=Left and {current_debug} <= {stack_debug}. {DEBUG_CAN_POP} True.", - level="can_pop")) - return True - - if current.associativity == SyaAssociativity.Right and current.precedence < stack.precedence: - if self.debug_enabled: - current_debug = f"{current.concept.id}({current.precedence})" - stack_debug = f"{stack.concept.id}({stack.precedence})" - self._add_debug( - DebugInfo(action=f"assoc=Right and {current_debug} < {stack_debug}. {DEBUG_CAN_POP} True.", - level="can_pop")) - return True - - if self.debug_enabled: - self._add_debug(DebugInfo(action=f"No rule. {DEBUG_CAN_POP} False.", level="can_pop")) - return False - - def handle_expected_token(self, token, pos): - """ - True if the token is part of the concept being parsed and the last token in a sequence is eaten - Example : Concept("foo a bar b").def_var("a").def_var("b") - The expected tokens are 'foo' and 'bar' (as a and b are parameters) - - Example: Concept("foo a bar baz b").def_var("a").def_var("b") - If the token is 'bar', it will be eaten but handle_expected_token() will return False - as we still expect 'baz' - :param token: - :param pos: - :return: - """ - - def _pop_stack(c): - while self.stack[-1] != c and not self._is_lpar(c): - self.pop_stack_to_out() - - if self._is_lpar(self.stack[-1]): - self._add_error(ParenthesisMismatchError(self.stack[-1])) - return False - - # Manage concepts ending with long names - if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched(): - self.pop_stack_to_out() - - for current_concept in reversed(self._concepts()): - # As I may loose memory again ;-) - # it's a reversed loop to manage cases like - # if a plus b then ... - # The current concept is 'plus', but the token is 'then' - # It's means that I have finished to parse the 'plus' and started the second part of the 'if' - - if current_concept.is_next(token): - current_concept.end = pos - current_concept.tokens.append(token) - if current_concept.eat_token(token): - _pop_stack(current_concept) - return True - - if len(current_concept.expected) > 0 and current_concept.expected[0].type != TokenKind.VAR_DEF: - if current_concept.expected[0].type == TokenKind.WHITESPACE: - # drop it. It's the case where an optional whitespace is missing - del (current_concept.expected[0]) - else: - # error - # We are not parsing the concept we thought we were parsing. - # Transform the eaten tokens into unrecognized - # and discard the current SyaConceptParserHelper - # TODO: manage the pending LPAR, RPAR ? - self._transform_to_unrecognized(current_concept) - self.false_positives.append(current_concept) - self.stack.pop() - return False - - if current_concept.is_expected(token): - - # Fix the whitespace between var and expected if needed - # current_concept.expected[0] is '' - # current_concept.expected[1] is what separate var from expected (normally a whitespace) - if current_concept.expected[1].type == TokenKind.WHITESPACE: - self.unrecognized_tokens.pop(TokenKind.WHITESPACE) - - current_concept.end = pos - if self.debug_enabled: - self._add_debug(DebugInfo(pos, token, None, "??")) - self.manage_unrecognized() - # manage that some clones may have been forked - for forked in self.forked: - forked.handle_expected_token(token, pos) - - # manage concepts found between tokens (of ternary for example) - self.manage_parameters() - - if len(self.parameters_list) > len(current_concept.parameters_list_at_init): - # we have eaten the parameters expected between two expected tokens - # But there are some remaining parameters - self._add_error(TooManyParametersFoundError( - current_concept.concept.concept, - current_concept.start, - token, - self.parameters_list[:])) - return True # no need to continue - - while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1] != current_concept: - current = self.stack[-1] - if current.error: - self._transform_to_unrecognized(current) - self.false_positives.append(current) - self.stack.pop() - - if current_concept.expected[1].type == TokenKind.WHITESPACE: - self.unrecognized_tokens.pop(TokenKind.WHITESPACE) - - self.manage_unrecognized() - # manage that some clones may have been forked - for forked in self.forked: - forked.handle_expected_token(token, pos) - else: - self.pop_stack_to_out() - self.manage_parameters() - - # maybe eat whitespace that was between and expected token - if current_concept.expected[0].type == TokenKind.WHITESPACE: - del current_concept.expected[0] - - if current_concept.eat_token(token): - _pop_stack(current_concept) - - return True - - return False - - def eat_token(self, token, pos): - """ - Receive at token. - It will be processed if it's expected by a concept or if it's a parenthesis - :param token: - :param pos: - :return: - """ - - if self.is_locked: - return - - if self.parsing_function: - if self.debug_enabled: - self._add_debug(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC)) - - self.unrecognized_tokens.add_token(token, pos) - - if self.unrecognized_tokens.parenthesis_count == 0: - self.unrecognized_tokens.fix_source() - res = self.context.sheerka.parse_function(self.context, - self.unrecognized_tokens.source, - self.unrecognized_tokens.tokens[:], - self.unrecognized_tokens.start) - - instances = get_n_clones(self, len(res)) - self.forked.extend(instances[1:]) - for instance, res_i in zip(instances, res): - - if res_i.status or instance.context.sheerka.isinstance(res_i.body, BuiltinConcepts.PARSER_RESULT): - # 1. we manage to recognize a function - # 2. we almost manage, ex func(one two). It's not a function but almost - instance._put_to_out(res_i.body.body) - instance.unrecognized_tokens.reset() - else: - # it is not a function, try to recognized the token - # This situation is unlikely to occur - instance.manage_unrecognized() - - instance.parsing_function = False - - return True - - if self.handle_expected_token(token, pos): - # a token is found, let's check if it's part of a concepts being parsed - # example Concept(name="foo", definition="foo a bar b").def_var("a").def_var("b") - # if the token 'bar' is found, it has to be considered as part of the concept foo - if self.debug_enabled: - self._remove_debug_info_if_needed() - self._add_debug(DebugInfo(pos, token, None, DEBUG_EAT)) - return True - - elif self._is_lpar(token): - - if self.debug_enabled: - self._add_debug(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC)) - - if self.unrecognized_tokens.is_empty() or self.unrecognized_tokens.is_whitespace(): - - # first, remove what was in the buffer - self.manage_unrecognized() - for forked in self.forked: - # manage that some clones may have been forked - forked.eat_token(token, pos) - - self.stack.append((token, pos)) - - else: - # the parenthesis is part of the unrecognized - # So it's maybe a function call - - list_of_results = self.get_functions_names_from_unrecognized(token, pos) - instances = [self] - for i in range(len(list_of_results) - 1): - clone = self.clone() - self.forked.append(clone) - instances.append(clone) - - # Manage the result for self and its clones - for instance, parsing_res in zip(instances, list_of_results): - - for to_out in parsing_res.to_out: - instance._put_to_out(to_out) - - if parsing_res.function: - instance.unrecognized_tokens = parsing_res.function - instance.parsing_function = True - else: - # special case of "twenty two(". It's not considered as a function - # The manage_unrecognized() what somewhat done by get_functions_names_from_unrecognized() - # So we just put the unrecognized to out - - instance.unrecognized_tokens.reset() - - # make sure to pop the current concept - if self._stack_isinstance(SyaConceptParserHelper): - self.pop_stack_to_out() - - instance.stack.append((token, pos)) - - # # instance._put_to_out(")") # mark where the function should end - # # instance.stack.append(parsing_res.function) - # # instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # reset unrecognized - # else: - # # handle when there are multiple pending tokens - # if len(self.unrecognized_tokens.tokens) > 1: - # unrecognized = UnrecognizedTokensNode(self.unrecognized_tokens.start, - # pos - 2, - # self.unrecognized_tokens.tokens[:-1]) - # unrecognized.fix_source() - # self._put_to_out(unrecognized) - # last_token = self.unrecognized_tokens.tokens[-1] - # self.unrecognized_tokens.reset() - # self.unrecognized_tokens.add_token(last_token, pos - 1) - # - # self.eat_unrecognized(token, pos) # add the '(' to the rest of the unknown - # self.parsing_function = True - # # self.stack.append(self.unrecognized_tokens.fix_source()) - # # self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) - - return True - - elif self._is_rpar(token): - if self.debug_enabled: - self._add_debug(DebugInfo(pos, token, None, DEBUG_EAT)) - - # first, remove what was in the buffer - self.manage_unrecognized() - for forked in self.forked: - # manage that some clones may have been forked - forked.eat_token(token, pos) - - # pop everything but the lpar from stack to 'out' - while len(self.stack) > 0 and not self._is_lpar(self.stack[-1]): - self.pop_stack_to_out() - - # checks consistency if an lpar is found - if len(self.stack) == 0: - self._add_error(ParenthesisMismatchError((token, pos))) - return None - - if self._stack_isinstance(UnrecognizedTokensNode): - # the parenthesis was a function - # we need to return a SourceCodeWithConceptNode - for i in range(len(self.out) - 1, -1, -1): - if self.out[i] == ')': - start = i - break - else: - self._add_error(ParenthesisMismatchError((token, pos))) - return None - - source_code = self._make_source_code_with_concept(start, token, pos) - - for item in self.out[start:]: - # update the parameter list - try: - self.parameters_list.remove(item) - except ValueError: - pass - del self.out[start:] - self._put_to_out(source_code) - - # self.pop_stack_to_out() - # # Replace the ')' marker by its real position - # for i in range(len(self.out) - 1, -1, -1): - # if self.out[i] == ')': - # self.out[i] = UnrecognizedTokensNode(pos, pos, [token]) - - else: - self.stack.pop() # discard the lpar - return True - - return False - - def eat_concept(self, sya_concept_def, token, pos, first_pass=True): - """ - a concept is found - :param sya_concept_def: - :param token: - :param pos: - :param first_pass: When not called from a fork after manage_unrecognized() - :return: - """ - - if self.is_locked: - return - - parser_helper = SyaConceptParserHelper(sya_concept_def, pos) - - if first_pass: - if self.debug_enabled: - self._add_debug(DebugInfo(pos, token, sya_concept_def, "??")) - - if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE: - parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1] - - if Token.is_whitespace(parser_helper.last_token_before_first_token): - self.unrecognized_tokens.pop(TokenKind.WHITESPACE) - - # First, try to recognize the tokens that are waiting - self.manage_unrecognized() - for forked in self.forked: - # manage the fact that some clone may have been forked - forked.eat_concept(sya_concept_def, token, pos, first_pass=False) - - # then, check if this new concept is linked to the previous ones - # ie, is the previous concept fully matched ? - if parser_helper.expected_parameters_before_first_token == 0: - # => does not expect pending parameter (it's suffixed concept) - while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].potential_pos != -1: - # => previous seems to have everything it needs in the parameter list - self.pop_stack_to_out() - - if parser_helper.is_atom(): - self._put_to_out(parser_helper.fix_concept()) - else: - # call shunting yard algorithm - while self.i_can_pop(parser_helper): - self.pop_stack_to_out() - - if parser_helper.is_matched(): - # case of a prefix concept which has found happiness with self.parameters_list - # directly put it in out - self.manage_parameters_when_new_concept(parser_helper) - self._put_to_out(parser_helper.fix_concept()) - else: - if self.debug_enabled: - self._remove_debug_info_if_needed() - self._add_debug(DebugInfo(pos, token, sya_concept_def, DEBUG_PUSH)) - self.stack.append(parser_helper) - self.manage_parameters_when_new_concept(parser_helper) - - def eat_unrecognized(self, token, pos): - """ - The token was not recognized, add to the current UnrecognizedTokensNode - :param token: - :param pos: - :return: - """ - if self.is_locked: - return - - if self.debug_enabled: - self._add_debug(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC)) - - self.unrecognized_tokens.add_token(token, pos) - - def finalize(self, pos): - """ - Put the remaining items from the stack to out - :return: - """ - - if self.is_locked: - return - - if len(self.stack) == 0 and len(self.out) == 0: - # check for parenthesis mismatch - if self.unrecognized_tokens.parenthesis_count > 0: - self._add_error(ParenthesisMismatchError(self.unrecognized_tokens)) - return # no need to pop the buffer, as no concept is found - - if self.debug_enabled: - self._add_debug(DebugInfo(pos, "", None, "??")) - - while len(self.stack) > 0: - parser_helper = self.stack[-1] - - # validate parenthesis - if self._is_lpar(parser_helper) or self._is_rpar(parser_helper): - self._add_error(ParenthesisMismatchError(parser_helper)) - return None - - self.manage_unrecognized() - for forked in self.forked: - # manage that some clones may have been forked - forked.finalize(pos) - - failed_to_match = sum(map(lambda e: e.type != TokenKind.VAR_DEF, parser_helper.expected)) - if failed_to_match > 0: - # didn't manage to read all tokens. - # Transform them into unrecognized - self._transform_to_unrecognized(parser_helper) - self.false_positives.append(parser_helper) - self.stack.pop() # discard the parser helper - else: - self.pop_stack_to_out() # process it - - self.manage_unrecognized() - for forked in self.forked: - # manage that some clones may have been forked - forked.finalize(pos) + self.stack = res def clone(self): - clone = InFixToPostFix(self.context, self.next_id_manager, self.debugger) - clone.is_locked = self.is_locked - clone.out = self.out[:] - clone.stack = [i.clone() if hasattr(i, "clone") else i for i in self.stack] - clone.unrecognized_tokens = self.unrecognized_tokens.clone() - clone.parameters_list = self.parameters_list[:] - clone.errors = self.errors[:] - clone.debug = self.debug[:] - # clone.forked = self.forked + clone = SyaTokensParser(self.context, + self.sya_node_parser, + self.parser_input.clone(), + self.start_pos, + self.stack.copy(), + self.unrecognized_tokens.clone(), + self.state, + self.state_context, + self.debug_items) + return clone + def n_clones(self, nb_clones): + return self.sya_node_parser.fork(self, nb_clones) + + def get_unrecognized_tokens_requests_cache(self): + return self.sya_node_parser.cache + + def get_tokens_parser(self): + return self + + def debug(self, text, is_error=False, **kwargs): + args = self.add_debug(text, is_error, **kwargs) + if self.debugger.is_enabled(): + debug_prefix = f"pos={self.parser_input.pos}, token='{self.parser_input.token.repr_value}'" + self.debugger.debug_log(f"{debug_prefix}: {text}", is_error, args) + + +class SyaConceptParser(BaseSyaParser): + def __init__(self, + tokens_parser: SyaTokensParser, + concept: Concept, + prefix_parameters, + stack=None, + unrecognized_tokens=None, + start_pos=None, + end_pos=None, + sub_concepts_start_positions=None, + sub_concept_detected=False, + debug_items=None): + super().__init__(tokens_parser.context, + tokens_parser.parser_input, + ConceptParserStates(self), + start_pos, + end_pos, + stack, + unrecognized_tokens, + debug_items=debug_items) + + self.parent_tokens_parser = tokens_parser + self.concept = concept + self.prefix_parameters = prefix_parameters + self.prefix_parameters_snapshot = prefix_parameters.copy() + self.checkpoint = self.start_pos + + self.expected = [] + self.nb_expected_popped = 0 + self.parameters = [] # parameters found during parsing + self.concept_node = None # concept_node returned + self.sub_concepts_start_positions = sub_concepts_start_positions or [] # sya concepts found during parsing + self.sub_concept_detected = sub_concept_detected + + self.after_parsing_hint = None + self.debugger = self.parent_tokens_parser.debugger + + def __repr__(self): + if self.concept_node: + return f"SyaConceptParser(concept_node={self.concept_node})" + + text = f"SyaConceptParser(concept={self.concept}, " + if self.has_error(): + text += f"errors={self.errors}" + else: + res = [] + for tokens, nb_vars in self.expected: + debug_tokens = "".join([t.repr_value for t in tokens]) + res.append((debug_tokens, nb_vars)) + text += f"expected={res}" + text += f", stack={debug_nodes(self.stack)}" + return text + ")" + @staticmethod - def develop_parameter_list(parameter_list): + def compute_expected_parameters(concept_key): """ - given a list of parameter (solely from self.parameter_list) - develop UnrecognizedTokensNode parameter that contains whitespaces - :param parameter_list: - :return: + Return of list of pairs + expected token and number of expected variable before this token + ex: 'if x y then z end' => ('if', 0), ('then', 2), ('end', 1) """ - temp = [] - for parameter in parameter_list: - if isinstance(parameter, UnrecognizedTokensNode): - for i, token in [(i, t) for i, t in enumerate(parameter.tokens) if t.type != TokenKind.WHITESPACE]: - temp.append(UnrecognizedTokensNode(parameter.start + i, - parameter.start + i, - [token])) + + def custom_strip_tokens(_tokens): + res = [] + buffer = None + for t in _tokens: + if t.type == TokenKind.WHITESPACE: + buffer = t + else: + if buffer: + res.append(buffer) + buffer = None + res.append(t) + + if res and buffer: # add the buffer only is the result is not empty + res.append(buffer) + + return res + + expected = [] # tuple of expected token and number of expected variables before this token + tokens = [] + nb_variables = 0 + under_tokens = None + for token in Tokenizer(concept_key, yield_eof=False): + if token.type == TokenKind.WHITESPACE: + tokens.append(token) + elif token.type == TokenKind.VAR_DEF: + if under_tokens is not None and under_tokens: + expected.append((custom_strip_tokens(tokens), nb_variables)) + nb_variables = 1 + tokens = [] + under_tokens = False + else: + nb_variables += 1 else: - temp.append(parameter) - return temp + tokens.append(token) + under_tokens = True + if tokens or nb_variables: + expected.append((custom_strip_tokens(tokens), nb_variables)) -@dataclass() -class PostFixToItem: - concept: Concept - start: int - end: int - has_unrecognized: bool - source: str + return expected + + def initialize_expected_parameters(self): + self.expected = self.compute_expected_parameters(self.concept.key) + + expected_parameters_before_first_token = self.expected[0][1] + + # remove the trailing whitespace before counting the parameters + if len(self.prefix_parameters) > 0 and self.prefix_parameters[-1].source.isspace(): + self.prefix_parameters.pop() + + # Check the number of prefixed parameters + if len(self.prefix_parameters) < expected_parameters_before_first_token: + raise NotEnoughParameters(self.concept, + get_text_from_tokens(self.expected[0][0]), + self.start_pos, + expected_parameters_before_first_token, + self.prefix_parameters.copy()) + + # add the previous parameters to the list of available parameters + for i in range(expected_parameters_before_first_token): + self.stack.insert(0, self.prefix_parameters.pop()) + + # prepare the tokens to recognize + if self.expected[0][0][0].type == TokenKind.WHITESPACE: + # remove white space before the first token if any + self.expected[0][0].pop(0) + self.expected[0][0].pop(0) # pop the first token + + def parse(self): + try: + while self.state: + self.state.run() + self._state = self.state.next() + + except SyaNodeException as err: + self.debug(err, is_error=True, error=err) + self.errors.append(err) + + def recognize(self, start_pos, tokens): + for i in range(len(tokens)): + pi_input_token = self.parser_input.tokens[i + start_pos] + token = tokens[i] + if pi_input_token.type != token.type or pi_input_token.value != token.value: + return False + + return True + + def is_part_of_concept_definition(self): + to_be_recognized = self.expected[0][0] + if not to_be_recognized: + return False + + if self.parser_input.pos == self.parser_input.length - len(to_be_recognized): + return False + + if self.recognize(self.parser_input.pos, to_be_recognized): + self.parser_input.seek(self.parser_input.pos + len(to_be_recognized) - 1) + self.end_pos = self.parser_input.pos + return True + + return False + + def must_pop(self, other_concept: Concept): + """ + Compare the priority of the 'other' concept against self.concept + """ + current = SyaConceptDef.get_sya_concept_def(self.context, self.concept) + other = SyaConceptDef.get_sya_concept_def(self.context, other_concept) + + if current.associativity == SyaAssociativity.No and other.associativity == SyaAssociativity.No: + raise NoneAssociativeConceptsError(self.concept, other_concept) + + if other.associativity == SyaAssociativity.Left and other.precedence <= current.precedence: + return True + + if other.associativity == SyaAssociativity.Right and other.precedence < current.precedence: + return True + + return False + + def manage_parameters(self): + # check the number of expected parameters + if not self.expected: + return + + nb_expected_parameters = self.expected[0][1] + if len(self.stack) < nb_expected_parameters: + raise NotEnoughParameters(self.concept, + get_text_from_tokens(self.expected[0][0]), + self.checkpoint, + nb_expected_parameters, + self.stack.copy()) + + if len(self.stack) > nb_expected_parameters and len(self.expected[0][0]) > 0: + # Only raise an exception when there are too many parameters for ternary like concepts + raise TooManyParameters(self.concept, + get_text_from_tokens(self.expected[0][0]), + self.checkpoint, + nb_expected_parameters, + self.stack.copy()) + + self.parameters.extend(self.stack) + self.stack.clear() + self.expected.pop(0) + self.nb_expected_popped += 1 + + def create_concept_node(self): + new_concept = self.context.sheerka.new_from_template(self.concept, self.concept.key) + new_concept.get_hints().use_copy = True + new_concept.get_hints().need_validation = True + new_concept.get_hints().recognized_by = RECOGNIZED_BY_KEY + start_pos = self.start_pos + end_pos = self.end_pos + + assert len(new_concept.get_metadata().parameters) <= len(self.parameters) + + # update the parameters + for param_name, param_value in zip(new_concept.get_metadata().parameters, self.parameters): + new_concept.get_compiled()[param_name] = param_value + + # set sub_concept_detected to True if we have eaten an sya concept as an unrecognized token + # It may help if we fail to validate the current concept + # See test_i_always_look_for_the_longest_match() for more explanations + if param_value.start in self.sub_concepts_start_positions: + self.sub_concept_detected = True + + if param_value.start < start_pos: + start_pos = param_value.start + if param_value.end > end_pos: + end_pos = param_value.end + + # remove the parameters that are already used + for i in range(len(new_concept.get_metadata().parameters)): + self.parameters.pop(0) + + # update variable metadata + new_concept.get_metadata().variables = get_new_variables_definitions(new_concept) + + tokens = self.parser_input.tokens[start_pos:end_pos + 1] + concept_node = ConceptNode(new_concept, + start_pos, + end_pos, + tokens, + get_text_from_tokens(tokens)) + + if self.debugger.is_enabled(): + msg = f"concept node {new_concept}" + for param_name, param_value in new_concept.get_compiled().items(): + msg += f", {param_name}='{param_value.source}'" + + msg += " successfully created." + self.debug(msg, concept_node=concept_node) + + return concept_node + + def clone(self): + fork = self.parent_tokens_parser.sya_node_parser.fork_tokens_parser(self.parent_tokens_parser) + clone = SyaConceptParser(fork, + self.concept, + self.prefix_parameters.copy(), + self.stack.copy(), + self.unrecognized_tokens.clone(), + self.start_pos, + self.end_pos, + self.sub_concepts_start_positions.copy(), + self.sub_concept_detected, + self.debug_items) + + clone.expected = self.expected.copy() + clone.nb_expected_popped = self.nb_expected_popped + clone.parameters = self.parameters.copy() + clone.state_context = self.state_context + + fork.state_context = clone + return clone + + def n_clones(self, nb_clones): + if nb_clones == 1: + return [self] + + return [self] + [self.clone() for _ in range(nb_clones - 1)] + + def get_unrecognized_tokens_requests_cache(self): + return self.parent_tokens_parser.sya_node_parser.cache + + def get_tokens_parser(self): + return self.parent_tokens_parser + + def debug(self, text, is_error=False, **kwargs): + args = self.add_debug(text, is_error, **kwargs) + if self.debugger.is_enabled(): + debug_prefix = f"pos={self.parser_input.pos}, token='{self.parser_input.token.repr_value}'" + self.debugger.debug_log(f"{debug_prefix}: {text}", is_error=is_error, args=args) class SyaNodeParser(BaseNodeParser): NAME = "Sya" def __init__(self, **kwargs): - super().__init__(SyaNodeParser.NAME, 50, **kwargs) - self.test_only_sya_definitions = {} - - def init_from_concepts(self, context, concepts, **kwargs): - super().init_from_concepts(context, concepts) - - sya_definitions = kwargs.get("sya", None) - if sya_definitions: - self.test_only_sya_definitions = sya_definitions - - return self + super().__init__(SyaNodeParser.NAME, 50, enabled=True, **kwargs) + self.cache = UnrecognizedTokensCache(PARSERS) + self.forks = [] + self.tokens_parser_next_id = 0 @staticmethod def _is_eligible(concept): @@ -1233,187 +936,90 @@ class SyaNodeParser(BaseNodeParser): return (concept.get_metadata().definition_type != DEFINITION_TYPE_BNF and len(concept.get_metadata().parameters) > 0) - def infix_to_postfix(self, context, parser_input: ParserInput): + @staticmethod + def _function_detected(tokens): """ - Implementing Shunting Yard Algorithm - :param context: - :param parser_input: - :return: + Returns True if we thing that the result of the tokens parser is a function """ + tokens = strip_tokens(tokens, True) + if (len(tokens) >= 3 and + tokens[0].type == TokenKind.IDENTIFIER and + tokens[1].value == "(" and + tokens[-1].value == ")"): + return True - if not self.reset_parser(context, parser_input): - return None + if (len(tokens) >= 4 and + tokens[0].type == TokenKind.IDENTIFIER and + tokens[1].type == TokenKind.WHITESPACE and + tokens[2].value == "(" and + tokens[-1].value == ")"): + return True - debugger = context.get_debugger(self.NAME, "parse") - debugger.debug_entering(source=self.parser_input.as_text()) + return False - forked = [] + @staticmethod + def _merge_errors(sheerka, errors): + res = flatten(errors) + if len(res) == 1: + return res[0] + else: + return sheerka.err([e.body for e in res]) - def _add_forked_to_res(): - # check that if some new InfixToPostfix are created - for in_to_post in res: - if len(in_to_post.forked) > 0: - forked.extend(in_to_post.forked) - in_to_post.forked.clear() - if len(forked) > 0: - res.extend(forked) - forked.clear() + def get_next_tokens_parser_id(self): + self.tokens_parser_next_id += 1 + return self.tokens_parser_next_id - res = [InFixToPostFix(context, NextIdManager(), debugger)] - while self.parser_input.next_token(False): - for infix_to_postfix in res: - infix_to_postfix.reset() + def get_concepts(self, context, token): + return context.sheerka.get_concepts_by_first_token(token, self._is_eligible) - token = self.parser_input.token - if debugger.is_enabled(): - debug_prefix = f"pos={self.parser_input.pos}, {token=}, {len(res)} parser(s)" + def fork(self, tokens_parser, number_of_forks): + if number_of_forks == 1: + return [tokens_parser] - try: - if token.type in (TokenKind.LPAR, TokenKind.RPAR): - # little optim, no need to lock, unlock or get the concept when parenthesis - if debugger.is_enabled(): - debugger.debug_log(debug_prefix + ", eat token.") + forks = [tokens_parser.clone() for _ in range(number_of_forks - 1)] + self.forks.extend(forks) - for infix_to_postfix in res: - infix_to_postfix.eat_token(token, self.parser_input.pos) - continue + return [tokens_parser] + forks - for infix_to_postfix in res: - if infix_to_postfix.eat_token(token, self.parser_input.pos): - infix_to_postfix.lock() + def fork_tokens_parser(self, tokens_parser): + clone = tokens_parser.clone() + self.forks.append(clone) + return clone - nb_locked = len([itp for itp in res if itp.is_locked]) - if nb_locked == len(res): - if debugger.is_enabled(): - debugger.debug_log(debug_prefix + f", all parsers are locked") - continue + def parse_token_parser(self, context, tokens_parser: SyaTokensParser, result, in_error, not_for_me): + tokens_parser.parse() - concepts_def = context.sheerka.get_concepts_by_first_token(token, - self._is_eligible, - to_map=SyaConceptDef.get_sya_concept_def, - parser=self) - if not concepts_def: - if debugger.is_enabled(): - debugger.debug_log(debug_prefix + f", no concept found") + # validate the nodes + for node in tokens_parser.stack: + if isinstance(node, ConceptNode): + errors = [] + update_compiled(context, node.concept, errors) + tokens_parser.errors.extend(errors) - for infix_to_postfix in res: - infix_to_postfix.eat_unrecognized(token, self.parser_input.pos) - continue + if self._is_eligible(node.concept) and node.concept.get_compiled(): + tokens_parser.has_sya_concept = True - if debugger.is_enabled(): - found = [cd.short_repr() for cd in concepts_def] - debugger.debug_log(debug_prefix + f", concept(s) found={found}") + elif isinstance(node, UnrecognizedTokensNode): + tokens_parser.has_unrecognized = True - if len(concepts_def) == 1: - for infix_to_postfix in res: - infix_to_postfix.eat_concept(concepts_def[0], token, self.parser_input.pos) - continue + # put the tokens_parser in the correct bag + if tokens_parser.has_sya_concept and not tokens_parser.has_error(): + result.append(tokens_parser) + elif tokens_parser.has_sya_concept and tokens_parser.has_error(): + in_error.append(tokens_parser) + else: + tokens_parser.errors.append(NoSyaConceptFound()) + not_for_me.append(tokens_parser) - # make the cartesian product - temp_res = [] - for infix_to_postfix in res: - for concept in concepts_def: - clone = infix_to_postfix.clone() - temp_res.append(clone) - clone.eat_concept(concept, token, self.parser_input.pos) - res = temp_res - - finally: - _add_forked_to_res() - - # make sure that remaining items in stack are moved to out - for infix_to_postfix in res: - infix_to_postfix.reset() - infix_to_postfix.finalize(self.parser_input.pos) - _add_forked_to_res() - - if debugger.is_enabled(): - for r in res: - for line in r.debug: - if line.level: - debugger.debug_var(f"#{r.id}.{line.level}", line) - else: - debugger.debug_var(f"#{r.id}", line) - - return res - - def postfix_to_item(self, sheerka, postfixed): - item = postfixed.pop() - if isinstance(item, (UnrecognizedTokensNode, SourceCodeNode, ConceptNode, VariableNode)): - return item - - if isinstance(item, SourceCodeWithConceptNode): - items = [] - while len(item.nodes) > 0: - res = self.postfix_to_item(sheerka, item.nodes) - if isinstance(res, PostFixToItem): - items.append(ConceptNode(res.concept, - res.start, - res.end, - self.parser_input.tokens[res.start: res.end + 1])) - else: - items.append(res) - item.has_unrecognized |= hasattr(res, "has_unrecognized") and res.has_unrecognized or \ - isinstance(res, UnrecognizedTokensNode) - item.nodes = items - item.fix_all_pos() - item.tokens = self.parser_input.tokens[item.start:item.end + 1] - item.fix_source(True) - return item - - # ParserHelper - start = item.start - end = item.end - has_unrecognized = False - concept = sheerka.new_from_template(item.concept, item.concept.key) - concept_metadata = [] - for param_index in reversed(range(len(concept.get_metadata().variables))): - param_name = concept.get_metadata().variables[param_index][0] - - if param_name not in concept.get_metadata().parameters: - # This is not a real concept parameter, but a concept variable - # just copy its default value - concept_metadata.append(concept.get_metadata().variables[param_index]) - continue - - inner_item = self.postfix_to_item(sheerka, postfixed) - if inner_item.start < start: - start = inner_item.start - if inner_item.end > end: - end = inner_item.end - has_unrecognized |= isinstance(inner_item, (UnrecognizedTokensNode, SourceCodeWithConceptNode)) or \ - hasattr(inner_item, "has_unrecognized") and inner_item.has_unrecognized - - param_value = inner_item.concept if hasattr(inner_item, "concept") else \ - [inner_item.return_value] if isinstance(inner_item, SourceCodeNode) else \ - inner_item - - concept.get_compiled()[param_name] = param_value - concept_metadata.append((param_name, inner_item.source)) - - # update the metadata - concept_metadata.reverse() - - # ---- Sanity check. To remove at some point - assert len(concept_metadata) == len(concept.get_metadata().variables) - for meta_orig, meta_new in zip(concept.get_metadata().variables, concept_metadata): - assert meta_orig[0] == meta_new[0] - # ---- Sanity check. To remove at some point - concept.get_metadata().variables = concept_metadata - concept.get_hints().use_copy = True - concept.get_hints().need_validation = True - - source = get_text_from_tokens(self.parser_input.tokens[start:end + 1]) - return PostFixToItem(concept, start, end, has_unrecognized, source) + # recurse on the forks + if self.forks: + forks = self.forks.copy() + self.forks.clear() + for fork in forks: + self.parse_token_parser(context, fork, result, in_error, not_for_me) + # @profile(filename="sya_node_parser_parse") def parse(self, context, parser_input: ParserInput): - """ - - :param context: - :param parser_input: - :return: - """ - if not isinstance(parser_input, ParserInput): return None @@ -1424,74 +1030,54 @@ class SyaNodeParser(BaseNodeParser): context.sheerka.new(BuiltinConcepts.IS_EMPTY) ) - ret = [] - valid_infix_to_postfixs = self.get_valid(self.infix_to_postfix(context, parser_input)) - if valid_infix_to_postfixs is None: - # token error + if not self.reset_parser(context, parser_input): return self.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) - if len(valid_infix_to_postfixs) == 0: - return self.sheerka.ret( - self.name, - False, - context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text())) + debugger = context.get_debugger(self.NAME, "parse") + debugger.debug_entering(source=self.parser_input.as_text()) - for infix_to_postfix in valid_infix_to_postfixs: - sequence = [] - has_unrecognized = False + if self._function_detected(parser_input.tokens): + return self.sheerka.ret(self.name, + False, + context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, + body=parser_input.as_text(), + reason=[FunctionDetected()])) + + tokens_parser = SyaTokensParser(context, self, parser_input) + valid_tokens_parser, in_errors, not_for_me = [], [], [] + self.parse_token_parser(context, tokens_parser, valid_tokens_parser, in_errors, not_for_me) + + if debugger.is_enabled(): + debugger.debug_var("stats", self.cache.to_dict()) + debugger.debug_leaving(valid=valid_tokens_parser, error=in_errors, not_for_me=not_for_me) + + ret = [] + if valid_tokens_parser: + for tokens_parser in valid_tokens_parser: + ret.append( + self.sheerka.ret(self.name, + not tokens_parser.has_unrecognized, + self.sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=parser_input.as_text(), + body=tokens_parser.stack, + try_parsed=tokens_parser.stack))) + + elif in_errors: + errors = self._merge_errors(self.sheerka, [tp.errors for tp in in_errors]) + ret.append(self.sheerka.ret(self.name, False, errors)) + + else: errors = [] - while len(infix_to_postfix.out) > 0: - item = self.postfix_to_item(context.sheerka, infix_to_postfix.out) - has_unrecognized |= hasattr(item, "has_unrecognized") and item.has_unrecognized or \ - isinstance(item, UnrecognizedTokensNode) - if isinstance(item, PostFixToItem): - to_insert = ConceptNode(item.concept, - item.start, - item.end, - self.parser_input.tokens[item.start: item.end + 1]) - - # validate the concept - update_compiled(context, item.concept, errors) - if errors: - break - else: - to_insert = item - sequence.insert(0, to_insert) - - if errors: - if len(errors) == 1: - ret.append( - self.sheerka.ret( - self.name, - False, - errors[0])) - else: - ret.append( - self.sheerka.ret( - self.name, - False, - self.sheerka.err([e.body for e in errors]))) - continue - - if has_unrecognized: - # Manage some sick cases where missing parenthesis mess the order or the sequence - # example "foo bar(one plus two" - # too lazy to fix the why... - sequence.sort(key=attrgetter("start")) - - ret.append( - self.sheerka.ret( - self.name, - not has_unrecognized, - self.sheerka.new( - BuiltinConcepts.PARSER_RESULT, - parser=self, - source=parser_input.as_text(), - body=sequence, - try_parsed=sequence))) + for tp in not_for_me: + errors.extend(tp.errors) + ret.append(self.sheerka.ret(self.name, False, context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, + body=parser_input.as_text(), + reason=errors))) if len(ret) == 1: self.log_result(context, parser_input, ret[0]) @@ -1499,42 +1085,3 @@ class SyaNodeParser(BaseNodeParser): else: self.log_multiple_results(context, parser_input, ret) return ret - - @staticmethod - def get_valid(infix_to_postfixs): - """ - Gets the valid infixToPostfix - :param infix_to_postfixs: - :return: - """ - - def _has_sya(items): - for item in items: - if isinstance(item, SourceCodeWithConceptNode): - return _has_sya(item.nodes) - - if isinstance(item, SyaConceptParserHelper): - return True - return False - - if infix_to_postfixs is None: - return None - - result = [] - for infix_to_postfix in infix_to_postfixs: - if len(infix_to_postfix.get_errors()) > 0: - continue - - if len(infix_to_postfix.out) == 0: - continue - - if infix_to_postfix in result: - continue - - if not _has_sya(infix_to_postfix.out): - # refuses the result if it does not involve SYA - continue - - result.append(infix_to_postfix) - - return result diff --git a/src/parsers/UnrecognizedNodeParser.py b/src/parsers/UnrecognizedNodeParser.py index 321d8b9..e77cb8b 100644 --- a/src/parsers/UnrecognizedNodeParser.py +++ b/src/parsers/UnrecognizedNodeParser.py @@ -2,9 +2,9 @@ from dataclasses import dataclass import core.utils from core.builtin_concepts import BuiltinConcepts -from core.builtin_helpers import only_successful, get_lexer_nodes, update_compiled -from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode -from parsers.BaseParser import BaseParser, ParsingError, BaseParserInputParser +from core.builtin_helpers import get_lexer_nodes, only_successful, update_compiled +from parsers.BaseNodeParser import ConceptNode, SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode +from parsers.BaseParser import BaseParserInputParser, ParsingError from parsers.BnfNodeParser import BnfNodeParser from parsers.PythonParser import PythonParser from parsers.SequenceNodeParser import SequenceNodeParser diff --git a/src/sheerkapython/ExprToPython.py b/src/sheerkapython/ExprToPython.py new file mode 100644 index 0000000..06c761a --- /dev/null +++ b/src/sheerkapython/ExprToPython.py @@ -0,0 +1,389 @@ +from dataclasses import dataclass +from itertools import product +from typing import Union + +from core.builtin_helpers import is_only_successful, only_successful +from core.global_symbols import INIT_AST_PARSERS, NotInit +from core.sheerka.services.SheerkaEvaluateConcept import EvaluationHints +from core.tokenizer import TokenKind +from core.utils import merge_dicts, merge_sets +from parsers.BaseExpressionParser import AndNode, ComparisonNode, ComparisonType, ExprNode, ExpressionVisitorWithHint, \ + FunctionNode, ListComprehensionNode, \ + ListNode, NameExprNode, NotNode, VariableNode, end_parenthesis_mapping, open_parenthesis_mapping +from parsers.PythonParser import PythonNode +from sheerkapython.python_wrapper import sheerka_globals + + +@dataclass() +class PythonExprVisitorObj: + text: Union[str, None] # human readable + source: Union[str, None] # python expression to compile + objects: dict # dictionaries of object created during the visit + variables: set # I intended to detect unbound symbols, but it's actually not used + + +class PythonExprVisitor(ExpressionVisitorWithHint): + def __init__(self, context, obj_counter=0): + self.context = context + self.obj_counter = obj_counter + self.objects_by_id = {} + self.objects_by_name = {} + self.errors = {} + self.results = [] + + def compile(self, expr_node, hint=None): + hint = hint or EvaluationHints(eval_body=True) + visitor_objects = self.visit(expr_node, hint) + + for obj in visitor_objects: + + ret = self.context.sheerka.parse_python(self.context, obj.source) + if ret.status: + ret.body.body.original_source = obj.text + ret.body.body.objects = obj.objects + self.results.append(ret) + else: + self.errors[obj.text] = self.context.sheerka.get_error_cause(ret.body) + + return self.results + + def visit_ListComprehensionNode(self, expr_node: ListComprehensionNode, hint: EvaluationHints): + """ + + :param expr_node: + :param hint: + :return: + """ + visitor_objects = [] + source = expr_node.get_source() + + not_a_question_hint = EvaluationHints(eval_body=True, eval_question=False) + is_a_question_hint = EvaluationHints(eval_body=True, eval_question=True) + product_inputs = [] + + # add parenthesis around the element if needed + # test case test_ExprToPython.test_i_can_compile_when_element_is_missing_its_parenthesis() + if expr_node.element.first is None and len(expr_node.element.items) > 1: + expr_node.element.first = NameExprNode(-1, -1, [open_parenthesis_mapping[TokenKind.LPAR]]) + expr_node.element.last = NameExprNode(-1, -1, [end_parenthesis_mapping[TokenKind.LPAR]]) + + element_objs = self.visit(expr_node.element, not_a_question_hint) + product_inputs.append(element_objs) + for comp in expr_node.generators: + target_objs = self.visit(comp.target, not_a_question_hint) + iter_objs = self.visit(comp.iterable, not_a_question_hint) + if comp.if_expr: + # parse it using PythonConditionExprVisitor + res = self.context.sheerka.parse_expression(self.context, comp.if_expr.get_source()) + if not res.status: + self.errors[comp.if_expr.get_source()] = res.body + return None + if_expr_objs = self.visit(res.body.body, is_a_question_hint) + else: + if_expr_objs = [None] + + product_inputs.extend([target_objs, iter_objs, if_expr_objs]) + + for items in product(*product_inputs): + visitor_objects.append(self.create_list_comprehension(source, *items)) + + return visitor_objects + + def visit_VariableNode(self, expr_node: VariableNode, hint: EvaluationHints): + source = expr_node.get_source() + return self.parse_source_code(source, hint) + + def visit_NameExprNode(self, expr_node: NameExprNode, hint: EvaluationHints): + """ + create visitor objects from NameExprNode + :param expr_node: + :param hint: + :return: + """ + source = expr_node.get_source() + return self.parse_source_code(source, hint) + + def visit_ListNode(self, expr_node: ListNode, hint: EvaluationHints): + visitor_objects = [] + source = expr_node.get_source() + + items_objs = [] + for item in expr_node.items: + items_objs.append(self.visit(item, hint)) + + for items in product(*items_objs): + visitor_objects.append(self.create_list(source, + expr_node.first.get_source() if expr_node.first else None, + expr_node.last.get_source() if expr_node.last else None, + items, + expr_node.sep)) + return visitor_objects + + def visit_AndNode(self, expr_node: AndNode, hint: EvaluationHints): + """ + + :param expr_node: + :param hint: + :return: + """ + return self.visit_or_or_and_node("and", expr_node, hint) + + def visit_OrNode(self, expr_node: AndNode, hint: EvaluationHints): + """ + + :param expr_node: + :param hint: + :return: + """ + return self.visit_or_or_and_node("or", expr_node, hint) + + def visit_NotNode(self, expr_node: NotNode, hint: EvaluationHints): + """ + + :param expr_node: + :param hint: + :return: + """ + visitor_objects = [] + + source = expr_node.get_source() + objs = self.visit(expr_node.node, hint) + + for obj in objs: + visitor_objects.append(self.create_not(source, obj)) + + return visitor_objects + + def visit_ComparisonNode(self, expr_node: ComparisonNode, hint: EvaluationHints): + """ + + :param expr_node: + :param hint: + :return: + """ + visitor_objects = [] + source = expr_node.get_source() + + left = self.visit(expr_node.left, hint) + right = self.visit(expr_node.right, hint) + + for left_obj, right_obj in product(left, right): + visitor_objects.append(self.create_comparison(source, expr_node.comp, left_obj, right_obj)) + + return visitor_objects + + def visit_FunctionNode(self, expr_node: FunctionNode, hint: EvaluationHints): + visitor_objects = [] + source = expr_node.get_source() + + parameters_objects = [] + for parameter in expr_node.parameters: + parameters_objects.append(self.visit(parameter.value, hint)) + + for parameters in product(*parameters_objects): + visitor_objects.append(self.create_function(source, + expr_node.first.get_source(), + expr_node.last.get_source(), + parameters)) + + return visitor_objects + + def visit_or_or_and_node(self, node_type, expr_node: ExprNode, hint: EvaluationHints): + """ + + :param node_type: + :param expr_node: + :param hint: + :return: + """ + visitor_objects = [] + + source = expr_node.get_source() + objs = [] + for node in expr_node.parts: + objs.append(self.visit(node, hint)) + + for objs_parts in product(*objs): + visitor_objects.append(self.create_and_or(node_type, source, objs_parts)) + + return visitor_objects + + def parse_source_code(self, source, hint): + res = self.context.sheerka.parse_unrecognized(self.context, + source, + INIT_AST_PARSERS, + filter_func=only_successful, + is_question=hint.eval_question) + + return_values = res.body.body if is_only_successful(self.context.sheerka, res) else [res] + + visitor_objects = [] + for ret_val in return_values: + if not ret_val.status: + self.errors[source] = ret_val.body + return + + if isinstance(ret_val.body.body, list): + if len(ret_val.body.body) > 1: + raise NotImplementedError("Too many concept found. Not handled yet !") + body = ret_val.body.body[0] + else: + body = ret_val.body.body + + if hasattr(body, "get_concept"): + visitor_objects.append(self.create_call_concept(source, body.get_concept(), hint.eval_question)) + elif hasattr(body, "get_python_node"): + visitor_objects.append(self.create_source_code_from_python_node(body.get_python_node())) + else: + raise NotImplementedError(f"{body=}. Not yet implemented") + + return visitor_objects + + @staticmethod + def create_source_code_from_python_node(node: PythonNode): + return PythonExprVisitorObj(text=node.original_source, + source=node.source, + objects=node.objects, + variables=set()) + + @staticmethod + def create_list_comprehension(text, *items): + objects = {} + variables = set() + + def update_objects_and_variables(*objs): + for obj in [obj for obj in objs if obj]: + objects.update(obj.objects) + variables.update(obj.variables) + + items = list(items) + + element = items.pop(0) + update_objects_and_variables(element) + source = f"[ {element.source}" + + while len(items): + target = items.pop(0) + iterable = items.pop(0) + if_expr = items.pop(0) + update_objects_and_variables(target, iterable, if_expr) + source += f" for {target.source} in {iterable.source}" + if if_expr: + source += f" if {if_expr.source}" + source += " ]" + + return PythonExprVisitorObj(text=text, + source=source, + objects=objects, + variables=variables) + + @staticmethod + def create_and_or(node_type, text, parts): + return PythonExprVisitorObj(text=text, + source=f" {node_type} ".join([p.source for p in parts]), + objects=merge_dicts(*[p.objects for p in parts]), + variables=merge_sets(*[p.variables for p in parts])) + + @staticmethod + def create_comparison(text, op, left_obj, right_obj): + def get_source(_op, a, b): + if _op == ComparisonType.EQUALS and b == "sheerka": + return f"is_sheerka({a})" + else: + return ComparisonNode.rebuild_source(a, op, b) + + return PythonExprVisitorObj(text=text, + source=get_source(op, left_obj.source, right_obj.source), + objects=merge_dicts(left_obj.objects, right_obj.objects), + variables=merge_sets(left_obj.variables, right_obj.variables)) + + @staticmethod + def create_not(text, node): + return PythonExprVisitorObj(text=text, + source=f"not {node.source}", + objects=node.objects, + variables=node.variables) + + @staticmethod + def create_function(text, first, last, parameters): + def get_source(_first, _last, _parameters): + return f"{_first}{', '.join(p for p in _parameters)}{_last}" + + return PythonExprVisitorObj(text=text, + source=get_source(first, last, [p.source for p in parameters]), + objects=merge_dicts(*[p.objects for p in parameters]), + variables=merge_sets(*[p.variables for p in parameters])) + + @staticmethod + def create_list(text, first, last, items, sep): + def get_source(_first, _last, _items, _sep): + res = _first or "" + res += f"{_sep.value} ".join(item for item in _items) + if _last: + res += _last + return res + + return PythonExprVisitorObj(text=text, + source=get_source(first, last, [p.source for p in items], sep), + objects=merge_dicts(*[p.objects for p in items]), + variables=merge_sets(*[p.variables for p in items])) + + def get_object_name(self, obj): + """ + object found during the parsing are not serialized + They are kept in a dictionary. + This function returns a new name for every new object + :param obj: object for which a name is to be created + :param objects: already created names (it's a dictionary) + :return: tuple(name created, dictionary of already created names) + """ + + if self.context.sheerka.is_sheerka(obj): + return "sheerka" + + try: + return self.objects_by_id[id(obj)] + except KeyError: + pass + + object_name = f"__o_{self.obj_counter:02}__" + self.obj_counter += 1 + + self.objects_by_id[id(obj)] = object_name + self.objects_by_name[object_name] = obj + return object_name + + def create_call_concept(self, source, concept, is_question): + name = self.get_object_name(concept) + parameters = {} + + for var_name, default_value in concept.get_metadata().variables: + if var_name not in concept.get_metadata().parameters: + continue + parameters[var_name] = default_value if default_value is not NotInit else var_name + + function_to_call = "evaluate_question" if is_question else "call_concept" + to_compile = f"{function_to_call}({name}" + for p_name, p_value in parameters.items(): + to_compile += f", {p_name}={p_value}" + to_compile += ")" + + concept.get_hints().use_copy = True + concept.get_hints().is_evaluated = True + return PythonExprVisitorObj(source, to_compile, {name: concept}, set()) + + def is_a_possible_variable(self, name): + """ + tells whether or not the name can be a variable + :param name: + :return: + """ + if self.context.sheerka.is_a_concept_name(name): + return False + + try: + eval(name, sheerka_globals) + except: + return True + + return False diff --git a/tests/BaseTest.py b/tests/BaseTest.py index c739525..7038694 100644 --- a/tests/BaseTest.py +++ b/tests/BaseTest.py @@ -1,12 +1,13 @@ import ast from dataclasses import dataclass, field -from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts +from core.builtin_concepts import BuiltinConcepts, ParserResultConcept, ReturnValueConcept from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF, freeze_concept_attrs -from core.rule import Rule, ACTION_TYPE_PRINT, ACTION_TYPE_EXEC +from core.rule import ACTION_TYPE_EXEC, ACTION_TYPE_PRINT, Rule from core.sheerka.ExecutionContext import ExecutionContext from core.sheerka.Sheerka import Sheerka from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager +from core.sheerka.services.SheerkaDebugManager import ListDebugLogger from core.sheerka.services.SheerkaRuleManager import SheerkaRuleManager from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfNodeParser import StrMatch @@ -290,3 +291,10 @@ class BaseTest: @staticmethod def successful_return_values(return_values): return [ret_val for ret_val in return_values if ret_val.status] + + @staticmethod + def activate_debug(context, pattern="Sya.*.*"): + sheerka = context.sheerka + sheerka.set_debug(context, True) + sheerka.set_debug_var(context, pattern) + sheerka.set_debug_logger_definition(ListDebugLogger) diff --git a/tests/core/test_SheerkaDebugManager.py b/tests/core/test_SheerkaDebugManager.py index 717e264..57cd480 100644 --- a/tests/core/test_SheerkaDebugManager.py +++ b/tests/core/test_SheerkaDebugManager.py @@ -73,6 +73,7 @@ class TestSheerkaDebugManager(TestUsingMemoryBasedSheerka): item_container = f"debug_{item_type}_settings" assert getattr(service, item_container) == [DebugItem( + item_type, item, service_name, method_name, @@ -89,7 +90,7 @@ class TestSheerkaDebugManager(TestUsingMemoryBasedSheerka): service.add_or_update_debug_item(context, "vars", item="item") assert service.debug_vars_settings == [ - DebugItem("item", None, None, None, False, None, False, True) + DebugItem("vars", "item", None, None, None, False, None, False, True) ] def test_i_can_update_debug_item(self): @@ -101,8 +102,8 @@ class TestSheerkaDebugManager(TestUsingMemoryBasedSheerka): service.add_or_update_debug_item(context, "vars", "item", "service_name", "method_name", enabled=False) assert service.debug_vars_settings == [ - DebugItem("item", "service_name", "method_name", None, False, None, False, False), - DebugItem("item2", "service_name", "method_name", None, False, None, False, True), + DebugItem("vars", "item", "service_name", "method_name", None, False, None, False, False), + DebugItem("vars", "item2", "service_name", "method_name", None, False, None, False, True), ] @pytest.mark.parametrize("settings, expected", [ @@ -407,7 +408,7 @@ class TestSheerkaDebugManager(TestUsingMemoryBasedSheerka): sheerka.set_debug_var(context, "s.m.v", "1+", 10, variable="my_var") assert service.debug_vars_settings == [ - DebugItem("my_var", "s", "m", 1, True, 10, False, True) + DebugItem("vars", "my_var", "s", "m", 1, True, 10, False, True) ] assert service.debug_concepts_settings == [] assert service.debug_rules_settings == [] @@ -418,7 +419,7 @@ class TestSheerkaDebugManager(TestUsingMemoryBasedSheerka): sheerka.set_debug_rule(context, "s.m.v", "1+", 10, rule="my_rule") assert service.debug_rules_settings == [ - DebugItem("my_rule", "s", "m", 1, True, 10, False, True) + DebugItem("rules", "my_rule", "s", "m", 1, True, 10, False, True) ] assert service.debug_concepts_settings == [] assert service.debug_vars_settings == [] @@ -429,7 +430,7 @@ class TestSheerkaDebugManager(TestUsingMemoryBasedSheerka): sheerka.set_debug_concept(context, "s.m.v", "1+", 10, concept="my_concept") assert service.debug_concepts_settings == [ - DebugItem("my_concept", "s", "m", 1, True, 10, False, True) + DebugItem("concepts", "my_concept", "s", "m", 1, True, 10, False, True) ] assert service.debug_rules_settings == [] assert service.debug_vars_settings == [] @@ -449,11 +450,11 @@ class TestSheerkaDebugManager(TestUsingMemoryBasedSheerka): assert another_service.activated assert another_service.debug_vars_settings == [ - DebugItem('var', 'service_name', None, None, False, None, False, True)] + DebugItem("vars", 'var', 'service_name', None, None, False, None, False, True)] assert another_service.debug_rules_settings == [ - DebugItem('1', None, None, None, False, None, False, True)] + DebugItem("rules", '1', None, None, None, False, None, False, True)] assert another_service.debug_concepts_settings == [ - DebugItem('1001', None, None, None, False, None, False, True)] + DebugItem("concepts", '1001', None, None, None, False, None, False, True)] def test_i_can_inspect_concept_all_attributes(self): sheerka, context, foo = self.init_concepts("foo") @@ -766,9 +767,12 @@ class TestSheerkaDebugManager(TestUsingMemoryBasedSheerka): sheerka.pop_ontology(context) assert service.activated - assert service.debug_vars_settings == [DebugItem("v_name", "v_service", "v_method", 1, True, 1, False, True)] - assert service.debug_rules_settings == [DebugItem("r_name", "r_service", "r_method", 2, True, 2, False, True)] - assert service.debug_concepts_settings == [DebugItem("c_name", "c_serv", "c_method", 3, True, 3, False, True)] + assert service.debug_vars_settings == [ + DebugItem("vars", "v_name", "v_service", "v_method", 1, True, 1, False, True)] + assert service.debug_rules_settings == [ + DebugItem("rules", "r_name", "r_service", "r_method", 2, True, 2, False, True)] + assert service.debug_concepts_settings == [ + DebugItem("concepts", "c_name", "c_serv", "c_method", 3, True, 3, False, True)] def test_i_can_register_debug_item(self): sheerka, context = self.init_concepts() diff --git a/tests/core/test_SheerkaEvaluateConcept.py b/tests/core/test_SheerkaEvaluateConcept.py index 9746049..d57fe03 100644 --- a/tests/core/test_SheerkaEvaluateConcept.py +++ b/tests/core/test_SheerkaEvaluateConcept.py @@ -4,7 +4,7 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserRes from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved, \ DEFINITION_TYPE_DEF from core.global_symbols import NotInit, NotFound -from core.sheerka.services.SheerkaEvaluateConcept import SheerkaEvaluateConcept +from core.sheerka.services.SheerkaEvaluateConcept import EvaluationHints, SheerkaEvaluateConcept from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaMemory import SheerkaMemory from parsers.BaseParser import BaseParser @@ -13,7 +13,7 @@ from parsers.ExpressionParser import ExpressionParser from parsers.PythonParser import PythonNode, PythonParser from parsers.SyaNodeParser import SyaNodeParser from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -from tests.evaluators.EvaluatorTestsUtils import pr_ret_val, python_ret_val +from tests.evaluators.EvaluatorTestsUtils import exact, pr_ret_val, python_ret_val from tests.parsers.parsers_utils import CB, compare_with_test_object @@ -487,7 +487,9 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): eval_where=True, ) - evaluated = sheerka.evaluate_concept(self.get_context(sheerka, False, True), concept, eval_body=False) + evaluated = sheerka.evaluate_concept(self.get_context(sheerka, False, True), + concept, + hints=EvaluationHints(eval_body=False)) if expected: assert evaluated.key == concept.key @@ -532,13 +534,13 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): ) foo_instance = sheerka.new("foo") - evaluated = sheerka.evaluate_concept(context, foo_instance, eval_body=False) + evaluated = sheerka.evaluate_concept(context, foo_instance, hints=EvaluationHints(eval_body=False)) assert ConceptParts.BODY in evaluated.get_compiled() assert evaluated.body == NotInit assert not evaluated.get_hints().is_evaluated - evaluated = sheerka.evaluate_concept(context, foo_instance, eval_body=True) # evaluate the body this time + evaluated = sheerka.evaluate_concept(context, foo_instance, hints=EvaluationHints(eval_body=True)) # evaluate the body this time assert isinstance(evaluated.body, bool) and evaluated.body def test_i_can_apply_intermediate_where_condition_using_python(self): @@ -811,7 +813,7 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): Concept("bar", pre="print('10')"), # print won't be executed ) - evaluated = sheerka.evaluate_concept(context, foo, eval_body=True) + evaluated = sheerka.evaluate_concept(context, foo, hints=EvaluationHints(eval_body=True)) captured = capsys.readouterr() assert evaluated.key == foo.key assert captured.out == "10\n" @@ -828,7 +830,7 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): Concept("foo", pre="in_context('foo')", body="print('10')"), ) - evaluated = sheerka.evaluate_concept(context, concept, eval_body=True) + evaluated = sheerka.evaluate_concept(context, concept, hints=EvaluationHints(eval_body=True)) assert sheerka.isinstance(evaluated, BuiltinConcepts.CONDITION_FAILED) assert evaluated.body == "in_context('foo')" assert evaluated.concept == concept @@ -898,7 +900,7 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): def test_is_evaluated_is_correctly_set(self, concept, expected): sheerka, context, concept = self.init_concepts(concept) - evaluated = sheerka.evaluate_concept(context, concept, eval_body=True) + evaluated = sheerka.evaluate_concept(context, concept, hints=EvaluationHints(eval_body=True)) assert evaluated.key == concept.key assert concept.get_hints().is_evaluated == expected @@ -919,24 +921,24 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): res = sheerka.evaluate_concept(context, bar) assert sheerka.isinstance(res, "bar") - res = sheerka.evaluate_concept(context, bar, eval_body=True) + res = sheerka.evaluate_concept(context, bar, hints=EvaluationHints(eval_body=True)) assert sheerka.isinstance(res, "foo") # And the result is still the same after a second call assert bar.get_hints().is_evaluated - res = sheerka.evaluate_concept(context, bar, eval_body=True) + res = sheerka.evaluate_concept(context, bar, hints=EvaluationHints(eval_body=True)) assert sheerka.isinstance(res, "foo") def test_ret_is_evaluated_only_is_body_is_requested(self): sheerka, context, foo, bar = self.init_concepts("foo", Concept("bar", ret="__NOT_FOUND")) - res = sheerka.evaluate_concept(context, bar, eval_body=False) + res = sheerka.evaluate_concept(context, bar, hints=EvaluationHints(eval_body=False)) assert res.id == bar.id def test_i_can_eval_concept_with_rules(self): sheerka, context, foo = self.init_concepts(Concept("foo a", body="a.name").def_var("a", "r:|1:")) - res = sheerka.evaluate_concept(context, foo, eval_body=True) + res = sheerka.evaluate_concept(context, foo, hints=EvaluationHints(eval_body=True)) assert res.body == "Print return values" def test_i_can_manage_python_concept_infinite_recursion_when_initializing_ast(self): @@ -957,18 +959,18 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): evaluator = SheerkaEvaluateConcept(sheerka) # 'def concept foo as foo' - return_values = [pr_ret_val(foo, parser="ExactConcept"), python_ret_val("foo")] + return_values = [pr_ret_val(foo, parser=exact), python_ret_val("foo")] res = evaluator.get_recursive_definitions(context, foo, return_values) - assert list(res) == [BaseParser.get_name("ExactConcept")] + assert list(r.name for r in res) == [BaseParser.get_name("ExactConcept")] def test_i_can_detect_when_no_recursive_definition(self): sheerka, context, foo, bar = self.init_concepts("foo", "bar") evaluator = SheerkaEvaluateConcept(sheerka) # 'def concept foo as bar' - return_values = [pr_ret_val(bar, parser="ExactConcept"), python_ret_val("foo")] + return_values = [pr_ret_val(bar, parser=exact), python_ret_val("foo")] res = evaluator.get_recursive_definitions(context, foo, return_values) @@ -980,7 +982,7 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): evaluator = SheerkaEvaluateConcept(sheerka) # i dunno how to construct the return value - return_values = [pr_ret_val(q, parser="ExactConcept")] + return_values = [pr_ret_val(q, parser=exact)] res = evaluator.get_recursive_definitions(context, q, return_values) @@ -1004,7 +1006,7 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): assert evaluated.get_compiled()["y"][0].body.body.get_hints().use_copy # get the body - evaluated = evaluator.evaluate_concept(context, concept, eval_body=True) + evaluated = evaluator.evaluate_concept(context, concept, hints=EvaluationHints(eval_body=True)) assert evaluated.get_compiled()["x"][0].body.body.get_hints().use_copy assert evaluated.get_compiled()["y"][0].body.body.get_hints().use_copy assert not evaluated.get_value("x").get_hints().use_copy @@ -1025,7 +1027,7 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): # get the body context.add_to_protected_hints(BuiltinConcepts.EVAL_QUESTION_REQUESTED) - evaluated = evaluator.evaluate_concept(context, concept, eval_body=True) + evaluated = evaluator.evaluate_concept(context, concept, hints=EvaluationHints(eval_body=True)) assert evaluated.get_compiled()["x"][0].body.body.get_hints().use_copy assert evaluated.get_compiled()["y"][0].body.body.get_hints().use_copy assert not evaluated.get_value("x").get_hints().use_copy @@ -1043,7 +1045,9 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): # Sanity check for normal behaviour to_evaluate1 = parsed_ret_val.body.body[0].concept.copy() - evaluated1 = sheerka.evaluate_concept(context, to_evaluate1, eval_body=True, validation_only=False) + evaluated1 = sheerka.evaluate_concept(context, + to_evaluate1, + hints=EvaluationHints(eval_body=True, expression_only=False)) assert sheerka.isinstance(evaluated1, shirt) assert evaluated1.get_value("body_ax_is_evaluated") == True @@ -1053,7 +1057,9 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): # check validation_only behaviour to_evaluate2 = parsed_ret_val.body.body[0].concept.copy() - evaluated2 = sheerka.evaluate_concept(context, to_evaluate2, eval_body=True, validation_only=True) + evaluated2 = sheerka.evaluate_concept(context, + to_evaluate2, + hints=EvaluationHints(eval_body=True, expression_only=True)) assert sheerka.isinstance(evaluated2, shirt) assert evaluated2.get_value("body_ax_is_evaluated") == NotInit @@ -1073,7 +1079,7 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): parsed_ret_val = SyaNodeParser().parse(context, ParserInput("a red shirt")) to_evaluate = parsed_ret_val.body.body[0].concept - evaluated = sheerka.evaluate_concept(context, to_evaluate, eval_body=False) + evaluated = sheerka.evaluate_concept(context, to_evaluate, hints=EvaluationHints(eval_body=False)) assert sheerka.isinstance(evaluated, a_x) assert "x" in evaluated.get_compiled() @@ -1085,7 +1091,7 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): # sanity check parsed_ret_val = SyaNodeParser().parse(context, ParserInput("a red shirt")) to_evaluate = parsed_ret_val.body.body[0].concept - evaluated = sheerka.evaluate_concept(context, to_evaluate, eval_body=True) + evaluated = sheerka.evaluate_concept(context, to_evaluate, hints=EvaluationHints(eval_body=True)) assert sheerka.isinstance(evaluated, shirt) assert evaluated.get_value("body_ax_is_evaluated") == True @@ -1095,7 +1101,7 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): def test_concept_is_not_evaluated_when_method_access_error(self): sheerka, context, foo = self.init_concepts(Concept("foo", body="set_attr(self, 'prop_name', 'prop_value')")) - evaluated = sheerka.evaluate_concept(context, foo, eval_body=True, validation_only=True) + evaluated = sheerka.evaluate_concept(context, foo, hints=EvaluationHints(eval_body=True, expression_only=True)) assert sheerka.isinstance(evaluated, foo) assert not foo.get_hints().is_evaluated diff --git a/tests/evaluators/EvaluatorTestsUtils.py b/tests/evaluators/EvaluatorTestsUtils.py index 9aa3a94..9966d78 100644 --- a/tests/evaluators/EvaluatorTestsUtils.py +++ b/tests/evaluators/EvaluatorTestsUtils.py @@ -1,14 +1,22 @@ import ast -from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts, ParserResultConcept +from core.builtin_concepts import BuiltinConcepts, ParserResultConcept, ReturnValueConcept from core.concept import Concept from evaluators.BaseEvaluator import BaseEvaluator -from parsers.BaseParser import BaseParser -from parsers.PythonParser import PythonNode +from parsers.BaseNodeParser import ConceptNode +from parsers.ExactConceptParser import ExactConceptParser +from parsers.PythonParser import PythonNode, PythonParser +from parsers.SequenceNodeParser import SequenceNodeParser +from parsers.SyaNodeParser import SyaNodeParser reduced_requested = ReturnValueConcept("Sheerka", True, Concept(name=BuiltinConcepts.REDUCE_REQUESTED, key=BuiltinConcepts.REDUCE_REQUESTED)) +sequence = SequenceNodeParser() +sya = SyaNodeParser() +exact = ExactConceptParser() +python = PythonParser() + def ret_val(value="value", who="who", status=True): """ @@ -21,7 +29,7 @@ def ret_val(value="value", who="who", status=True): return ReturnValueConcept(who, status, value) -def p_ret_val(value="value", parser="parser", status=True): +def p_ret_val(value="value", parser=exact, status=True): """ ReturnValueConcept from parser :param value: @@ -29,7 +37,7 @@ def p_ret_val(value="value", parser="parser", status=True): :param status: :return: """ - return ReturnValueConcept(BaseParser.get_name(parser), status, value) + return ReturnValueConcept(parser.name, status, value) def e_ret_val(value="value", evaluator="evaluator", status=True): @@ -43,7 +51,7 @@ def e_ret_val(value="value", evaluator="evaluator", status=True): return ReturnValueConcept(BaseEvaluator.PREFIX + evaluator, status, value) -def p_ret_val_false(value="value", parser="parser"): +def p_ret_val_false(value="value", parser=exact): """ Failed ReturnValueConcept from parser :param value: @@ -53,7 +61,7 @@ def p_ret_val_false(value="value", parser="parser"): return p_ret_val(value, parser, status=False) -def p_ret_val_true(value="value", parser="parser"): +def p_ret_val_true(value="value", parser=exact): """ Successful ReturnValueConcept from parser :param value: @@ -63,24 +71,24 @@ def p_ret_val_true(value="value", parser="parser"): return p_ret_val(value, parser, status=True) -def e_ret_val_false(value="value", parser="parser"): +def e_ret_val_false(value="value", evaluator="evaluator"): """ Failed ReturnValueConcept from evaluator :param value: - :param parser: + :param evaluator: :return: """ - return e_ret_val(value, parser, status=False) + return e_ret_val(value, evaluator, status=False) -def e_ret_val_true(value="value", parser="parser"): +def e_ret_val_true(value="value", evaluator="evaluator"): """ Successful ReturnValueConcept from evaluator :param value: - :param parser: + :param evaluator: :return: """ - return e_ret_val(value, parser, status=True) + return e_ret_val(value, evaluator, status=True) def e_ret_val_new(key, evaluator="evaluator", status=True, **kwargs): @@ -96,7 +104,7 @@ def e_ret_val_new(key, evaluator="evaluator", status=True, **kwargs): return e_ret_val(body, evaluator, status) -def pr_ret_val(value, parser="parser", source=None, status=True): +def pr_ret_val(value, parser=exact, source=None, status=True): """ ParserResult ReturnValue eg: ReturnValue with a ParserResult @@ -107,7 +115,7 @@ def pr_ret_val(value, parser="parser", source=None, status=True): :return: """ source = source or (value.name if isinstance(value, Concept) else "source") - parser_result = ParserResultConcept(BaseParser.get_name(parser), source=source, value=value) + parser_result = ParserResultConcept(parser, source=source, value=value) return p_ret_val(value=parser_result, parser=parser, status=status) @@ -117,8 +125,14 @@ def python_ret_val(source): :param source: :return: """ - python_node = PythonNode(source.strip(), ast.parse(source.strip(), f"", 'eval')) - return pr_ret_val(python_node, parser="Python", source=source) + python_node = PythonNode(source.lstrip(), ast.parse(source.strip(), f"", 'eval')) + return pr_ret_val(python_node, parser=python, source=source) + + +def cnode_ret_val(concept, source=None, parser=sya): + source = source or concept.name + cnode = ConceptNode(concept, 0, 0, source=source) + return pr_ret_val([cnode], parser=parser, source=source) def new_concept(key, **kwargs): @@ -129,3 +143,11 @@ def new_concept(key, **kwargs): res.get_hints().is_evaluated = True return res + + +def new_plural(name): + name_stripped_s = name.lstrip("s") + single = Concept(name_stripped_s) + concept = Concept(key=name, name=name, id=f"{name}:{BuiltinConcepts.PLURAL}", is_builtin=False, is_unique=False) + concept.set_prop(BuiltinConcepts.PLURAL, single) + return concept diff --git a/tests/evaluators/test_DefConceptEvaluator.py b/tests/evaluators/test_DefConceptEvaluator.py index 1559945..17fa249 100644 --- a/tests/evaluators/test_DefConceptEvaluator.py +++ b/tests/evaluators/test_DefConceptEvaluator.py @@ -449,6 +449,18 @@ class TestDefConceptEvaluator(TestUsingMemoryBasedSheerka): assert sheerka.get_property(created_concept, BuiltinConcepts.ISA) == {sheerka.new(BuiltinConcepts.AUTO_EVAL)} + def test_i_can_eval_when_variable_are_forced(self): + sheerka, context = self.init_test().unpack() + definition = "def concept foo from [z for x in y] def_var x def_var y def_var z" + def_ret_val = DefConceptParser().parse(context, ParserInput(definition)) + evaluated = DefConceptEvaluator().eval(context, def_ret_val) + + assert evaluated.status + assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT) + created_concept = evaluated.body.body + + assert created_concept.get_metadata().parameters == ["z", "x", "y"] + def test_i_cannot_eval_bnf_concept_with_unknown_variable(self): # testing MandatoryVariable context = self.get_context() diff --git a/tests/evaluators/test_ResolveMultiplePluralAmbiguityEvaluator.py b/tests/evaluators/test_ResolveMultiplePluralAmbiguityEvaluator.py new file mode 100644 index 0000000..d2aa729 --- /dev/null +++ b/tests/evaluators/test_ResolveMultiplePluralAmbiguityEvaluator.py @@ -0,0 +1,60 @@ +import pytest + +from core.concept import Concept +from evaluators.ResolveMultiplePluralAmbiguityEvaluator import ResolveMultiplePluralAmbiguityEvaluator +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +from tests.evaluators.EvaluatorTestsUtils import cnode_ret_val, exact, new_plural, pr_ret_val, python_ret_val, sequence, \ + sya + + +class TestResolveMultiplePluralAmbiguityEvaluator(TestUsingMemoryBasedSheerka): + + @pytest.mark.parametrize("return_values, expected", [ + ([python_ret_val("numbers"), cnode_ret_val(new_plural("numbers"), parser=sequence)], True), + ([python_ret_val("numbers"), cnode_ret_val(new_plural("numbers"), parser=sequence), pr_ret_val("other")], True), + ([python_ret_val("numbers"), cnode_ret_val(new_plural("numbers"), parser=sya)], False), + ([python_ret_val("numbers"), cnode_ret_val(Concept("numbers"), parser=sequence)], False), + ([python_ret_val("numbers"), pr_ret_val(new_plural("numbers"), parser=exact)], False), + ]) + def test_i_can_match(self, return_values, expected): + context = self.get_context() + assert ResolveMultiplePluralAmbiguityEvaluator().matches(context, return_values) == expected + + def test_i_can_eval_when_nothing_in_memory(self): + sheerka, context = self.init_test().unpack() + + return_values = [ + python_ret_val("numbers"), + cnode_ret_val(new_plural("numbers"), source="source", parser=sequence) + ] + + evaluator = ResolveMultiplePluralAmbiguityEvaluator() + assert evaluator.matches(context, return_values) + + rets = evaluator.eval(context, return_values) + assert len(rets) == 1 + ret = rets[0] + assert ret.who == evaluator.name + assert ret.status == return_values[1].status + assert ret.value == return_values[1].value + assert ret.parents == return_values + + def test_i_can_eval_when_plural_in_memory(self): + sheerka, context = self.init_test().unpack() + sheerka.add_to_memory(context, "numbers", "something") + + return_values = [ + python_ret_val("numbers"), + cnode_ret_val(new_plural("numbers"), source="source", parser=sequence) + ] + + evaluator = ResolveMultiplePluralAmbiguityEvaluator() + assert evaluator.matches(context, return_values) + + rets = evaluator.eval(context, return_values) + assert len(rets) == 1 + ret = rets[0] + assert ret.who == evaluator.name + assert ret.status == return_values[0].status + assert ret.value == return_values[0].value + assert ret.parents == return_values diff --git a/tests/non_reg/test_sheerka_non_reg2.py b/tests/non_reg/test_sheerka_non_reg2.py index 458522e..37aa7a1 100644 --- a/tests/non_reg/test_sheerka_non_reg2.py +++ b/tests/non_reg/test_sheerka_non_reg2.py @@ -45,13 +45,35 @@ class TestSheerkaNonRegMemory2(TestUsingMemoryBasedSheerka): assert res[0].status assert sheerka.isa(sheerka.new("one"), sheerka.new("number")) - # def test_i_can_define_plural(self): - # init = [ - # "def concept man", - # "def concept men as set_plural(man) ret man auto_eval True", - # ] - # sheerka = self.init_scenario(init) - # - # res = sheerka.evaluate_user_input("men") - # assert res[0].status + def test_i_can_get_sequence_when_evaluation_plural(self): + init = [ + "def concept one", + "def concept two", + "def concept number", + "global_truth(set_isa(one, number))", + "global_truth(set_isa(two, number))", + ] + sheerka = self.init_scenario(init) + res = sheerka.evaluate_user_input("eval numbers") + + assert res[0].status + assert set(res[0].body) == {sheerka.new("one"), sheerka.new("two")} + + def test_i_can_use_list_comprehension(self): + init = [ + "def concept rex", + "def concept rantanplan", + "def concept dog", + "def concept x is a y as set_isa(x, y)", + ] + sheerka = self.init_scenario(init) + + res = sheerka.evaluate_user_input("global_truth([ x is a dog for x in [rex, rantanplan]])") + + assert len(res) == 1 + assert res[0].status + + rex = sheerka.new("rex") + dog = sheerka.new("dog") + assert sheerka.isa(rex, dog) diff --git a/tests/non_reg/test_sheerka_non_reg_out.py b/tests/non_reg/test_sheerka_non_reg_out.py index 2ad0393..52954e0 100644 --- a/tests/non_reg/test_sheerka_non_reg_out.py +++ b/tests/non_reg/test_sheerka_non_reg_out.py @@ -180,3 +180,24 @@ two: (1002)two captured = capsys.readouterr() assert " : test()" in captured.out assert " : history()" in captured.out + + def test_i_can_list_debug_settings(self, capsys): + init = [ + "set_debug_var('Sya.parsers.*', 45)", + "set_debug_concept('c:|1015', '13+')", + "set_debug_rule('Out')", + ] + sheerka = self.init_scenario(init) + capsys.readouterr() + + sheerka.enable_process_return_values = True + res = sheerka.evaluate_user_input(f"list_debug_settings()") + + assert len(res) == 1 + assert res[0].status + + captured = capsys.readouterr() + assert captured.out == """DebugItem(type=vars, setting=Sya.parsers.*, context_id=45, debug_id=None, context_children=False, debug_children=False (enabled=True)) +DebugItem(type=concepts, setting=c:|1015.*.*, context_id=13, debug_id=None, context_children=True, debug_children=False (enabled=True)) +DebugItem(type=rules, setting=Out.*.*, context_id=None, debug_id=None, context_children=False, debug_children=False (enabled=True)) +""" diff --git a/tests/parsers/parsers_utils.py b/tests/parsers/parsers_utils.py index a9f7cdb..09f23b9 100644 --- a/tests/parsers/parsers_utils.py +++ b/tests/parsers/parsers_utils.py @@ -1,24 +1,23 @@ import ast from dataclasses import dataclass -from typing import Union, List +from typing import List, Union from core.builtin_concepts import ReturnValueConcept from core.builtin_helpers import CreateObjectIdentifiers -from core.concept import Concept, ConceptParts, DoNotResolve, AllConceptParts +from core.concept import AllConceptParts, Concept, ConceptParts, DoNotResolve from core.rule import Rule -from core.tokenizer import Tokenizer, TokenKind, Token -from core.utils import get_text_from_tokens, tokens_index, str_concept -from parsers.BaseExpressionParser import NameExprNode, AndNode, OrNode, NotNode, VariableNode, ComparisonNode, \ - ComparisonType, \ - FunctionParameter -from parsers.BaseNodeParser import UnrecognizedTokensNode, SourceCodeNode, RuleNode, ConceptNode, \ - SourceCodeWithConceptNode +from core.tokenizer import Token, TokenKind, Tokenizer +from core.utils import get_text_from_tokens, str_concept, tokens_index +from parsers.BaseExpressionParser import AndNode, ComparisonNode, ComparisonType, Comprehension, FunctionParameter, \ + ListComprehensionNode, ListNode, NameExprNode, \ + NotNode, OrNode, VariableNode, comma +from parsers.BaseNodeParser import ConceptNode, RuleNode, SourceCodeNode, SourceCodeWithConceptNode, \ + UnrecognizedTokensNode from parsers.FunctionParser import FunctionNode from parsers.PythonParser import PythonNode -from parsers.SyaNodeParser import SyaConceptParserHelper from sheerkapython.python_wrapper import sheerka_globals from sheerkarete.common import V -from sheerkarete.conditions import Condition, AndConditions, NegatedCondition, NegatedConjunctiveConditions +from sheerkarete.conditions import AndConditions, Condition, NegatedCondition, NegatedConjunctiveConditions @dataclass @@ -29,104 +28,254 @@ class Obj: parent: object = None -class AND: +class ExprTestObj: + @staticmethod + def get_pos(nodes): + start, end = None, None + for n in nodes: + if start is None or start > n.start: + start = n.start + if end is None or end < n.end: + end = n.end + return start, end + + @staticmethod + def get_pos_from_source(source, full_text_as_tokens): + if isinstance(source, tuple): + source, to_skip = source[0], source[1] + else: + to_skip = 0 + + source_as_node = list(Tokenizer(source, yield_eof=False)) + start = tokens_index(full_text_as_tokens, source_as_node, skip=to_skip) + end = start + len(source_as_node) - 1 + return start, end + + @staticmethod + def as_tokens(source): + if isinstance(source, tuple): + source, to_skip = source + else: + source, to_skip = source, 0 + + return list(Tokenizer(source, yield_eof=False)), to_skip + + def get_expr_node(self, full_text_as_tokens=None): + raise NotImplementedError + + @staticmethod + def safe_get_expr_node(obj, full_text_as_tokens): + if obj is None: + return None + + obj = EXPR(obj) if isinstance(obj, (str, tuple)) else obj + return obj.get_expr_node(full_text_as_tokens) + + +class AND(ExprTestObj): """ Test class for AndNode""" def __init__(self, *parts, source=None): self.parts = parts self.source = source + def get_expr_node(self, full_text_as_tokens=None): + parts = [part.get_expr_node(full_text_as_tokens) for part in self.parts] + start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else self.get_pos(parts) + return AndNode(start, end, full_text_as_tokens[start: end + 1], *parts) -class OR: + +class OR(ExprTestObj): """ Test class for OrNode""" def __init__(self, *parts, source=None): self.parts = parts self.source = source + def get_expr_node(self, full_text_as_tokens=None): + parts = [part.get_expr_node(full_text_as_tokens) for part in self.parts] + start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else self.get_pos(parts) + return OrNode(start, end, full_text_as_tokens[start: end + 1], *parts) + @dataclass -class NOT: +class NOT(ExprTestObj): """ Test class for NotNode""" - expr: object + expr: ExprTestObj source: str = None + def get_expr_node(self, full_text_as_tokens=None): + part = self.expr.get_expr_node(full_text_as_tokens) + start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else ( + part.start - 2, part.end) + return NotNode(start, end, full_text_as_tokens[start: end + 1], part) + @dataclass -class EXPR: - """Test class for NameNode. E stands for Expression""" +class EXPR(ExprTestObj): + """Test class for NameNode""" source: str + def get_expr_node(self, full_text_as_tokens=None): + value_as_tokens, to_skip = self.as_tokens(self.source) + start = tokens_index(full_text_as_tokens, value_as_tokens, to_skip) + end = start + len(value_as_tokens) - 1 + return NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + @dataclass -class VAR: +class VAR(ExprTestObj): """Test class for VarNode""" + full_name: str source: str = None + def get_expr_node(self, full_text_as_tokens=None): + value_as_tokens = list(Tokenizer(self.source or self.full_name, yield_eof=False)) + start = tokens_index(full_text_as_tokens, value_as_tokens, 0) + end = start + len(value_as_tokens) - 1 + parts = self.full_name.split(".") + if len(parts) == 1: + return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0]) + else: + return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0], *parts[1:]) + @dataclass -class EQ: - left: object - right: object +class CompExprTestObj(ExprTestObj): + """ + Test object for comparison ==, <=, ... + """ + left: ExprTestObj + right: ExprTestObj source: str = None - -@dataclass -class NEQ: - left: object - right: object - source: str = None + def get_expr_node(self, full_text_as_tokens=None): + node_type = comparison_type_mapping[type(self).__name__] + left_node = self.left.get_expr_node(full_text_as_tokens) + right_node = self.right.get_expr_node(full_text_as_tokens) + start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else \ + self.get_pos([left_node, right_node]) + return ComparisonNode(start, end, full_text_as_tokens[start: end + 1], node_type, left_node, right_node) @dataclass -class GT: - left: object - right: object - source: str = None +class EQ(CompExprTestObj): + pass @dataclass -class GTE: - left: object - right: object - source: str = None +class NEQ(CompExprTestObj): + pass @dataclass -class LT: - left: object - right: object - source: str = None +class GT(CompExprTestObj): + pass @dataclass -class LTE: - left: object - right: object - source: str = None +class GTE(CompExprTestObj): + pass @dataclass -class IN: - left: object - right: object - source: str = None +class LT(CompExprTestObj): + pass @dataclass -class NIN: # for NOT INT - left: object - right: object - source: str = None +class LTE(CompExprTestObj): + pass @dataclass -class PAREN: # for parenthesis node +class IN(CompExprTestObj): + pass + + +@dataclass +class NIN(CompExprTestObj): # for NOT INT + pass + + +@dataclass +class PAREN(ExprTestObj): # for parenthesis node node: object source: str = None +class L_EXPR(ExprTestObj): + def __init__(self, first, last, *items, sep=None, source=None): + self.first = first + self.last = last + self.items = items + self.sep = sep or comma + self.source = source + + def get_expr_node(self, full_text_as_tokens=None): + first = self.safe_get_expr_node(self.first, full_text_as_tokens) + last = self.safe_get_expr_node(self.last, full_text_as_tokens) + + items = [self.safe_get_expr_node(item, full_text_as_tokens) for item in self.items] + + if self.source is None: + source = self.first if self.first else "" + source += f"{self.sep.value} ".join(item.get_source() for item in items) + if self.last: + source += self.last + else: + source = self.source + + start, end = self.get_pos_from_source(source, full_text_as_tokens) + return ListNode(start, end, full_text_as_tokens[start: end + 1], first, last, items, self.sep) + + +@dataclass +class LCC: + """ + List comprehension comprehension + """ + target: object + iterable: object + if_expr: object + + +@dataclass +class LC(ExprTestObj): # for List Comprehension node + element: object + generators: list + source: str = None + + def get_expr_node(self, full_text_as_tokens=None): + # first transform str into NameExprTestObj (ie EXPR) + if isinstance(self.element, str): + self.element = EXPR(self.element) + + comprehensions = [] + nodes = [] + for comp in self.generators: + target = EXPR(comp[0]) if isinstance(comp[0], (str, tuple)) else comp[0] + iterable = EXPR(comp[1]) if isinstance(comp[1], (str, tuple)) else comp[1] + if_expr = EXPR(comp[2]) if isinstance(comp[2], (str, tuple)) else comp[2] + comprehensions.append(LCC(target, iterable, if_expr)) + self.generators = comprehensions + + # then transform into ListComprehensionNode + element = self.element.get_expr_node(full_text_as_tokens) + nodes.append(element) + comprehensions = [] + for comp in self.generators: + target = comp.target.get_expr_node(full_text_as_tokens) + iterable = comp.iterable.get_expr_node(full_text_as_tokens) + if_expr = comp.if_expr.get_expr_node(full_text_as_tokens) if comp.if_expr else None + comprehensions.append(Comprehension(target, iterable, if_expr)) + nodes.extend([target, iterable, if_expr]) + + start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else self.get_pos(nodes) + return ListComprehensionNode(start, end, full_text_as_tokens[start: end + 1], element, comprehensions) + + class CC: """ Concept class for test purpose @@ -320,10 +469,13 @@ class CMV: Test class that only compare the key and the metadata variables """ - def __init__(self, concept, **kwargs): + def __init__(self, concept, source=None, **kwargs): self.concept_key = concept.key if isinstance(concept, Concept) else concept self.concept = concept if isinstance(concept, Concept) else None self.variables = kwargs + self.source = source # to use when the key is different from the sub str to search when filling start and stop + self.start = None # for debug purpose, indicate where the concept starts + self.end = None # for debug purpose, indicate where the concept ends def __eq__(self, other): if id(self) == id(other): @@ -352,6 +504,21 @@ class CMV: txt += f", {k}='{v}'" return txt + ")" + def fix_pos(self, node): + start = node.start if hasattr(node, "start") else \ + node[0] if isinstance(node, tuple) else None + end = node.end if hasattr(node, "end") else \ + node[1] if isinstance(node, tuple) else None + + if start is not None: + if self.start is None or start < self.start: + self.start = start + + if end is not None: + if self.end is None or end > self.end: + self.end = end + return self + def transform_real_obj(self, other, get_test_obj_delegate): if isinstance(other, CMV): return other @@ -730,7 +897,7 @@ class CNC(CN): self_compile_to_use = self.compiled or compiled - compiled = get_test_obj_delegate(self_compile_to_use, compiled, get_test_obj_delegate) + compiled = get_test_obj_delegate(compiled, self_compile_to_use, get_test_obj_delegate) return CNC(other.concept, other.source if self.source is not None else None, other.start if self.start is not None else None, @@ -865,7 +1032,7 @@ class RN(HelperWithPos): raise Exception(f"Expecting RuleNode but received {other=}") -class FN: +class FN(ExprTestObj): """ Test class only It matches with FunctionNode but with less constraints @@ -931,6 +1098,32 @@ class FN: raise Exception(f"Expecting FunctionNode but received {other=}") + def get_expr_node(self, full_text_as_tokens=None): + start, end = self.get_pos_from_source(self.first, full_text_as_tokens) + first = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + start, end = self.get_pos_from_source(self.last, full_text_as_tokens) + last = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + parameters = [] + for param_value, sep in self.parameters: + if isinstance(param_value, str): + start, end = self.get_pos_from_source(param_value, full_text_as_tokens) + param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + else: + param_as_expr_node = param_value.get_expr_node(full_text_as_tokens) + + if sep: + sep_tokens = Tokenizer(sep, yield_eof=False) + start = param_as_expr_node.end + 1 + end = start + len(list(sep_tokens)) - 1 + sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + else: + sep_as_expr_node = None + + parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node)) + + start, end = first.start, last.end + return FunctionNode(start, end, full_text_as_tokens[start: end + 1], first, last, parameters) + @dataclass() class NEGCOND: @@ -966,94 +1159,7 @@ def get_expr_node_from_test_node(full_text, test_node): Returns EXPR, OR, NOT, AND object to ease the comparison with the real ExprNode """ full_text_as_tokens = list(Tokenizer(full_text, yield_eof=False)) - - def get_pos(nodes): - start, end = None, None - for n in nodes: - if start is None or start > n.start: - start = n.start - if end is None or end < n.end: - end = n.end - return start, end - - def get_pos_from_source(source): - if isinstance(source, tuple): - source, to_skip = source[0], source[1] - else: - to_skip = 0 - - source_as_node = list(Tokenizer(source, yield_eof=False)) - start = tokens_index(full_text_as_tokens, source_as_node, skip=to_skip) - end = start + len(source_as_node) - 1 - return start, end - - def get_expr_node(node): - - if isinstance(node, EXPR): - value_as_tokens = list(Tokenizer(node.source, yield_eof=False)) - start = tokens_index(full_text_as_tokens, value_as_tokens, 0) - end = start + len(value_as_tokens) - 1 - return NameExprNode(start, end, full_text_as_tokens[start: end + 1]) - - if isinstance(node, AND): - parts = [get_expr_node(part) for part in node.parts] - start, end = get_pos_from_source(node.source) if node.source else get_pos(parts) - return AndNode(start, end, full_text_as_tokens[start: end + 1], *parts) - - if isinstance(node, OR): - parts = [get_expr_node(part) for part in node.parts] - start, end = get_pos_from_source(node.source) if node.source else get_pos(parts) - return OrNode(start, end, full_text_as_tokens[start: end + 1], *parts) - - if isinstance(node, NOT): - part = get_expr_node(node.expr) - start, end = get_pos_from_source(node.source) if node.source else (part.start - 2, part.end) - return NotNode(start, end, full_text_as_tokens[start: end + 1], part) - - if isinstance(node, VAR): - value_as_tokens = list(Tokenizer(node.source or node.full_name, yield_eof=False)) - start = tokens_index(full_text_as_tokens, value_as_tokens, 0) - end = start + len(value_as_tokens) - 1 - parts = node.full_name.split(".") - if len(parts) == 1: - return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0]) - else: - return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0], *parts[1:]) - - if isinstance(node, (EQ, NEQ, GT, GTE, LT, LTE, IN, NIN)): - node_type = comparison_type_mapping[type(node).__name__] - left_node, right_node = get_expr_node(node.left), get_expr_node(node.right) - start, end = get_pos_from_source(node.source) if node.source else get_pos([left_node, right_node]) - return ComparisonNode(start, end, full_text_as_tokens[start: end + 1], - node_type, left_node, right_node) - - if isinstance(node, FN): - start, end = get_pos_from_source(node.first) - first = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) - start, end = get_pos_from_source(node.last) - last = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) - parameters = [] - for param_value, sep in node.parameters: - if isinstance(param_value, str): - start, end = get_pos_from_source(param_value) - param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) - else: - param_as_expr_node = get_expr_node(param_value) - - if sep: - sep_tokens = Tokenizer(sep, yield_eof=False) - start = param_as_expr_node.end + 1 - end = start + len(list(sep_tokens)) - 1 - sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) - else: - sep_as_expr_node = None - - parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node)) - - start, end = first.start, last.end - return FunctionNode(start, end, full_text_as_tokens[start: end + 1], first, last, parameters) - - return get_expr_node(test_node) + return test_node.get_expr_node(full_text_as_tokens) def _index(tokens, expr, index): @@ -1096,15 +1202,15 @@ def compute_debug_array(res): def get_node( - concepts_map, - expression_as_tokens, - sub_expr, - concept_key=None, - skip=0, - is_bnf=False, - sya=False, - init_empty_body=False, - exclude_body=False): + concepts_map, + expression_as_tokens, + sub_expr, + concept_key=None, + skip=0, + is_bnf=False, + sya=False, + init_empty_body=False, + exclude_body=False): """ Tries to find sub in expression When found, transform it to its correct type @@ -1157,18 +1263,20 @@ def get_node( sub_expr.end = start + length - 1 return sub_expr - if isinstance(sub_expr, (CNC, CC, CN)): - concept_node = get_node( - concepts_map, - expression_as_tokens, - sub_expr.source or sub_expr.concept_key, - sub_expr.concept_key, sya=sya) - if not hasattr(concept_node, "concept"): - raise Exception(f"'{sub_expr.concept_key}' is not a concept. Check your map.") - concept_found = concept_node.concept - sub_expr.concept_key = concept_found.key - sub_expr.concept = concept_found - sub_expr.fix_pos((concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start)) + if isinstance(sub_expr, (CNC, CC, CN, CMV)): + if sub_expr.concept is None or sub_expr.start is None or sub_expr.end is None: + concept_node = get_node( + concepts_map, + expression_as_tokens, + sub_expr.source or sub_expr.concept_key, + sub_expr.concept_key, sya=sya) + if not hasattr(concept_node, "concept"): + raise Exception(f"'{sub_expr.concept_key}' is not a concept. Check your map.") + concept_found = concept_node.concept + sub_expr.concept_key = concept_found.key + sub_expr.concept = concept_found + sub_expr.fix_pos( + (concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start)) if hasattr(sub_expr, "compiled"): for k, v in sub_expr.compiled.items(): node = get_node(concepts_map, expression_as_tokens, v, sya=sya, @@ -1210,9 +1318,9 @@ def get_node( concept_found = concepts_map.get(concept_key, None) if concept_found: concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests - if sya and len(concept_found.get_metadata().variables) > 0 and not is_bnf: - return SyaConceptParserHelper(concept_found, start, start + length - 1) - elif init_empty_body: + # if sya and len(concept_found.get_metadata().variables) > 0 and not is_bnf: + # return SyaConceptParserHelper(concept_found, start, start + length - 1) + if init_empty_body: node = CNC(concept_found, sub_expr, start, start + length - 1, exclude_body=exclude_body) init_body(node, concept_found, sub_expr) return node @@ -1354,8 +1462,8 @@ def get_test_obj(real_obj, test_obj, get_test_obj_delegate=None): """ From a production object (Concept, ConceptNode, ....) Create a test object (CNC, CC ...) that can be used to validate the unit tests - :param test_obj: :param real_obj: + :param test_obj: test object used as a template :param get_test_obj_delegate: :return: """ @@ -1367,13 +1475,25 @@ def get_test_obj(real_obj, test_obj, get_test_obj_delegate=None): if isinstance(test_obj, dict): if len(test_obj) != len(real_obj): raise Exception(f"Not the same size ! {real_obj=}, {test_obj=}") - return {k: get_test_obj(real_obj[k], v) for k, v in test_obj.items()} - if not hasattr(test_obj, "transform_real_obj"): - return real_obj + if hasattr(test_obj, "transform_real_obj"): + return test_obj.transform_real_obj(real_obj, get_test_obj) - return test_obj.transform_real_obj(real_obj, get_test_obj) + return real_obj + + +def prepare_nodes_comparison(concepts_map, expression, real_obj, test_obj): + if isinstance(real_obj, list): + assert len(real_obj) == len( + test_obj), f"The two lists do not have the same size {len(real_obj)} != {len(test_obj)}" + resolved_test_obj = compute_expected_array(concepts_map, expression, test_obj) + real_obj_as_test = [get_test_obj(r, t) for r, t in zip(real_obj, resolved_test_obj)] + return real_obj_as_test, resolved_test_obj + else: + resolved_test_obj = compute_expected_array(concepts_map, expression, [test_obj])[0] + real_obj_as_test = get_test_obj(real_obj, resolved_test_obj) + return real_obj_as_test, resolved_test_obj def compare_with_test_object(actual, expected): diff --git a/tests/parsers/test_BaseCustomGrammarParser.py b/tests/parsers/test_BaseCustomGrammarParser.py index 46bd042..8241b01 100644 --- a/tests/parsers/test_BaseCustomGrammarParser.py +++ b/tests/parsers/test_BaseCustomGrammarParser.py @@ -124,7 +124,7 @@ func(a) assert parser.get_parts(["print", "when"]) is not None assert len(parser.error_sink) == 1 assert isinstance(parser.error_sink[0], UnexpectedEofParsingError) - assert parser.error_sink[0].message == "While parsing keyword 'print'." + assert parser.error_sink[0].message == "while parsing keyword 'print'" def test_i_can_double_quoted_strings_are_expanded(self): """ diff --git a/tests/parsers/test_BnfNodeParser.py b/tests/parsers/test_BnfNodeParser.py index b6f13d2..0996a33 100644 --- a/tests/parsers/test_BnfNodeParser.py +++ b/tests/parsers/test_BnfNodeParser.py @@ -4,19 +4,20 @@ import pytest import tests.parsers.parsers_utils from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept, ConceptParts, DoNotResolve, DEFINITION_TYPE_BNF +from core.concept import Concept, ConceptParts, DEFINITION_TYPE_BNF, DoNotResolve from core.global_symbols import NotInit from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager from core.sheerka.services.SheerkaExecute import ParserInput +from core.sheerka.services.SheerkaIsAManager import SheerkaIsAManager from parsers.BaseNodeParser import NoMatchingTokenError from parsers.BnfDefinitionParser import BnfDefinitionParser -from parsers.BnfNodeParser import StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ - Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser, RegExMatch, \ - BnfNodeFirstTokenVisitor, Match, RegExDef, VariableExpression +from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor, BnfNodeParser, ConceptExpression, Match, NonTerminalNode, \ + OneOrMore, Optional, OrderedChoice, RegExDef, RegExMatch, Sequence, StrMatch, TerminalNode, UnOrderedChoice, \ + VariableExpression, ZeroOrMore from tests.BaseTest import BaseTest from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.evaluators.EvaluatorTestsUtils import python_ret_val -from tests.parsers.parsers_utils import CNC, CN, UTN, CC, SCN, get_test_obj, compare_with_test_object +from tests.parsers.parsers_utils import CC, CMV, CN, CNC, SCN, UTN, compare_with_test_object, get_test_obj cmap = { "one": Concept("one"), @@ -1027,7 +1028,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): ("one tiny but beautiful shoe", [CNC("foo", "one tiny but beautiful shoe", - x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful "))]), + x=CMV("but", source="tiny but beautiful", x="tiny ", y="beautiful "))]), ]) def test_i_can_match_variable_in_between(self, expr, expected): my_map = { @@ -1896,7 +1897,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_simplify_unordered_choices_that_refer_to_the_same_isa(self): my_map = { - "light_red": Concept("light red"), + "light_red": Concept("light red", key="light_red"), "dark_red": Concept("dark red"), "red colors": Concept("red colors"), "color": Concept("color"), @@ -1916,6 +1917,10 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): sheerka.set_isa(global_truth_context, my_map["red colors"], my_map["color"]) sheerka.set_isa(global_truth_context, my_map["red colors"], my_map["adjective"]) + # hack to ease the tests + sheerka.get_by_id(my_map["light_red"].id).get_metadata().key = "light_red" + sheerka.om.clear(SheerkaIsAManager.CONCEPTS_IN_GROUPS_ENTRY) + text = "light red table" expected = CNC("qualified table", @@ -1940,7 +1945,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): "isafoo": Concept("x is an foo", body="False", pre="is_question()").def_var("x"), "q": Concept("q ?", body="question(a)").def_var("q") } - + # I need the concept isafoo to fool SyaNodeParser when parsing the sub text 'is an hex ?'" # The parser will try to recognize 'is an foo', will fail and will revert the result to UTN() # It's this UTN that need to be properly handled @@ -1953,7 +1958,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): res = parser.parse(context, ParserInput(text)) assert not res.status - # @pytest.mark.parametrize("parser_input, expected", [ # ("one", [ # (True, [CNC("bnf_one", source="one", one="one", body="one")]), diff --git a/tests/parsers/test_DefConceptParser.py b/tests/parsers/test_DefConceptParser.py index dfc04aa..66141d1 100644 --- a/tests/parsers/test_DefConceptParser.py +++ b/tests/parsers/test_DefConceptParser.py @@ -387,8 +387,8 @@ def concept add one to a as: ("def concept name from def", SyntaxErrorNode([], "Empty 'from' declaration.")), ("def concept name from def ", SyntaxErrorNode([], "Empty 'from' declaration.")), ("def concept name from as True", SyntaxErrorNode([], "Empty 'from' declaration.")), - ("def concept name from", UnexpectedEofParsingError("While parsing keyword 'from'.")), - ("def concept name from ", UnexpectedEofParsingError("While parsing keyword 'from'.")), + ("def concept name from", UnexpectedEofParsingError("while parsing keyword 'from'")), + ("def concept name from ", UnexpectedEofParsingError("while parsing keyword 'from'")), ]) def test_i_can_detect_empty_def_declaration(self, text, error): sheerka, context, parser, *concepts = self.init_parser() diff --git a/tests/parsers/test_DefRuleParser.py b/tests/parsers/test_DefRuleParser.py index 1b2e240..0e00f5d 100644 --- a/tests/parsers/test_DefRuleParser.py +++ b/tests/parsers/test_DefRuleParser.py @@ -190,8 +190,8 @@ class TestDefRuleParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(res.body, expected_error) @pytest.mark.parametrize("text, error_message", [ - ("def rule rule_name as", "While parsing 'when'."), - ("def rule rule_name as ", "While parsing 'when'."), + ("def rule rule_name as", "while parsing 'when'"), + ("def rule rule_name as ", "while parsing 'when'"), ]) def test_i_cannot_parse_when_unexpected_eof(self, text, error_message): sheerka, context, parser = self.init_parser() diff --git a/tests/parsers/test_ListComprehensionParser.py b/tests/parsers/test_ListComprehensionParser.py new file mode 100644 index 0000000..c8288b0 --- /dev/null +++ b/tests/parsers/test_ListComprehensionParser.py @@ -0,0 +1,180 @@ +import pytest + +from core.builtin_concepts_ids import BuiltinConcepts +from core.sheerka.services.SheerkaExecute import ParserInput +from core.tokenizer import Token, TokenKind +from parsers.BaseExpressionParser import ParenthesisMismatchError +from parsers.BaseParser import UnexpectedEofParsingError, UnexpectedTokenParsingError +from parsers.ListComprehensionParser import ElementNotFound, FailedToParse, ForNotFound, LeadingParenthesisNotFound, \ + ListComprehensionParser +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +from tests.parsers.parsers_utils import LC, L_EXPR, get_expr_node_from_test_node + + +class TestListComprehensionParser(TestUsingMemoryBasedSheerka): + + def init_parser(self): + sheerka, context = self.init_concepts() + parser = ListComprehensionParser(auto_compile=False) + return sheerka, context, parser + + @pytest.mark.parametrize("text, reason", [ + ("foo", LeadingParenthesisNotFound()), + ("[]", ForNotFound()), + ("[ x ]", ForNotFound()), + ("[ x for]", FailedToParse("target", 5)), + ("[ x for x]", UnexpectedEofParsingError("while parsing comprehension")), + ("[ x for x in ]", UnexpectedEofParsingError("while parsing comprehension")), + ("[ x for x in lst for]", FailedToParse("target", 13)), + ("[", UnexpectedEofParsingError("when start parsing")), + ("[]", ForNotFound()), + ("[ for x in z ]", ElementNotFound()), + ("[ x for in z ]", FailedToParse("target", 6)), + ("[ x for x in ]", UnexpectedEofParsingError("while parsing comprehension")), + ("[ x for x in z if ]", UnexpectedEofParsingError("while parsing comprehension")), + ("[ x for x in z", ParenthesisMismatchError(Token(TokenKind.RBRACKET, "]", -1, -1, -1))), + ("[ x for x in z if t", ParenthesisMismatchError(Token(TokenKind.RBRACKET, "]", -1, -1, -1))), + ("zzz [ x for x in z if t ]", LeadingParenthesisNotFound()), + ("[ x for x in z )", ParenthesisMismatchError(Token(TokenKind.RBRACKET, "]", -1, -1, -1))), + ("[ x for x in z if t )", ParenthesisMismatchError(Token(TokenKind.RBRACKET, "]", -1, -1, -1))), + ]) + def test_i_cannot_parse_when_not_for_me(self, text, reason): + sheerka, context, parser = self.init_parser() + + res = parser.parse(context, ParserInput(text)) + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + assert res.body.reason == [reason] + + def test_i_cannot_parse_when_trailing_elements(self): + sheerka, context, parser = self.init_parser() + + text = "[ x for x in z if t ] zzz" + res = parser.parse(context, ParserInput(text)) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) + assert len(res.body.body) == 1 + error = res.body.body[0] + assert isinstance(error, UnexpectedTokenParsingError) + + def test_i_can_parse_a_simple_expression(self): + sheerka, context, parser = self.init_parser() + + expression = "[x for x in ['a', 'b'] if x == 'a']" + res = parser.parse(context, ParserInput(expression)) + wrapper = res.body + lc_node = res.body.body + + assert res.status + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + + expected = LC(L_EXPR(None, None, "x", source="x "), [(("x", 1), "['a', 'b']", "x == 'a'")], source=expression) + to_compare_to = get_expr_node_from_test_node(expression, expected) + assert lc_node == to_compare_to + + def test_i_can_parse_when_no_if(self): + sheerka, context, parser = self.init_parser() + + expression = "[x for x in ['a', 'b']]" + res = parser.parse(context, ParserInput(expression)) + wrapper = res.body + lc_node = res.body.body + + assert res.status + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + + expected = LC(L_EXPR(None, None, "x", source="x "), [(("x", 1), "['a', 'b']", None)], source=expression) + to_compare_to = get_expr_node_from_test_node(expression, expected) + assert lc_node == to_compare_to + + def test_i_can_parse_when_element_is_a_tuple(self): + sheerka, context, parser = self.init_parser() + + expression = "[(x + 1, x + 2) for x in [1, 2]]" + res = parser.parse(context, ParserInput(expression)) + wrapper = res.body + lc_node = res.body.body + + assert res.status + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + + elt = L_EXPR("(", ")", "x + 1", "x + 2") + expected = LC(elt, [(("x", 2), "[1, 2]", None)], source=expression) + to_compare_to = get_expr_node_from_test_node(expression, expected) + + assert lc_node == to_compare_to + + def test_i_can_parse_when_element_is_a_tuple_with_missing_parenthesis(self): + sheerka, context, parser = self.init_parser() + + expression = "[x + 1, x + 2 for x in [1, 2]]" + res = parser.parse(context, ParserInput(expression)) + wrapper = res.body + lc_node = res.body.body + + assert res.status + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + + elt = L_EXPR(None, None, "x + 1", "x + 2", source="x + 1, x + 2 ") + expected = LC(elt, [(("x", 2), "[1, 2]", None)], source=expression) + to_compare_to = get_expr_node_from_test_node(expression, expected) + + assert lc_node == to_compare_to + + def test_i_can_parse_when_element_is_a_context_that_contains_for(self): + sheerka, context, parser = self.init_parser() + + expression = "[handle x for me and for you for x in [1, 2]]" + res = parser.parse(context, ParserInput(expression)) + wrapper = res.body + lc_node = res.body.body + + assert res.status + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + + elt = L_EXPR(None, None, "handle x for me and for you", source="handle x for me and for you ") + expected = LC(elt, [(("x", 1), "[1, 2]", None)], source=expression) + to_compare_to = get_expr_node_from_test_node(expression, expected) + + assert lc_node == to_compare_to + + def test_i_can_parse_when_multiple_generators(self): + sheerka, context, parser = self.init_parser() + + expression = "[(x, y) for x in ['a', 'b'] if x == 'a' for y in ['c', 'd'] if y == 'c']" + res = parser.parse(context, ParserInput(expression)) + wrapper = res.body + lc_node = res.body.body + + assert res.status + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + + elt = L_EXPR("(", ")", "x", "y") + expected = LC(elt, + [(("x", 1), "['a', 'b']", "x == 'a'"), + (("y", 1), "['c', 'd']", "y == 'c'")], + source=expression) + to_compare_to = get_expr_node_from_test_node(expression, expected) + + assert lc_node == to_compare_to + + def test_i_can_parse_when_multiple_generators_when_no_if(self): + sheerka, context, parser = self.init_parser() + + expression = "[x, y for x in ['a', 'b'] for y in ['c', 'd'] if y == 'c']" + res = parser.parse(context, ParserInput(expression)) + wrapper = res.body + lc_node = res.body.body + + assert res.status + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + + elt = L_EXPR(None, None, "x", "y", source="x, y ") + expected = LC(elt, + [(("x", 1), "['a', 'b']", None), + (("y", 1), "['c', 'd']", "y == 'c'")], + source=expression) + to_compare_to = get_expr_node_from_test_node(expression, expected) + + assert lc_node == to_compare_to diff --git a/tests/parsers/test_ListParser.py b/tests/parsers/test_ListParser.py new file mode 100644 index 0000000..100e7f8 --- /dev/null +++ b/tests/parsers/test_ListParser.py @@ -0,0 +1,69 @@ +import pytest + +from core.builtin_concepts_ids import BuiltinConcepts +from core.sheerka.services.SheerkaExecute import ParserInput +from core.tokenizer import Token, TokenKind +from parsers.BaseExpressionParser import ParenthesisMismatchError, end_parenthesis_mapping +from parsers.BaseParser import ErrorSink +from parsers.ListParser import ListParser +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +from tests.parsers.parsers_utils import EXPR, L_EXPR, get_expr_node_from_test_node + +semi_colon = Token(TokenKind.SEMICOLON, ";", -1, -1, -1) +or_token = Token(TokenKind.IDENTIFIER, "or", -1, -1, -1) + + +class TestListParser(TestUsingMemoryBasedSheerka): + def init_parser(self, sep=None): + sheerka, context = self.init_concepts() + parser = ListParser(sep) + return sheerka, context, parser + + @pytest.mark.parametrize("expression, sep, expected", [ + ("()", None, L_EXPR("(", ")")), + ("(x , foo y,z)", None, L_EXPR("(", ")", EXPR("x"), EXPR("foo y"), EXPR("z"), source="(x , foo y,z)")), + ("x , foo y,z", None, L_EXPR(None, None, EXPR("x"), EXPR("foo y"), EXPR("z"), source="x , foo y,z")), + ("x", None, L_EXPR(None, None, EXPR("x"))), + ("[x, foo y, z]", None, L_EXPR("[", "]", EXPR("x"), EXPR("foo y"), EXPR("z"))), + ("{x, foo y, z}", None, L_EXPR("{", "}", EXPR("x"), EXPR("foo y"), EXPR("z"))), + ("(x; y; z)", semi_colon, L_EXPR("(", ")", EXPR("x"), EXPR("y"), EXPR("z"), sep=semi_colon, source="(x; y; z)")), + ("x; y; z", semi_colon, L_EXPR(None, None, EXPR("x"), EXPR("y"), EXPR("z"), sep=semi_colon, source="x; y; z")), + ("x or y or z", or_token, L_EXPR(None, None, EXPR("x"), EXPR("y"), EXPR("z"), sep=or_token, source="x or y or z")), + ]) + def test_i_can_parse_expression(self, expression, sep, expected): + sheerka, context, parser = self.init_parser(sep) + + expected = get_expr_node_from_test_node(expression, expected) + res = parser.parse(context, ParserInput(expression)) + wrapper = res.body + expressions = res.body.body + + assert res.status + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert expressions == expected + + @pytest.mark.parametrize("expression, starting", [ + ("(", TokenKind.LPAR), + ("(x, y", TokenKind.LPAR), + ("{x, y", TokenKind.LBRACE), + ("[x, y", TokenKind.LBRACKET), + ]) + def test_i_cannot_parse_when_missing_trailing_parenthesis(self, expression, starting): + sheerka, context, parser = self.init_parser() + + res = parser.parse(context, ParserInput(expression)) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) + assert res.body.body == [ParenthesisMismatchError(end_parenthesis_mapping[starting])] + + def test_none_is_return_when_empty_parser_input(self): + sheerka, context, parser = self.init_parser() + + parser_input = ParserInput(" ").reset() + parser_input.next_token() + error_sink = ErrorSink() + + parsed = parser.parse_input(context, parser_input, error_sink) + + assert parsed is None diff --git a/tests/parsers/test_LogicalOperatorParser.py b/tests/parsers/test_LogicalOperatorParser.py index c80192c..38b6e4a 100644 --- a/tests/parsers/test_LogicalOperatorParser.py +++ b/tests/parsers/test_LogicalOperatorParser.py @@ -100,12 +100,12 @@ class TestLogicalOperatorParser(TestUsingMemoryBasedSheerka): assert expressions == expected @pytest.mark.parametrize("expression, expected_errors", [ - ("one or", [UnexpectedEofParsingError("When parsing 'or'")]), - ("one and", [UnexpectedEofParsingError("When parsing 'and'")]), - ("and one", [LeftPartNotFoundError()]), - ("or one", [LeftPartNotFoundError()]), - ("or", [LeftPartNotFoundError(), UnexpectedEofParsingError("When parsing 'or'")]), - ("and", [LeftPartNotFoundError(), UnexpectedEofParsingError("When parsing 'and'")]), + ("one or", [UnexpectedEofParsingError("while parsing 'or'")]), + ("one and", [UnexpectedEofParsingError("while parsing 'and'")]), + ("and one", [LeftPartNotFoundError("and", 0)]), + ("or one", [LeftPartNotFoundError("or", 0)]), + ("or", [LeftPartNotFoundError("or", 0), UnexpectedEofParsingError("while parsing 'or'")]), + ("and", [LeftPartNotFoundError("and", 0), UnexpectedEofParsingError("while parsing 'and'")]), ]) def test_i_can_detect_error(self, expression, expected_errors): sheerka, context, parser = self.init_parser() diff --git a/tests/parsers/test_SequenceNodeParser.py b/tests/parsers/test_SequenceNodeParser.py index 2c40179..c9d4507 100644 --- a/tests/parsers/test_SequenceNodeParser.py +++ b/tests/parsers/test_SequenceNodeParser.py @@ -5,8 +5,7 @@ from core.concept import Concept, DEFINITION_TYPE_DEF from core.sheerka.services.SheerkaExecute import ParserInput from parsers.SequenceNodeParser import SequenceNodeParser from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -from tests.parsers.parsers_utils import compute_expected_array, CN, SCN, get_test_obj, compare_with_test_object, \ - UTN +from tests.parsers.parsers_utils import CN, SCN, UTN, compare_with_test_object, compute_expected_array, get_test_obj class TestSequenceNodeParser(TestUsingMemoryBasedSheerka): @@ -463,3 +462,24 @@ class TestSequenceNodeParser(TestUsingMemoryBasedSheerka): assert concept_found.name == "boys" assert concept_found.key == "boys" assert concept_found.get_prop(BuiltinConcepts.PLURAL) == boy + + def test_i_can_set_body_for_plurals_that_are_a_set(self): + concepts_map = { + "boy": Concept("boy"), + "girl": Concept("girl"), + "human": Concept("human"), + } + + sheerka, context, parser = self.init_parser(concepts_map) + global_truth_concept = self.get_context(sheerka, global_truth=True) + sheerka.set_isa(global_truth_concept, concepts_map["boy"], concepts_map["human"]) + sheerka.set_isa(global_truth_concept, concepts_map["girl"], concepts_map["human"]) + + res = parser.parse(context, ParserInput("humans")) + + assert res.status + lexer_nodes = res.body.body + assert len(lexer_nodes) == 1 + concept_found = lexer_nodes[0].concept + + assert concept_found.get_metadata().body == "get_set_elements(c:|1003:)" diff --git a/tests/parsers/test_SyaNodeParser.py b/tests/parsers/test_SyaNodeParser.py index 10c5931..52874e4 100644 --- a/tests/parsers/test_SyaNodeParser.py +++ b/tests/parsers/test_SyaNodeParser.py @@ -1,47 +1,40 @@ import pytest -import tests.parsers.parsers_utils -from core.builtin_concepts import BuiltinConcepts +from core.builtin_concepts import ReturnValueConcept +from core.builtin_concepts_ids import BuiltinConcepts +from core.builtin_helpers import get_new_variables_definitions from core.concept import Concept from core.global_symbols import CONCEPT_COMPARISON_CONTEXT +from core.sheerka.Sheerka import RECOGNIZED_BY_KEY from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import Tokenizer -from core.utils import NextIdManager +from core.tokenizer import Tokenizer, comparable_tokens +from core.utils import get_text_from_tokens +from parsers.BaseNodeParser import ConceptNode, SourceCodeNode, UnrecognizedTokensNode from parsers.PythonParser import PythonNode -from parsers.SyaNodeParser import SyaNodeParser, SyaConceptParserHelper, SyaAssociativity, \ - NoneAssociativeSequenceError, TooManyParametersFoundError, InFixToPostFix, ParenthesisMismatchError +from parsers.SyaNodeParser import FunctionDetected, NoSyaConceptFound, NotEnoughParameters, SyaConceptParser, \ + SyaNodeParser, \ + SyaTokensParser, \ + TokensNotFound, TooManyParameters from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -from tests.parsers.parsers_utils import UTN, SCWC, CNC, SCN, CIO, CN, compute_debug_array, CMV, get_test_obj, \ - compare_with_test_object - - -def compute_expected_array(concepts_map, expression, expected): - return tests.parsers.parsers_utils.compute_expected_array(concepts_map, expression, expected, sya=True) - +from tests.parsers.parsers_utils import CC, CN, CNC, SCN, UTN, compute_expected_array, get_test_obj, \ + prepare_nodes_comparison cmap = { "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), - "four": Concept("four"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), "plus": Concept("a plus b").def_var("a").def_var("b"), - "minus": Concept("a minus b").def_var("a").def_var("b"), "mult": Concept("a mult b").def_var("a").def_var("b"), "prefixed": Concept("a prefixed").def_var("a"), "suffixed": Concept("suffixed a").def_var("a"), - "infix": Concept("a infix b").def_var("a").def_var("b"), - "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), - "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), - "square": Concept("square(a)").def_var("a"), - "foo bar": Concept("foo bar(a)").def_var("a"), - "long infixed": Concept("a long infixed b").def_var("a").def_var("b"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - "is a concept": Concept("c is a concept").def_var("c"), + "infixed": Concept("a infixed b").def_var("a").def_var("b"), + "begin": Concept("begin x long end").def_var("x"), } -class TestSyaNodeParser(TestUsingMemoryBasedSheerka): +class TestSyaNodeParser2(TestUsingMemoryBasedSheerka): shared_ontology = None @classmethod @@ -53,996 +46,1274 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): cmap["plus"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right") cmap["mult"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right") - cmap["minus"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right") - TestSyaNodeParser.sheerka.set_is_greater_than(context, - BuiltinConcepts.PRECEDENCE, - cmap["mult"], - cmap["plus"], - CONCEPT_COMPARISON_CONTEXT) - TestSyaNodeParser.sheerka.set_is_greater_than(context, - BuiltinConcepts.PRECEDENCE, - cmap["mult"], - cmap["minus"], - CONCEPT_COMPARISON_CONTEXT) + TestSyaNodeParser2.sheerka.set_is_greater_than(context, + BuiltinConcepts.PRECEDENCE, + cmap["mult"], + cmap["plus"], + CONCEPT_COMPARISON_CONTEXT) cls.shared_ontology = sheerka.get_ontology(context) sheerka.pop_ontology(context) - def init_parser(self, - my_concepts_map=None, - sya_def=None, - post_init_concepts=None, - **kwargs): - if my_concepts_map is None: + def initialize_test(self, concepts_map=None): + if concepts_map is None: sheerka, context = self.init_test().unpack() sheerka.add_ontology(context, self.shared_ontology) - concepts = cmap.values() - init_from_sheerka = kwargs.get("init_from_sheerka", True) else: - sheerka, context, *concepts = self.init_test().with_concepts(*my_concepts_map.values(), **kwargs).unpack() - for i, pair in enumerate(my_concepts_map): - my_concepts_map[pair] = concepts[i] - init_from_sheerka = kwargs.get("init_from_sheerka", False) + sheerka, context, *updated = super().init_test().with_concepts(*concepts_map.values(), + create_new=True).unpack() + for i, concept_name in enumerate(concepts_map): + concepts_map[concept_name] = updated[i] - if post_init_concepts: - post_init_concepts(sheerka, context) + return sheerka, context - if sya_def: - sya_def_to_use = {} - for k, v in sya_def.items(): - sya_def_to_use[k.id] = v - else: - sya_def_to_use = None + def init_parser(self, concepts_map=None): + + sheerka, context = self.initialize_test(concepts_map) + parser = SyaNodeParser() - if init_from_sheerka: - parser = SyaNodeParser(sheerka=sheerka) - else: - parser = SyaNodeParser() - if my_concepts_map: - parser.init_from_concepts(context, concepts, sya=sya_def_to_use) return sheerka, context, parser + def get_real_node(self, concepts_map, expression, source): + + expr_tokens = list(Tokenizer(expression)) + + def _to_real_obj(_test_node): + if isinstance(_test_node, (CN, CNC)): + concept = _test_node.concept if _test_node.concept else concepts_map[_test_node.concept_key] + + if hasattr(_test_node, "compiled"): + for compiled_name, compiled_value in _test_node.compiled.items(): + concept.get_compiled()[compiled_name] = _to_real_obj(compiled_value) + + concept.get_metadata().variables = get_new_variables_definitions(concept) + + tokens = expr_tokens[_test_node.start:_test_node.start + 1] + return ConceptNode(concept, + _test_node.start, + _test_node.end, + tokens=tokens, + source=_test_node.source or get_text_from_tokens(tokens)) + + if isinstance(_test_node, UTN): + tokens = expr_tokens[_test_node.start:_test_node.start + 1] + return UnrecognizedTokensNode(_test_node.start, + _test_node.end, + tokens=tokens) + + if isinstance(_test_node, SCN): + tokens = expr_tokens[_test_node.start:_test_node.start + 1] + return SourceCodeNode(_test_node.start, + _test_node.end, + tokens=tokens, ) + + raise NotImplementedError(_test_node) + + test_nodes = compute_expected_array(concepts_map, expression, [source]) + real_nodes = [_to_real_obj(n) for n in test_nodes] + return real_nodes[0] + @staticmethod - def compare_results(res, expected_sequences, concept_map, expression, validate_errors=True): - assert len(res) == len(expected_sequences) - for res_i, expected in zip(res, expected_sequences): - if validate_errors: - assert len(res_i.errors) == 0 - expected_array = compute_expected_array(concept_map, expression, expected) - res_i_as_test_obj = get_test_obj(res_i.out, expected_array) - assert res_i_as_test_obj == expected_array + def resolve_expected(actual, concepts_map, expression, array_of_expected): + resolved_expected = compute_expected_array(concepts_map, expression, array_of_expected) + return get_test_obj(actual, resolved_expected[0]) - @pytest.mark.parametrize("expression, expected_sequences", [ - ("one plus two", [["one", "two", "plus"]]), - ("1 + 1 plus two", [["1 + 1", "two", "plus"]]), - ("one + two plus three", [ - ["one", " + ", "two", "three", "plus"], - ["one + two", "three", "plus"]]), - ("twenty one plus two", [ - ["twenty ", "one", "two", "plus"], - [CN("twenties", "twenty one"), "two", "plus"] - ]), - ("x$!# plus two", [["x$!#", "two", "plus"]]), - - ("one plus 1 + 1", [["one", "1 + 1", "plus"]]), - ("1 + 1 plus 2 + 2", [["1 + 1", "2 + 2", "plus"]]), - ("one + two plus 1 + 1", [ - ["one", " + ", "two", "1 + 1", "plus"], - ["one + two", "1 + 1", "plus"] - ]), - ("twenty one plus 1 + 1", [ - ["twenty ", "one", "1 + 1", "plus"], - [CN("twenties", "twenty one", 0, 2), "1 + 1", "plus"] - ]), - ("x$!# plus 1 + 1", [["x$!#", "1 + 1", "plus"]]), - - ("one plus two + three", [ - ["one", "two", "plus", " + ", "three"], - ["one", "two + three", "plus"], - ]), - ("1 + 1 plus two + three", [ - ["1 + 1", "two", "plus", (" + ", 1), "three"], - ["1 + 1", "two + three", "plus"], - ]), - ("one + two plus two + three", [ - ["one", " + ", "two", ("two", 1), "plus", (" + ", 1), "three"], - ["one + two", ("two", 1), "plus", (" + ", 1), "three"], - ["one", " + ", "two", "two + three", "plus"], - ["one + two", "two + three", "plus"], - ]), - ("twenty one plus two + three", [ - ["twenty ", "one", "two", "plus", " + ", "three"], - [CN("twenties", "twenty one", 0, 2), "two", "plus", " + ", "three"], - ["twenty ", "one", "two + three", "plus"], - [CN("twenties", "twenty one", 0, 2), "two + three", "plus"], - ]), - ("x$!# plus two + three", [ - ["x$!#", "two", "plus", " + ", "three"], - ["x$!#", "two + three", "plus"], - ]), - - ("one plus twenty two", [ - ["one", "twenty ", "plus", "two"], - ["one", CN("twenties", "twenty two", 4, 6), "plus"], - ]), - ("1 + 1 plus twenty one", [ - ["1 + 1", "twenty ", "plus", "one"], - ["1 + 1", CN("twenties", "twenty one", 8, 10), "plus"], - ]), - ("one + two plus twenty one", [ - ["one", " + ", "two", "twenty ", "plus", ("one", 1)], - ["one + two", "twenty ", "plus", ("one", 1)], - ["one", " + ", "two", CN("twenties", "twenty one", 8, 10), "plus"], - ["one + two", CN("twenties", "twenty one", 8, 10), "plus"], - ]), - ("twenty one plus twenty two", - [ - ["twenty ", "one", ("twenty ", 1), "plus", "two"], - [CN("twenties", "twenty one", 0, 2), ("twenty ", 1), "plus", "two"], - ["twenty ", "one", CN("twenties", "twenty two", 6, 8), "plus"], - [CN("twenties", "twenty one", 0, 2), CN("twenties", "twenty two", 6, 8), "plus"], - ]), - ("x$!# plus twenty two", [ - ["x$!#", "twenty ", "plus", "two"], - ["x$!#", CN("twenties", "twenty two", 7, 9), "plus"] - ]), - - ("one plus z$!#", [["one", "z$!#", "plus"]]), - ("1 + 1 plus z$!#", [["1 + 1", "z$!#", "plus"]]), - ("one + two plus z$!#", [ - ["one", " + ", "two", "z$!#", "plus"], - ["one + two", "z$!#", "plus"], - ]), - ("twenty one plus z$!#", [ - ["twenty ", "one", "z$!#", "plus"], - [CN("twenties", "twenty one", 0, 2), "z$!#", "plus"], - ]), - ("x$!# plus z$!#", [["x$!#", "z$!#", "plus"]]), + @pytest.mark.parametrize("concept_key, expected_list", [ + ["a long token name", [("a long token name", 0)]], + ["__var__0 __var__1 __var__2", [("", 3)]], + ["__var__0 __var__1 prefixed", [(" prefixed", 2)]], + ["suffixed __var__0 __var__1", [("suffixed ", 0), ["", 2]]], + ["__var__0 __var__1 infixed __var__0 __var__1", [(" infixed ", 2), ["", 2]]], + ["if __var__0 __var__1 then __var__2 end", [("if ", 0), (" then ", 2), (" end", 1)]] ]) - def test_i_can_post_fix_simple_infix_concepts(self, expression, expected_sequences): - sheerka, context, parser = self.init_parser() + def test_i_can_initialize_expected_parameters(self, concept_key, expected_list): + resolved_expected_list = [] + for expected in expected_list: + source, nb = expected + tokens = list(Tokenizer(source, yield_eof=False)) + resolved_expected_list.append((tokens, nb)) - res = parser.infix_to_postfix(context, ParserInput(expression)) + with comparable_tokens(): + assert SyaConceptParser.compute_expected_parameters(concept_key) == resolved_expected_list - self.compare_results(res, expected_sequences, cmap, expression) + def test_i_can_concept_parse_simple_infixed_concept(self): + sheerka, context = self.initialize_test() - @pytest.mark.parametrize("expression, expected_sequences", [ - ("one plus plus plus 1 + 1", [["one", "1 + 1", "plus plus plus"]]), - ("x$!# another long name infix twenty two", [ - ["x$!#", "twenty ", "another long name infix", "two"], - ["x$!#", CN("twenties", "twenty two", 13, 15), "another long name infix"], - ]), - ]) - def test_i_can_post_fix_infix_concepts_with_long_name(self, expression, expected_sequences): + expression = "one plus two" + param1 = self.get_real_node(cmap, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, cmap["plus"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node + + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 + + expected = CNC("plus", a=CNC("one"), b=CNC("two"), source=expression) + resolved_expected = compute_expected_array(cmap, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "two")] + + def test_i_can_concept_parse_simple_prefixed_concept(self): + sheerka, context = self.initialize_test() + + expression = "one prefixed" + param1 = self.get_real_node(cmap, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, cmap["prefixed"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node + + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 + + expected = CNC("prefixed", a=CNC("one"), source=expression) + resolved_expected = compute_expected_array(cmap, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) + + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one")] + + def test_i_can_concept_parse_simple_suffixed_concept(self): + sheerka, context = self.initialize_test() + + expression = "suffixed one" + parser_input = ParserInput(expression).reset() + parser_input.seek(0) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [] + + concept_parser = SyaConceptParser(tokens_parser, cmap["suffixed"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node + + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 + + expected = CNC("suffixed", a=CNC("one"), source=expression) + resolved_expected = compute_expected_array(cmap, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) + + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one")] + + def test_i_can_concept_parse_simple_ternary_concept(self): concepts_map = { - "plus plus plus": Concept("a plus plus plus b").def_var("a").def_var("b"), - "another long name infix": Concept("a another long name infix b").def_var("a").def_var("b"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c") } - sheerka, context, parser = self.init_parser(concepts_map, create_new=True, init_from_sheerka=True) + sheerka, context = self.initialize_test(concepts_map) - res = parser.infix_to_postfix(context, ParserInput(expression)) + expression = "if one then two else three end" + parser_input = ParserInput(expression).reset() + parser_input.seek(0) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [] - self.compare_results(res, expected_sequences, concepts_map, expression) + concept_parser = SyaConceptParser(tokens_parser, concepts_map["if"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node - @pytest.mark.parametrize("expression, expected_sequences", [ - ("one prefixed", [["one", "prefixed"]]), - ("1 + 1 prefixed", [["1 + 1", "prefixed"]]), - ("one + two prefixed", [ - ["one", " + ", "two", "prefixed"], - ["one + two", "prefixed"], - ]), - ("twenty one prefixed", [ - ["twenty ", "one", "prefixed"], - [CN("twenties", "twenty one", 0, 2), "prefixed"], - ]), - ("x$!# prefixed", [["x$!#", "prefixed"]]), - ]) - def test_i_can_post_fix_simple_prefixed_concepts(self, expression, expected_sequences): - sheerka, context, parser = self.init_parser() + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 - res = parser.infix_to_postfix(context, ParserInput(expression)) + expected = CNC("if", a=CNC("one"), b=CNC("two"), c=CNC("three"), source=expression) + resolved_expected = compute_expected_array(concepts_map, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) - self.compare_results(res, expected_sequences, cmap, expression) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "two"), ("c", "three")] - @pytest.mark.parametrize("expression, expected_sequences", [ - ("one prefixed prefixed", [["one", "prefixed prefixed"]]), - ("1 + 1 prefixed prefixed", [["1 + 1", "prefixed prefixed"]]), - ("one + two prefixed prefixed", [ - ["one", " + ", "two", "prefixed prefixed"], - ["one + two", "prefixed prefixed"], - ]), - ("twenty one prefixed prefixed", [ - ["twenty ", "one", "prefixed prefixed"], - [CN("twenties", "twenty one", 0, 2), "prefixed prefixed"], - ]), - ("x$!# prefixed prefixed", [["x$!#", "prefixed prefixed"]]), - - ("one long name prefixed", [["one", "long name prefixed"]]), - ("1 + 1 long name prefixed", [["1 + 1", "long name prefixed"]]), - ("one + two long name prefixed", [ - ["one", " + ", "two", "long name prefixed"], - ["one + two", "long name prefixed"], - ]), - ("twenty one long name prefixed", [ - ["twenty ", "one", "long name prefixed"], - [CN("twenties", "twenty one", 0, 2), "long name prefixed"], - ]), - ("x$!# long name prefixed", [["x$!#", "long name prefixed"]]), - ]) - def test_i_can_post_fix_prefixed_concepts_with_long_names(self, expression, expected_sequences): + def test_i_can_concept_parse_concepts_with_long_names(self): concepts_map = { - "prefixed prefixed": Concept("a prefixed prefixed").def_var("a"), - "long name prefixed": Concept("a long name prefixed").def_var("a"), "one": Concept("one"), "two": Concept("two"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + "plus": Concept("a plus plus b").def_var("a").def_var("b") } - sheerka, context, parser = self.init_parser(concepts_map, create_new=True, init_from_sheerka=True) + sheerka, context = self.initialize_test(concepts_map) - res = parser.infix_to_postfix(context, ParserInput(expression)) + expression = "one plus plus two" + param1 = self.get_real_node(concepts_map, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] - self.compare_results(res, expected_sequences, concepts_map, expression) + concept_parser = SyaConceptParser(tokens_parser, concepts_map["plus"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node - @pytest.mark.parametrize("expression, expected_sequences", [ - ("suffixed one", [["one", "suffixed"]]), - ("suffixed 1 + 1", [["1 + 1", "suffixed"]]), - ("suffixed one + two", [ - ["one", "suffixed", " + ", "two"], - ["one + two", "suffixed"], - ]), - ("suffixed twenty one", [ - ["twenty ", "suffixed", "one"], - [CN("twenties", "twenty one", 2, 4), "suffixed"], - ]), - ("suffixed x$!#", [["x$!#", "suffixed"]]), - ]) - def test_i_can_post_fix_simple_suffixed_concepts(self, expression, expected_sequences): - sheerka, context, parser = self.init_parser() + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 - res = parser.infix_to_postfix(context, ParserInput(expression)) + expected = CNC("plus", a=CNC("one"), b=CNC("two")) + resolved_expected = compute_expected_array(concepts_map, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) - self.compare_results(res, expected_sequences, cmap, expression) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "two")] - @pytest.mark.parametrize("expression, expected", [ - ("suffixed suffixed one", ["one", "suffixed suffixed"]), - ("long name suffixed one", ["one", "long name suffixed"]), - ]) - def test_i_can_post_fix_suffixed_concepts_with_long_names(self, expression, expected): + def test_i_can_concept_parse_infix_when_multiple_variables_are_expected(self): concepts_map = { - "suffixed suffixed": Concept("suffixed suffixed a").def_var("a"), - "long name suffixed": Concept("long name suffixed a").def_var("a"), - "one": Concept("one"), - "two": Concept("two"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) - - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(concepts_map, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - @pytest.mark.parametrize("expression, expected_sequences", [ - ("one ? two : three", [["one", "two", "three", "?"]]), - ("one ? baz qux : two", [["one", "baz qux", "two", "?"]]), - ("1+1 ? one + two : twenty one", [ - ["1+1", "one", " + ", "two"], # error is detected so the parsing has stopped - ["1+1", "one + two", "twenty ", "?", ("one", 1)], - ["1+1", "one + two", CN("twenties", "twenty one"), "?"], - ]), - ("x$!# ? y$!# : z$!#", [["x$!#", "y$!#", "z$!#", "?"]]), - - ("if one then two else three end", [["one", "two", "three", "if"]]), - ("if 1+1 then x$!# else twenty one end", [ - ["1+1", "x$!#", "twenty ", "one"], # an error is detected - ["1+1", "x$!#", CN("twenties", "twenty one"), "if"], - ]), - ("if x$!# then one + two else z$!# end", [ - ["x$!#", "one", " + ", "two"], # error is detected so the parsing has stopped - ["x$!#", "one + two", "z$!#", "if"], - ]), - ]) - def test_i_can_post_fix_ternary_concepts(self, expression, expected_sequences): - """ - The purpose of this test is to validate concepts - that have at least 3 parameters separated by tokens - Example : - var_0 token var_1 token var_2 - token var_0 token var_1 token var_2 - token var_0 token var_1 token var_2 token - var_0 token var_1 token var_2 token - etc... - :return: - """ - - sheerka, context, parser = self.init_parser() - - res = parser.infix_to_postfix(context, ParserInput(expression)) - - self.compare_results(res, expected_sequences, cmap, expression, validate_errors=False) - - @pytest.mark.parametrize("expression, expected_sequences", [ - ("one ? ? two : : three", [["one", "two", "three", "? ?"]]), - ("1+1 ? ? one + two : : twenty one", [ - ["1+1", "one", " + ", "two"], # error - ["1+1", "one + two", "twenty ", "? ?", ("one", 1)], - ["1+1", "one + two", CN("twenties", "twenty one"), "? ?"], - ]), - - ("if if one then then two else else three end end ", [["one", "two", "three", "if if"]]), - ("if if 1+1 then then x$!# else else twenty one end end ", [ - ["1+1", "x$!#", "twenty ", "one"], # error - ["1+1", "x$!#", CN("twenties", "twenty one"), "if if"]]), - ]) - def test_i_can_post_fix_ternary_concept_with_long_names(self, expression, expected_sequences): - concepts_map = { - "? ?": Concept("a ? ? b : : c").def_var("a").def_var("b").def_var("c"), - "if if": Concept("if if a then then b else else c end end").def_var("a").def_var("b").def_var("c"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), - "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + "four": Concept("four"), + "infixed": Concept("a b infixed c d").def_var("a").def_var("b").def_var("c").def_var("d") } - sheerka, context, parser = self.init_parser(concepts_map, create_new=True, init_from_sheerka=True) + sheerka, context = self.initialize_test(concepts_map) - res = parser.infix_to_postfix(context, ParserInput(expression)) + expression = "one two infixed three four" + param1 = self.get_real_node(concepts_map, expression, "one") + param2 = self.get_real_node(concepts_map, expression, "two") + parser_input = ParserInput(expression).reset() + parser_input.seek(4) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1, param2] - self.compare_results(res, expected_sequences, concepts_map, expression, validate_errors=False) + concept_parser = SyaConceptParser(tokens_parser, concepts_map["infixed"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node - @pytest.mark.parametrize("expression, expected", [ - ("foo bar baz", ["baz", "bar", "foo"]), - ("foo bar x$!#", ["x$!#", "bar", "foo"]), - ("foo bar 1 + 1", ["1 + 1", "bar", "foo"]), - ]) - def test_i_can_post_fix_suffixed_unary_composition(self, expression, expected): + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 + + expected = CNC("infixed", a=CNC("one"), b=CNC("two"), c=CNC("three"), d=CNC("four"), source=expression) + resolved_expected = compute_expected_array(concepts_map, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) + + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), + ("b", "two"), + ("c", "three"), + ("d", "four")] + + def test_i_can_concept_parse_ternary_when_multiple_variables_are_expected(self): concepts_map = { - "foo": Concept("foo a").def_var("a"), - "bar": Concept("bar a").def_var("a"), - "baz": Concept("baz"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) - - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(concepts_map, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - @pytest.mark.parametrize("expression, expected", [ - ("baz bar foo", ["baz", "bar", "foo"]), - ("x$!# bar foo", ["x$!#", "bar", "foo"]), - ("1 + 1 bar foo", ["1 + 1", "bar", "foo"]), - ]) - def test_i_can_post_fix_prefixed_unary_composition(self, expression, expected): - concepts_map = { - "foo": Concept("a foo").def_var("a"), - "bar": Concept("a bar").def_var("a"), - "baz": Concept("baz"), - } - sya_def = { - concepts_map["foo"]: (5, SyaAssociativity.Left), - concepts_map["bar"]: (5, SyaAssociativity.Left), # precedence greater than plus - } - sheerka, context, parser = self.init_parser(concepts_map, sya_def) - - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(concepts_map, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - @pytest.mark.parametrize("expression, expected", [ - ("one plus two mult three", ["one", "two", "three", "mult", "plus"]), - ("one mult two plus three", ["one", "two", "mult", "three", "plus"]), - ("(one plus two) mult three", ["one", "two", "plus", "three", "mult"]), - ("one mult (two plus three)", ["one", "two", "three", "plus", "mult"]), - ]) - def test_i_can_post_fix_binary_with_precedence(self, expression, expected): - sheerka, context, parser = self.init_parser() - - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(cmap, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - def test_i_can_post_fix_unary_with_precedence(self): - concepts_map = { - "suffixed": Concept("suffixed a").def_var("a"), - "prefixed": Concept("a prefixed").def_var("a"), - "a": Concept("a"), - } - - sya_def = { - concepts_map["prefixed"]: (10, SyaAssociativity.Left), - concepts_map["suffixed"]: (5, SyaAssociativity.Right), - } - sheerka, context, parser = self.init_parser(concepts_map, sya_def) - - expression = "suffixed a prefixed" - expected = ["a", "prefixed", "suffixed"] - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(concepts_map, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - # change the precedence - sya_def = { - concepts_map["prefixed"]: (5, SyaAssociativity.Left), - concepts_map["suffixed"]: (10, SyaAssociativity.Right), - } - sheerka, context, parser = self.init_parser(concepts_map, sya_def) - - expression = "suffixed a prefixed" - expected = ["a", "suffixed", "prefixed"] - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(concepts_map, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - def test_i_can_post_fix_right_associated_binary(self): - concepts_map = { - "equals": Concept("a equals b").def_var("a").def_var("b"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), + "if": Concept("if a b then c end").def_var("a").def_var("b").def_var("c") } + sheerka, context = self.initialize_test(concepts_map) - sya_def = { - concepts_map["equals"]: (None, SyaAssociativity.Right), - } - sheerka, context, parser = self.init_parser(concepts_map, sya_def) + expression = "if one two then three end" + parser_input = ParserInput(expression).reset() + parser_input.seek(0) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [] - expression = "one equals two equals three" - res = parser.infix_to_postfix(context, ParserInput(expression)) + concept_parser = SyaConceptParser(tokens_parser, concepts_map["if"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node - expected = ["one", "two", "three", ("equals", 1), "equals"] - expected_array = compute_expected_array(concepts_map, expression, expected) + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array + expected = CNC("if", a=CNC("one"), b=CNC("two"), c=CNC("three"), source=expression) + resolved_expected = compute_expected_array(concepts_map, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) - def test_i_can_post_fix_left_associated_binary(self): + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "two"), ("c", "three")] + + def test_i_can_concept_parse_unrecognized(self): concepts_map = { + "plus": Concept("a plus b").def_var("a").def_var("b") + } + sheerka, context = self.initialize_test(concepts_map) + + expression = "one plus two" + param1 = self.get_real_node(concepts_map, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, concepts_map["plus"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node + + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 + + expected = CNC("plus", a="one", b="two", source=expression) + resolved_expected = compute_expected_array(concepts_map, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "two")] + + def test_i_can_concept_parse_source_code(self): + sheerka, context = self.initialize_test() + + expression = "1 + 1 plus 2 + 2" + param1 = SourceCodeNode(0, 5, source="1 + 1 ") + parser_input = ParserInput(expression).reset() + parser_input.seek(6) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, cmap["plus"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node + + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 + + expected = CNC("plus", a=SCN(source="1 + 1 "), b=SCN(source="2 + 2")) + resolved_expected = compute_expected_array(cmap, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, expected) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "1 + 1 "), ("b", "2 + 2")] + + def test_i_can_concept_parse_concepts_composition(self): + sheerka, context = self.initialize_test() + + expression = "one plus two mult three" + param1 = self.get_real_node(cmap, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, cmap["plus"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node + + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 + + expected = CNC("plus", a=CNC("one"), b=CNC("mult", a=CN("two"), b=CN("three")), source=expression) + _stack, _expected = prepare_nodes_comparison(cmap, expression, concept_node, expected) + assert _stack == _expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "two mult three")] + + def test_i_can_concept_parse_embedded_concepts(self): + sheerka, context = self.initialize_test() + + expression = "begin one plus two long end" + parser_input = ParserInput(expression).reset() + parser_input.seek(0) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [] + + concept_parser = SyaConceptParser(tokens_parser, cmap["begin"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node + + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 + + expected = CNC("begin", x=CNC("plus", a=CNC("one"), b=CNC("two")), source=expression) + _stack, _expected = prepare_nodes_comparison(cmap, expression, concept_node, expected) + assert _stack == _expected + assert concept_node.concept.get_metadata().variables == [("x", "one plus two")] + + def test_i_can_concept_parse_concepts_composition_when_unrecognized_yield_multiple_results(self): + sheerka, context = self.initialize_test() + + expression = "one plus twenty two mult three" + param1 = self.get_real_node(cmap, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, cmap["plus"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node + + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 + + # the first part is found as the concept_node + expected = CNC("plus", a=CNC("one"), b=UTN("twenty ")) + _stack, _expected = prepare_nodes_comparison(cmap, expression, concept_node, expected) + assert _stack == _expected + # the second part can be found in parameters + expected = [CNC("mult", a=CNC("two"), b=CNC("three"))] + _stack, _expected = prepare_nodes_comparison(cmap, expression, concept_parser.parameters, expected) + assert _stack == _expected + + # there is a fork + assert len(sya_node_parser.forks) == 1 + forked_parser = sya_node_parser.forks[0].state_context + + forked_parser.parse() + assert not forked_parser.has_error() + expected = CNC("plus", a=CNC("one"), b=CNC("mult", a=CN("twenties", source="twenty two"), b=CN("three"))) + _stack, _expected = prepare_nodes_comparison(cmap, expression, forked_parser.concept_node, expected) + assert _stack == _expected + + def test_i_can_concept_parse_when_too_many_suffix_parameters(self): + sheerka, context = self.initialize_test() + + expression = "one plus two three" + param1 = self.get_real_node(cmap, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, cmap["plus"], tokens_parser.stack) + concept_parser.parse() + + assert not concept_parser.has_error() + expected = CNC("plus", a=CNC("one"), b=CNC("two"), source="one plus two") + resolved_expected = compute_expected_array(cmap, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_parser.concept_node, resolved_expected) + assert concept_node_as_test_obj == resolved_expected + + assert not concept_parser.has_error() + assert concept_parser.concept_node.start == 0 + assert concept_parser.concept_node.end == 4 + + def test_i_can_concept_parse_when_too_many_prefix_parameters(self): + sheerka, context = self.initialize_test() + + expression = "one two plus three" + param1 = self.get_real_node(cmap, expression, "one") + param2 = self.get_real_node(cmap, expression, "two") + parser_input = ParserInput(expression).reset() + parser_input.seek(4) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1, param2] + + concept_parser = SyaConceptParser(tokens_parser, cmap["plus"], tokens_parser.stack) + concept_parser.parse() + + assert not concept_parser.has_error() + expected = CNC("plus", a=CNC("two"), b=CNC("three"), source="two plus three") + resolved_expected = compute_expected_array(cmap, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_parser.concept_node, resolved_expected) + assert concept_node_as_test_obj == resolved_expected + + assert not concept_parser.has_error() + assert concept_parser.concept_node.start == 2 + assert concept_parser.concept_node.end == 6 + + def test_i_can_concept_rollback_when_the_incorrect_concept_is_parsed(self): + concepts_map = { + "one": Concept("one"), + "twenty one": Concept("twenty one"), + "twenty two": Concept("twenty two x").def_var("x"), "plus": Concept("a plus b").def_var("a").def_var("b"), + } + sheerka, context = self.initialize_test(concepts_map) + + expression = "one plus twenty one" + param1 = self.get_real_node(concepts_map, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, concepts_map["plus"], tokens_parser.stack) + concept_parser.parse() + + assert not concept_parser.has_error() + concept_node = concept_parser.concept_node + + expected = CNC("plus", a=CNC("one"), b=CNC("twenty one"), source=expression) + resolved_expected = compute_expected_array(concepts_map, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "twenty one")] + + def test_i_can_concept_parse_when_unrecognized_tokens_yields_multiple_results_ternary_concept(self): + sheerka, context = self.initialize_test() + + expression = "begin twenty two long end" + parser_input = ParserInput(expression).reset() + parser_input.seek(0) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [] + + concept_parser = SyaConceptParser(tokens_parser, cmap["begin"], tokens_parser.stack) + concept_parser.parse() + + assert concept_parser.has_error() + assert concept_parser.errors == [TooManyParameters(cmap["begin"], " long end", 5, 1)] + + assert len(sya_node_parser.forks) == 1 + forked_parser = sya_node_parser.forks[0].state_context + + forked_parser.parse() + assert not forked_parser.has_error() + expected = CNC("begin", x=CN("twenties", source="twenty two"), source=expression) + _stack, _expected = prepare_nodes_comparison(cmap, expression, forked_parser.concept_node, expected) + assert _stack == _expected + + def test_i_can_concept_parse_when_multiple_concepts_found(self): + concepts_map = { "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), - } - - sya_def = { - concepts_map["plus"]: (1, SyaAssociativity.Left), - } - sheerka, context, parser = self.init_parser(concepts_map, sya_def) - - expression = "one plus two plus three" - res = parser.infix_to_postfix(context, ParserInput(expression)) - - expected = ["one", "two", "plus", "three", ("plus", 1)] - expected_array = compute_expected_array(concepts_map, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - @pytest.mark.parametrize("expression, expected", [ - ("x$!# ? y$!# : z$!# ? two : three", ["x$!#", "y$!#", "z$!#", "two", "three", ("?", 1), "?"]), - ("x$!# ? y$!# : (z$!# ? two : three)", ["x$!#", "y$!#", "z$!#", "two", "three", ("?", 1), "?"]), - - ("one ? x$!# ? y$!# : z$!# : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]), - ("one ? (x$!# ? y$!# : z$!#) : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]), - - ("one ? two : x$!# ? y$!# : z$!#", ["one", "two", "x$!#", "y$!#", "z$!#", ("?", 1), "?"]), - ("one ? two : (x$!# ? y$!# : z$!#)", ["one", "two", "x$!#", "y$!#", "z$!#", ("?", 1), "?"]), - ]) - def test_i_can_post_fix_right_associated_ternary(self, expression, expected): - concepts_map = { - "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), - "one": Concept("one"), - "two": Concept("two"), - "three": Concept("three"), - } - sya_def = { - concepts_map["?"]: (5, SyaAssociativity.Right), - } - sheerka, context, parser = self.init_parser(concepts_map, sya_def) - - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(concepts_map, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - @pytest.mark.parametrize("expression, expected", [ - ("x$!# ? y$!# : z$!# ? two : three", ["x$!#", "y$!#", "z$!#", "?", "two", "three", ("?", 1)]), - ("(x$!# ? y$!# : z$!#) ? two : three", ["x$!#", "y$!#", "z$!#", "?", "two", "three", ("?", 1)]), - - # the following one is not possible when Left association - # ("one ? x$!# ? y$!# : z$!# : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]), - - ("one ? two : x$!# ? y$!# : z$!#", ["one", "two", "x$!#", "?", "y$!#", "z$!#", ("?", 1)]), - ("(one ? two : x$!#) ? y$!# : z$!#", ["one", "two", "x$!#", "?", "y$!#", "z$!#", ("?", 1)]), - ]) - def test_i_can_post_fix_left_associated_ternary(self, expression, expected): - concepts_map = { - "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), - "one": Concept("one"), - "two": Concept("two"), - "three": Concept("three"), - } - sya_def = { - concepts_map["?"]: (5, SyaAssociativity.Left), - } - sheerka, context, parser = self.init_parser(concepts_map, sya_def) - - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(concepts_map, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - def test_i_can_post_fix_when_multiple_concepts_are_found(self): - concepts_map = { - "foo": Concept("foo a").def_var("a"), - "foo bar": Concept("foo bar a").def_var("a"), - "baz": Concept("baz"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) - - expression = "foo bar baz" - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_sequences = [ - [UTN("bar "), "foo", "baz"], - ["baz", "foo bar"] - ] - - self.compare_results(res, expected_sequences, concepts_map, expression) - - @pytest.mark.parametrize("expression, expected", [ - # ("function(one plus three) minus two", - # [SCWC("function(", ")", CNC("plus", a="one", b="three")), "two", "minus"]), - ("two minus function(one plus three)", - ["two", SCWC("function(", ")", CNC("plus", a="one", b="three")), "minus"]), - ("func1() minus func2()", [SCN("func1()"), SCN("func2()"), "minus"]), - ("func1() comes with func2()", [SCN("func1()"), UTN(" comes with "), SCN("func2()")]), - - ("(one plus two) ", ["one", "two", "plus"]), - ("(one prefixed) ", ["one", "prefixed"]), - ("(suffixed one) ", ["one", "suffixed"]), - ("(one ? two : three)", ["one", "two", "three", "?"]), - ("square(square(one))", ["one", ("square", 1), "square"]), - ("square ( square ( one ) )", ["one", ("square", 1), "square"]), - - ("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]), - ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]), - ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]), - - ("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]), - ("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]), - ("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]), - - ("suffixed (suffixed one)", ["one", ("suffixed", 1), "suffixed"]), - ("suffixed ( suffixed one) ", ["one", ("suffixed", 1), "suffixed"]), - ("suffixed (suffixed square(one))", ["one", "square", ("suffixed", 1), "suffixed"]), - ("suffixed ( suffixed square ( one ) )", ["one", "square", ("suffixed", 1), "suffixed"]), - - ("one plus (two minus three)", ["one", "two", "three", "minus", "plus"]), - ("one plus ( two minus three )", ["one", "two", "three", "minus", "plus"]), - ("(one plus two) minus three", ["one", "two", "plus", "three", "minus"]), - ("(( one plus two ) minus three )", ["one", "two", "plus", "three", "minus"]), - - ("foo bar(one)", ["one", "foo bar"]), - ("foo bar ( one )", ["one", "foo bar"]), - ]) - def test_i_can_pos_fix_when_parenthesis(self, expression, expected): - sheerka, context, parser = self.init_parser() - context.add_to_protected_hints(BuiltinConcepts.DEBUG) - - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(cmap, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - @pytest.mark.parametrize("expression, expected_sequences", [ - # composition - ("function(suffixed one)", [[SCWC("function(", ")", CNC("suffixed", a="one"))]]), - ("function(one prefixed)", [[SCWC("function(", ")", CNC("prefixed", a="one"))]]), - ("function(if one then two else three end)", - [[SCWC("function(", ")", CNC("if", a="one", b="two", c="three", end=14))]]), - ("function(suffixed twenty two)", - [[SCWC("function(", ")", CNC("suffixed", a=CIO("twenties", source="twenty two")))]]), - ("function(twenty two prefixed)", - [[SCWC("function(", ")", CNC("prefixed", a=CIO("twenties", source="twenty two")))]]), - ("function(if one then twenty two else three end)", - [[SCWC("function(", ")", CNC("if", a="one", b=CIO("twenties", source="twenty two"), c="three", end=16))]]), - - ("twenty two(suffixed one)", [ - ["twenty ", SCWC("two(", ")", CNC("suffixed", a="one"))], - [CN("twenties", "twenty two"), "one", "suffixed"], - ]), - ("twenty two(one prefixed)", [ - ["twenty ", SCWC("two(", ")", CNC("prefixed", a="one"))], - [CN("twenties", "twenty two"), "one", "prefixed"], - ]), - ("f1(one plus two mult three) plus f2(suffixed xxx prefixed)", [ - [SCWC("f1(", ")", CN("plus", "one plus two mult three")), - SCWC("f2(", (")", 1), CN("suffixed", "suffixed xxx prefixed")), - ("plus", 1)] - ]), - - # plus, suffixed, prefixed, ternary - ("func1(one) plus func2(two)", [[SCWC("func1(", ")", "one"), SCWC("func2(", (")", 1), "two"), "plus"]]), - ("suffixed function(one)", [[SCWC("function(", ")", "one"), "suffixed"]]), - ("function(one) prefixed", [[SCWC("function(", ")", "one"), "prefixed"]]), - ("if f1(one) then f2(two) else f3(three) end", [ - [SCWC("f1(", ")", "one"), SCWC("f2(", (")", 1), "two"), SCWC("f3(", (")", 2), "three"), "if"]]), - - # Sequence - ("if one then two else three end function(xxx)", [ - ["one", "two", "three", "if", UTN(" ", start=13, end=13), SCWC("function(", ")", "xxx")]]), - ("one prefixed function(two)", [["one", "prefixed", UTN(" ", start=3, end=3), SCWC("function(", ")", "two")]]), - ("suffixed one function(two)", [["one", "suffixed", UTN(" ", start=3, end=3), SCWC("function(", ")", "two")]]), - ("func(one, two, three)", [[SCWC("func(", ")", "one", ", ", "two", (", ", 1), "three")]]), - ]) - def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences): - sheerka, context, parser = self.init_parser() - - res = parser.infix_to_postfix(context, ParserInput(expression)) - - self.compare_results(res, expected_sequences, cmap, expression) - - @pytest.mark.parametrize("expression, expected", [ - ("suffixed2 a b", ['a', 'b', "suffixed2"]), - ("suffixed3 a b c", ['a', 'b', 'c', "suffixed3"]), - ("a b prefixed2", ['a', 'b', "prefixed2"]), - ("a b c prefixed3", ['a', 'b', 'c', "prefixed3"]), - ("start2 a b stop", ['a', 'b', "start2"]), - ("start3 a b c stop", ['a', 'b', 'c', "start3"]), - ]) - def test_i_can_post_fix_when_multiple_parameters_are_expected(self, expression, expected): - concepts_map = { - "a": Concept("a"), - "b": Concept("b"), - "c": Concept("c"), - "suffixed2": Concept("suffixed2 x y").def_var("x").def_var("y"), - "suffixed3": Concept("suffixed3 x y z").def_var("x").def_var("y").def_var("z"), - "prefixed2": Concept("x y prefixed2").def_var("x").def_var("y"), - "prefixed3": Concept("x y z prefixed3").def_var("x").def_var("y").def_var("z"), - "start2": Concept("start2 x y stop").def_var("x").def_var("y"), - "start3": Concept("start3 x y z stop").def_var("x").def_var("y").def_var("z"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) - - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(concepts_map, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - @pytest.mark.parametrize("expression, expected", [ - ("suffixed3 x y z", ['x', 'y', 'z', "suffixed3"]), - ("suffixed3 a y z", ['a', 'y', 'z', "suffixed3"]), - ("suffixed3 x a z", ['x ', 'a', ' z', "suffixed3"]), # this one was not managed by the second chance - ("suffixed3 x y a", ['x', 'y', 'a', "suffixed3"]), - ("x y z prefixed3", ['x', 'y', 'z', "prefixed3"]), - ("a y z prefixed3", ['a', 'y', 'z', "prefixed3"]), - ("x a z prefixed3", ['x ', 'a', ' z', "prefixed3"]), - ("x y a prefixed3", ['x', 'y', 'a', "prefixed3"]), - ("start3 x y z stop", ['x', 'y', 'z', "start3"]), - ("start3 a y z stop", ['a', 'y', 'z', "start3"]), - ("start3 x a z stop", ['x ', 'a', ' z ', "start3"]), - ("start3 x y a stop", ['x', 'y', 'a', "start3"]), - ]) - def test_i_can_post_fix_when_multiple_parameters_are_expected_but_unrecognized_tokens(self, expression, expected): - concepts_map = { - "a": Concept("a"), - "suffixed3": Concept("suffixed3 x y z").def_var("x").def_var("y").def_var("z"), - "prefixed3": Concept("x y z prefixed3").def_var("x").def_var("y").def_var("z"), - "start3": Concept("start3 x y z stop").def_var("x").def_var("y").def_var("z"), - } - sheerka, context, parser = self.init_parser(concepts_map, None) - - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(concepts_map, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - @pytest.mark.parametrize("expression, expected", [ - ("(", ("(", 0)), - ("one plus ( 1 + ", ("(", 4)), - ("one( 1 + ", ("(", 1)), - ("one ( 1 + ", ("(", 2)), - ("function(", ("(", 1)), - ("function( 1 + ", ("(", 1)), - ("function ( 1 + ", ("(", 2)), - ("one plus ) 1 + ", (")", 4)), - ("one ) 1 + ", (")", 2)), - ("function ) 1 + ", (")", 2)), - ("one ? ( : two", ("(", 4)), - ("one ? one plus ( : two", ("(", 8)), - ("one ? ) : two", (")", 4)), - ("one ? one plus ) : two", (")", 8)), - ("(one plus ( 1 + )", ("(", 0)), - ]) - def test_i_can_detect_parenthesis_mismatch_error_when_post_fixing(self, expression, expected): - sheerka, context, parser = self.init_parser() - - res = parser.infix_to_postfix(context, ParserInput(expression)) - - assert len(res) == 1 - assert res[0].errors == [ParenthesisMismatchError(expected)] - - def test_i_can_detect_parenthesis_mismatch_error_special_case(self): - sheerka, context, parser = self.init_parser() - expression = "one ? function( : two" - expected = [ParenthesisMismatchError(("(", 5)), ParenthesisMismatchError(("(", 5))] - res = parser.infix_to_postfix(context, ParserInput(expression)) - - assert len(res) == 1 - assert res[0].errors == expected - - @pytest.mark.parametrize("expression, expected", [ - ("one ? one two : three", ("?", ":")), - ]) - def test_i_can_detected_when_too_many_parameters(self, expression, expected): - sheerka, context, parser = self.init_parser(cmap, None) - - res = parser.infix_to_postfix(context, ParserInput(expression)) - - assert len(res) == 1 - assert len(res[0].errors) == 1 - error = res[0].errors[0] - assert isinstance(error, TooManyParametersFoundError) - assert error.concept == cmap[expected[0]] - assert error.token.value == expected[1] - - @pytest.mark.parametrize("expression, expected", [ - ("one infix two x$!#", ["one", "two", "infix", " x$!#"]), - ("x$!# one infix two", ["x$!# ", "one", "two", "infix"]), - ("one prefixed x$!#", ["one", "prefixed", " x$!#"]), - ("x$!# one prefixed", ["x$!# ", "one", "prefixed"]), - ("suffixed one x$!#", ["one", "suffixed", " x$!#"]), - ("x$!# suffixed one", ["x$!# ", "one", "suffixed"]), - ("one ? two : three x$!#", ["one", "two", "three", "?", " x$!#"]), - ("x$!# one ? two : three", ["x$!# ", "one", "two", "three", "?"]), - - ("one infix two three infix four", ["one", "two", "infix", "three", "four", ("infix", 1)]), - ("one infix two three prefixed", ["one", "two", "infix", "three", "prefixed"]), - ("one infix two suffixed three", ["one", "two", "infix", "three", "suffixed"]), - ("one infix two x$!# ? y$!# : z$!#", ["one", "two", "infix", " x$!#", "y$!#", "z$!#", "?"]), - - ("one prefixed two infix three", ["one", "prefixed", "two", "three", "infix"]), - ("one prefixed two prefixed", ["one", "prefixed", "two", ("prefixed", 1)]), - ("one prefixed suffixed two", ["one", "prefixed", "two", "suffixed"]), - ("one prefixed x$!# ? y$!# : z$!#", ["one", "prefixed", " x$!#", "y$!#", "z$!#", "?"]), - - ("(one infix two) (three prefixed)", ["one", "two", "infix", "three", "prefixed"]), - ]) - def test_i_can_post_fix_sequences(self, expression, expected): - sheerka, context, parser = self.init_parser() - - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(cmap, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - @pytest.mark.parametrize("expression", [ - "one ? two : three", - "one?two:three", - "one ?two:three", - "one? two:three", - "one ? two :three", - "one ? two: three", - ]) - def test_whitespaces_may_be_omitted_in_some_circumstances(self, expression): - sheerka, context, parser = self.init_parser() - - expected = ["one", "two", "three", "?"] - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_expected_array(cmap, expression, expected) - - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array - - def test_the_more_concepts_the_more_results(self): - concepts_map = { "plus": Concept("a plus b").def_var("a").def_var("b"), - "plus plus": Concept("a plus plus").def_var("a"), - "plus equals": Concept("a plus equals b").def_var("a").def_var("b"), + "minus1": Concept("a minus b").def_var("a").def_var("b"), + "minus2": Concept("x minus y").def_var("x").def_var("y"), } + sheerka, context = self.initialize_test(concepts_map) - sya_def = { - concepts_map["plus"]: (1, SyaAssociativity.Right), - concepts_map["plus plus"]: (1, SyaAssociativity.Right), - concepts_map["plus equals"]: (1, SyaAssociativity.Right), - } + expression = "one plus two minus three" + param1 = self.get_real_node(concepts_map, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] - sheerka, context, parser = self.init_parser(concepts_map, sya_def) + concept_parser = SyaConceptParser(tokens_parser, concepts_map["plus"], tokens_parser.stack) + concept_parser.parse() - expression = "a plus plus equals b" - res = parser.infix_to_postfix(context, ParserInput(expression)) - expected_array = compute_debug_array(res) - assert len(expected_array) == len([ - ["T(a)", "C(a plus b)", "C(a plus b)", "T(equals)", "T(b)"], - ["T(a)", "C(a plus b)", "C(a plus plus)", "T(equals)", "T(b)"], - ["T(a)", "C(a plus b)", "C(a plus equals b)", "T(equals)", "T(b)"], - ["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"], - ["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"], - ["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"], - ["T(a)", "C(a plus equals b)", "C(a plus b)", "T(equals)", "T(b)"], - ["T(a)", "C(a plus equals b)", "C(a plus plus)", "T(equals)", "T(b)"], - ["T(a)", "C(a plus equals b)", "C(a plus equals b)", "T(equals)", "T(b)"], - ]) + assert not concept_parser.has_error() + expected = CNC("plus", a=CN("one"), b=CNC("minus1", a=CN("two"), b=CN("three"), source="two minus three")) + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, concept_parser.concept_node, expected) + assert _stack == _expected - def test_i_can_use_string_instead_of_identifier(self): + assert len(sya_node_parser.forks) == 1 + forked_parser = sya_node_parser.forks[0].state_context + + forked_parser.parse() + assert not forked_parser.has_error() + expected = CNC("plus", a=CN("one"), b=CNC("minus2", x=CN("two"), y=CN("three"), source="two minus three")) + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, forked_parser.concept_node, expected) + assert _stack == _expected + + def test_i_can_concept_detect_the_end_of_a_concept(self): + sheerka, context = self.initialize_test() + + expression = "begin one long end two" + parser_input = ParserInput(expression).reset() + parser_input.seek(0) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [] + + concept_parser = SyaConceptParser(tokens_parser, cmap["begin"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node + + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 + + expected = CNC("begin", x=CNC("one"), source="begin one long end") + _concept_node, _expected = prepare_nodes_comparison(cmap, expression, concept_node, expected) + assert _concept_node == _expected + + def test_i_can_concept_early_detect_when_an_incorrect_concept_is_parsed(self): + concepts_map = { + "foo": Concept("foo"), + "car": Concept("beautiful but expensive car"), + "isa": Concept("x is a command").def_var("x"), + } + sheerka, context = self.initialize_test(concepts_map) + + expression = "foo is a beautiful but expensive car" + param1 = self.get_real_node(concepts_map, expression, "foo") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, concepts_map["isa"], tokens_parser.stack) + concept_parser.parse() + + assert concept_parser.has_error() + assert isinstance(concept_parser.errors[0], TokensNotFound) + assert concept_parser.checkpoint == 3 # I stopped before the end of the input + + def test_i_can_tokens_parse_unrecognized(self): + """ + When there is no sya concept, there is no need to manage unrecognized + """ + sheerka, context = self.initialize_test() + + expression = "one two three" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + assert len(tokens_parser.stack) == 1 + + expected = [UTN("one two three")] + _stack, _expected = prepare_nodes_comparison(cmap, expression, tokens_parser.stack, expected) + assert _stack == _expected + + def test_i_can_tokens_parse_simple_infixed_concept(self): + sheerka, context = self.initialize_test() + + expression = "one plus two" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + assert len(tokens_parser.stack) == 1 + concept_node = tokens_parser.stack[0] + + expected = CNC("plus", a=CNC("one"), b=CNC("two"), source=expression) + resolved_expected = compute_expected_array(cmap, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "two")] + + def test_i_can_tokens_parse_concepts_composition(self): + sheerka, context = self.initialize_test() + + expression = "one plus two mult three" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + assert len(tokens_parser.stack) == 1 + concept_node = tokens_parser.stack[0] + + expected = CNC("plus", a=CNC("one"), b=CNC("mult", a=CN("two"), b=CN("three")), source=expression) + resolved_expected = compute_expected_array(cmap, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "two mult three")] + + def test_i_can_tokens_parse_concepts_composition_when_pop(self): + sheerka, context = self.initialize_test() + + expression = "one mult two plus three" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + assert len(tokens_parser.stack) == 1 + concept_node = tokens_parser.stack[0] + + expected = CNC("plus", a=CNC("mult", a=CN("one"), b=CN("two")), b=CNC("three"), source=expression) + resolved_expected = compute_expected_array(cmap, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one mult two"), ("b", "three")] + + @pytest.mark.parametrize("expression, expected", [ + ("suffixed foo bar", ["suffixed x", "bar"]), + ("foo suffixed bar", ["foo", "suffixed x"]), + ("foo prefixed bar", ["x prefixed", "bar"]), + ("foo bar prefixed", ["foo", "x prefixed"]), + ("foo infixed bar baz", ["x infixed y", "baz"]), + ("foo bar infixed bar", ["foo", "x infixed y"]), + ("foo infixed bar baz infixed qux", ["x infixed y", "x infixed y"]), + ]) + def test_i_can_tokens_parse_sequences(self, expression, expected): + concepts_map = { + "foo": Concept("foo"), + "bar": Concept("bar"), + "baz": Concept("baz"), + "qux": Concept("qux"), + "suffixed": Concept("suffixed x").def_var("x"), + "prefixed": Concept("x prefixed").def_var("x"), + "infixed": Concept("x infixed y").def_var("x").def_var("y"), + } + sheerka, context = self.initialize_test(concepts_map) + + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + assert len(tokens_parser.stack) == 2 + + to_compare_to = [concept_node.concept.name for concept_node in tokens_parser.stack] + assert to_compare_to == expected + + assert len(sya_node_parser.forks) == 0 + + def test_i_can_tokens_parse_sequences_sya_concept_and_concept(self): + concepts_map = { + "foo": Concept("foo"), + "two": Concept("two"), + "one x": Concept("one x").def_var("x"), + } + sheerka, context = self.initialize_test(concepts_map) + + expression = "two one foo" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + assert len(tokens_parser.stack) == 2 + + assert isinstance(tokens_parser.stack[0], ConceptNode) + assert sheerka.isinstance(tokens_parser.stack[0].concept, concepts_map["two"].key) + assert isinstance(tokens_parser.stack[1], ConceptNode) + assert sheerka.isinstance(tokens_parser.stack[1].concept, concepts_map["one x"].key) + + def test_i_can_tokens_parse_when_trailing_unrecognized_tokens_yields_multiple_results(self): + sheerka, context = self.initialize_test() + + expression = "one plus twenty two" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + expected = [CNC("plus", a=CNC("one"), b=UTN("twenty ")), "two"] + _stack, _expected = prepare_nodes_comparison(cmap, expression, tokens_parser.stack, expected) + assert _stack == _expected + + assert len(sya_node_parser.forks) == 1 + forked_parser = sya_node_parser.forks[0] + + forked_parser.parse() + assert not forked_parser.has_error() + expected = [CNC("plus", a=CNC("one"), b=CN("twenties", source="twenty two"))] + _stack, _expected = prepare_nodes_comparison(cmap, expression, forked_parser.stack, expected) + assert _stack == _expected + + def test_i_can_tokens_parse_when_leading_unrecognized_tokens_yields_multiple_results(self): + sheerka, context = self.initialize_test() + + expression = "twenty one plus two" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + expected = [UTN("twenty "), CNC("plus", a=CNC("one"), b=CNC("two"))] + _stack, _expected = prepare_nodes_comparison(cmap, expression, tokens_parser.stack, expected) + assert _stack == _expected + + assert len(sya_node_parser.forks) == 1 + forked_parser = sya_node_parser.forks[0] + + forked_parser.parse() + assert not forked_parser.has_error() + expected = [CNC("plus", a=CN("twenties", source="twenty one"), b=CNC("two"))] + _stack, _expected = prepare_nodes_comparison(cmap, expression, forked_parser.stack, expected) + assert _stack == _expected + + def test_i_can_tokens_parse_when_unrecognized_tokens_yields_multiple_results_ternary_concept(self): + sheerka, context = self.initialize_test() + + expression = "begin twenty two long end" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + concept_parser = SyaTokensParser(context, sya_node_parser, parser_input) + concept_parser.parse() + + assert not concept_parser.has_error() + expected = [UTN("begin twenty two long end")] + _stack, _expected = prepare_nodes_comparison(cmap, expression, concept_parser.stack, expected) + assert _stack == _expected + + assert len(sya_node_parser.forks) == 1 + + forked_parser = sya_node_parser.forks[0] + forked_parser.parse() + assert not forked_parser.has_error() + expected = [CNC("begin", x=CN("twenties", source="twenty two"), source=expression)] + _stack, _expected = prepare_nodes_comparison(cmap, expression, forked_parser.stack, expected) + assert _stack == _expected + + def test_i_can_tokens_rollback_when_the_incorrect_concept_is_parsed(self): + concepts_map = { + "one": Concept("one"), + "two": Concept("two"), + "one and only": Concept("one and only x").def_var("x"), + "plus": Concept("a plus b").def_var("a").def_var("b"), + } + sheerka, context = self.initialize_test(concepts_map) + + expression = "one plus two" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + assert len(tokens_parser.stack) == 1 + concept_node = tokens_parser.stack[0] + + expected = CNC("plus", a=CNC("one"), b=CNC("two"), source=expression) + resolved_expected = compute_expected_array(concepts_map, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "two")] + + def test_i_can_tokens_parse_when_tokens_parser_yields_multiple_results(self): concepts_map = { - "ternary": Concept("a ? ? b '::' c").def_var("a").def_var("b").def_var("c"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), + "plus": Concept("a plus b").def_var("a").def_var("b"), + "mult1": Concept("a mult b").def_var("a").def_var("b"), + "mult2": Concept("x mult y").def_var("x").def_var("y"), } + sheerka, context = self.initialize_test(concepts_map) + sheerka.set_precedence(context, concepts_map["mult1"], concepts_map["plus"]) + sheerka.set_precedence(context, concepts_map["mult2"], concepts_map["plus"]) - sheerka, context, parser = self.init_parser(concepts_map, None) + expression = "one mult two plus three" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() - res = parser.infix_to_postfix(context, ParserInput("one ? ? two '::' three")) - expected_array = [ - CN("one", start=0, end=0, source="one"), - CN("two", start=6, end=6, source="two"), - CN("three", start=10, end=10, source="three"), - SyaConceptParserHelper(concepts_map["ternary"], 2), - ] + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array + assert not tokens_parser.has_error() + expected = [CNC("plus", a=CNC("mult1", a=CN("one"), b=CN("two"), source="one mult two"), b=CN("three"))] + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, tokens_parser.stack, expected) + assert _stack == _expected - def test_i_cannot_chain_non_associative(self): + assert len(sya_node_parser.forks) == 1 + + forked_parser = sya_node_parser.forks[0] + assert type(forked_parser) == SyaTokensParser + forked_parser.parse() + assert not forked_parser.has_error() + expected = [CNC("plus", a=CNC("mult2", x=CN("one"), y=CN("two"), source="one mult two"), b=CN("three"))] + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, forked_parser.stack, expected) + assert _stack == _expected + + def test_i_can_tokens_parse_when_concept_parsers_yield_multiple_results(self): concepts_map = { - "less than": Concept("a less than b").def_var("a").def_var("b"), "one": Concept("one"), "two": Concept("two"), "three": Concept("three"), + "plus": Concept("a plus b").def_var("a").def_var("b"), + "minus1": Concept("a minus b").def_var("a").def_var("b"), + "minus2": Concept("x minus y").def_var("x").def_var("y"), } - sya_def = { - concepts_map["less than"]: (None, SyaAssociativity.No), + sheerka, context = self.initialize_test(concepts_map) + + expression = "one plus two minus three" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + expected = [CNC("plus", a=CN("one"), b=CNC("minus1", a=CN("two"), b=CN("three"), source="two minus three"))] + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, tokens_parser.stack, expected) + assert _stack == _expected + + assert len(sya_node_parser.forks) == 1 + + forked_parser = sya_node_parser.forks[0] + assert type(forked_parser) == SyaTokensParser + forked_parser.parse() + assert not forked_parser.has_error() + expected = [CNC("plus", a=CN("one"), b=CNC("minus2", x=CN("two"), y=CN("three"), source="two minus three"))] + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, forked_parser.stack, expected) + assert _stack == _expected + + def test_i_can_tokens_parse_when_concept_parsers_pop_multiple_results(self): + concepts_map = { + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "plus1": Concept("a plus b").def_var("a").def_var("b"), + "plus2": Concept("x plus y").def_var("x").def_var("y"), + "mult": Concept("a mult b").def_var("a").def_var("b"), } + sheerka, context = self.initialize_test(concepts_map) + sheerka.set_precedence(context, concepts_map["mult"], concepts_map["plus1"]) + sheerka.set_precedence(context, concepts_map["mult"], concepts_map["plus2"]) - sheerka, context, parser = self.init_parser(concepts_map, sya_def) + expression = "one mult two plus three" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() - res = parser.infix_to_postfix(context, ParserInput("one less than two less than three")) - assert len(res) == 1 - assert res[0].errors == [NoneAssociativeSequenceError(concepts_map["less than"], 2, 8)] + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() - def test_i_can_post_fix_bnf_definition(self): - """ - The definition of a BNF concept is considered as an atom concept - Not quite sure why this test is here - :return: - """ - sheerka, context, parser = self.init_parser() + assert not tokens_parser.has_error() + expected = [CNC("plus1", a=CNC("mult", a=CN("one"), b=CN("two")), b=CN("three"), source=expression)] + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, tokens_parser.stack, expected) + assert _stack == _expected - expression = "suffixed twenties" - res = parser.infix_to_postfix(context, ParserInput(expression)) + assert len(sya_node_parser.forks) == 1 - expected = [CN("twenties", "twenties", 2, 2), "suffixed"] - expected_array = compute_expected_array(cmap, expression, expected) + forked_parser = sya_node_parser.forks[0] + assert type(forked_parser) == SyaTokensParser + forked_parser.parse() + assert not forked_parser.has_error() + expected = [CNC("plus2", x=CNC("mult", a=CN("one"), b=CN("two")), y=CN("three"), source=expression)] + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, forked_parser.stack, expected) + assert _stack == _expected - assert len(res) == 1 - transformed_out = get_test_obj(res[0].out, expected_array) - assert transformed_out == expected_array + def test_i_can_tokens_parse_when_concept_parsers_mix_multiple_results(self): + concepts_map = { + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "plus1": Concept("a plus b").def_var("a").def_var("b"), + "plus2": Concept("x plus y").def_var("x").def_var("y"), + "plus3": Concept("u plus v").def_var("u").def_var("v"), + "plus4": Concept("i plus j").def_var("i").def_var("j"), + "mult": Concept("a mult b").def_var("a").def_var("b"), + } + sheerka, context = self.initialize_test(concepts_map) + sheerka.set_precedence(context, concepts_map["mult"], concepts_map["plus1"]) + sheerka.set_precedence(context, concepts_map["mult"], concepts_map["plus2"]) + sheerka.set_precedence(context, concepts_map["plus3"], concepts_map["mult"]) + sheerka.set_precedence(context, concepts_map["plus4"], concepts_map["mult"]) - @pytest.mark.parametrize("expression, expected_debugs", [ - ("one", [[" 0:one => PUSH_UNREC"]]), - ("one plus two", [[' 0:one => PUSH_UNREC', - ' 1: => PUSH_UNREC', - ' 2:plus ((1005)a plus b, prio=1, assoc=SyaAssociativity.Right) => ??', - ' _: => RECOG [[CN((1001)one)]]', - " _: => POP ConceptNode(concept='(1001)one', source='one', start=0, end=0)", - ' 2:plus ((1005)a plus b, prio=1, assoc=SyaAssociativity.Right) => PUSH', - ' 3: => EAT', - ' 4:two => PUSH_UNREC', - ' 5: => ??', - ' _: => RECOG [[CN((1002)two)]]', - " _: => POP ConceptNode(concept='(1002)two', source='two', start=4, end=4)", - ' _: => POP SyaConceptParserHelper(concept=(1005)a plus b, start=2, ' - 'error=None)']]), - ("suffixed one", [[ - ' 0:suffixed ((1009)suffixed a, prio=1, assoc=SyaAssociativity.Right) => PUSH', - ' 1: => EAT', - ' 2:one => PUSH_UNREC', - ' 3: => ??', - " _: => RECOG [[CN((1001)one)]]", - " _: => POP ConceptNode(concept='(1001)one', source='one', start=2, end=2)", - ' _: => POP SyaConceptParserHelper(concept=(1009)suffixed a, start=0, error=None)' - ]]), - ("one ? twenty one : three", [[ - ' 0:one => PUSH_UNREC', - ' 1: => PUSH_UNREC', - ' 2:? ((1011)a ? b : c, prio=1, assoc=SyaAssociativity.Right) => ??', - ' _: => RECOG [[CN((1001)one)]]', - " _: => POP ConceptNode(concept='(1001)one', source='one', start=0, end=0)", - ' 2:? ((1011)a ? b : c, prio=1, assoc=SyaAssociativity.Right) => PUSH', - ' 3: => EAT', - ' 4:twenty => PUSH_UNREC', - ' 5: => PUSH_UNREC', - ' 6:one => PUSH_UNREC', - ' 7: => PUSH_UNREC', - ' 8:: => ??', - " _: => RECOG [[UTN('twenty '), CN((1001)one)], [CN((1016)twenties)]]", - " _: => POP UnrecognizedTokensNode(source='twenty ', start=4, end=5)", - " _: => POP ConceptNode(concept='(1001)one', source='one', start=6, end=6)", - " _: => => ERROR Too many parameters found for '(1011)a ? b : c' before token 'Token(:)'", - ' 8:: => EAT'], [ - ' 0:one => PUSH_UNREC', - ' 1: => PUSH_UNREC', - ' 2:? ((1011)a ? b : c, prio=1, assoc=SyaAssociativity.Right) => ??', - ' _: => RECOG [[CN((1001)one)]]', - " _: => POP ConceptNode(concept='(1001)one', source='one', start=0, end=0)", - ' 2:? ((1011)a ? b : c, prio=1, assoc=SyaAssociativity.Right) => PUSH', - ' 3: => EAT', - ' 4:twenty => PUSH_UNREC', - ' 5: => PUSH_UNREC', - ' 6:one => PUSH_UNREC', - ' 7: => PUSH_UNREC', - ' 8:: => ??', - " _: => RECOG [[UTN('twenty '), CN((1001)one)], [CN((1016)twenties)]]", - " _: => POP ConceptNode(concept='(1016)twenties', source='twenty one', start=4, end=6, #body#='DoNotResolve(value='twenty one')', unit='(1001)one')", - ' 9: => EAT', - ' 10:three => PUSH_UNREC', - ' 11: => ??', - ' _: => RECOG [[CN((1003)three)]]', - " _: => POP ConceptNode(concept='(1003)three', source='three', start=10, end=10)", - ' _: => POP SyaConceptParserHelper(concept=(1011)a ? b : c, start=2, error=None)']]), - ]) - def test_i_can_debug(self, expression, expected_debugs): - sheerka, context, parser = self.init_parser() - sheerka.set_debug(context, True) - sheerka.set_debug_var(context, "Sya") - res = parser.infix_to_postfix(context, ParserInput(expression)) + expression = "one mult two plus three" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() - assert len(res) == len(expected_debugs) - for res_i, expected_debug in zip(res, expected_debugs): - actual_debug = [str(di) for di in res_i.debug] - assert actual_debug == expected_debug + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() - @pytest.mark.parametrize("settings", [ - "Sya.*.*", - "Sya.*.#0.can_pop" - ]) - def test_i_can_debug_can_pop_using_star(self, settings): - sheerka, context, parser = self.init_parser() - sheerka.set_debug(context, True) - sheerka.set_debug_var(context, settings) - res = parser.infix_to_postfix(context, ParserInput("one plus two mult three")) + assert not tokens_parser.has_error() + expected = [CNC("plus1", a=CNC("mult", a=CN("one"), b=CN("two")), b=CN("three"), source=expression)] + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, tokens_parser.stack, expected) + assert _stack == _expected - debug = [str(di) for di in res[0].debug] - assert debug[5] == ' _: => No stack. CAN_POP false.' + assert len(sya_node_parser.forks) == 3 - def test_i_can_parse_when_concept_atom_only(self): + forked_parser = sya_node_parser.forks[0] + forked_parser.parse() + assert not forked_parser.has_error() + expected = [CNC("plus2", x=CNC("mult", a=CN("one"), b=CN("two")), y=CN("three"), source=expression)] + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, forked_parser.stack, expected) + assert _stack == _expected + + forked_parser = sya_node_parser.forks[1] + forked_parser.parse() + assert not forked_parser.has_error() + expected = [CNC("mult", a=CN("one"), b=CNC("plus3", u=CN("two"), v=CN("three"), source="two plus three"))] + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, forked_parser.stack, expected) + assert _stack == _expected + + forked_parser = sya_node_parser.forks[2] + forked_parser.parse() + assert not forked_parser.has_error() + expected = [CNC("mult", a=CN("one"), b=CNC("plus4", i=CN("two"), j=CN("three"), source="two plus three"))] + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, forked_parser.stack, expected) + assert _stack == _expected + + def test_i_can_tokens_rollback_incorrect_concept_when_prefix_parameters(self): + sheerka, context = self.initialize_test() + + expression = "desc(infixed)" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + + expected = [UTN("desc(infixed)")] + _stack, _expected = prepare_nodes_comparison(cmap, expression, tokens_parser.stack, expected) + assert _stack == _expected + + def test_i_can_tokens_parse_when_multiple_levels_of_pop(self): + concepts_map = { + "what": Concept("what is the x of y").def_var("x").def_var("y"), + "?": Concept("q ?").def_var("q"), + "the": Concept("the x").def_var("x"), + "color": Concept("color"), + "short": Concept("short"), + } + sheerka, context = self.initialize_test(concepts_map) + sheerka.set_precedence(context, concepts_map["what"], concepts_map["?"]) + sheerka.set_precedence(context, concepts_map["the"], concepts_map["?"]) + + expression = "what is the color of the short ?" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + assert len(tokens_parser.stack) == 1 + + expected = [CNC("?", q=CNC("what", x=CNC("color"), y=CNC("the", x=CNC("short"), source="the short")), + source=expression)] + _stack, _expected = prepare_nodes_comparison(concepts_map, expression, tokens_parser.stack, expected) + assert _stack == _expected + + def test_i_can_tokens_manage_when_leading_parameter_is_a_whitespace(self): + sheerka, context = self.initialize_test() + + expression = " plus two" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + + expected = [UTN(" plus two")] + _stack, _expected = prepare_nodes_comparison(cmap, expression, tokens_parser.stack, expected) + assert _stack == _expected + + def test_i_can_tokens_manage_when_trailing_parameter_is_a_whitespace(self): + sheerka, context = self.initialize_test() + + expression = "one plus " + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + + expected = ["one", UTN("plus ")] + _stack, _expected = prepare_nodes_comparison(cmap, expression, tokens_parser.stack, expected) + assert _stack == _expected + + def test_i_always_look_for_the_longest_match(self): + concepts_map = { + "one": Concept("one"), + "two": Concept("two"), + "one x": Concept("one x").def_var("x"), + "plus": Concept("a plus b").def_var("a").def_var("b"), + } + sheerka, context = self.initialize_test(concepts_map) + + expression = "one plus two" + parser_input = ParserInput(expression).reset() + sya_node_parser = SyaNodeParser() + + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.parse() + + assert not tokens_parser.has_error() + assert len(tokens_parser.stack) == 2 + + assert len(sya_node_parser.forks) == 1 + forked_tokens_parser = sya_node_parser.forks[0] + forked_tokens_parser.parse() + concept_node = forked_tokens_parser.stack[0] + + expected = CNC("plus", a=CNC("one"), b=CNC("two"), source=expression) + resolved_expected = compute_expected_array(concepts_map, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, resolved_expected) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "two")] + + def test_i_cannot_concept_parse_concept_when_not_enough_prefix_parameters(self): + sheerka, context = self.initialize_test() + + expression = "one plus two" + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [] + + concept_parser = SyaConceptParser(tokens_parser, cmap["plus"], tokens_parser.stack) + concept_parser.parse() + + assert concept_parser.has_error() + assert concept_parser.errors == [NotEnoughParameters(cmap["plus"], " plus ", 2, 1)] + + def test_i_cannot_concept_parse_concept_when_leading_parameters_is_a_white_space(self): + sheerka, context = self.initialize_test() + + expression = " plus two" + parser_input = ParserInput(expression).reset() + parser_input.seek(1) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [UTN(" ", start=0, end=0)] + + concept_parser = SyaConceptParser(tokens_parser, cmap["plus"], tokens_parser.stack) + concept_parser.parse() + + assert concept_parser.has_error() + assert concept_parser.errors == [NotEnoughParameters(cmap["plus"], " plus ", 1, 1)] + + def test_i_cannot_concept_parse_concept_when_trailing_parameters_is_a_white_space(self): + sheerka, context = self.initialize_test() + + expression = "one plus " + param1 = self.get_real_node(cmap, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, cmap["plus"], tokens_parser.stack) + concept_parser.parse() + + assert concept_parser.has_error() + assert concept_parser.errors == [NotEnoughParameters(cmap["plus"], "", 4, 1)] + + def test_i_cannot_concept_parse_when_not_enough_suffix_parameters(self): + concepts_map = { + "one": Concept("one"), + "plus": Concept("a plus b").def_var("a").def_var("b") + } + sheerka, context = self.initialize_test(concepts_map) + + expression = "one plus " + param1 = self.get_real_node(concepts_map, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, concepts_map["plus"], tokens_parser.stack) + concept_parser.parse() + + assert concept_parser.has_error() + assert concept_parser.errors == [NotEnoughParameters(concepts_map["plus"], '', 4, 1)] + + def test_i_cannot_concept_parse_when_not_enough_infix_parameters(self): + concepts_map = { + "one": Concept("one"), + "begin": Concept("begin a b end").def_var("a").def_var("b") + } + sheerka, context = self.initialize_test(concepts_map) + + expression = "begin one end" + parser_input = ParserInput(expression).reset() + parser_input.seek(0) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [] + + concept_parser = SyaConceptParser(tokens_parser, concepts_map["begin"], tokens_parser.stack) + concept_parser.parse() + + assert concept_parser.has_error() + assert concept_parser.errors == [NotEnoughParameters(concepts_map["begin"], ' end', 3, 2)] + + def test_i_cannot_concept_parse_when_too_many_infix_parameters(self): + sheerka, context = self.initialize_test() + + expression = "begin one two long end" + parser_input = ParserInput(expression).reset() + parser_input.seek(0) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [] + + concept_parser = SyaConceptParser(tokens_parser, cmap["begin"], tokens_parser.stack) + concept_parser.parse() + + assert concept_parser.has_error() + assert concept_parser.errors == [TooManyParameters(cmap["begin"], ' long end', 5, 1)] + + def test_i_cannot_concept_parse_when_missing_parts(self): + sheerka, context = self.initialize_test() + + expression = "begin twenty" + parser_input = ParserInput(expression).reset() + parser_input.seek(0) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [] + + concept_parser = SyaConceptParser(tokens_parser, cmap["begin"], tokens_parser.stack) + concept_parser.parse() + + assert concept_parser.has_error() + assert concept_parser.errors == [TokensNotFound(cmap['begin'], " long end")] + + def test_i_can_detect_when_parsing_the_wrong_concept(self): + concepts_map = { + "one": Concept("one"), + "two": Concept("two"), + "if": Concept("if a then b").def_var("a").def_var("b") + } + sheerka, context = self.initialize_test(concepts_map) + + expression = "if one else two" + parser_input = ParserInput(expression).reset() + parser_input.seek(0) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [] + + concept_parser = SyaConceptParser(tokens_parser, concepts_map["if"], tokens_parser.stack) + concept_parser.parse() + + assert concept_parser.has_error() + assert concept_parser.errors == [TokensNotFound(concepts_map["if"], " then ")] + + def test_i_can_detect_invalid_long_name_parsing(self): + concepts_map = { + "one": Concept("one"), + "two": Concept("two"), + "plus": Concept("a plus plus b").def_var("a").def_var("b") + } + sheerka, context = self.initialize_test(concepts_map) + + expression = "one plus two" + param1 = self.get_real_node(concepts_map, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, concepts_map["plus"], tokens_parser.stack) + concept_parser.parse() + + assert concept_parser.has_error() + with comparable_tokens(): + assert concept_parser.errors == [TokensNotFound(concepts_map["plus"], " plus ")] + + def test_i_can_parse_simple_concepts_composition(self): sheerka, context, parser = self.init_parser() text = "one plus two mult three" @@ -1052,14 +1323,10 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - compare_with_test_object(lexer_nodes, [CN(cmap["plus"], text, 0, 8)]) - # check the compiled - expected_concept = lexer_nodes[0].concept - assert expected_concept.get_compiled()["a"] == cmap["one"] - compare_with_test_object(expected_concept.get_compiled()["b"], CMV(cmap["mult"], a="two", b="three")) - assert expected_concept.get_compiled()["b"].get_compiled()["a"] == cmap["two"] - assert expected_concept.get_compiled()["b"].get_compiled()["b"] == cmap["three"] + expected = [CNC("plus", a=CC("one"), b=CC("mult", a=CC("two"), b=CC("three")), source=text)] + _stack, _expected = prepare_nodes_comparison(cmap, text, lexer_nodes, expected) + assert _stack == _expected # check the metadata expected_concept = lexer_nodes[0].concept @@ -1076,7 +1343,9 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - compare_with_test_object(lexer_nodes, [CN(cmap["suffixed"], text, 0, 6)]) + expected = [CN(cmap["suffixed"], text, 0, 6)] + concept_node_as_test_obj = get_test_obj(lexer_nodes, expected) + assert concept_node_as_test_obj == expected # check the compiled expected_concept = lexer_nodes[0].concept @@ -1104,7 +1373,9 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): lexer_nodes = res[1].body.body assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - compare_with_test_object(lexer_nodes, [CN(cmap["suffixed"], text, 0, 4)]) + expected = [CN(cmap["suffixed"], text, 0, 4)] + concept_node_as_test_obj = get_test_obj(lexer_nodes, expected) + assert concept_node_as_test_obj == expected # check the compiled expected_concept = lexer_nodes[0].concept @@ -1114,6 +1385,77 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): # check metadata assert expected_concept.get_metadata().variables == [("a", "twenty one")] + def test_i_can_parse_sequences(self): + sheerka, context, parser = self.init_parser() + + text = "one plus 1 + 1 suffixed two" + res = parser.parse(context, ParserInput(text)) + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + + expected = [ + CN(cmap["plus"], "one plus 1 + 1 ", 0, 9), + CN(cmap["suffixed"], "suffixed two", 10, 12)] + + _stack, _expected = prepare_nodes_comparison(cmap, text, lexer_nodes, expected) + assert _stack == _expected + + # check the compiled + concept_plus_a = lexer_nodes[0].concept.get_compiled()["a"] + concept_plus_b = lexer_nodes[0].concept.get_compiled()["b"] + concept_suffixed_a = lexer_nodes[1].concept.get_compiled()["a"] + + assert concept_plus_a == cmap["one"] + assert len(concept_plus_b) == 1 + assert sheerka.isinstance(concept_plus_b[0], BuiltinConcepts.RETURN_VALUE) + assert isinstance(concept_plus_b[0].body.body, PythonNode) + assert concept_suffixed_a == cmap["two"] + + def test_i_can_parse_when_mix_bnf_and_python_code(self): + sheerka, context, parser = self.init_parser() + + text = "twenty two plus two mult 3" + res = parser.parse(context, ParserInput(text)) + assert len(res) == 2 + assert res[1].status + + lexer_nodes = res[1].body.body + assert len(lexer_nodes) == 1 + concept_found = lexer_nodes[0].concept + assert sheerka.isinstance(concept_found.get_compiled()["a"], "twenties") + assert sheerka.isinstance(concept_found.get_compiled()["b"], cmap["mult"]) + + concept_mult = concept_found.get_compiled()["b"] + assert sheerka.isinstance(concept_mult.get_compiled()["a"], "two") + assert isinstance(concept_mult.get_compiled()["b"], list) + assert isinstance(concept_mult.get_compiled()["b"][0], ReturnValueConcept) + assert "Python" in concept_mult.get_compiled()["b"][0].who + + @pytest.mark.parametrize("text, expected", [ + ("suffixed3 a b c", [("x", "a"), ("y", "b"), ("z", "c")]), + ("a b c prefixed3", [("x", "a"), ("y", "b"), ("z", "c")]), + ("start3 a b c stop", [("x", "a"), ("y", "b"), ("z", "c")]), + ]) + def test_i_can_parse_when_multiple_parameters_are_expected(self, text, expected): + concepts_map = { + "a": Concept("a"), + "b": Concept("b"), + "c": Concept("c"), + "suffixed3": Concept("suffixed3 x y z").def_var("x").def_var("y").def_var("z"), + "prefixed3": Concept("x y z prefixed3").def_var("x").def_var("y").def_var("z"), + "start3": Concept("start3 x y z stop").def_var("x").def_var("y").def_var("z"), + } + sheerka, context, parser = self.init_parser(concepts_map) + + res = parser.parse(context, ParserInput(text)) + lexer_nodes = res.body.body + + assert res.status + assert lexer_nodes[0].concept.get_metadata().variables == expected + def test_i_can_parse_when_all_variables_are_not_parameters(self): my_map = { "foo": Concept("foo a").def_var("a", "'default_a'").def_var("b", "'default_b'"), # 'b' is not a parameter @@ -1141,85 +1483,23 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert "a" not in expected_concept.get_compiled() assert expected_concept.get_metadata().variables == [("a", "'default_a'"), ("b", "baz")] - def test_i_can_parse_sequences(self): - sheerka, context, parser = self.init_parser() - - text = "one plus 1 + 1 suffixed two" - res = parser.parse(context, ParserInput(text)) - wrapper = res.body - lexer_nodes = res.body.body - - assert res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - compare_with_test_object(lexer_nodes, [ - CN(cmap["plus"], "one plus 1 + 1 ", 0, 9), - CN(cmap["suffixed"], "suffixed two", 10, 12)]) - - # check the compiled - concept_plus_a = lexer_nodes[0].concept.get_compiled()["a"] - concept_plus_b = lexer_nodes[0].concept.get_compiled()["b"] - concept_suffixed_a = lexer_nodes[1].concept.get_compiled()["a"] - - assert concept_plus_a == cmap["one"] - assert len(concept_plus_b) == 1 - assert sheerka.isinstance(concept_plus_b[0], BuiltinConcepts.RETURN_VALUE) - assert isinstance(concept_plus_b[0].body.body, PythonNode) - assert concept_suffixed_a == cmap["two"] - - @pytest.mark.parametrize("text, expected", [ - ("suffixed3 a b c", [("x", "a"), ("y", "b"), ("z", "c")]), - ("a b c prefixed3", [("x", "a"), ("y", "b"), ("z", "c")]), - ("start3 a b c stop", [("x", "a"), ("y", "b"), ("z", "c")]), - ]) - def test_i_can_parse_when_multiple_parameters_are_expected(self, text, expected): + def test_i_can_parse_when_defining_parenthesis(self): concepts_map = { - "a": Concept("a"), - "b": Concept("b"), - "c": Concept("c"), - "suffixed3": Concept("suffixed3 x y z").def_var("x").def_var("y").def_var("z"), - "prefixed3": Concept("x y z prefixed3").def_var("x").def_var("y").def_var("z"), - "start3": Concept("start3 x y z stop").def_var("x").def_var("y").def_var("z"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "plus": Concept("a plus b").def_var("a").def_var("b"), + "mult": Concept("a mult b").def_var("a").def_var("b"), + "(": Concept("( x )").def_var("x") } - sheerka, context, parser = self.init_parser(concepts_map, None) - + sheerka, context, parser = self.init_parser(concepts_map) + text = "( one plus two ) mult three" res = parser.parse(context, ParserInput(text)) lexer_nodes = res.body.body - assert res.status - assert lexer_nodes[0].concept.get_metadata().variables == expected - - @pytest.mark.parametrize("text", [ - "function(suffixed one)", - "function(one plus two mult three)", - "function(suffixed x$!#)" - ]) - def test_i_cannot_parse_when_function_only(self, text): - sheerka, context, parser = self.init_parser() - - res = parser.parse(context, ParserInput(text)) - - assert not res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) - - @pytest.mark.parametrize("text", [ - "foo bar (one", - "foo bar one", - "foo one two", - "foo x$!# one", - ]) - def test_i_cannot_parse_when_concept_almost_found(self, text): - """ - We test that the parsed concept seems like a known one, but it was not. - The parser has to detected that the predication was incorrect - :return: - """ - sheerka, context, parser = self.init_parser() - - res = parser.parse(context, ParserInput(text)) - - assert not res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) - assert res.body.body == text + expected = [CNC("mult", a=CC("(", x=CC("plus"), source="( one plus two )"), b="three")] + _stack, _expected = prepare_nodes_comparison(concepts_map, text, lexer_nodes, expected) + assert _stack == _expected @pytest.mark.parametrize("text, expected_result", [ ("one plus two foo bar baz", [CNC("plus", a="one", b="two"), UTN(" foo bar baz")]), @@ -1231,8 +1511,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_almost_parse_when_one_part_is_recognized_but_not_the_rest(self, text, expected_result): """ - We test that the parsed concept seems like a known one, but it was not. - The parser has to detected that the predication was incorrect + Return value status must be False when there are unrecognized tokens :return: """ sheerka, context, parser = self.init_parser() @@ -1249,8 +1528,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): transformed_nodes = get_test_obj(lexer_nodes, expected_array) assert transformed_nodes == expected_array - # assert lexer_nodes == expected_array - @pytest.mark.parametrize("text, expected_result", [ ("a plus b", [CN("plus", "a plus b")]), ("suffixed a plus b", [CN("suffixed", "suffixed a plus b")]), @@ -1258,7 +1535,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_almost_parse_concept_definition(self, text, expected_result): """ In these examples, 'a' and 'b' are not defined. - So the status of the return value cannot be True + But the concepts are recognized :param text: :param expected_result: :return: @@ -1271,20 +1548,19 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): lexer_nodes = res.body.body expected_array = compute_expected_array(cmap, text, expected_result) - assert not res.status + assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) transformed_nodes = get_test_obj(lexer_nodes, expected_array) assert transformed_nodes == expected_array - # assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected_error", [ - ("x$!# prefixed", "Cannot parse 'x$!#'"), + ("x$!# prefixed", "Cannot parse 'x$!# '"), ("suffixed x$!#", "Cannot parse 'x$!#'"), - ("one infix x$!#", "Cannot parse 'x$!#'"), - ("x$!# infix one", "Cannot parse 'x$!#'"), - ("x$!# infix z$!#", ["Cannot parse 'z$!#'", "Cannot parse 'x$!#'"]), + ("one infixed x$!#", "Cannot parse 'x$!#'"), + ("x$!# infixed one", "Cannot parse 'x$!# '"), + ("x$!# infixed z$!#", ["Cannot parse 'x$!# '", "Cannot parse 'z$!#'"]), ("suffixed alpha beta", "Cannot parse 'alpha beta'"), - ("alpha beta prefixed", "Cannot parse 'alpha beta'"), + ("alpha beta prefixed", "Cannot parse 'alpha beta '"), ("one plus alpha beta", "Cannot parse 'alpha beta'"), ]) def test_i_cannot_parse_when_unrecognized(self, text, expected_error): @@ -1297,65 +1573,8 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(wrapper, BuiltinConcepts.ERROR) assert wrapper.body == expected_error - @pytest.mark.parametrize("text, expected", [ - ("x$!# suffixed one", [UTN("x$!# ", 0, 4), CN("suffixed __var__0", "suffixed one", 5, 7)]), - ("one prefixed x$!#", [CN("__var__0 prefixed", "one prefixed", 0, 2), UTN(" x$!#", 3, 7)]), - ]) - def test_i_cannot_parse_when_part_of_the_sequence_is_not_recognized(self, text, expected): - sheerka, context, parser = self.init_parser() - - res = parser.parse(context, ParserInput(text)) - wrapper = res.body - lexer_nodes = res.body.body - - assert not res.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - compare_with_test_object(lexer_nodes, expected) - - def test_i_cannot_parse_function_using_short_name(self): - concepts_map = { - "infixed": self.from_def_concept("infixed", "a infixed b", ["a", "b"]), - "suffixed": self.from_def_concept("suffixed", "suffixed a", ["a"]), - "prefixed": self.from_def_concept("prefixed", "a prefixed", ["a"]), - } - sheerka, context, parser = self.init_parser(concepts_map) - - res = parser.parse(context, ParserInput("desc(infixed)")) - assert not res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) - - res = parser.parse(context, ParserInput("desc(suffixed)")) - assert not res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) - - res = parser.parse(context, ParserInput("desc(prefixed)")) - assert not res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) - - @pytest.mark.parametrize("text", [ - "one", - "1 + 1", - "x$!#", - "twenty one" - "", - "function(not an sya concept)", - ]) - def test_i_cannot_parse_when_no_concept_is_recognized(self, text): - """ - it's actually no concept with property - Atoms concepts, source code or BNF concepts alone are discarded by the lexer - :return: - """ - sheerka, context, parser = self.init_parser() - - res = parser.parse(context, ParserInput(text)) - - assert not res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) - assert res.body.body == text - def test_i_cannot_parse_empty_string(self): - sheerka, context, parser = self.init_parser({}, None) + sheerka, context, parser = self.init_parser() res = parser.parse(context, ParserInput("")) @@ -1375,59 +1594,60 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) assert res.body.body == text - @pytest.mark.parametrize("expression, expected", [ - ("function(", ([], "function(")), - ("before the function(", (["before the "], "function(")), - ("one two function(", (["one", "two", UTN(" ", 3, 3)], "function(")), - ("one(", ([], "one(")), - ("one before the function(", (["one", " before the "], "function(")), + @pytest.mark.parametrize("text, expected_error", [ + ("one", NoSyaConceptFound()), + ("1 + 1", NoSyaConceptFound()), + ("x$!#", NoSyaConceptFound()), + ("twenty one", NoSyaConceptFound()), + ("function(not an sya concept)", FunctionDetected()), ]) - def test_i_can_get_functions_names_from_unrecognized(self, expression, expected): + def test_i_cannot_parse_when_no_concept_is_recognized(self, text, expected_error): + """ + it's actually no concept with property + Atoms concepts, source code or BNF concepts alone are discarded by the lexer + :return: + """ sheerka, context, parser = self.init_parser() - infix_to_postfix = InFixToPostFix(context, NextIdManager()) - tokens = list(Tokenizer(expression, yield_eof=False)) - for pos, token in enumerate(tokens[:-1]): - infix_to_postfix.eat_unrecognized(token, pos) + res = parser.parse(context, ParserInput(text)) - resolved_to_out = compute_expected_array(cmap, expression, expected[0]) - resolved_function_name = compute_expected_array(cmap, expression, [expected[1]]) - actual = infix_to_postfix.get_functions_names_from_unrecognized(tokens[-1], len(tokens) - 1) + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + assert res.body.body == text + assert res.body.reason == [expected_error] - assert len(actual) == 1 + def test_i_cannot_parse_function_using_short_name(self): + concepts_map = { + "infixed": self.from_def_concept("infixed", "a infixed b", ["a", "b"]), + "suffixed": self.from_def_concept("suffixed", "suffixed a", ["a"]), + "prefixed": self.from_def_concept("prefixed", "a prefixed", ["a"]), + } + sheerka, context, parser = self.init_parser(concepts_map) - compare_with_test_object(actual[0].to_out, resolved_to_out) - actual[0].function.fix_source() - compare_with_test_object(actual[0].function, resolved_function_name[0]) + res = parser.parse(context, ParserInput("desc(suffixed)")) + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) - @pytest.mark.parametrize("expression, expected_list", [ - ("twenty two function(", [(["twenty ", "two", UTN(" ", 3, 3)], "function("), - ([CN("twenties", "twenty two"), UTN(" ", 3, 3)], "function(")]), - ("twenty two(", [(["twenty "], "two("), - ([CN("twenties", "twenty two")], None)]), + res = parser.parse(context, ParserInput("desc(prefixed)")) + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + + res = parser.parse(context, ParserInput("desc(infixed)")) + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + + @pytest.mark.parametrize("text", [ + "function(suffixed one)", + "function(one plus two mult three)", + "function(suffixed x$!#)" ]) - def test_i_can_get_functions_names_from_unrecognized_when_multiple_results(self, expression, expected_list): + def test_i_cannot_parse_when_function_only(self, text): sheerka, context, parser = self.init_parser() - infix_to_postfix = InFixToPostFix(context, NextIdManager()) - tokens = list(Tokenizer(expression, yield_eof=False)) - for pos, token in enumerate(tokens[:-1]): - infix_to_postfix.eat_unrecognized(token, pos) + res = parser.parse(context, ParserInput(text)) - actual_list = infix_to_postfix.get_functions_names_from_unrecognized(tokens[-1], len(tokens) - 1) - - assert len(actual_list) == len(expected_list) - - for actual, expected in zip(actual_list, expected_list): - resolved_to_out = compute_expected_array(cmap, expression, expected[0]) - - compare_with_test_object(actual.to_out, resolved_to_out) - if actual.function: - actual.function.fix_source() - resolved_function_name = compute_expected_array(cmap, expression, [expected[1]]) - compare_with_test_object(actual.function, resolved_function_name[0]) - else: - assert actual.function is None + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) def test_i_can_parse_when_multiple_ontologies(self): sheerka, context, parser = self.init_parser() @@ -1439,7 +1659,9 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - compare_with_test_object(lexer_nodes, [CN(cmap["suffixed"], text, 0, 6)]) + expected = [CN(cmap["suffixed"], text, 0, 6)] + concept_node_as_test_obj = get_test_obj(lexer_nodes, expected) + assert concept_node_as_test_obj == expected # add an ontology layer and make sure will still can parse sheerka.push_ontology(context, "new ontology") @@ -1451,9 +1673,11 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - compare_with_test_object(lexer_nodes, [CN(cmap["suffixed"], text, 0, 6)]) + expected = [CN(cmap["suffixed"], text, 0, 6)] + concept_node_as_test_obj = get_test_obj(lexer_nodes, expected) + assert concept_node_as_test_obj == expected - def test_i_correctly_set_up_use_copy(self): + def test_i_correctly_set_up_concept_hints(self): my_map = { "shirt": Concept("shirt"), "a x": Concept("a x", ret="x").def_var("x"), @@ -1466,13 +1690,41 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): concept_found = res.body.body[0].concept assert concept_found.get_hints().use_copy + assert concept_found.get_hints().need_validation + assert concept_found.get_hints().recognized_by == RECOGNIZED_BY_KEY concept_found_x = concept_found.get_compiled()["x"] assert concept_found_x.get_hints().use_copy + assert concept_found_x.get_hints().need_validation + assert concept_found_x.get_hints().recognized_by == RECOGNIZED_BY_KEY concept_found_x_x = concept_found_x.get_compiled()["x"] assert concept_found_x_x.get_hints().use_copy + def test_i_can_rollback_concept_during_finalize(self): + concepts_map = { + "[": Concept("[z for x in y]").def_var("z").def_var("x").def_var("y"), + "foo": Concept("foo"), + "bar": Concept("bar"), + "baz": Concept("baz"), + } + + sheerka, context, parser = self.init_parser(concepts_map) + text = "[ foo for foo in [bar, baz] ]" + + res = parser.parse(context, ParserInput(text)) + nodes = res.body.body + + assert res.status + assert len(nodes) == 1 + node = nodes[0] + + assert sheerka.isinstance(node.concept, concepts_map["["]) + assert node.concept.get_compiled()["z"] == concepts_map["foo"] + assert node.concept.get_compiled()["x"] == concepts_map["foo"] + assert node.concept.get_compiled()["y"][0].who == "parsers.Python" + assert node.concept.get_compiled()["y"][0].body.source == '[bar, baz]' + class TestFileBaseSyaNodeParser(TestUsingFileBasedSheerka): def test_i_can_parse_after_restart(self): diff --git a/tests/sheerkapython/test_ExprToPython.py b/tests/sheerkapython/test_ExprToPython.py new file mode 100644 index 0000000..523eb31 --- /dev/null +++ b/tests/sheerkapython/test_ExprToPython.py @@ -0,0 +1,422 @@ +import pytest + +from core.builtin_concepts_ids import BuiltinConcepts +from core.concept import Concept +from core.sheerka.services.SheerkaEvaluateConcept import EvaluationHints +from core.sheerka.services.SheerkaExecute import ParserInput +from evaluators.PythonEvaluator import PythonEvaluator +from parsers.BaseParser import ErrorSink +from parsers.ExpressionParser import ExpressionParser +from parsers.ListComprehensionParser import ListComprehensionParser +from parsers.PythonParser import PythonNode +from sheerkapython.ExprToPython import PythonExprVisitor +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka + + +class TestExprToPython(TestUsingMemoryBasedSheerka): + + @staticmethod + def get_expr_node(context, expression, parser=None): + parser = parser or ExpressionParser() + error_sink = ErrorSink() + parser_input = ParserInput(expression) + parser.reset_parser_input(parser_input, error_sink) + parsed = parser.parse_input(context, parser_input, error_sink) + + assert not error_sink.has_error + + return parsed + + @staticmethod + def eval(context, return_value, namespace=None): + evaluator = PythonEvaluator() + assert evaluator.matches(context, return_value) + + if namespace: + for k, v in namespace.items(): + context.add_to_short_term_memory(k, v) + + res = evaluator.eval(context, return_value) + assert res.status + return res.body + + @pytest.mark.parametrize("expression, source, objects", [ + ("foo w", "call_concept(__o_00__, x=w)", {"__o_00__": "foo"}), + ("foo z + 2", "call_concept(__o_00__, x=z + 2)", {"__o_00__": "foo"}), + ("foo a and bar b", + "call_concept(__o_00__, x=a) and call_concept(__o_01__, y=b)", + {"__o_00__": "foo", "__o_01__": "bar"}), + ("foo a or bar b", + "call_concept(__o_00__, x=a) or call_concept(__o_01__, y=b)", + {"__o_00__": "foo", "__o_01__": "bar"}), + ("not foo w", "not call_concept(__o_00__, x=w)", {"__o_00__": "foo"}), + ("foo a >= bar b", + "call_concept(__o_00__, x=a) >= call_concept(__o_01__, y=b)", + {"__o_00__": "foo", "__o_01__": "bar"}), + ("function(foo a, bar b)", + "function(call_concept(__o_00__, x=a), call_concept(__o_01__, y=b))", + {"__o_00__": "foo", "__o_01__": "bar"}) + + ]) + def test_i_can_compile_concept_when_is_question_is_false(self, expression, source, objects): + sheerka, context, foo, bar = self.init_test().with_concepts( + Concept("foo x", body="x").def_var("x"), + Concept("bar y", body="y").def_var("y"), + create_new=True + ).unpack() + + concepts = { + "foo": foo, + "bar": bar + } + + node = self.get_expr_node(context, expression) + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + + assert len(ret) == 1 + python_node = ret[0].body.body + assert python_node.original_source == expression + assert python_node.source == source + + for obj_name, obj_value in objects.items(): + assert obj_name in python_node.objects + + obj = python_node.objects[obj_name] + if isinstance(obj, Concept): + assert sheerka.isinstance(obj, concepts[obj_value]) + assert obj.get_hints().use_copy + assert obj.get_hints().is_evaluated + else: + assert False + + @pytest.mark.parametrize("expression, source, objects", [ + ("foo w", "evaluate_question(__o_00__, x=w)", {"__o_00__": "foo"}), + ("foo z + 2", "evaluate_question(__o_00__, x=z + 2)", {"__o_00__": "foo"}), + ]) + def test_i_can_compile_concept_when_is_question_is_true(self, expression, source, objects): + sheerka, context, foo = self.init_test().with_concepts( + Concept("foo x", body="x", pre="is_question()").def_var("x"), + create_new=True + ).unpack() + + concepts = { + "foo": foo + } + + node = self.get_expr_node(context, expression) + visitor = PythonExprVisitor(context) + ret = visitor.compile(node, EvaluationHints(eval_body=True, eval_question=True)) + + assert len(ret) == 1 + python_node = ret[0].body.body + assert python_node.original_source == expression + assert python_node.source == source + + for obj_name, obj_value in objects.items(): + assert obj_name in python_node.objects + + obj = python_node.objects[obj_name] + if isinstance(obj, Concept): + assert sheerka.isinstance(obj, concepts[obj_value]) + assert obj.get_hints().use_copy + assert obj.get_hints().is_evaluated + else: + assert False + + def test_i_can_compile_simple_list_comprehension(self): + sheerka, context = self.init_test().unpack() + + expression = "[ x for x in ['a', 'b'] if x == 'a' ]" + node = self.get_expr_node(context, expression, parser=ListComprehensionParser()) + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + + assert len(ret) == 1 + assert sheerka.isinstance(ret[0], BuiltinConcepts.RETURN_VALUE) + assert sheerka.isinstance(ret[0].body, BuiltinConcepts.PARSER_RESULT) + assert isinstance(ret[0].body.body, PythonNode) + + python_node = ret[0].body.body + assert python_node.original_source == expression + assert python_node.source == expression + + assert self.eval(context, ret[0]) == ["a"] + + def test_i_can_compile_simple_list_comprehension_when_no_if(self): + sheerka, context = self.init_test().unpack() + + expression = "[ x for x in ['a', 'b'] ]" + node = self.get_expr_node(context, expression, parser=ListComprehensionParser()) + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + + assert len(ret) == 1 + assert sheerka.isinstance(ret[0], BuiltinConcepts.RETURN_VALUE) + assert sheerka.isinstance(ret[0].body, BuiltinConcepts.PARSER_RESULT) + assert isinstance(ret[0].body.body, PythonNode) + + python_node = ret[0].body.body + assert python_node.original_source == expression + assert python_node.source == expression + + assert self.eval(context, ret[0]) == ['a', 'b'] + + def test_i_can_compile_list_comprehension_when_element_is_a_concept(self): + sheerka, context, foo = self.init_test().with_concepts( + Concept("foo x", body="x").def_var("x") + ).unpack() + + expression = "[ foo w for w in ['a', 'b'] ]" + node = self.get_expr_node(context, expression, parser=ListComprehensionParser()) + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + + assert len(ret) == 1 + assert sheerka.isinstance(ret[0], BuiltinConcepts.RETURN_VALUE) + assert sheerka.isinstance(ret[0].body, BuiltinConcepts.PARSER_RESULT) + assert isinstance(ret[0].body.body, PythonNode) + + python_node = ret[0].body.body + assert python_node.original_source == expression + assert python_node.source == "[ call_concept(__o_00__, x=w) for w in ['a', 'b'] ]" + assert "__o_00__" in python_node.objects + + concept = python_node.objects["__o_00__"] + assert sheerka.isinstance(concept, foo) + assert concept.get_hints().use_copy + assert concept.get_hints().is_evaluated + + assert self.eval(context, ret[0]) == ["a", "b"] + + def test_i_can_compile_list_comprehension_when_concept_with_complex_parameter(self): + sheerka, context, foo = self.init_test().with_concepts( + Concept("foo x", body="x").def_var("x"), + create_new=True + ).unpack() + + expression = "[ foo w + 1 for w in [1, 2] ]" + node = self.get_expr_node(context, expression, parser=ListComprehensionParser()) + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + + assert len(ret) == 1 + assert sheerka.isinstance(ret[0], BuiltinConcepts.RETURN_VALUE) + assert sheerka.isinstance(ret[0].body, BuiltinConcepts.PARSER_RESULT) + assert isinstance(ret[0].body.body, PythonNode) + + python_node = ret[0].body.body + assert python_node.original_source == expression + assert python_node.source == "[ call_concept(__o_00__, x=w + 1) for w in [1, 2] ]" + assert "__o_00__" in python_node.objects + + assert self.eval(context, ret[0]) == [2, 3] + + def test_i_can_compile_list_comprehension_when_iter_is_a_concept(self): + sheerka, context, red, blue, color, foo = self.init_test().with_concepts( + "red", + "blue", + "color", + Concept("foo x", body="x").def_var("x") + ).unpack() + + global_truth_context = self.get_context(sheerka, global_truth=True) + sheerka.set_isa(global_truth_context, red, color) + sheerka.set_isa(global_truth_context, blue, color) + + expression = "[ foo x for x in colors ]" + node = self.get_expr_node(context, expression, parser=ListComprehensionParser()) + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + + assert len(ret) == 1 + assert sheerka.isinstance(ret[0], BuiltinConcepts.RETURN_VALUE) + assert sheerka.isinstance(ret[0].body, BuiltinConcepts.PARSER_RESULT) + assert isinstance(ret[0].body.body, PythonNode) + + python_node = ret[0].body.body + assert python_node.original_source == expression + assert python_node.source == "[ call_concept(__o_00__, x=x) for x in call_concept(__o_01__) ]" + assert "__o_00__" in python_node.objects + assert "__o_01__" in python_node.objects + + concept0 = python_node.objects["__o_00__"] + assert sheerka.isinstance(concept0, foo) + assert concept0.get_hints().use_copy + assert concept0.get_hints().is_evaluated + + concept1 = python_node.objects["__o_01__"] + assert sheerka.isinstance(concept1, "colors") + assert concept1.get_hints().use_copy + assert concept1.get_hints().is_evaluated + + assert set(self.eval(context, ret[0])) == {red, blue} + + def test_i_can_compile_list_comprehension_when_if_expression_is_a_concept(self): + sheerka, context, red, blue, color, foo, startswith = self.init_test().with_concepts( + "red", + "blue", + "color", + Concept("foo x", body="x").def_var("x"), + Concept("x starts with y", body="x.name.startswith(y)", pre="is_question()").def_var("x").def_var("y") + ).unpack() + global_truth_context = self.get_context(sheerka, global_truth=True) + sheerka.set_isa(global_truth_context, red, color) + sheerka.set_isa(global_truth_context, blue, color) + + expression = "[ foo x for x in colors if x starts with 'b' ]" + + node = self.get_expr_node(context, expression, parser=ListComprehensionParser()) + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + + assert len(ret) == 1 + assert sheerka.isinstance(ret[0], BuiltinConcepts.RETURN_VALUE) + assert sheerka.isinstance(ret[0].body, BuiltinConcepts.PARSER_RESULT) + assert isinstance(ret[0].body.body, PythonNode) + + python_node = ret[0].body.body + assert python_node.original_source == expression + assert python_node.source == "[ call_concept(__o_00__, x=x) for x in call_concept(__o_01__) if evaluate_question(__o_02__, x=x, y='b') ]" + assert "__o_00__" in python_node.objects + assert "__o_01__" in python_node.objects + assert "__o_02__" in python_node.objects + assert visitor.obj_counter == 3 + + assert set(self.eval(context, ret[0])) == {blue} + + def test_i_can_compile_list_comprehension_when_multiple_concepts(self): + sheerka, context, foo1, foo2, bar1, bar2, colors1, colors2, = self.init_test().with_concepts( + Concept("foo x").def_var("x"), + Concept("foo y", ).def_var("y"), + Concept("bar x", pre="is_question()").def_var("x"), + Concept("bar y", pre="is_question()").def_var("y"), + Concept("colors", body="[1]"), + Concept("colors", body="[2]"), + ).unpack() + + expression = "[ foo a for a in colors if bar a ]" + + node = self.get_expr_node(context, expression, parser=ListComprehensionParser()) + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + + assert len(ret) == 8 + python_node = ret[0].body.body + object_to_compare = {k: v.name for k, v in python_node.objects.items()} + assert python_node.source == '[ call_concept(__o_00__, x=a) for a in call_concept(__o_02__) if evaluate_question(__o_04__, x=a) ]' + assert object_to_compare == {"__o_00__": "foo x", "__o_02__": "colors", "__o_04__": "bar x"} + + # ... + + python_node = ret[7].body.body + object_to_compare = {k: v.name for k, v in python_node.objects.items()} + assert python_node.source == '[ call_concept(__o_01__, y=a) for a in call_concept(__o_03__) if evaluate_question(__o_05__, y=a) ]' + assert object_to_compare == {"__o_01__": "foo y", "__o_03__": "colors", "__o_05__": "bar y"} + + def test_i_can_compile_list_comprehension_when_missing_concept_parameter(self): + sheerka, context, foo = self.init_test().with_concepts( + Concept("foo x y", body="x").def_var("x").def_var("y") + ).unpack() + + expression = "[ foo x k for x in ['a', 'b'] ]" + node = self.get_expr_node(context, expression, parser=ListComprehensionParser()) + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + + assert len(ret) == 1 + python_node = ret[0].body.body + object_to_compare = {k: v.name for k, v in python_node.objects.items()} + assert python_node.source == "[ call_concept(__o_00__, x=x, y=k) for x in ['a', 'b'] ]" + assert object_to_compare == {"__o_00__": "foo x y"} + + def test_i_can_compile_simple_list_comprehension_when_multiple_for(self): + sheerka, context = self.init_test().unpack() + + expression = "[ (x, y) for x in ['a', 'b'] if x == 'a' for y in ['c', 'd'] if y == 'c' ]" + node = self.get_expr_node(context, expression, parser=ListComprehensionParser()) + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + + assert len(ret) == 1 + assert sheerka.isinstance(ret[0], BuiltinConcepts.RETURN_VALUE) + assert sheerka.isinstance(ret[0].body, BuiltinConcepts.PARSER_RESULT) + assert isinstance(ret[0].body.body, PythonNode) + + python_node = ret[0].body.body + assert python_node.original_source == expression + assert python_node.source == expression + + assert self.eval(context, ret[0]) == [("a", "c")] + + def test_i_can_compile_and_when_multiple_results(self): + sheerka, context, foo, foo2, bar, bar2 = self.init_test().with_concepts( + Concept("foo x", body="x").def_var("x"), + Concept("foo y", body="y").def_var("y"), + Concept("bar x", body="x").def_var("x"), + Concept("bar y", body="y").def_var("y"), + create_new=True + ).unpack() + + node = self.get_expr_node(context, "foo a and bar b") + visitor = PythonExprVisitor(context) + ret = visitor.compile(node) + + assert len(ret) == 4 + python_node = ret[0].body.body + object_to_compare = {k: v.name for k, v in python_node.objects.items()} + assert python_node.source == 'call_concept(__o_00__, x=a) and call_concept(__o_02__, x=b)' + assert object_to_compare == {"__o_00__": "foo x", "__o_02__": "bar x"} + + python_node = ret[1].body.body + object_to_compare = {k: v.name for k, v in python_node.objects.items()} + assert python_node.source == 'call_concept(__o_00__, x=a) and call_concept(__o_03__, y=b)' + assert object_to_compare == {"__o_00__": "foo x", "__o_03__": "bar y"} + + python_node = ret[2].body.body + object_to_compare = {k: v.name for k, v in python_node.objects.items()} + assert python_node.source == 'call_concept(__o_01__, y=a) and call_concept(__o_02__, x=b)' + assert object_to_compare == {"__o_01__": "foo y", "__o_02__": "bar x"} + + python_node = ret[3].body.body + object_to_compare = {k: v.name for k, v in python_node.objects.items()} + assert python_node.source == 'call_concept(__o_01__, y=a) and call_concept(__o_03__, y=b)' + assert object_to_compare == {"__o_01__": "foo y", "__o_03__": "bar y"} + + def test_i_can_compile_when_element_is_missing_its_parenthesis(self): + sheerka, context, foo = self.init_test().with_concepts( + Concept("foo x", body="x").def_var("x") + ).unpack() + + expression = "[ w, foo w for w in ['a', 'b'] ]" + node = self.get_expr_node(context, expression, parser=ListComprehensionParser()) + visitor = PythonExprVisitor(context) + + ret = visitor.compile(node) + + assert len(ret) == 1 + assert sheerka.isinstance(ret[0], BuiltinConcepts.RETURN_VALUE) + assert sheerka.isinstance(ret[0].body, BuiltinConcepts.PARSER_RESULT) + assert isinstance(ret[0].body.body, PythonNode) + + python_node = ret[0].body.body + assert python_node.original_source == expression + assert python_node.source == "[ (w, call_concept(__o_00__, x=w)) for w in ['a', 'b'] ]" + assert "__o_00__" in python_node.objects + + concept = python_node.objects["__o_00__"] + assert sheerka.isinstance(concept, foo) + assert concept.get_hints().use_copy + assert concept.get_hints().is_evaluated + + assert self.eval(context, ret[0]) == [("a", "a"), ("b", "b")]