From 6c7c52901699f0cf2f9da8ae2c596fa42c284b95 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Thu, 9 Apr 2020 15:42:36 +0200 Subject: [PATCH] Added SyaNodeParser (finally, after one month) --- docs/blog.rst | 10 +- predecence | 1 + src/core/builtin_concepts.py | 29 +- src/core/builtin_helpers.py | 177 +++ src/core/concept.py | 83 +- src/core/sheerka/ExecutionContext.py | 10 +- .../Services/SheerkaCreateNewConcept.py | 39 +- .../Services/SheerkaEvaluateConcept.py | 2 +- src/core/sheerka/Services/SheerkaExecute.py | 39 +- src/core/sheerka/Sheerka.py | 104 +- src/core/utils.py | 38 +- src/evaluators/AddConceptEvaluator.py | 2 +- src/evaluators/LexerNodeEvaluator.py | 3 +- src/evaluators/MultipleErrorsEvaluator.py | 51 + src/evaluators/OneErrorEvaluator.py | 4 + src/evaluators/PythonEvaluator.py | 3 +- src/parsers/AtomNodeParser.py | 369 +++++ src/parsers/BaseNodeParser.py | 669 +++++++++ src/parsers/BaseParser.py | 58 +- ...ConceptLexerParser.py => BnfNodeParser.py} | 185 +-- src/parsers/BnfParser.py | 8 +- src/parsers/ConceptsWithConceptsParser.py | 23 +- src/parsers/DefaultParser.py | 17 +- src/parsers/EmptyStringParser.py | 10 +- src/parsers/ExactConceptParser.py | 25 +- src/parsers/MultipleConceptsParser.py | 25 +- src/parsers/PythonParser.py | 49 +- src/parsers/PythonWithConceptsParser.py | 15 +- src/parsers/SyaNodeParser.py | 1117 +++++++++++++++ src/parsers/UnrecognizedNodeParser.py | 114 ++ src/sdp/sheerkaDataProvider.py | 19 +- src/sdp/sheerkaDataProviderIO.py | 4 + tests/BaseTest.py | 5 +- tests/TestUsingMemoryBasedSheerka.py | 29 +- tests/core/test_SheerkaCreateNewConcept.py | 28 + tests/core/test_SheerkaHistoryManager.py | 2 +- tests/core/test_sheerka.py | 2 +- tests/core/test_sheerka_call_parsers.py | 37 +- tests/core/test_utils.py | 2 +- tests/evaluators/test_AddConceptEvaluator.py | 8 +- .../test_AddConceptInSetEvaluator.py | 8 +- tests/evaluators/test_LexerNodeEvaluator.py | 5 +- .../test_MultipleErrorsEvaluator.py | 98 ++ tests/evaluators/test_OneErrorEvaluator.py | 1 + tests/non_reg/test_sheerka_non_reg.py | 132 +- tests/parsers/parsers_utils.py | 150 +++ tests/parsers/test_AtomsParser.py | 241 ++++ ...arser.py => test_BnfConceptLexerParser.py} | 36 +- tests/parsers/test_BnfParser.py | 7 +- .../test_ConceptsWithConceptsParser.py | 5 +- tests/parsers/test_DefaultParser.py | 4 +- tests/parsers/test_MultipleConceptsParser.py | 5 +- tests/parsers/test_PythonParser.py | 21 +- .../parsers/test_PythonWithConceptsParser.py | 18 +- tests/parsers/test_SyaConceptLexerParser.py | 1197 +++++++++++++++++ tests/parsers/test_UnrecognizedNodeParser.py | 383 ++++++ 56 files changed, 5322 insertions(+), 404 deletions(-) create mode 100644 predecence create mode 100644 src/evaluators/MultipleErrorsEvaluator.py create mode 100644 src/parsers/AtomNodeParser.py create mode 100644 src/parsers/BaseNodeParser.py rename src/parsers/{ConceptLexerParser.py => BnfNodeParser.py} (84%) create mode 100644 src/parsers/SyaNodeParser.py create mode 100644 src/parsers/UnrecognizedNodeParser.py create mode 100644 tests/evaluators/test_MultipleErrorsEvaluator.py create mode 100644 tests/parsers/parsers_utils.py create mode 100644 tests/parsers/test_AtomsParser.py rename tests/parsers/{test_ConceptLexerParser.py => test_BnfConceptLexerParser.py} (98%) create mode 100644 tests/parsers/test_SyaConceptLexerParser.py create mode 100644 tests/parsers/test_UnrecognizedNodeParser.py diff --git a/docs/blog.rst b/docs/blog.rst index 7745237..3c937c8 100644 --- a/docs/blog.rst +++ b/docs/blog.rst @@ -659,7 +659,7 @@ For the two questions, I will first try the simple implementations and see there Going back on BNF implementation. As it's Christmas eve today, I won't stay very long. -So, the implementation lies in the class ConceptLexerParser, a it's a lexer not for token, but for concept. +So, the implementation lies in the class BnfNodeParser, a it's a lexer not for token, but for concept. The purpose of this class is to recognize a sequence of Concept. So if we defines the following concepts @@ -675,7 +675,7 @@ when you input one two three four five -the list of :code:`[foo, bar]` will be returned by the ConceptLexerParser (as return values) +the list of :code:`[foo, bar]` will be returned by the BnfNodeParser (as return values) How does it works ? @@ -696,7 +696,7 @@ Some example : and so on... So when a concept is defined using its bnf definition, I use the **BnfParser** to create the grammar, and then -I use the **ConceptLexerParser** to recognize the concepts +I use the **BnfNodeParser** to recognize the concepts The current implementation to recognize a concept is not very efficient. All the definitions are in a dictionary and I go thru the whole dictionary to see if some concepts are recognized. Once a concept is found, I loop again @@ -713,7 +713,7 @@ So once the parsing is effective, I return a **ConceptNode** object class ConceptNode(LexerNode): """ - Returned by the ConceptLexerParser + Returned by the BnfNodeParser It represents a recognized concept """ @@ -859,7 +859,7 @@ As of now, I have implemented the following parsers: * DefaultParser (the name is not accurate) To recognize builtin syntax (like 'def concept' or 'isa') -* ConceptLexerParser +* BnfNodeParser To recognize concept defined with BNF language All theses parsers are executed in the row (the order in not very important) diff --git a/predecence b/predecence new file mode 100644 index 0000000..873f5ce --- /dev/null +++ b/predecence @@ -0,0 +1 @@ +ReturnValue(who=evaluators.TooManySuccess, status=False, value=(21)__TOO_MANY_SUCCESS, message=None) diff --git a/src/core/builtin_concepts.py b/src/core/builtin_concepts.py index 4399afa..dfb7624 100644 --- a/src/core/builtin_concepts.py +++ b/src/core/builtin_concepts.py @@ -37,6 +37,8 @@ class BuiltinConcepts(Enum): PARSER_RESULT = "parser result" TOO_MANY_SUCCESS = "too many success" # when expecting a limited number of successful return value TOO_MANY_ERRORS = "too many errors" # when expecting a limited number of successful return value + ONLY_SUCCESSFUL = "only successful" # filter the result, only keep successful ones + MULTIPLE_ERRORS = "multiple errors" # filter the result, only keep evaluator in error NOT_FOR_ME = "not for me" # a parser recognize that the entry is not meant for it IS_EMPTY = "is empty" # when a set is empty INVALID_RETURN_VALUE = "invalid return value" # the return value of an evaluator is not correct @@ -45,6 +47,7 @@ class BuiltinConcepts(Enum): CONCEPT_EVAL_ERROR = "concept evaluation error" # cannot evaluate a property or metadata of a concept ENUMERATION = "enum" # represents a list or a set LIST = "list" # represents a list + FILTERED = "filtered" # represents the result of a filtering CONCEPT_ALREADY_IN_SET = "concept already in set" EVALUATOR_PRE_PROCESS = "evaluator pre process" # used modify / tweak behaviour of evaluators EVAL_BODY_REQUESTED = "eval body requested" # to evaluate the body @@ -91,6 +94,7 @@ BuiltinErrors = [str(e) for e in { BuiltinConcepts.UNKNOWN_PROPERTY, BuiltinConcepts.TOO_MANY_SUCCESS, BuiltinConcepts.TOO_MANY_ERRORS, + BuiltinConcepts.MULTIPLE_ERRORS, BuiltinConcepts.INVALID_RETURN_VALUE, BuiltinConcepts.CONCEPT_ALREADY_DEFINED, BuiltinConcepts.CONCEPT_EVAL_ERROR, @@ -249,11 +253,12 @@ class ParserResultConcept(Concept): Result of a parsing """ - def __init__(self, parser=None, source=None, value=None, try_parsed=None, validate_concept=None): + def __init__(self, parser=None, source=None, tokens=None, value=None, try_parsed=None): super().__init__(BuiltinConcepts.PARSER_RESULT, True, False, BuiltinConcepts.PARSER_RESULT) self.set_metadata_value(ConceptParts.BODY, value) self.set_prop("parser", parser) self.set_prop("source", source) + self.set_prop("tokens", tokens) self.set_prop("try_parsed", try_parsed) # in case of error, what was found before the error self.metadata.is_evaluated = True @@ -372,6 +377,14 @@ class ListConcept(Concept): # return item in self.body +class FilteredConcept(Concept): + def __init__(self, filtered=None, iterable=None, predicate=None): + super().__init__(BuiltinConcepts.FILTERED, True, False, BuiltinConcepts.FILTERED) + self.set_metadata_value(ConceptParts.BODY, filtered) + self.def_prop("iterable", iterable) + self.def_prop("predicate", predicate) + + class ConceptAlreadyInSet(Concept): def __init__(self, concept=None, concept_set=None): super().__init__(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, @@ -409,3 +422,17 @@ class WhereClauseFailed(Concept): @property def concept(self): return self.body + + +class NotForMeConcept(Concept): + def __init__(self, source=None, reason=None): + super().__init__(BuiltinConcepts.NOT_FOR_ME, + True, + False, + BuiltinConcepts.NOT_FOR_ME) + self.set_metadata_value(ConceptParts.BODY, source) + self.def_prop("reason", reason) + self.metadata.is_evaluated = True + + def __repr__(self): + return f"NotForMeConcept(source={self.body}, reason={self.get_prop('reason')})" diff --git a/src/core/builtin_helpers.py b/src/core/builtin_helpers.py index c89972e..6bb5e78 100644 --- a/src/core/builtin_helpers.py +++ b/src/core/builtin_helpers.py @@ -6,6 +6,8 @@ from core.ast.nodes import CallNodeConcept, GenericNodeConcept from core.ast.visitors import UnreferencedNamesVisitor from core.builtin_concepts import BuiltinConcepts from core.concept import Concept +from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode +from parsers.BaseParser import BaseParser, ErrorNode def is_same_success(context, return_values): @@ -132,6 +134,181 @@ def expect_one(context, return_values): parents=return_values) +def only_successful(context, return_values): + """ + Removes all return values that are not successful + Return error when no successful return value + :param context: + :param return_values: + :return: + """ + if not isinstance(return_values, list): + return return_values + + sheerka = context.sheerka + + if len(return_values) == 0: + return sheerka.ret( + context.who, + False, + sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values), + parents=return_values) + + successful_results = [item for item in return_values if item.status] + if len(successful_results) == 0: + return sheerka.ret( + context.who, + False, + sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values), + parents=return_values) + + return sheerka.ret( + context.who, + True, + sheerka.new(BuiltinConcepts.ONLY_SUCCESSFUL, body=successful_results), + parents=return_values) + + +def only_parsers_results(context, return_values): + """ + Filters the return_values and returns when the result is a ParserResult + regardless of the status + + So it filters errors + :param context: + :param return_values: + :return: + """ + + if not isinstance(return_values, list): + return return_values + + sheerka = context.sheerka + + if len(return_values) == 0: + return sheerka.ret( + context.who, + False, + sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values), + parents=return_values) + + return_values_ok = [item for item in return_values if sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)] + + # hack because some parsers don't follow the NOT_FOR_ME rule + temp_ret_val = [] + for ret_val in return_values_ok: + if isinstance(ret_val.body.body, ErrorNode): + continue + if isinstance(ret_val.body.body, list) and \ + len(ret_val.body.body) == 1 and \ + isinstance(ret_val.body.body[0], UnrecognizedTokensNode): + continue + temp_ret_val.append(ret_val) + return_values_ok = temp_ret_val + + if len(return_values_ok) == 0: + return sheerka.ret( + context.who, + False, + sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values), + parents=return_values) + + return sheerka.ret( + context.who, + True, + sheerka.new(BuiltinConcepts.FILTERED, + body=return_values_ok, + iterable=return_values, + predicate="sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)"), + parents=return_values) + + +def parse_unrecognized(context, tokens, parsers): + """ + Try to recognize concepts or code from tokens using the given parsers + :param context: + :param tokens: + :param parsers: + :return: + """ + steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING] + sheerka = context.sheerka + + with context.push(desc=f"Parsing unrecognized '{tokens}'") as sub_context: + # disable all parsers but the following ones + sub_context.add_preprocess(BaseParser.PREFIX + "*", enabled=False) + for parser in parsers: + sub_context.add_preprocess(BaseParser.PREFIX + parser, enabled=True) + + sub_context.add_inputs(source=tokens) + to_parse = sheerka.ret( + context.who, + True, + sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens)) + res = sheerka.execute(sub_context, to_parse, steps) + sub_context.add_values(return_values=res) + + # discard Python response if accepted by AtomNode + is_concept = False + for r in res: + if r.status and r.who == "parsers.AtomNode": + is_concept = True + + if not is_concept: + return res + + filtered = [] + for r in res: + if r.who == "parsers.Python": + continue + filtered.append(r) + + return filtered + + +def get_lexer_nodes(return_values, start, tokens): + """ + From a parser result, return the corresponding LexerNode + either ConceptNode, UnrecognizedTokensNode or SourceCodeNode + :param return_values: + :param start: + :param tokens: + :return: list of list (list of concept node sequence) + """ + + lexer_nodes = [] + for ret_val in return_values: + if ret_val.who == "parsers.Python": + + if ret_val.body.source.strip().isalnum() and not ret_val.body.source.strip().isnumeric(): + # Discard SourceCodeNode which seems to be a concept + # It may be a wrong idea, so let's see + continue + + end = start + len(tokens) - 1 + lexer_nodes.append([SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)]) + + elif ret_val.who == "parsers.ExactConcept": + concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body] + end = start + len(tokens) - 1 + for concept in concepts: + lexer_nodes.append([ConceptNode(concept, start, end, tokens, ret_val.body.source)]) + + elif ret_val.who in ("parsers.BnfNode", "parsers.SyaNode", "parsers.AtomNode"): + nodes = [node for node in ret_val.body.body] + for node in nodes: + node.start += start + node.end += start + + # but append the whole sequence if when it's a sequence + lexer_nodes.append(nodes) + + else: + raise NotImplementedError() + + return lexer_nodes + + def get_names(sheerka, concept_node): """ Finds all the names referenced by the concept_node diff --git a/src/core/concept.py b/src/core/concept.py index 5c462c7..8c70b21 100644 --- a/src/core/concept.py +++ b/src/core/concept.py @@ -108,11 +108,14 @@ class Concept: def __eq__(self, other): + if id(self) == id(other): + return True + if isinstance(other, simplec): return self.name == other.name and self.body == other.body - if id(self) == id(other): - return True + if isinstance(other, CC): + return other == self if not isinstance(other, Concept): return False @@ -346,6 +349,17 @@ class Concept: """ return self.props[prop_name].value + def set_prop_by_index(self, index: int, value): + """ + Set the value of a property (not the metadata) using the index + :param index: Name the property or another concept + :param value: + :return: + """ + prop_name = list(self.props.keys())[index] + self.props[prop_name].value = value + return self + def set_metadata_value(self, metadata: ConceptParts, value): """ Set the resolved value of a metadata (not the metadata itself) @@ -438,3 +452,68 @@ class InfiniteRecursionResolved: def get_value(self): return self.value + + +class CC: + """ + Concept class for test purpose + CC means concept for compiled (or concept with compiled) + It matches a concept if the compiles are equals + """ + + # The only properties that are testes are concept_key and compiled + # The other properties (concept, source, start and end) + # are used in tests/parsers/parsers_utils.py to help creating helper objects + + def __init__(self, concept, source=None, **kwargs): + self.concept_key = concept.key if isinstance(concept, Concept) else concept + self.compiled = kwargs + self.concept = concept if isinstance(concept, Concept) else None + self.source = source # to use when the key is different from the sub str to search when filling start and stop + self.start = None # for debug purpose, indicate where the concept starts + self.end = None # for debug purpose, indicate where the concept ends + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, Concept): + if other.key != self.concept_key: + return False + return self.compiled == other.compiled + + if not isinstance(other, CC): + return False + + return self.concept_key == other.concept_key and \ + self.compiled == other.compiled + + def __hash__(self): + if self.concept: + return hash(self.concept) + return hash(self.concept_key) + + def __repr__(self): + if self.concept: + txt = f"CC(concept='{self.concept}'" + else: + txt = f"CC(concept_key='{self.concept_key}'" + + for k, v in self.compiled.items(): + txt += f", {k}='{v}'" + return txt + ")" + + def fix_pos(self, node): + start = node.start if hasattr(node, "start") else \ + node[0] if isinstance(node, tuple) else None + end = node.end if hasattr(node, "end") else \ + node[1] if isinstance(node, tuple) else None + + if start is not None: + if self.start is None or start < self.start: + self.start = start + + if end is not None: + if self.end is None or end > self.end: + self.end = end + return self diff --git a/src/core/sheerka/ExecutionContext.py b/src/core/sheerka/ExecutionContext.py index 48094f3..ac43658 100644 --- a/src/core/sheerka/ExecutionContext.py +++ b/src/core/sheerka/ExecutionContext.py @@ -43,6 +43,7 @@ class ExecutionContext: desc: str = None, logger=None, global_hints=None, + global_errors=None, **kwargs): self._parent = None @@ -61,6 +62,7 @@ class ExecutionContext: self.logger = logger self.local_hints = set() self.global_hints = set() if global_hints is None else global_hints + self.global_errors = [] if global_errors is None else global_errors self.inputs = {} # what was the parameters of the execution context self.values = {} # what was produced by the execution context @@ -146,8 +148,8 @@ class ExecutionContext: preprocess.set_prop(k, v) if not self.preprocess: - self.preprocess = set() - self.preprocess.add(preprocess) + self.preprocess = [] + self.preprocess.append(preprocess) return self def add_inputs(self, **kwargs): @@ -212,6 +214,7 @@ class ExecutionContext: desc, logger, self.global_hints, + self.global_errors, **_kwargs) new._parent = self new._tab = self._tab + " " * DEBUG_TAB_SIZE @@ -230,7 +233,8 @@ class ExecutionContext: if self.logger and not self.logger.disabled: self.logger.debug(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message)) - def log_error(self, message, who=None): + def log_error(self, message, who=None, exc=None): + self.global_errors.append(exc or message) if self.logger and not self.logger.disabled: self.logger.exception(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message)) diff --git a/src/core/sheerka/Services/SheerkaCreateNewConcept.py b/src/core/sheerka/Services/SheerkaCreateNewConcept.py index f933d35..8b5ddf9 100644 --- a/src/core/sheerka/Services/SheerkaCreateNewConcept.py +++ b/src/core/sheerka/Services/SheerkaCreateNewConcept.py @@ -1,8 +1,10 @@ from core.builtin_concepts import BuiltinConcepts, ErrorConcept from core.concept import Concept from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError, SheerkaDataProviderRef +import core.utils -CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser" +BNF_NODE_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser" +BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser" class SheerkaCreateNewConcept: @@ -13,6 +15,7 @@ class SheerkaCreateNewConcept: def __init__(self, sheerka): self.sheerka = sheerka self.logger_name = self.create_new_concept.__name__ + self.base_lexer_parser = core.utils.get_class(BASE_NODE_PARSER_CLASS)("BaseNodeParser", 0) def create_new_concept(self, context, concept: Concept): """ @@ -25,7 +28,7 @@ class SheerkaCreateNewConcept: concept.init_key() concepts_definitions = None - init_ret_value = None + init_bnf_ret_value = None sdp = self.sheerka.sdp @@ -49,13 +52,19 @@ class SheerkaCreateNewConcept: concepts_definitions[concept] = concept.bnf # check if it's a valid BNF or whether it breaks the known rules - concept_lexer_parser = self.sheerka.parsers[CONCEPT_LEXER_PARSER_CLASS]() + bnf_lexer_parser = self.sheerka.parsers[BNF_NODE_PARSER_CLASS]() with context.push(self.sheerka.name, desc=f"Initializing concept definition for {concept}") as sub_context: sub_context.concepts[concept.key] = concept # the concept is not in the real cache yet - init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions) - sub_context.add_values(return_values=init_ret_value) - if not init_ret_value.status: - return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value)) + init_bnf_ret_value = bnf_lexer_parser.initialize(sub_context, concepts_definitions) + sub_context.add_values(return_values=init_bnf_ret_value) + if not init_bnf_ret_value.status: + return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_bnf_ret_value.value)) + + # update concept definition by key + init_sya_ret_value = self.base_lexer_parser.initialize(context, [concept], use_sheerka=True) + if not init_sya_ret_value.status: + return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value)) + concepts_by_first_keyword = init_sya_ret_value.body concept.freeze_definition_hash() @@ -97,9 +106,15 @@ class SheerkaCreateNewConcept: sdp.set( context.event.get_digest(), self.sheerka.CONCEPTS_DEFINITIONS_ENTRY, - concept_lexer_parser.encode_grammar(init_ret_value.body), + bnf_lexer_parser.encode_grammar(init_bnf_ret_value.body), use_ref=True) self.sheerka.concepts_definitions_cache = None # invalidate cache + + # update the concepts by first keyword + sdp.set(context.event.get_digest(), + self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, + concepts_by_first_keyword) + except SheerkaDataProviderDuplicateKeyError as error: context.log_error("Failed to create a new concept.", who=self.logger_name) return self.sheerka.ret( @@ -109,13 +124,13 @@ class SheerkaCreateNewConcept: error.args[0]) # Updates the caches - self.sheerka.cache_by_key[concept.key] = sdp.get_safe(self.sheerka.CONCEPTS_ENTRY, concept.key) self.sheerka.cache_by_name[concept.name] = sdp.get_safe(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name) self.sheerka.cache_by_id[concept.id] = concept - if init_ret_value is not None and init_ret_value.status: - self.sheerka.concepts_grammars = init_ret_value.body + if init_bnf_ret_value is not None and init_bnf_ret_value.status: + self.sheerka.concepts_grammars = init_bnf_ret_value.body + self.sheerka.concepts_by_first_keyword = concepts_by_first_keyword - # process the return in needed + # process the return if needed ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept)) return ret diff --git a/src/core/sheerka/Services/SheerkaEvaluateConcept.py b/src/core/sheerka/Services/SheerkaEvaluateConcept.py index 8184559..222184f 100644 --- a/src/core/sheerka/Services/SheerkaEvaluateConcept.py +++ b/src/core/sheerka/Services/SheerkaEvaluateConcept.py @@ -1,6 +1,6 @@ from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved -from core.builtin_helpers import add_to_ret_val, remove_from_ret_val, expect_one +from core.builtin_helpers import expect_one CONCEPT_EVALUATION_STEPS = [ BuiltinConcepts.BEFORE_EVALUATION, diff --git a/src/core/sheerka/Services/SheerkaExecute.py b/src/core/sheerka/Services/SheerkaExecute.py index 9f0deb3..d182652 100644 --- a/src/core/sheerka/Services/SheerkaExecute.py +++ b/src/core/sheerka/Services/SheerkaExecute.py @@ -33,6 +33,8 @@ class SheerkaExecute: # group the parsers by priorities instantiated_parsers = [parser(sheerka=self.sheerka) for parser in self.sheerka.parsers.values()] + instantiated_parsers = self.preprocess(execution_context, instantiated_parsers) + grouped_parsers = {} for parser in [p for p in instantiated_parsers if p.enabled]: grouped_parsers.setdefault(parser.priority, []).append(parser) @@ -44,7 +46,6 @@ class SheerkaExecute: for parser in grouped_parsers[priority]: - return_value_success_found = False for return_value in inputs_for_this_group: to_parse = return_value.body.body \ @@ -67,22 +68,23 @@ class SheerkaExecute: r.parents = [return_value] result.append(r) if self.sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT): + # if a ParserResultConcept is returned, it will be used by the parsers + # of the following groups to_process.append(r) if r.status: - return_value_success_found = True + stop_processing = True else: res.parents = [return_value] result.append(res) if self.sheerka.isinstance(res.body, BuiltinConcepts.PARSER_RESULT): + # if a ParserResultConcept is returned, it will be used by the parsers + # of the following groups to_process.append(res) if res.status: - return_value_success_found = True + stop_processing = True sub_context.add_values(return_values=res) - if return_value_success_found: - stop_processing = True - break # Stop the other return_values (but not the other parsers with the same priority) if stop_processing: break # Do not try the other priorities if a match is found @@ -102,7 +104,7 @@ class SheerkaExecute: instantiated_evaluators = [e_class() for e_class in self.sheerka.evaluators] # pre-process evaluators if needed - instantiated_evaluators = self._preprocess_evaluators(execution_context, instantiated_evaluators) + instantiated_evaluators = self.preprocess(execution_context, instantiated_evaluators) for evaluator in [e for e in instantiated_evaluators if e.enabled and process_step in e.steps]: grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator) @@ -123,7 +125,7 @@ class SheerkaExecute: evaluated_items = [] to_delete = [] for evaluator in grouped_evaluators[priority]: - evaluator = self._preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy + evaluator = self.preprocess(execution_context, evaluator.__class__()) # fresh copy sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})" with iteration_context.push(desc=sub_context_desc, logger=evaluator.verbose_log) as sub_context: @@ -215,22 +217,29 @@ class SheerkaExecute: return return_values - def _preprocess_evaluators(self, context, evaluators): + def preprocess(self, context, parsers_or_evaluators): if not context.preprocess: - return evaluators + return parsers_or_evaluators - if not hasattr(evaluators, "__iter__"): + if not hasattr(parsers_or_evaluators, "__iter__"): single_one = True - evaluators = [evaluators] + parsers_or_evaluators = [parsers_or_evaluators] else: single_one = False for preprocess in context.preprocess: - for e in evaluators: - if preprocess.props["name"].value == e.name: + for e in parsers_or_evaluators: + if self.matches(e.name, preprocess.get_prop("name")): for prop, value in preprocess.props.items(): if prop == "name": continue if hasattr(e, prop): setattr(e, prop, value.value) - return evaluators[0] if single_one else evaluators + return parsers_or_evaluators[0] if single_one else parsers_or_evaluators + + @staticmethod + def matches(parser_or_evaluator_name, preprocessor_name): + if preprocessor_name.endswith("*"): + return parser_or_evaluator_name.startswith(preprocessor_name[:-1]) + else: + return parser_or_evaluator_name == preprocessor_name diff --git a/src/core/sheerka/Sheerka.py b/src/core/sheerka/Sheerka.py index fe55353..8c2d519 100644 --- a/src/core/sheerka/Sheerka.py +++ b/src/core/sheerka/Sheerka.py @@ -17,12 +17,7 @@ from core.sheerka_logger import console_handler import logging -# CONCEPT_EVALUATION_STEPS = [ -# BuiltinConcepts.BEFORE_EVALUATION, -# BuiltinConcepts.EVALUATION, -# BuiltinConcepts.AFTER_EVALUATION] - -CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser" +CONCEPT_LEXER_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser" BNF_PARSER_CLASS = "parsers.BnfParser.BnfParser" CONCEPTS_FILE = "_concepts.txt" @@ -37,6 +32,9 @@ class Sheerka(Concept): CONCEPTS_BY_NAME_ENTRY = "Concepts_By_Name" CONCEPTS_BY_HASH_ENTRY = "Concepts_By_Hash" # store hash of concepts definitions (not values) CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts + CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "Concepts_By_First_Keyword" + CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions" + BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts @@ -65,6 +63,10 @@ class Sheerka(Concept): # a grammar is a resolved BNF self.concepts_grammars = {} + # cache for SYA concepts + self.concepts_by_first_keyword = {} + self.sya_definitions = {} + # a concept can be instantiated # ex: File is a concept, but File('foo.txt') is an instance # TODO: manage contexts @@ -119,7 +121,8 @@ class Sheerka(Concept): self.initialize_builtin_concepts() self.initialize_builtin_parsers() self.initialize_builtin_evaluators() - self.initialize_concepts_definitions(exec_context) + self.initialize_bnf_parsing(exec_context) + self.initialize_sya_parsing() res = ReturnValueConcept(self, True, self) exec_context.add_values(return_values=res) @@ -174,12 +177,25 @@ class Sheerka(Concept): """ core.utils.init_package_import("parsers") base_class = core.utils.get_class("parsers.BaseParser.BaseParser") + modules_to_skip = ["parsers.BaseNodeParser"] + + temp_result = {} for parser in core.utils.get_sub_classes("parsers", base_class): if parser.__module__ == base_class.__module__: continue - self.init_log.debug(f"Adding builtin parser '{parser.__name__}'") - self.parsers[core.utils.get_full_qualified_name(parser)] = parser + if parser.__module__ in modules_to_skip: + continue + + qualified_name = core.utils.get_full_qualified_name(parser) + self.init_log.debug(f"Adding builtin parser '{qualified_name}'") + temp_result[qualified_name] = parser + + # Now we sort the parser by name. + # It's not important for the logic of their usage as they have their priority anyway, + # We do that for the unit tests. They are to complicated to write otherwise + for name in sorted(temp_result.keys()): + self.parsers[name] = temp_result[name] def initialize_builtin_evaluators(self): """ @@ -195,7 +211,7 @@ class Sheerka(Concept): self.init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'") self.evaluators.append(evaluator) - def initialize_concepts_definitions(self, execution_context): + def initialize_bnf_parsing(self, execution_context): self.init_log.debug("Initializing concepts grammars.") definitions = self.get_concepts_definitions(execution_context) @@ -211,6 +227,25 @@ class Sheerka(Concept): self.concepts_grammars = lexer_parser.concepts_grammars + def initialize_sya_parsing(self): + self.init_log.debug("Initializing sya definitions.") + + self.concepts_by_first_keyword = self.sdp.get_safe( + self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, + load_origin=False) or {} + + self.sya_definitions = self.sdp.get_safe( + self.CONCEPTS_SYA_DEFINITION_ENTRY, + load_origin=False) or {} + + def reset(self): + self.reset_cache() + self.concepts_by_first_keyword = {} + self.concepts_grammars = {} + self.sya_definitions = {} + self.sdp.reset() + self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000) + def reset_cache(self, filter_to_use=None): """ reset the different cache that exists @@ -220,6 +255,7 @@ class Sheerka(Concept): if filter_to_use is None: self.cache_by_key = {} self.cache_by_id = {} + self.cache_by_name = {} else: raise NotImplementedError() @@ -324,6 +360,38 @@ class Sheerka(Concept): """ return self.sets_handler.set_isa(context, concept, concept_set) + def set_sya_def(self, context, list_of_def): + """ + Set the precedence and/or the associativity of a concept + :param context: + :param list_of_def list of tuple(concept_id, precedence (int), SyaAssociativity) + :return: + """ + + # validate the entries + for concept_id, precedence, associativity in list_of_def: + if concept_id == BuiltinConcepts.UNKNOWN_CONCEPT: + return self.ret(self.name, + False, + self.new(BuiltinConcepts.ERROR, body=f"Concept {concept_id} is not known")) + + # update the definitions + for concept_id, precedence, associativity in list_of_def: + if precedence is None and associativity is None: + try: + del self.sya_definitions[concept_id] + except KeyError: + pass + else: + self.sya_definitions[concept_id] = (precedence, associativity.value) + + # then save + self.sdp.set(context.event.get_digest(), + self.CONCEPTS_SYA_DEFINITION_ENTRY, + self.sya_definitions) + + return self.ret(self.name, True, self.new(BuiltinConcepts.SUCCESS)) + def get_set_elements(self, context, concept): """ Concept is supposed to be a set @@ -571,6 +639,22 @@ class Sheerka(Concept): return self.value(body_to_use) + def get_error(self, obj): + if isinstance(obj, Concept) and obj.metadata.is_builtin and obj.key in BuiltinErrors: + return obj + + if isinstance(obj, list): + return obj + + if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE): + if obj.status: + return None + + if self.isinstance(obj.body, BuiltinConcepts.PARSER_RESULT): + return self.get_error(obj.body.body) + + return NotImplementedError() + def get_values(self, objs): if not (isinstance(objs, list) or self.isinstance(objs, BuiltinConcepts.LIST) or diff --git a/src/core/utils.py b/src/core/utils.py index 6554247..86845a7 100644 --- a/src/core/utils.py +++ b/src/core/utils.py @@ -163,7 +163,7 @@ def remove_list_from_list(lst, to_remove): def product(a, b): """ Kind of cartesian product between lists a and b - knowing that a is also a list + knowing that a is also a list : a is a list of list !!! So it's a cartesian product between a list of list and a list """ @@ -176,7 +176,12 @@ def product(a, b): res = [] for item_b in b: for item_a in a: - items = item_a + [item_b] + #items = item_a + [item_b] + items = item_a[:] + if hasattr(item_b, "__iter__"): + items.extend(item_b) + else: + items.append(item_b) res.append(items) return res @@ -276,6 +281,7 @@ def str_concept(t): >>> assert str_concept((None, "id")) == "c:|id:" >>> assert str_concept(("key", None)) == "c:key:" >>> assert str_concept((None, None)) == "" + >>> assert str_concept(Concept(key="foo", id="bar")) == "c:foo|bar:" :param t: :return: """ @@ -297,6 +303,12 @@ def unstr_concept(concept_repr): """ if concept_repr is like :c:key:id: return the key and the id + >>> assert unstr_concept("c:key:") == "key" + >>> assert unstr_concept("c:key|id:") == ("key", "id") + >>> assert unstr_concept("c:|id:") == ("None", "id") + >>> assert unstr_concept("c:key|:") == ("key", "None") + >>> # Otherwise, return (None,None) + :param concept_repr: :return: """ @@ -371,3 +383,25 @@ def decode_concept(text): return key, id_, use_concept return None, None, None + + +def tokens_index(tokens, sub_tokens, skip=0): + """ + Index of the sub tokens in tokens + :param tokens: tokens + :param sub_tokens: sub tokens to search + :param skip: number of found to skip + :return: + """ + expected = [token.value for token in sub_tokens if token.type != TokenKind.EOF] + for i in range(0, len(tokens) - len(expected) + 1): + for j in range(len(expected)): + if tokens[i + j].value != expected[j]: + break + else: + if skip == 0: + return i + else: + skip -= 1 + + raise ValueError(f"sub tokens '{sub_tokens}' not found") diff --git a/src/evaluators/AddConceptEvaluator.py b/src/evaluators/AddConceptEvaluator.py index 40b1266..e21c7a6 100644 --- a/src/evaluators/AddConceptEvaluator.py +++ b/src/evaluators/AddConceptEvaluator.py @@ -5,7 +5,7 @@ from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF from core.tokenizer import TokenKind from evaluators.BaseEvaluator import OneReturnValueEvaluator from parsers.BaseParser import NotInitializedNode -from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor +from parsers.BnfNodeParser import ParsingExpression, ParsingExpressionVisitor from parsers.DefaultParser import DefConceptNode, NameNode from parsers.PythonParser import PythonNode import core.utils diff --git a/src/evaluators/LexerNodeEvaluator.py b/src/evaluators/LexerNodeEvaluator.py index 3589ca6..4ccb348 100644 --- a/src/evaluators/LexerNodeEvaluator.py +++ b/src/evaluators/LexerNodeEvaluator.py @@ -1,6 +1,7 @@ from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from evaluators.BaseEvaluator import OneReturnValueEvaluator -from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode +from parsers.BaseNodeParser import SourceCodeNode +from parsers.BnfNodeParser import ConceptNode from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode diff --git a/src/evaluators/MultipleErrorsEvaluator.py b/src/evaluators/MultipleErrorsEvaluator.py new file mode 100644 index 0000000..fb4ac66 --- /dev/null +++ b/src/evaluators/MultipleErrorsEvaluator.py @@ -0,0 +1,51 @@ +from core.builtin_concepts import BuiltinConcepts +from evaluators.BaseEvaluator import AllReturnValuesEvaluator +from parsers.BaseParser import BaseParser + + +class MultipleErrorsEvaluator(AllReturnValuesEvaluator): + """ + Use to reduce to evaluator errors + All parser error will be discarded + Cannot match if there is at least one successful evaluator + """ + NAME = "MultipleErrors" + + def __init__(self): + super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 30) + self.return_values_in_error = [] + + def matches(self, context, return_values): + nb_evaluators_in_error = 0 + to_process = False + + for ret in return_values: + if ret.status and (ret.who.startswith(self.PREFIX) or ret.who.startswith(BaseParser.PREFIX)): + return False + elif ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED): + to_process = True + self.eaten.append(ret) + elif not ret.status and ret.who.startswith(self.PREFIX): + nb_evaluators_in_error += 1 + self.return_values_in_error.append(ret) + self.eaten.append(ret) + elif not ret.status and ret.who.startswith(BaseParser.PREFIX): + self.eaten.append(ret) + # else: + # other concepts. We do not care if there are successful or not + # They won't be part of result nor part of the parent + # --> So they will be handled by other evaluators + + return to_process and nb_evaluators_in_error > 1 + + def eval(self, context, return_values): + context.log(f"{len(self.return_values_in_error)} return value in error, {len(self.eaten)} item(s) eaten", + who=self) + context.log(f"{self.return_values_in_error}", who=self) + + sheerka = context.sheerka + return sheerka.ret( + self.name, + False, + sheerka.new(BuiltinConcepts.MULTIPLE_ERRORS, body=self.return_values_in_error), + parents=self.eaten) diff --git a/src/evaluators/OneErrorEvaluator.py b/src/evaluators/OneErrorEvaluator.py index faa3ef1..a4121e3 100644 --- a/src/evaluators/OneErrorEvaluator.py +++ b/src/evaluators/OneErrorEvaluator.py @@ -31,6 +31,10 @@ class OneErrorEvaluator(AllReturnValuesEvaluator): self.eaten.append(ret) elif not ret.status and ret.who.startswith(BaseParser.PREFIX): self.eaten.append(ret) + # else: + # other concepts. We do not care if there are successful or not + # They won't be part of result nor part of the parent + # --> So they will be handled by other evaluators return to_process and nb_evaluators_in_error == 1 diff --git a/src/evaluators/PythonEvaluator.py b/src/evaluators/PythonEvaluator.py index 19fa13f..84ed070 100644 --- a/src/evaluators/PythonEvaluator.py +++ b/src/evaluators/PythonEvaluator.py @@ -1,4 +1,5 @@ import copy +import traceback from enum import Enum from core.ast.visitors import UnreferencedNamesVisitor @@ -59,7 +60,7 @@ class PythonEvaluator(OneReturnValueEvaluator): return sheerka.ret(self.name, True, evaluated, parents=[return_value]) except Exception as error: - context.log_error(error, self.name) + context.log_error(error, who=self.name, exc=traceback.format_exc()) error = sheerka.new(BuiltinConcepts.ERROR, body=error) return sheerka.ret(self.name, False, error, parents=[return_value]) diff --git a/src/parsers/AtomNodeParser.py b/src/parsers/AtomNodeParser.py new file mode 100644 index 0000000..09c52f2 --- /dev/null +++ b/src/parsers/AtomNodeParser.py @@ -0,0 +1,369 @@ +import copy +from dataclasses import dataclass + +from core import builtin_helpers +from core.builtin_concepts import BuiltinConcepts +from core.concept import Concept, DEFINITION_TYPE_BNF +from core.tokenizer import TokenKind, Tokenizer +from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode +from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, ErrorNode + +PARSERS = ["BnfNode", "SyaNode", "Python"] + + +@dataclass() +class TokensNodeFound(ErrorNode): + expected_tokens: list + + def __eq__(self, other): + if id(other) == id(self): + return True + + if not isinstance(other, UnexpectedTokenErrorNode): + return False + + if self.message != other.message: + return False + + if self.token.type != other.token.type or self.token.value != other.token.value: + return False + + if len(self.expected_tokens) != len(other.expected_tokens): + return False + + for i, t in enumerate(self.expected_tokens): + if t != other.expected_tokens[i]: + return False + + return True + + def __hash__(self): + return hash((self.message, self.token, self.expected_tokens)) + + +class AtomConceptParserHelper: + def __init__(self, context): + + self.context = context + self.debug = [] + self.sequence = [] # sequence of concepts already found found + self.current_concept: ConceptNode = None # concept being parsed + self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # buffer that keeps tracks of tokens positions + self.expected_tokens = None # expected tokens for this concepts + self.is_locked = False + self.errors = [] + self.has_unrecognized = False + self.forked = [] # use to duplicate AtomConceptParserHelper. See manage_unrecognized() + + def __eq__(self, other): + if id(other) == id(self): + return True + + if not isinstance(other, AtomConceptParserHelper): + return False + + if len(self.sequence) != len(other.sequence): + return False + + for item_self, item_other in zip(self.sequence, other.sequence): + if item_self != item_other: + return False + + return True + + def __hash__(self): + return hash(len(self.sequence)) + + def __repr__(self): + return f"{self.sequence}" + + def lock(self): + self.is_locked = True + + def reset(self): + self.is_locked = False + + def has_error(self): + return len(self.errors) > 0 + + def eat_token(self, token, pos): + if not self.expected_tokens: + return False + + self.debug.append(token) + + if self.expected_tokens[0] != BaseNodeParser.get_token_value(token): + self.errors.append(UnexpectedTokenErrorNode( + f"Found '{token}' while expecting '{self.expected_tokens[0]}'", + token, + [self.expected_tokens[0]])) + return False + + self.current_concept.end = pos + del self.expected_tokens[0] + + if not self.expected_tokens: + # the concept is fully matched + self.sequence.append(self.current_concept) + self.expected_tokens = None + + return True + + def eat_concept(self, concept, pos): + if self.is_locked: + return + + self.debug.append(concept) + self.manage_unrecognized() + for forked in self.forked: + # manage that some clones may have been forked + forked.eat_concept(concept, pos) + + concept_node = ConceptNode(concept, pos, pos) + expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1] + + if not expected: + # the concept is already matched + self.sequence.append(concept_node) + else: + self.current_concept = concept_node + self.expected_tokens = expected + + def manage_unrecognized(self): + if self.unrecognized_tokens.is_empty(): + return + + # do not put empty UnrecognizedToken in out + if self.unrecognized_tokens.is_whitespace(): + self.unrecognized_tokens.reset() + return + + self.unrecognized_tokens.fix_source() + + # try to recognize concepts + nodes_sequences = self._get_lexer_nodes_from_unrecognized() + if nodes_sequences: + instances = [self] + for i in range(len(nodes_sequences) - 1): + clone = self.clone() + instances.append(clone) + self.forked.append(clone) + + for instance, node_sequence in zip(instances, nodes_sequences): + for node in node_sequence: + instance.sequence.append(node) + if isinstance(node, UnrecognizedTokensNode) or \ + hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens: + instance.has_unrecognized = True + instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + + else: + self.sequence.append(self.unrecognized_tokens) + self.has_unrecognized = True + + # create another instance + self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + + def eat_unrecognized(self, token, pos): + if self.is_locked: + return + + self.debug.append(token) + self.unrecognized_tokens.add_token(token, pos) + + def finalize(self): + if len(self.sequence) > 0: + self.manage_unrecognized() + for forked in self.forked: + # manage that some clones may have been forked + forked.finalize() + + if self.expected_tokens: + self.errors.append(TokensNodeFound(self.expected_tokens)) + + def clone(self): + clone = AtomConceptParserHelper(self.context) + clone.debug = self.debug[:] + clone.sequence = self.sequence[:] + clone.current_concept = self.current_concept.clone() if self.current_concept else None + clone.unrecognized_tokens = self.unrecognized_tokens.clone() + clone.expected_tokens = self.expected_tokens[:] if self.expected_tokens else None + clone.is_locked = self.is_locked + clone.errors = self.errors[:] + clone.has_unrecognized = self.has_unrecognized + return clone + + def _get_lexer_nodes_from_unrecognized(self): + """ + Use the source of self.unrecognized_tokens gto find concepts or source code + :return: + """ + + res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS) + only_parsers_results = builtin_helpers.only_parsers_results(self.context, res) + + if not only_parsers_results.status: + return None + + return builtin_helpers.get_lexer_nodes( + only_parsers_results.body.body, + self.unrecognized_tokens.start, + self.unrecognized_tokens.tokens) + + +class AtomNodeParser(BaseNodeParser): + """ + Parser used to recognize atoms concepts or sequence of atoms concepts + An atom concept is concept that does not have any property thought it may have a body + + So, if 'one', 'two', 'three' are defined as atom concepts (with no property/parameter) + This parser can recognize the sequence 'one two three' + as [ConceptNode(one), ConceptNode(two), ConceptNode(three)] + It can partly recognized 'one x$1!! two three' + as [ConceptNode(one), UnrecognizedTokensNode(x$1!!), [ConceptNode(two), [ConceptNode(three)] + It cannot recognize concepts with parameters (non atom) + ex: 'one plus two' won't be recognized as ConceptNode(plus, one, two) + it will be [ConceptNode(one), UnrecognizedTokensNode(plus), [ConceptNode(two)] + + Note 'one plus two' will be recognized by the SyaParser + """ + + def __init__(self, **kwargs): + super().__init__("AtomNode", 50, **kwargs) + self.enabled = False + + @staticmethod + def _is_eligible(concept): + """ + Predicate that select concepts that must handled by AtomNodeParser + :param concept: + :return: + """ + return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF + + def get_concepts_sequences(self): + + forked = [] + + def _add_forked_to_concept_parser_helpers(): + # check that if some new InfixToPostfix are created + for parser in concept_parser_helpers: + if len(parser.forked) > 0: + forked.extend(parser.forked) + parser.forked.clear() + if len(forked) > 0: + concept_parser_helpers.extend(forked) + forked.clear() + + concept_parser_helpers = [AtomConceptParserHelper(self.context)] + + while self.next_token(False): + for concept_parser in concept_parser_helpers: + concept_parser.reset() + + token = self.token + + try: + for concept_parser in concept_parser_helpers: + if concept_parser.eat_token(self.token, self.pos): + concept_parser.lock() + + concepts = self.get_concepts(token, self._is_eligible) + if not concepts: + for concept_parser in concept_parser_helpers: + concept_parser.eat_unrecognized(token, self.pos) + continue + + if len(concepts) == 1: + for concept_parser in concept_parser_helpers: + concept_parser.eat_concept(concepts[0], self.pos) + continue + + # make the cartesian product + temp_res = [] + for concept_parser in concept_parser_helpers: + if concept_parser.is_locked: + # It means that it already eat the token + # so simply add it, do not clone + temp_res.append(concept_parser) + continue + + for concept in concepts: + clone = concept_parser.clone() + temp_res.append(clone) + clone.eat_concept(concept, self.pos) + + concept_parser_helpers = temp_res + finally: + _add_forked_to_concept_parser_helpers() + + # make sure that remaining items in stack are moved to out + for concept_parser in concept_parser_helpers: + concept_parser.reset() + concept_parser.finalize() + _add_forked_to_concept_parser_helpers() + + return concept_parser_helpers + + def get_valid(self, concept_parser_helpers): + valid_parser_helpers = [] # be careful, it will be a list of list + for parser_helper in concept_parser_helpers: + if parser_helper.has_error(): + continue + + if len(parser_helper.sequence) == 0: + continue + + for node in parser_helper.sequence: + node.tokens = self.tokens[node.start:node.end + 1] + node.fix_source() + + if parser_helper in valid_parser_helpers: + continue + + valid_parser_helpers.append(parser_helper) + + return valid_parser_helpers + + def parse(self, context, parser_input): + if parser_input == "": + return context.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.IS_EMPTY) + ) + + if not self.reset_parser(context, parser_input): + return self.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + + parser_helpers = self.get_valid(self.get_concepts_sequences()) + + if len(parser_helpers): + ret = [] + for parser_helper in parser_helpers: + ret.append( + self.sheerka.ret( + self.name, + not parser_helper.has_unrecognized, + self.sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=parser_input, + body=parser_helper.sequence, + try_parsed=parser_helper.sequence))) + + if len(ret) == 1: + self.log_result(context, parser_input, ret[0]) + return ret[0] + else: + self.log_multiple_results(context, parser_input, ret) + return ret + + else: + return self.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input)) diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py new file mode 100644 index 0000000..39833ac --- /dev/null +++ b/src/parsers/BaseNodeParser.py @@ -0,0 +1,669 @@ +from collections import namedtuple +from dataclasses import dataclass +from enum import Enum + +from core.builtin_concepts import BuiltinConcepts +from core.concept import VARIABLE_PREFIX, Concept +from core.sheerka.ExecutionContext import ExecutionContext +from core.tokenizer import TokenKind, LexerError, Token +from parsers.BaseParser import Node, BaseParser, ErrorNode + +DEBUG_COMPILED = True + + +@dataclass() +class LexerNode(Node): + start: int # starting index in the tokens list + end: int # ending index in the tokens list + tokens: list = None # tokens + source: str = None # string representation of what was parsed + + def __post_init__(self): + if self.source is None: + self.source = BaseParser.get_text_from_tokens(self.tokens) + + def __eq__(self, other): + if not isinstance(other, LexerNode): + return False + + return self.start == other.start and \ + self.end == other.end and \ + self.source == other.source and \ + self.tokens == other.tokens + + def fix_source(self, force=True): + if force or self.source is None: + self.source = BaseParser.get_text_from_tokens(self.tokens) + return self + + +class UnrecognizedTokensNode(LexerNode): + def __init__(self, start, end, tokens): + super().__init__(start, end, tokens) + self.is_frozen = False + self.parenthesis_count = 0 + + def freeze(self): + self.is_frozen = True + + def reset(self): + self.start = self.end = -1 + self.tokens.clear() + self.is_frozen = False + self.parenthesis_count = 0 + + def has_open_paren(self): + return self.parenthesis_count > 0 + + def add_token(self, token, pos): + if self.is_frozen: + raise Exception("The node is frozen") + + if self.end != -1 and pos == self.end + 2: + # add the missing whitespace + p = self.tokens[-1] # previous token + self.tokens.append(Token(TokenKind.WHITESPACE, " ", p.index + 1, p.line, p.column + 1)) + + self.tokens.append(token) + self.end = pos + if self.start == -1: + self.start = pos + + if token.type == TokenKind.LPAR: + self.parenthesis_count += 1 + + if token.type == TokenKind.RPAR: + self.parenthesis_count -= 1 + + return self + + def not_whitespace(self): + return not self.is_whitespace() + + def is_whitespace(self): + for t in self.tokens: + if t.type not in (TokenKind.WHITESPACE, TokenKind.NEWLINE): + return False + return True + + def is_empty(self): + return len(self.tokens) == 0 + + def __eq__(self, other): + if isinstance(other, utnode): + return self.start == other.start and \ + self.end == other.end and \ + self.source == other.source + + if isinstance(other, UTN): + return other == self + + if not isinstance(other, UnrecognizedTokensNode): + return False + + return self.start == other.start and \ + self.end == other.end and \ + self.source == other.source + + def __hash__(self): + return hash((self.start, self.end, self.source)) + + def __repr__(self): + return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')" + + def clone(self): + clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:]) + clone.is_frozen = self.is_frozen + clone.parenthesis_count = self.parenthesis_count + return clone + + +class ConceptNode(LexerNode): + """ + Returned by the BnfNodeParser + It represents a recognized concept + """ + + def __init__(self, concept, start, end, tokens=None, source=None, underlying=None): + super().__init__(start, end, tokens, source) + self.concept = concept + self.underlying = underlying + self.fix_source(False) + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, (CN, CNC)): + return other == self + + if isinstance(other, cnode): + return self.concept.key == other.concept_key and \ + self.start == other.start and \ + self.end == other.end and \ + self.source == other.source + + if isinstance(other, short_cnode): + return self.concept.key == other.concept_key and self.source == other.source + + if not isinstance(other, ConceptNode): + return False + + return self.concept == other.concept and \ + self.start == other.start and \ + self.end == other.end and \ + self.source == other.source and \ + self.underlying == other.underlying + + def __hash__(self): + return hash((self.concept, self.start, self.end, self.source, self.underlying)) + + def __repr__(self): + text = f"ConceptNode(concept='{self.concept}', source='{self.source}', start={self.start}, end={self.end}" + if DEBUG_COMPILED: + for k, v in self.concept.compiled.items(): + text += f", {k}='{v}'" + return text + ")" + + def clone(self): + # do we need to clone the concept as well ? + clone = ConceptNode(self.concept, self.start, self.end, self.tokens, self.source, self.underlying) + return clone + + +class SourceCodeNode(LexerNode): + """ + Returned when some source code (like Python source code is recognized) + """ + + def __init__(self, node, start, end, tokens=None, source=None, return_value=None): + super().__init__(start, end, tokens, source) + self.node = node # The PythonNode (or whatever language node) that is found + self.return_value = return_value # original result of the parsing + + def __eq__(self, other): + if isinstance(other, scnode): + return self.start == other.start and \ + self.end == other.end and \ + self.source == other.source + + if not isinstance(other, SourceCodeNode): + return False + + return self.node == other.node and \ + self.start == other.start and \ + self.end == other.end and \ + self.source == other.source + + def __hash__(self): + return hash((self.start, self.end, self.source)) + + def __repr__(self): + return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')" + + +class SourceCodeWithConceptNode(LexerNode): + """ + Kind of temporary version for SourceCodeNode + I know that there is some code, + I know that there are some concepts + I just don't want to make the glue yet + + So I push all the nodes into one big bag + """ + + def __init__(self, first_node, last_node, content_nodes=None): + super().__init__(9999, -1, None) # why not sys.maxint ? + self.first = first_node + self.last = last_node + self.nodes = content_nodes or [] + self.has_unrecognized = False + self.fix_all_pos() + + def add_node(self, node): + self.nodes.append(node) + self.fix_pos(node) + + return self + + def __eq__(self, other): + if id(self) == id(other): + return True + + if not isinstance(other, SourceCodeWithConceptNode): + return False + + if self.start != other.start or self.end != other.end: + return False + + if self.first != other.first: + return False + + if self.last != other.last: + return False + + if len(self.nodes) != len(other.nodes): + return False + + for self_node, other_node in zip(self.nodes, other.nodes): + if self_node != other_node: + return False + + # at last + return True + + def __hash__(self): + return hash((self.first, self.last, len(self.nodes))) + + def __repr__(self): + return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')" + + def fix_all_pos(self): + for n in [self.first, self.last] + self.nodes: + self.fix_pos(n) + + def fix_pos(self, node): + if hasattr(node, "start") and node.start is not None: + if node.start < self.start: + self.start = node.start + + if hasattr(node, "end") and node.end is not None: + if node.end > self.end: + self.end = node.end + return self + + def pseudo_fix_source(self): + self.source = self.first.source + for n in self.nodes: + self.source += " " + if hasattr(n, "source"): + self.source += n.source + elif hasattr(n, "concept"): + self.source += str(n.concept) + else: + self.source += " unknown" + self.source += self.last.source + return self + + def clone(self): + clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes) + return clone + + +@dataclass() +class GrammarErrorNode(ErrorNode): + message: str + + +class SyaAssociativity(Enum): + Left = "left" + Right = "right" + No = "No" + + def __repr__(self): + return self.value + + +cnode = namedtuple("ConceptNode", "concept_key start end source") +short_cnode = namedtuple("ConceptNode", "concept_key source") +utnode = namedtuple("utnode", "start end source") +scnode = namedtuple("scnode", "start end source") + + +@dataclass(init=False) +class SCWC: + """ + SourceNodeWithConcept tester class + It matches with a SourceNodeWithConcept + but it's easier to instantiate during the tests + """ + first: LexerNode + last: LexerNode + content: tuple + + def __init__(self, first, last, *args): + self.first = first + self.last = last + self.content = args + + +class HelperWithPos: + def __init__(self, start=None, end=None): + self.start = start + self.end = end + + self.start_is_fixed = start is not None + self.end_is_fixed = end is not None + + def fix_pos(self, node): + if not self.start_is_fixed: + start = node.start if hasattr(node, "start") else \ + node[0] if isinstance(node, tuple) else None + + if start is not None and (self.start is None or start < self.start): + self.start = start + + if not self.end_is_fixed: + end = node.end if hasattr(node, "end") else \ + node[1] if isinstance(node, tuple) else None + + if end is not None and (self.end is None or end > self.end): + self.end = end + return self + + +class CN(HelperWithPos): + """ + ConceptNode tester class + It matches with ConceptNode but with less constraints + + CNC == ConceptNode if concept key, start, end and source are the same + """ + + def __init__(self, concept, start=None, end=None, source=None): + """ + + :param concept: Concept or concept_key (only the key is used anyway) + :param start: + :param end: + :param source: + """ + super().__init__(start, end) + self.concept_key = concept.key if isinstance(concept, Concept) else concept + self.source = source + self.concept = concept if isinstance(concept, Concept) else None + + def fix_source(self, str_tokens): + self.source = "".join(str_tokens) + return self + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, ConceptNode): + if other.concept is None: + return False + if other.concept.key != self.concept_key: + return False + if self.start is not None and self.start != other.start: + return False + if self.end is not None and self.end != other.end: + return False + return True + + if not isinstance(other, CN): + return False + + return self.concept_key == other.concept_key and \ + self.start == other.start and \ + self.end == other.end and \ + self.source == other.source + + def __hash__(self): + return hash((self.concept_key, self.start, self.end, self.source)) + + def __repr__(self): + if self.concept: + txt = f"CN(concept='{self.concept}'" + else: + txt = f"CN(concept_key='{self.concept_key}'" + txt += f", source='{self.source}'" + if self.start is not None: + txt += f", start={self.start}" + if self.end is not None: + txt += f", end={self.end}" + return txt + ")" + + +class CNC(CN): + """ + ConceptNode for Compiled tester class + It matches with ConceptNode + But focuses on the 'compiled' property of the concept + + CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled + """ + + def __init__(self, concept_key, start=None, end=None, source=None, **kwargs): + super().__init__(concept_key, start, end, source) + self.compiled = kwargs + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, ConceptNode): + if other.concept is None: + return False + if other.concept.key != self.concept_key: + return False + if self.start is not None and self.start != other.start: + return False + if self.end is not None and self.end != other.end: + return False + return self.compiled == other.concept.compiled # assert instead of return to help debugging tests + + if not isinstance(other, CNC): + return False + + return self.concept_key == other.concept_key and \ + self.start == other.start and \ + self.end == other.end and \ + self.source == other.source and \ + self.compiled == other.compiled + + def __repr__(self): + if self.concept: + txt = f"CNC(concept='{self.concept}'" + else: + txt = f"CNC(concept_key='{self.concept_key}'" + txt += f", source='{self.source}'" + if self.start is not None: + txt += f", start={self.start}" + if self.end is not None: + txt += f", end={self.end}" + + for k, v in self.compiled.items(): + txt += f", {k}='{v}'" + return txt + ")" + + +class BaseNodeParser(BaseParser): + def __init__(self, name, priority, **kwargs): + super().__init__(name, priority) + if 'sheerka' in kwargs: + sheerka = kwargs.get("sheerka") + self.init_from_sheerka(sheerka) + + else: + self.concepts_by_first_keyword = None + self.sya_definitions = None + + self.token = None + self.pos = -1 + self.tokens = None + + self.context: ExecutionContext = None + self.text = None + self.sheerka = None + + def init_from_sheerka(self, sheerka): + """ + Use the definitons from Sheerka to initialize + :param sheerka: + :return: + """ + self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword + if sheerka.sya_definitions: + self.sya_definitions = {} + for k, v in sheerka.sya_definitions.items(): + self.sya_definitions[k] = (v[0], SyaAssociativity(v[1])) + + def reset_parser(self, context, text): + self.context = context + self.sheerka = context.sheerka + self.text = text + + try: + self.tokens = list(self.get_input_as_tokens(text)) + except LexerError as e: + self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) + return False + + self.token = None + self.pos = -1 + return True + + def add_error(self, error, next_token=True): + self.error_sink.append(error) + if next_token: + self.next_token() + return error + + def get_token(self) -> Token: + return self.token + + def next_token(self, skip_whitespace=True): + if self.token and self.token.type == TokenKind.EOF: + return False + + self.pos += 1 + self.token = self.tokens[self.pos] + + if skip_whitespace: + while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE: + self.pos += 1 + self.token = self.tokens[self.pos] + + return self.token.type != TokenKind.EOF + + def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False): + """ + To quickly find a concept, we store them in an hash where the key is the first token of the concept + example : + Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable + So the key to use will be "foo" + + Concept("a foo").def_prop("a") -> first token is "foo" + + Concept("Hello my dear a").def_prop("a") -> first token is "Hello" + Note that under the same key, there will be multiple entry + a B-Tree may be a better implementation in the future + + We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity) + :param context: + :param concepts: list[Concept] + :param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)] + :param use_sheerka: first init with the definitions from Sheerka + :return: + """ + self.context = context + self.sheerka = context.sheerka + + if use_sheerka: + self.init_from_sheerka(self.sheerka) + + if sya_definitions: + if self.sya_definitions: + self.sya_definitions.update(sya_definitions) + else: + self.sya_definitions = sya_definitions + + if self.concepts_by_first_keyword is None: + self.concepts_by_first_keyword = {} + + for concept in concepts: + keywords = concept.key.split() + for keyword in keywords: + if keyword.startswith(VARIABLE_PREFIX): + continue + + self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id) + break + + return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword) + + def get_concepts(self, token, to_keep, to_map=None): + """ + Tries to find if there are concepts that match the value of the token + :param token: + :param to_keep: predicate to tell if the concept is eligible + :param to_map: + :return: + """ + + if token.type == TokenKind.STRING: + name = token.value[1:-1] + elif token.type == TokenKind.KEYWORD: + name = token.value.value + else: + name = token.value + + result = [] + if name in self.concepts_by_first_keyword: + for concept_id in self.concepts_by_first_keyword[name]: + + concept = self.sheerka.get_by_id(concept_id) + + if not to_keep(concept): + continue + + concept = to_map(concept) if to_map else concept + result.append(concept) + return result + + return None + + @staticmethod + def get_token_value(token): + if token.type == TokenKind.STRING: + return token.value[1:-1] + elif token.type == TokenKind.KEYWORD: + return token.value.value + else: + return token.value + + +class UTN(HelperWithPos): + """ + Tester class for UnrecognizedTokenNode + compare the source, and start, end if defined + """ + + def __init__(self, source, start=None, end=None): + """ + :param concept: Concept or concept_key (only the key is used anyway) + :param start: + :param end: + :param source: + """ + super().__init__(start, end) + self.source = source + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, UnrecognizedTokensNode): + return self.start == other.start and \ + self.end == other.end and \ + self.source == other.source + + if not isinstance(other, UTN): + return False + + return self.start == other.start and \ + self.end == other.end and \ + self.source == other.source + + def __hash__(self): + return hash((self.source, self.start, self.end)) + + def __repr__(self): + txt = f"UTN( source='{self.source}'" + if self.start is not None: + txt += f", start={self.start}" + if self.end is not None: + txt += f", end={self.end}" + return txt + ")" diff --git a/src/parsers/BaseParser.py b/src/parsers/BaseParser.py index 877d0ab..179967a 100644 --- a/src/parsers/BaseParser.py +++ b/src/parsers/BaseParser.py @@ -1,8 +1,8 @@ from dataclasses import dataclass -from core.builtin_concepts import BuiltinConcepts +from core.builtin_concepts import BuiltinConcepts, ParserResultConcept from core.concept import Concept -from core.tokenizer import TokenKind, Keywords, Token +from core.tokenizer import TokenKind, Keywords, Token, Tokenizer from core.sheerka_logger import get_logger import core.utils import logging @@ -77,7 +77,6 @@ class BaseParser: self.priority = priority self.enabled = enabled - self.has_error = False self.error_sink = [] def __eq__(self, other): @@ -91,9 +90,13 @@ class BaseParser: def __repr__(self): return self.name - def parse(self, context, text): + def parse(self, context, parser_input): pass + @property + def has_error(self): + return len(self.error_sink) > 0 + def log_result(self, context, source, ret): if not self.log.isEnabledFor(logging.DEBUG): return @@ -132,6 +135,53 @@ class BaseParser: body=self.error_sink if self.has_error else tree, try_parsed=try_parse) + def get_input_as_text(self, parser_input, custom_switcher=None): + if isinstance(parser_input, list): + return self.get_text_from_tokens(parser_input, custom_switcher) + + if isinstance(parser_input, ParserResultConcept): + parser_input = parser_input.source + + if "c:" in parser_input: + return self.get_text_from_tokens(list(Tokenizer(parser_input)), custom_switcher) + + return parser_input + + def get_input_as_tokens(self, parser_input): + if isinstance(parser_input, list): + return self.add_eof_if_needed(parser_input) + + if isinstance(parser_input, ParserResultConcept): + if parser_input.tokens: + return self.add_eof_if_needed(parser_input.tokens) + else: + return Tokenizer(parser_input.source) + + return Tokenizer(parser_input) + + def get_input_as_lexer_nodes(self, parser_input, expected_parser=None): + if not isinstance(parser_input, ParserResultConcept): + return None + + if expected_parser and parser_input.parser != expected_parser: + return None + + if len(parser_input.value) == 0: + return None + + for node in parser_input.value: + from parsers.BaseNodeParser import LexerNode + if not isinstance(node, LexerNode): + return None + + return parser_input.value + + @staticmethod + def add_eof_if_needed(lst): + if len(lst) == 0 or not lst[-1].type == TokenKind.EOF: + lst.append(Token(TokenKind.EOF, "", -1, -1, -1)) + return lst + @staticmethod def get_text_from_tokens(tokens, custom_switcher=None): if tokens is None: diff --git a/src/parsers/ConceptLexerParser.py b/src/parsers/BnfNodeParser.py similarity index 84% rename from src/parsers/ConceptLexerParser.py rename to src/parsers/BnfNodeParser.py index f43a2a8..31cbeaa 100644 --- a/src/parsers/ConceptLexerParser.py +++ b/src/parsers/BnfNodeParser.py @@ -9,147 +9,17 @@ from collections import namedtuple from dataclasses import dataclass from collections import defaultdict -from core.builtin_concepts import BuiltinConcepts +from core.builtin_concepts import BuiltinConcepts, ParserResultConcept from core.concept import Concept, ConceptParts, DoNotResolve from core.tokenizer import TokenKind, Tokenizer, Token -from parsers.BaseParser import BaseParser, Node, ErrorNode +from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode +from parsers.BaseParser import BaseParser, ErrorNode import core.utils -@dataclass() -class LexerNode(Node): - start: int # starting index in the tokens list - end: int # ending index in the tokens list - tokens: list = None # tokens - source: str = None # string representation of what was parsed - - def __post_init__(self): - if self.source is None: - self.source = BaseParser.get_text_from_tokens(self.tokens) - - def __eq__(self, other): - if not isinstance(other, LexerNode): - return False - - return self.start == other.start and \ - self.end == other.end and \ - self.source == other.source and \ - self.tokens == other.tokens - - -class UnrecognizedTokensNode(LexerNode): - def __init__(self, start, end, tokens): - super().__init__(start, end, tokens) - - def add_token(self, token, pos): - self.tokens.append(token) - self.end = pos - - def fix_source(self): - self.source = BaseParser.get_text_from_tokens(self.tokens) - - def not_whitespace(self): - return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)) - - def __eq__(self, other): - if isinstance(other, utnode): - return self.start == other.start and \ - self.end == other.end and \ - self.source == other.source - - if not isinstance(other, UnrecognizedTokensNode): - return False - - return self.start == other.start and \ - self.end == other.end and \ - self.source == other.source - - def __hash__(self): - return hash((self.start, self.end, self.source)) - - def __repr__(self): - return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')" - - -class ConceptNode(LexerNode): - """ - Returned by the ConceptLexerParser - It represents a recognized concept - """ - - def __init__(self, concept, start, end, tokens=None, source=None, underlying=None): - super().__init__(start, end, tokens, source) - self.concept = concept - self.underlying = underlying - - if self.source is None: - self.source = BaseParser.get_text_from_tokens(self.tokens) - - def __eq__(self, other): - if isinstance(other, cnode): - return self.concept.key == other.concept_key and \ - self.start == other.start and \ - self.end == other.end and \ - self.source == other.source - - if isinstance(other, short_cnode): - return self.concept.key == other.concept_key and self.source == other.source - - if not isinstance(other, ConceptNode): - return False - - return self.concept == other.concept and \ - self.start == other.start and \ - self.end == other.end and \ - self.source == other.source and \ - self.underlying == other.underlying - - def __hash__(self): - return hash((self.concept, self.start, self.end, self.source, self.underlying)) - - def __repr__(self): - return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')" - - -class SourceCodeNode(LexerNode): - """ - Returned when some source code (like Python source code is recognized) - """ - - def __init__(self, node, start, end, tokens=None, source=None): - super().__init__(start, end, tokens, source) - self.node = node # The PythonNode (or whatever language node) that is found - - def __eq__(self, other): - if isinstance(other, scnode): - return self.start == other.start and \ - self.end == other.end and \ - self.source == other.source - - if not isinstance(other, SourceCodeNode): - return False - - return self.node == other.node and \ - self.start == other.start and \ - self.end == other.end and \ - self.source == other.source - - def __hash__(self): - return hash((self.start, self.end, self.source)) - - def __repr__(self): - return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')" - - -cnode = namedtuple("ConceptNode", "concept_key start end source") -short_cnode = namedtuple("ConceptNode", "concept_key source") -utnode = namedtuple("UnrecognizedTokensNode", "start end source") -scnode = namedtuple("SourceCodeNode", "start end source") - - class NonTerminalNode(LexerNode): """ - Returned by the ConceptLexerParser + Returned by the BnfNodeParser """ def __init__(self, parsing_expression, start, end, tokens, children=None): @@ -180,7 +50,7 @@ class NonTerminalNode(LexerNode): class TerminalNode(LexerNode): """ - Returned by the ConceptLexerParser + Returned by the BnfNodeParser """ def __init__(self, parsing_expression, start, end, value): @@ -205,11 +75,6 @@ class TerminalNode(LexerNode): return hash((self.parsing_expression, self.start, self.end, self.value)) -@dataclass() -class GrammarErrorNode(ErrorNode): - message: str - - @dataclass() class UnknownConceptNode(ErrorNode): concept_key: str @@ -574,9 +439,9 @@ class StrMatch(Match): return None -class ConceptLexerParser(BaseParser): +class BnfNodeParser(BaseParser): def __init__(self, **kwargs): - super().__init__("ConceptLexer", 50) + super().__init__("BnfNode", 50) if 'grammars' in kwargs: self.concepts_grammars = kwargs.get("grammars") elif 'sheerka' in kwargs: @@ -595,7 +460,6 @@ class ConceptLexerParser(BaseParser): self.sheerka = None def add_error(self, error, next_token=True): - self.has_error = True self.error_sink.append(error) if next_token: self.next_token() @@ -606,16 +470,11 @@ class ConceptLexerParser(BaseParser): self.sheerka = context.sheerka self.text = text - if isinstance(text, str): - try: - self.tokens = list(Tokenizer(text)) - except core.tokenizer.LexerError as e: - self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) - return False - - else: - self.tokens = list(text) - self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token + try: + self.tokens = list(self.get_input_as_tokens(text)) + except core.tokenizer.LexerError as e: + self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) + return False self.token = None self.pos = -1 @@ -785,15 +644,15 @@ class ConceptLexerParser(BaseParser): removed_concepts.append(e) return removed_concepts - def parse(self, context, text): - if text == "": + def parse(self, context, parser_input): + if parser_input == "": return context.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.IS_EMPTY) ) - if not self.reset_parser(context, text): + if not self.reset_parser(context, parser_input): return self.sheerka.ret( self.name, False, @@ -877,15 +736,15 @@ class ConceptLexerParser(BaseParser): self.sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, - source=text, + source=parser_input, body=choice, try_parsed=choice))) if len(ret) == 1: - self.log_result(context, text, ret[0]) + self.log_result(context, parser_input, ret[0]) return ret[0] else: - self.log_multiple_results(context, text, ret) + self.log_multiple_results(context, parser_input, ret) return ret def finalize_concept(self, sheerka, template, underlying, init_empty_body=True): @@ -915,6 +774,11 @@ class ConceptLexerParser(BaseParser): _concept.compiled[prop_name] = new_value def _look_for_concept_match(_underlying): + """ + At some point, there is either an StrMatch or a ConceptMatch, + that allowed the recognition. + Look for the ConceptMatch, with recursion if needed + """ if isinstance(_underlying.parsing_expression, ConceptExpression): return _underlying @@ -929,6 +793,7 @@ class ConceptLexerParser(BaseParser): def _get_underlying_value(_underlying): concept_match_node = _look_for_concept_match(_underlying) if concept_match_node: + # the value is a concept if id(concept_match_node) in _underlying_value_cache: result = _underlying_value_cache[id(concept_match_node)] else: @@ -936,6 +801,7 @@ class ConceptLexerParser(BaseParser): result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body) _underlying_value_cache[id(concept_match_node)] = result else: + # the value is a string result = DoNotResolve(_underlying.source) return result @@ -957,6 +823,7 @@ class ConceptLexerParser(BaseParser): concept.compiled[ConceptParts.BODY] = value if underlying.parsing_expression.rule_name: _add_prop(concept, underlying.parsing_expression.rule_name, value) + # KSI : Why don't we set concept.metadata.need_validation to True ? if isinstance(underlying, NonTerminalNode): for node in underlying.children: diff --git a/src/parsers/BnfParser.py b/src/parsers/BnfParser.py index 333d369..1d3dc6d 100644 --- a/src/parsers/BnfParser.py +++ b/src/parsers/BnfParser.py @@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts from core.sheerka.Sheerka import ExecutionContext from core.tokenizer import Tokenizer, Token, TokenKind, LexerError from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode -from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \ +from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \ StrMatch, ConceptGroupExpression @@ -30,7 +30,6 @@ class BnfParser(BaseParser): def __init__(self, **kwargs): super().__init__("Bnf", 50, False) - # self.has_error = False # self.error_sink = [] # self.name = BaseParser.PREFIX + "Bnf" @@ -61,7 +60,6 @@ class BnfParser(BaseParser): self.eat_white_space() def add_error(self, error, next_token=True): - self.has_error = True self.error_sink.append(error) if next_token: self.next_token() @@ -115,11 +113,11 @@ class BnfParser(BaseParser): token = self.get_token() return token.type == second or token.type == first and self.next_after().type == second - def parse(self, context: ExecutionContext, text): + def parse(self, context: ExecutionContext, parser_input): tree = None try: - self.reset_parser(context, text) + self.reset_parser(context, parser_input) tree = self.parse_choice() token = self.get_token() diff --git a/src/parsers/ConceptsWithConceptsParser.py b/src/parsers/ConceptsWithConceptsParser.py index 7d8a3c6..5ba3543 100644 --- a/src/parsers/ConceptsWithConceptsParser.py +++ b/src/parsers/ConceptsWithConceptsParser.py @@ -1,10 +1,14 @@ +# try to match something like +# ConceptNode 'plus' ConceptNode +# +# Replaced by SyaNodeParser from core.builtin_concepts import BuiltinConcepts from core.tokenizer import TokenKind, Token +from parsers.BaseNodeParser import SourceCodeNode from parsers.BaseParser import BaseParser -from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode +from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode from parsers.MultipleConceptsParser import MultipleConceptsParser from core.concept import VARIABLE_PREFIX -import logging multiple_concepts_parser = MultipleConceptsParser() @@ -12,6 +16,7 @@ multiple_concepts_parser = MultipleConceptsParser() class ConceptsWithConceptsParser(BaseParser): def __init__(self, **kwargs): super().__init__("ConceptsWithConcepts", 25) + self.enabled = False @staticmethod def get_tokens(nodes): @@ -71,23 +76,19 @@ class ConceptsWithConceptsParser(BaseParser): return concept - def parse(self, context, text): + def parse(self, context, parser_input): sheerka = context.sheerka - if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT): + nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser) + if not nodes: return None - if not text.parser == multiple_concepts_parser: - return None - - nodes = text.body - concept_key = self.get_key(nodes) concept = sheerka.new(concept_key) if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): return sheerka.ret( self.name, False, - sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body)) + sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body)) concepts = concept if hasattr(concept, "__iter__") else [concept] for concept in concepts: @@ -101,7 +102,7 @@ class ConceptsWithConceptsParser(BaseParser): sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, - source=text.source, + source=parser_input.source, body=concept, try_parsed=None))) diff --git a/src/parsers/DefaultParser.py b/src/parsers/DefaultParser.py index 032b7ae..dc22b67 100644 --- a/src/parsers/DefaultParser.py +++ b/src/parsers/DefaultParser.py @@ -110,7 +110,7 @@ class DefaultParser(BaseParser): """ def __init__(self, **kwargs): - BaseParser.__init__(self, "Default", 50) + BaseParser.__init__(self, "Default", 60) self.lexer_iter = None self._current = None self.context: ExecutionContext = None @@ -168,7 +168,6 @@ class DefaultParser(BaseParser): self.next_token() def add_error(self, error, next_token=True): - self.has_error = True self.error_sink.append(error) if next_token: self.next_token() @@ -188,19 +187,19 @@ class DefaultParser(BaseParser): return - def parse(self, context, text): + def parse(self, context, parser_input): # default parser can only manage string text - if not isinstance(text, str): + if not isinstance(parser_input, str): ret = context.sheerka.ret( self.name, False, - context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text)) - self.log_result(context, text, ret) + context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input)) + self.log_result(context, parser_input, ret) return ret tree = None try: - self.reset_parser(context, text) + self.reset_parser(context, parser_input) tree = self.parse_statement() except core.tokenizer.LexerError as e: self.add_error(e, False) @@ -211,7 +210,7 @@ class DefaultParser(BaseParser): if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode): body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink) else: - body = self.get_return_value_body(context.sheerka, text, tree, tree) + body = self.get_return_value_body(context.sheerka, parser_input, tree, tree) # body = self.sheerka.new( # BuiltinConcepts.PARSER_RESULT, # parser=self, @@ -224,7 +223,7 @@ class DefaultParser(BaseParser): not self.has_error, body) - self.log_result(context, text, ret) + self.log_result(context, parser_input, ret) return ret def parse_statement(self): diff --git a/src/parsers/EmptyStringParser.py b/src/parsers/EmptyStringParser.py index 84e4f21..7663377 100644 --- a/src/parsers/EmptyStringParser.py +++ b/src/parsers/EmptyStringParser.py @@ -10,12 +10,12 @@ class EmptyStringParser(BaseParser): def __init__(self, **kwargs): BaseParser.__init__(self, "EmptyString", 90) - def parse(self, context, text): + def parse(self, context, parser_input): sheerka = context.sheerka - if isinstance(text, str) and text.strip() == "" or \ - isinstance(text, list) and text == [] or \ - text is None: + if isinstance(parser_input, str) and parser_input.strip() == "" or \ + isinstance(parser_input, list) and parser_input == [] or \ + parser_input is None: ret = sheerka.ret(self.name, True, sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, @@ -24,5 +24,5 @@ class EmptyStringParser(BaseParser): else: ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME)) - self.log_result(context, text, ret) + self.log_result(context, parser_input, ret) return ret diff --git a/src/parsers/ExactConceptParser.py b/src/parsers/ExactConceptParser.py index 66e1bb8..10dc09d 100644 --- a/src/parsers/ExactConceptParser.py +++ b/src/parsers/ExactConceptParser.py @@ -16,26 +16,26 @@ class ExactConceptParser(BaseParser): def __init__(self, **kwargs): BaseParser.__init__(self, "ExactConcept", 80) - def parse(self, context, text): + def parse(self, context, parser_input): """ text can be string, but text can also be an list of tokens :param context: - :param text: + :param parser_input: :return: """ - context.log(f"Parsing '{text}'", self.name) + context.log(f"Parsing '{parser_input}'", self.name) res = [] sheerka = context.sheerka try: - words = self.get_words(text) + words = self.get_words(parser_input) except LexerError as e: context.log(f"Error found in tokenizer {e}", self.name) return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e)) if len(words) > self.MAX_WORDS_SIZE: context.log(f"Max words reached. Stopping.", self.name) - return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text)) + return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input)) recognized = False for combination in self.combinations(words): @@ -69,26 +69,25 @@ class ExactConceptParser(BaseParser): context.sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, - source=text if isinstance(text, str) else self.get_text_from_tokens(text), + source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input), body=concept, try_parsed=concept))) recognized = True if recognized: if len(res) == 1: - self.log_result(context, text, res[0]) + self.log_result(context, parser_input, res[0]) else: - self.log_multiple_results(context, text, res) + self.log_multiple_results(context, parser_input, res) return res return res - ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text)) - self.log_result(context, text, ret) + ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input)) + self.log_result(context, parser_input, ret) return ret - @staticmethod - def get_words(text): - tokens = iter(Tokenizer(text)) if isinstance(text, str) else text + def get_words(self, text): + tokens = self.get_input_as_tokens(text) res = [] for t in tokens: if t.type == TokenKind.EOF: diff --git a/src/parsers/MultipleConceptsParser.py b/src/parsers/MultipleConceptsParser.py index 5f4c47a..e02c6c2 100644 --- a/src/parsers/MultipleConceptsParser.py +++ b/src/parsers/MultipleConceptsParser.py @@ -1,18 +1,20 @@ +# to be replaced by SyaNodeParser import ast from core.builtin_concepts import BuiltinConcepts from core.tokenizer import TokenKind +from parsers.BaseNodeParser import SourceCodeNode from parsers.BaseParser import BaseParser -from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode +from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode import core.utils from parsers.PythonParser import PythonParser -concept_lexer_parser = ConceptLexerParser() +concept_lexer_parser = BnfNodeParser() class MultipleConceptsParser(BaseParser): """ - Parser that will take the result of ConceptLexerParser and + Parser that will take the result of BnfNodeParser and try to resolve the unrecognized tokens token by token It is a success when it returns a list ConceptNode exclusively @@ -20,6 +22,7 @@ class MultipleConceptsParser(BaseParser): def __init__(self, **kwargs): BaseParser.__init__(self, "MultipleConcepts", 45) + self.enabled = False @staticmethod def finalize(nodes_found, unrecognized_tokens): @@ -40,16 +43,12 @@ class MultipleConceptsParser(BaseParser): unrecognized_tokens = UnrecognizedTokensNode(index, index, [token]) return unrecognized_tokens - def parse(self, context, text): + def parse(self, context, parser_input): sheerka = context.sheerka - if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT): + nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser) + if not nodes: return None - if not text.parser == concept_lexer_parser: - return None - - sheerka = context.sheerka - nodes = text.value nodes_found = [[]] concepts_only = True @@ -97,16 +96,16 @@ class MultipleConceptsParser(BaseParser): sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, - source=text.source, + source=parser_input.source, body=choice, try_parsed=None)) ) if len(ret) == 1: - self.log_result(context, text.source, ret[0]) + self.log_result(context, parser_input.source, ret[0]) return ret[0] else: - self.log_multiple_results(context, text.source, ret) + self.log_multiple_results(context, parser_input.source, ret) return ret @staticmethod diff --git a/src/parsers/PythonParser.py b/src/parsers/PythonParser.py index 713af12..56fa525 100644 --- a/src/parsers/PythonParser.py +++ b/src/parsers/PythonParser.py @@ -1,4 +1,4 @@ -from core.builtin_concepts import BuiltinConcepts +from core.builtin_concepts import BuiltinConcepts, ParserResultConcept from core.tokenizer import Tokenizer, LexerError, TokenKind from parsers.BaseParser import BaseParser, Node, ErrorNode from dataclasses import dataclass @@ -6,7 +6,7 @@ import ast import logging import core.utils -from parsers.ConceptLexerParser import ConceptNode +from parsers.BnfNodeParser import ConceptNode log = logging.getLogger(__name__) @@ -67,7 +67,7 @@ class PythonParser(BaseParser): BaseParser.__init__(self, "Python", 50) self.source = kwargs.get("source", "") - def parse(self, context, text): + def parse(self, context, parser_input): sheerka = context.sheerka tree = None @@ -76,15 +76,9 @@ class PythonParser(BaseParser): } try: - if isinstance(text, str) and "c:" in text: - source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher) - elif isinstance(text, str): - source = text - else: - source = self.get_text_from_tokens(text, python_switcher) + source = self.get_input_as_text(parser_input, python_switcher) source = source.strip() - - text = text if isinstance(text, str) else source + parser_input = parser_input if isinstance(parser_input, str) else source # first, try to parse an expression res, tree, error = self.try_parse_expression(source) @@ -92,25 +86,32 @@ class PythonParser(BaseParser): # then try to parse a statement res, tree, error = self.try_parse_statement(source) if not res: - self.has_error = True - error_node = PythonErrorNode(text, error) + error_node = PythonErrorNode(parser_input, error) self.error_sink.append(error_node) except LexerError as e: - self.has_error = True self.error_sink.append(e) - ret = sheerka.ret( - self.name, - not self.has_error, - sheerka.new( - BuiltinConcepts.PARSER_RESULT, - parser=self, - source=text, - body=self.error_sink if self.has_error else PythonNode(text, tree), - try_parsed=None)) + if self.has_error: + ret = sheerka.ret( + self.name, + False, + sheerka.new( + BuiltinConcepts.NOT_FOR_ME, + body=parser_input, + reason=self.error_sink)) + else: + ret = sheerka.ret( + self.name, + True, + sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=parser_input, + body=PythonNode(parser_input, tree), + try_parsed=None)) - self.log_result(context, text, ret) + self.log_result(context, parser_input, ret) return ret def try_parse_expression(self, text): diff --git a/src/parsers/PythonWithConceptsParser.py b/src/parsers/PythonWithConceptsParser.py index 2d84781..131a21a 100644 --- a/src/parsers/PythonWithConceptsParser.py +++ b/src/parsers/PythonWithConceptsParser.py @@ -1,10 +1,11 @@ from core.builtin_concepts import BuiltinConcepts from parsers.BaseParser import BaseParser -from parsers.ConceptLexerParser import ConceptNode +from parsers.BnfNodeParser import ConceptNode from parsers.MultipleConceptsParser import MultipleConceptsParser from parsers.PythonParser import PythonParser +from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser -multiple_concepts_parser = MultipleConceptsParser() +unrecognized_nodes_parser = UnrecognizedNodeParser() class PythonWithConceptsParser(BaseParser): @@ -20,15 +21,12 @@ class PythonWithConceptsParser(BaseParser): res += c if c.isalnum() else "0" return res - def parse(self, context, text): + def parse(self, context, parser_input): sheerka = context.sheerka - if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT): + nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser) + if not nodes: return None - if not text.parser == multiple_concepts_parser: - return None - - nodes = text.body source = "" to_parse = "" identifiers = {} @@ -74,6 +72,7 @@ class PythonWithConceptsParser(BaseParser): python_id = _get_identifier(concept) to_parse += python_id python_ids_mappings[python_id] = concept + else: source += node.source to_parse += node.source diff --git a/src/parsers/SyaNodeParser.py b/src/parsers/SyaNodeParser.py new file mode 100644 index 0000000..a6bb839 --- /dev/null +++ b/src/parsers/SyaNodeParser.py @@ -0,0 +1,1117 @@ +import copy +from collections import namedtuple +from dataclasses import dataclass, field +from typing import List + +from core import builtin_helpers +from core.builtin_concepts import BuiltinConcepts +from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF +from core.sheerka.ExecutionContext import ExecutionContext +from core.tokenizer import LexerError, Token, TokenKind +from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \ + SourceCodeWithConceptNode +from parsers.BaseParser import BaseParser, ErrorNode + +PARSERS = ["BnfNode", "AtomNode", "Python"] + +function_parser_res = namedtuple("FunctionParserRes", 'to_out function') + + +class ParenthesisMismatchErrorNode(ErrorNode): + + def __init__(self, error_int): + if isinstance(error_int, tuple): + self.token = error_int[0] + self.pos = error_int[1] + elif isinstance(error_int, Token): + self.token = error_int + self.pos = -1 + else: # isinstance(UnrecognizedTokensNode) + for i, t in reversed(list(enumerate(error_int.tokens))): + if t.type == TokenKind.LPAR: + self.token = t + self.pos = i + error_int.start + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, tuple): + return other[0] == self.token.value and other[1] == self.pos + + if not isinstance(other, ParenthesisMismatchErrorNode): + return False + + return self.token == other.token and self.pos == other.pos + + def __hash__(self): + return hash(self.pos) + + def __repr__(self): + return f"ParenthesisMismatchErrorNode('{self.token.value}', {self.pos}" + + +@dataclass() +class NoneAssociativeSequenceErrorNode(ErrorNode): + concept: Concept + first: int + second: int + tokens: List[Token] = None + + +@dataclass() +class TooManyParametersFound(ErrorNode): + concept: Concept + pos: int # position of the concept + token: Token # token of the concept where the error was noticed + parameters: list # list of unmatched parameters + + def __repr__(self): + return f"Too many parameters found for '{self.concept}' before token '{self.token}'" + + +@dataclass() +class SyaConceptDef: + """ + Wrapper to concept + It gives the precedence and the associativity for the concept + """ + concept: Concept + precedence: int = 0 + associativity: SyaAssociativity = SyaAssociativity.Right + + +@dataclass() +class SyaConceptParserHelper: + """ + Use because the is not enough information to create the final ConceptNode + """ + concept: Concept + start: int # position of the token in the tokenizer (Caution, it is not token.index) + end: int = field(default=-1, repr=False, compare=False, hash=None) + expected: List[str] = field(default_factory=list, repr=False, compare=False, hash=None) + expected_parameters_before_first_token: int = field(default=0, repr=False, compare=False, hash=None) + potential_pos: int = field(default=-1, repr=False, compare=False, hash=None) + parameters_list_at_init: list = field(default_factory=list, repr=False, compare=False, hash=None) + error: str = None + + def __post_init__(self): + concept = self.concept.concept if isinstance(self.concept, SyaConceptDef) else self.concept + if self.end == -1: + self.end = self.start + + first_keyword_found = False + for name in concept.key.split(): + if not name.startswith(VARIABLE_PREFIX) and not first_keyword_found: + first_keyword_found = True + + if first_keyword_found: + self.expected.append(name) + else: + self.expected_parameters_before_first_token += 1 + + self.eat_token() # remove the fist token + + def is_matched(self): + return len(self.expected) == 0 + + def is_atom(self): + return len(self.concept.concept.metadata.props) == 0 and len(self.expected) == 0 + + def is_expected(self, token): + if self.is_matched(): + return False + + token_value = self._get_token_value(token) + + for expected in self.expected: + if not expected.startswith(VARIABLE_PREFIX) and expected == token_value: + return True + + return False + + def expected_parameters(self): + return sum(map(lambda e: e.startswith(VARIABLE_PREFIX), self.expected)) + + def eat_token(self): + # No check, as it is used only after is_expected + del self.expected[0] + + # return True is a whole sequence of keyword is eaten + # example + # Concept("foo a bar baz qux b").def_prop("a").def_prop("b") + # 'bar' is just eaten. We will return False because 'baz' and 'qux' are still waiting + if len(self.expected) == 0: + return True + + return self.expected[0].startswith(VARIABLE_PREFIX) + + def eat_parameter(self, parameter): + if self.is_matched() and parameter == self: + return # not a error + + if self.is_matched(): + self.error = "No more parameter expected" + return + + if not self.expected[0].startswith(VARIABLE_PREFIX): + self.error = "Parameter was not expected" + return + + del self.expected[0] + + def fix_concept(self): + """ + When the SYA is done, we only need the concept, not the sya concept + :return: + """ + if isinstance(self.concept, SyaConceptDef): + self.concept = self.concept.concept + return self + + @staticmethod + def _get_token_value(token): + if token.type == TokenKind.STRING: + return token.value[1:-1] + elif token.type == TokenKind.KEYWORD: + return token.value.value + else: + return token.value + + def clone(self): + clone = SyaConceptParserHelper(self.concept, self.start, self.end) + clone.expected = self.expected[:] + clone.expected_parameters_before_first_token = self.expected_parameters_before_first_token + clone.potential_pos = self.potential_pos + clone.parameters_list_at_init = self.parameters_list_at_init + clone.error = self.error + + return clone + + +class InFixToPostFix: + def __init__(self, context): + self.context = context + + self.is_locked = False # when locked, cannot process input + + self.out = [] # shunting yard algo out + self.stack = [] # shunting yard algo stack + self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # buffer that keeps tracks of tokens positions + + self.parameters_list = [] # list of the parameters that need to be associated to a concept + self.errors = [] # Not quite sure that I can handle more than one error + + self.debug = [] + self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens + + def __repr__(self): + return f"InFixToPostFix({self.debug})" + + def __eq__(self, other): + if id(self) == id(other): + return True + + if not isinstance(other, InFixToPostFix): + return False + + return self.out == other.out + + def _add_error(self, error): + self.errors.append(error) + + def _is_lpar(self, token): + """ + True if the token is a left parenthesis '(' + Note that when we are parsing non recognized tokens, + we consider that the parenthesis are part of the non recognized + :param token: + :return: + """ + # return isinstance(token, Token) and token.type == TokenKind.LPAR + if isinstance(token, Token) and token.type == TokenKind.LPAR: + return True + if isinstance(token, tuple) and token[0].type == TokenKind.LPAR: + return True + if isinstance(token, UnrecognizedTokensNode) and token.parenthesis_count > 0: + return True + return False + + def _is_rpar(self, token): + """ + True if the token is a right parenthesis ')' + Note that when we are parsing non recognized tokens, + we consider that the parenthesis are part of the non recognized + :param token: + :param stack: + :return: + """ + return isinstance(token, Token) and token.type == TokenKind.RPAR + + def _concepts(self): + """ + Return the concept currently being parsed + :return: + """ + res = [] + for item in self.stack: + if isinstance(item, SyaConceptParserHelper): + res.append(item) + return res + + def _put_to_out(self, item): + """ + Helper function that Put an item in the out + :param item: + :return: + """ + if isinstance(item, SyaConceptParserHelper) and len(item.expected) > 0 and not item.error: + if item.expected[0].startswith(VARIABLE_PREFIX): + item.error = "Not enough suffix parameters" + else: + item.error = f"token '{item.expected[0]}' not found" + + if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1: + self.out.insert(item.potential_pos, item) + else: + self.out.append(item) + + # put the item to the list of awaiting parameters + self.parameters_list.append(item) + + if len(self._concepts()) > 0: + # try to predict the final position of the current concept + # This position can be altered by concept associativity and precedence + # So it's only a prediction + current = self._concepts()[-1] + if current.expected_parameters() == len(self.parameters_list) - len(current.parameters_list_at_init): + self._concepts()[-1].potential_pos = len(self.out) + + def _stack_isinstance(self, type): + """ + Check the type of the top item in the stack + :param type: + :return: + """ + return len(self.stack) > 0 and isinstance(self.stack[-1], type) + + def _get_lexer_nodes_from_unrecognized(self): + """ + Use the source of self.unrecognized_tokens gto find concepts or source code + :return: + """ + + res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS) + only_parsers_results = builtin_helpers.only_parsers_results(self.context, res) + if not only_parsers_results.status: + return None + + return builtin_helpers.get_lexer_nodes( + only_parsers_results.body.body, + self.unrecognized_tokens.start, + self.unrecognized_tokens.tokens) + + def _make_source_code_with_concept(self, start, rpar_token, end): + """ + + :param start: + :param rpar_token: + :param end: + :return: + """ + source_code = SourceCodeWithConceptNode( + self.stack.pop(), + UnrecognizedTokensNode(end, end, [rpar_token]), + self.out[start + 1:] + ).pseudo_fix_source() + return source_code + + def get_errors(self): + res = [] + res.extend(self.errors) + res.extend([item for item in self.out if isinstance(item, SyaConceptParserHelper) and item.error]) + return res + + def lock(self): + self.is_locked = True + + def reset(self): + if len(self.errors) > 0: + return + + self.is_locked = False + + def manage_parameters_when_new_concept(self, temp_concept_node): + """ + When a new concept is create, we need to check what to do with the parameters + that were queued + :param temp_concept_node: new concept + :return: + """ + if len(self.parameters_list) < temp_concept_node.expected_parameters_before_first_token: + # The new concept expect some prefix parameters, but there's not enough + temp_concept_node.error = "Not enough prefix parameters" + return + + if len(self.parameters_list) > temp_concept_node.expected_parameters_before_first_token: + # There are more parameters than needed by the new concept + # The others are either + # - parameters for the previous concept (if any) + # - concepts on their own + # - syntax error + # In all the cases, the only thing that matter is to pop what is expected by the new concept + for i in range(temp_concept_node.expected_parameters_before_first_token): + self.parameters_list.pop() + temp_concept_node.parameters_list_at_init.extend(self.parameters_list) + return + + # len(self.parameters_list) == temp_concept_node.expected_parameters_before_first_token + # => We consider that the parameter are part of the new concept + + if len(self._concepts()) > 1: + # The new concept is a parameter of the previous one. + # So reset the potential_pos of the previous concept + self._concepts()[-2].potential_pos = -1 + + # eat them all + self.parameters_list.clear() + + def manage_parameters(self): + """ + Some new parameters were added to the list. + What to do with them ? + :return: + """ + + # The parameter must be part the current concept being parsed + assert len(self._concepts()) != 0 # sanity check + + current_concept = self._concepts()[-1] + while len(current_concept.expected) > 0 and current_concept.expected[0].startswith(VARIABLE_PREFIX): + # eat everything that was expected + if len(self.parameters_list) == 0: + # current_concept.error = f"Failed to match parameter '{current_concept.expected[0]}'" + return + del self.parameters_list[0] + del current_concept.expected[0] + + def manage_unrecognized(self): + if self.unrecognized_tokens.is_empty(): + return + + # do not put empty UnrecognizedToken in out + if self.unrecognized_tokens.is_whitespace(): + self.unrecognized_tokens.reset() + return + + self.unrecognized_tokens.fix_source() + + # try to recognize concepts + nodes_sequences = self._get_lexer_nodes_from_unrecognized() + + if nodes_sequences: + # There are more than one solution found + # In the case, we create a new InfixToPostfix for each new possibility + if len(nodes_sequences) > 1: + for node_sequence in nodes_sequences[1:]: + clone = self.clone() + for node in node_sequence: + clone._put_to_out(node) + clone.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + self.forked.append(clone) + + # Do not forget the first result that will go with the current InfixToPostfix + for node in nodes_sequences[0]: + self._put_to_out(node) + else: + self._put_to_out(self.unrecognized_tokens) + + # # try to recognize concepts + # nodes = self._get_lexer_nodes_from_unrecognized() + # if nodes: + # for node in nodes: + # self._put_to_out(node) + # else: + # self._put_to_out(self.unrecognized_tokens) + + # create another instance + self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + + def get_functions_from_unrecognized(self, token, pos): + """ + The unrecognized ends with an lpar '(' + It means that its a function like foo(something) + The problem is that we need to know if there are other conceps before the function + ex : suffix one function(x) + suffix and one are not / may not be part of the name of the function + + We need to call the function to recognize the parts and act accordingly + :return: list of function_parser_res + """ + self.unrecognized_tokens.fix_source() + nodes_sequences = self._get_lexer_nodes_from_unrecognized() + if nodes_sequences is None: + return None + + res = [] + for sequence in nodes_sequences: + if isinstance(sequence[-1], UnrecognizedTokensNode): + function = sequence[-1] + else: + function = UnrecognizedTokensNode(sequence[-1].start, sequence[-1].end, sequence[-1].tokens) + function.add_token(token, pos).fix_source() + + res.append(function_parser_res(sequence[:-1], function)) + + return res + + def pop_stack_to_out(self): + """ + Helper function that pops the stack and put the item to the output, if needed + :return: + """ + item = self.stack[-1] + + # fix the concept is needed + if isinstance(item, SyaConceptParserHelper): + # make sure the expected parameters of this item are eaten + if 0 < len(item.expected) <= len(self.parameters_list): + self.manage_parameters() + item.fix_concept() + + self.stack.pop() + self._put_to_out(item) + + def i_can_pop(self, concept_node): + """ + Validate the Shunting Yard Algorithm conditions to pop out from the stack + Note that it's a custom implementation as I need to manage UnrecognizedTokensNode + :param concept_node: + :return: + """ + if len(self.stack) == 0: + return False + + stack_head = self.stack[-1] + + if not isinstance(stack_head, SyaConceptParserHelper): # mostly left parenthesis + return False + + current = concept_node.concept + stack = stack_head.concept + + if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No: + self._add_error(NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, concept_node.start)) + + if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence: + return True + + if current.associativity == SyaAssociativity.Right and current.precedence < stack.precedence: + return True + + return False + + def handle_expected_token(self, token, pos): + """ + True if the token is part of the concept being parsed and the last token in a sequence is eaten + Example : Concept("foo a bar b").def_prop("a").def_prop("b") + The expected tokens are 'foo' and 'bar' (as a and b are parameters) + + Example: Concept("foo a bar baz b").def_prop("a").def_prop("b") + If the token is 'bar', it will be eaten but handle_expected_token() will return False + as we still expect 'baz' + :param token: + :param pos: + :return: + """ + + for current_concept in reversed(self._concepts()): + + if current_concept.is_expected(token): + current_concept.end = pos + self.manage_unrecognized() + # manage that some clones may have been forked + for forked in self.forked: + forked.handle_expected_token(token, pos) + + # manage concepts found between tokens (of ternary for example) + self.manage_parameters() + + if len(self.parameters_list) > len(current_concept.parameters_list_at_init): + # we have eaten the parameters expected between two expected tokens + # But there are some remaining parameters + self._add_error(TooManyParametersFound( + current_concept.concept.concept, + current_concept.start, + token, + self.parameters_list[:])) + return True # no need to continue + + while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched(): + self.pop_stack_to_out() + self.manage_parameters() + + if current_concept.eat_token(): + while self.stack[-1] != current_concept and not self._is_lpar(current_concept): + self.pop_stack_to_out() + + if self._is_lpar(self.stack[-1]): + self._add_error(ParenthesisMismatchErrorNode(self.stack[-1])) + return False + + # Manage concepts ending with long names + if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched(): + self.pop_stack_to_out() + + return True + + return False + + def eat_token(self, token, pos): + """ + Receive at token. + It will be processed if it's expected by a concept or if it's a parenthesis + :param token: + :param pos: + :return: + """ + + if self.is_locked: + return + + if self.handle_expected_token(token, pos): + # a token is found, let's check if it's part of a concepts being parsed + # example Concept(name="foo", definition="foo a bar b").def_prop("a").def_prop("b") + # if the token 'bar' is found, it has to be considered as part of the concept foo + self.debug.append(token) + return True + + elif self._is_lpar(token): + self.debug.append(token) + + if self.unrecognized_tokens.is_empty() or self.unrecognized_tokens.is_whitespace(): + # first, remove what was in the buffer + self.manage_unrecognized() + for forked in self.forked: + # manage that some clones may have been forked + forked.eat_token(token, pos) + + self.stack.append((token, pos)) + else: + # the parenthesis is part of the unrecognized + # So it's a function + + list_of_results = self.get_functions_from_unrecognized(token, pos) + if list_of_results: + instances = [self] + for i in range(len(list_of_results) - 1): + clone = self.clone() + self.forked.append(clone) + instances.append(clone) + + # Manage the result for self and its clones + for instance, parsing_res in zip(instances, list_of_results): + for to_out in parsing_res.to_out: + instance._put_to_out(to_out) + + instance._put_to_out(")") # mark where the function should end + instance.stack.append(parsing_res.function) + instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # reset unrecognized + else: + self._put_to_out(")") # mark where the function should end + self.eat_unrecognized(token, pos) # add the '(' to the rest of the unknown + self.stack.append(self.unrecognized_tokens.fix_source()) + self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + + return True + + elif self._is_rpar(token): + self.debug.append(token) + + # first, remove what was in the buffer + self.manage_unrecognized() + for forked in self.forked: + # manage that some clones may have been forked + forked.eat_token(token, pos) + + # pop everything but the lpar from stack to 'out' + while len(self.stack) > 0 and not self._is_lpar(self.stack[-1]): + self.pop_stack_to_out() + + # checks consistency if an lpar is found + if len(self.stack) == 0: + self._add_error(ParenthesisMismatchErrorNode((token, pos))) + return None + + if self._stack_isinstance(UnrecognizedTokensNode): + # the parenthesis was a function + # we need to return a SourceCodeWithConceptNode + for i in range(len(self.out) - 1, -1, -1): + if self.out[i] == ')': + start = i + break + else: + self._add_error(ParenthesisMismatchErrorNode((token, pos))) + return None + + source_code = self._make_source_code_with_concept(start, token, pos) + + for item in self.out[start:]: + # update the parameter list + try: + self.parameters_list.remove(item) + except ValueError: + pass + del self.out[start:] + self._put_to_out(source_code) + + # self.pop_stack_to_out() + # # Replace the ')' marker by its real position + # for i in range(len(self.out) - 1, -1, -1): + # if self.out[i] == ')': + # self.out[i] = UnrecognizedTokensNode(pos, pos, [token]) + + else: + self.stack.pop() # discard the lpar + return True + + return False + + def eat_concept(self, sya_concept_def, pos): + """ + a concept is found + :param sya_concept_def: + :param pos: + :return: + """ + + if self.is_locked: + return + self.debug.append(sya_concept_def) + + temp_concept_node = SyaConceptParserHelper(sya_concept_def, pos) + + # First, try to recognize the tokens that are waiting + self.manage_unrecognized() + for forked in self.forked: + # manage the fact that some clone may have been forked + forked.eat_concept(sya_concept_def, pos) + + # then, check if this new concept is linked to the previous ones + # ie, is the previous concept fully matched ? + if temp_concept_node.expected_parameters_before_first_token == 0: + # => does not expect pending parameter (it's suffixed concept) + while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].potential_pos != -1: + # => previous seems to have everything it needs in the parameter list + self.pop_stack_to_out() + + if temp_concept_node.is_atom(): + self._put_to_out(temp_concept_node.fix_concept()) + else: + # call shunting yard algorithm + while self.i_can_pop(temp_concept_node): + self.pop_stack_to_out() + + if temp_concept_node.is_matched(): + # case of a prefix concept which has found happiness with self.parameters_list + # directly put it in out + self.manage_parameters_when_new_concept(temp_concept_node) + self._put_to_out(temp_concept_node.fix_concept()) + else: + self.stack.append(temp_concept_node) + self.manage_parameters_when_new_concept(temp_concept_node) + + def eat_unrecognized(self, token, pos): + """ + The token was not recognized, add to the current UnrecognizedTokensNode + :param token: + :param pos: + :return: + """ + if self.is_locked: + return + + self.debug.append(token) + + self.unrecognized_tokens.add_token(token, pos) + + def finalize(self): + """ + Put the remaining items from the stack to out + :return: + """ + + if self.is_locked: + return + + if len(self.stack) == 0 and len(self.out) == 0: + return # no need to pop the buffer, as no concept is found + + self.manage_unrecognized() + for forked in self.forked: + # manage that some clones may have been forked + forked.finalize() + + while len(self.stack) > 0: + if self._is_lpar(self.stack[-1]) or self._is_rpar(self.stack[-1]): + self._add_error(ParenthesisMismatchErrorNode(self.stack[-1])) + return None + + self.pop_stack_to_out() + + def clone(self): + clone = InFixToPostFix(self.context) + clone.is_locked = self.is_locked + clone.out = self.out[:] + clone.stack = [i.clone() if hasattr(i, "clone") else i for i in self.stack] + clone.unrecognized_tokens = self.unrecognized_tokens.clone() + clone.parameters_list = self.parameters_list[:] + clone.errors = self.errors[:] + clone.debug = self.debug[:] + # clone.forked = self.forked + return clone + + +@dataclass() +class PostFixToItem: + concept: Concept + start: int + end: int + has_unrecognized: bool + + +class SyaNodeParser(BaseParser): + def __init__(self, **kwargs): + BaseParser.__init__(self, "SyaNode", 50) + if 'sheerka' in kwargs: + sheerka = kwargs.get("sheerka") + self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword + self.sya_definitions = {} + if sheerka.sya_definitions: + for k, v in sheerka.sya_definitions.items(): + self.sya_definitions[k] = (v[0], SyaAssociativity(v[1])) + + else: + self.concepts_by_first_keyword = {} + self.sya_definitions = {} + + self.token = None + self.pos = -1 + self.tokens = None + + self.context: ExecutionContext = None + self.text = None + self.sheerka = None + + def reset_parser(self, context, text): + self.context = context + self.sheerka = context.sheerka + self.text = text + + try: + self.tokens = list(self.get_input_as_tokens(text)) + except LexerError as e: + self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) + return False + + self.token = None + self.pos = -1 + return True + + def add_error(self, error, next_token=True): + self.error_sink.append(error) + if next_token: + self.next_token() + return error + + def get_token(self) -> Token: + return self.token + + def next_token(self, skip_whitespace=True): + if self.token and self.token.type == TokenKind.EOF: + return False + + self.pos += 1 + self.token = self.tokens[self.pos] + + if skip_whitespace: + while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE: + self.pos += 1 + self.token = self.tokens[self.pos] + + return self.token.type != TokenKind.EOF + + def initialize(self, context, concepts=None, sya_definitions=None): + self.context = context + self.sheerka = context.sheerka + + if sya_definitions: + self.sya_definitions = sya_definitions + + if concepts: + for concept in concepts: + keywords = concept.key.split() + for keyword in keywords: + if keyword.startswith(VARIABLE_PREFIX): + continue + + self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id) + break + + return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword) + + def get_concepts(self, token): + """ + Tries to find if there are concepts that match the value of the token + :param token: + :return: + """ + + if token.type == TokenKind.STRING: + name = token.value[1:-1] + elif token.type == TokenKind.KEYWORD: + name = token.value.value + else: + name = token.value + + result = [] + if name in self.concepts_by_first_keyword: + for concept_id in self.concepts_by_first_keyword[name]: + + concept = self.sheerka.get_by_id(concept_id) + + if len(concept.metadata.props) == 0: + # only concepts that has parameter (refuse atoms) + # Note that this test is needed if the definition of the concept has changed + continue + + if concept.metadata.definition_type == DEFINITION_TYPE_BNF: + # bnf definitions are not supposed to be managed by this parser + continue + + sya_concept_def = SyaConceptDef(concept) + if concept.id in self.sya_definitions: + sya_def = self.sya_definitions[concept.id] + if sya_def[0] is not None: + sya_concept_def.precedence = sya_def[0] + if sya_def[1] is not None: + sya_concept_def.associativity = sya_def[1] + + result.append(sya_concept_def) + return result + + return None + + def infix_to_postfix(self, context, text): + """ + Implementing Shunting Yard Algorithm + :param context: + :param text: + :return: + """ + + if not self.reset_parser(context, text): + return None + + forked = [] + + def _add_forked_to_res(): + # check that if some new InfixToPostfix are created + for in_to_post in res: + if len(in_to_post.forked) > 0: + forked.extend(in_to_post.forked) + in_to_post.forked.clear() + if len(forked) > 0: + res.extend(forked) + forked.clear() + + res = [InFixToPostFix(context)] + while self.next_token(False): + for infix_to_postfix in res: + infix_to_postfix.reset() + + token = self.get_token() + + try: + if token.type in (TokenKind.LPAR, TokenKind.RPAR): + # little optim, no need to get the concept when parenthesis + for infix_to_postfix in res: + infix_to_postfix.eat_token(token, self.pos) + continue + + for infix_to_postfix in res: + if infix_to_postfix.eat_token(token, self.pos): + infix_to_postfix.lock() + + concepts = self.get_concepts(token) + if not concepts: + for infix_to_postfix in res: + infix_to_postfix.eat_unrecognized(token, self.pos) + continue + + if len(concepts) == 1: + for infix_to_postfix in res: + infix_to_postfix.eat_concept(concepts[0], self.pos) + continue + + # make the cartesian product + temp_res = [] + for infix_to_postfix in res: + for concept in concepts: + clone = infix_to_postfix.clone() + temp_res.append(clone) + clone.eat_concept(concept, self.pos) + res = temp_res + + finally: + _add_forked_to_res() + + # make sure that remaining items in stack are moved to out + for infix_to_postfix in res: + infix_to_postfix.reset() + infix_to_postfix.finalize() + _add_forked_to_res() + + return res + + def postfix_to_item(self, sheerka, postfixed): + item = postfixed.pop() + if isinstance(item, (UnrecognizedTokensNode, SourceCodeNode, ConceptNode)): + return item + + if isinstance(item, SourceCodeWithConceptNode): + items = [] + while len(item.nodes) > 0: + res = self.postfix_to_item(sheerka, item.nodes) + if isinstance(res, PostFixToItem): + items.append(ConceptNode(res.concept, res.start, res.end, self.tokens[res.start: res.end + 1])) + else: + items.append(res) + item.has_unrecognized |= hasattr(res, "has_unrecognized") and res.has_unrecognized or \ + isinstance(res, UnrecognizedTokensNode) + item.nodes = items + item.fix_all_pos() + item.tokens = self.tokens[item.start:item.end + 1] + item.fix_source(True) + return item + + # ParserHelper + start = item.start + end = item.end + has_unrecognized = False + concept = sheerka.new_from_template(item.concept, item.concept.id) + for param_index in reversed(range(len(concept.metadata.props))): + inner_item = self.postfix_to_item(sheerka, postfixed) + if inner_item.start < start: + start = inner_item.start + if inner_item.end > end: + end = inner_item.end + has_unrecognized |= isinstance(inner_item, UnrecognizedTokensNode) + + param_name = concept.metadata.props[param_index][0] + param_value = inner_item.concept if hasattr(inner_item, "concept") else \ + [inner_item.return_value] if isinstance(inner_item, SourceCodeNode) else \ + inner_item + + concept.compiled[param_name] = param_value + + return PostFixToItem(concept, start, end, has_unrecognized) + + def parse(self, context, parser_input): + """ + + :param context: + :param parser_input: + :return: + """ + if parser_input == "": + return context.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.IS_EMPTY) + ) + + ret = [] + valid_infix_to_postfixs = self.get_valid(self.infix_to_postfix(context, parser_input)) + if valid_infix_to_postfixs is None: + # token error + return self.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + + if len(valid_infix_to_postfixs) == 0: + return self.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input)) + + for infix_to_postfix in valid_infix_to_postfixs: + sequence = [] + has_unrecognized = False + while len(infix_to_postfix.out) > 0: + item = self.postfix_to_item(context.sheerka, infix_to_postfix.out) + has_unrecognized |= hasattr(item, "has_unrecognized") and item.has_unrecognized or \ + isinstance(item, UnrecognizedTokensNode) + if isinstance(item, PostFixToItem): + to_insert = ConceptNode(item.concept, item.start, item.end, self.tokens[item.start: item.end + 1]) + else: + to_insert = item + sequence.insert(0, to_insert) + + ret.append( + self.sheerka.ret( + self.name, + not has_unrecognized, + self.sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=parser_input, + body=sequence, + try_parsed=sequence))) + + if len(ret) == 1: + self.log_result(context, parser_input, ret[0]) + return ret[0] + else: + self.log_multiple_results(context, parser_input, ret) + return ret + + @staticmethod + def get_valid(infix_to_postfixs): + """ + Gets the valid infixToPostfix + :param infix_to_postfixs: + :return: + """ + + def _has_sya(items): + for item in items: + if isinstance(item, SourceCodeWithConceptNode): + if _has_sya(item.nodes): + return True + if isinstance(item, SyaConceptParserHelper): + return True + return False + + if infix_to_postfixs is None: + return None + + result = [] + for infix_to_postfix in infix_to_postfixs: + if len(infix_to_postfix.get_errors()) > 0: + continue + + if len(infix_to_postfix.out) == 0: + continue + + if infix_to_postfix in result: + continue + + if not _has_sya(infix_to_postfix.out): + # refuses the result if it does not involve SYA + continue + + result.append(infix_to_postfix) + + return result diff --git a/src/parsers/UnrecognizedNodeParser.py b/src/parsers/UnrecognizedNodeParser.py new file mode 100644 index 0000000..7e589ec --- /dev/null +++ b/src/parsers/UnrecognizedNodeParser.py @@ -0,0 +1,114 @@ +from dataclasses import dataclass + +from core.builtin_concepts import BuiltinConcepts +from core.concept import Concept +from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode +from parsers.BaseParser import BaseParser, ErrorNode +from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes +import core.utils + +PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"] + + +@dataclass() +class CannotParseNode(ErrorNode): + unrecognized: UnrecognizedTokensNode + + +class UnrecognizedNodeParser(BaseParser): + """ + This parser comes after the other NodeParsers (Atom, Bnf or Sya) + It will try to resolve all UnrecognizedTokensNode. + """ + + def __init__(self, **kwargs): + super().__init__("UnrecognizedNode", 45) # lower than AtomNode, BnfNode and SyaNode + + def add_error(self, error): + if hasattr(error, "__iter__"): + self.error_sink.extend(error) + else: + self.error_sink.append(error) + + def parse(self, context, parser_input): + sheerka = context.sheerka + nodes = self.get_input_as_lexer_nodes(parser_input, None) + if not nodes: + return None + + sequences_found = [[]] + has_unrecognized = False + + for node in nodes: + if isinstance(node, ConceptNode): + res = self.validate_concept_node(context, node) + if not res.status: + self.add_error(res.body) + else: + sequences_found = core.utils.product(sequences_found, [res.body]) + + elif isinstance(node, UnrecognizedTokensNode): + res = parse_unrecognized(context, node.source, PARSERS) + res = only_successful(context, res) + if res.status: + lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens) + sequences_found = core.utils.product(sequences_found, lexer_nodes) + else: + sequences_found = core.utils.product(sequences_found, [node]) + has_unrecognized = True + + else: # cannot happen as of today :-) + raise NotImplementedError() + + # concept with UnrecognizedToken in their properties is considered as fatal error + if self.has_error: + return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + + ret = [] + for choice in sequences_found: + ret.append( + sheerka.ret( + self.name, + not has_unrecognized, + sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=parser_input, + body=choice, + try_parsed=choice))) + + if len(ret) == 1: + self.log_result(context, parser_input, ret[0]) + return ret[0] + else: + self.log_multiple_results(context, parser_input, ret) + return ret + + def validate_concept_node(self, context, concept_node): + + sheerka = context.sheerka + errors = [] + + def _validate_concept(concept): + """ + Recursively browse the compiled properties in order to find unrecognized + :param concept: + :return: + """ + for name, value in concept.compiled.items(): + if isinstance(value, Concept): + _validate_concept(value) + + elif isinstance(value, UnrecognizedTokensNode): + res = parse_unrecognized(context, value.tokens, PARSERS) + res = only_successful(context, res) # only key successful parsers + if res.status: + concept.compiled[name] = res.body.body + else: + errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'")) + + _validate_concept(concept_node.concept) + if len(errors) > 0: + return context.sheerka.ret(self.name, False, errors) + else: + return context.sheerka.ret(self.name, True, concept_node) diff --git a/src/sdp/sheerkaDataProvider.py b/src/sdp/sheerkaDataProvider.py index 919bf80..f74d930 100644 --- a/src/sdp/sheerkaDataProvider.py +++ b/src/sdp/sheerkaDataProvider.py @@ -20,6 +20,9 @@ def json_default_converter(o): if isinstance(o, (date, datetime)): return o.isoformat() + if isinstance(o, SheerkaDataProviderRef): + return f"##XREF##:{o.target}" + class Event(object): """ @@ -389,7 +392,7 @@ class SheerkaDataProvider: return getattr(obj, Serializer.ORIGIN) if isinstance(obj, SheerkaDataProviderRef): - return obj.original_target + return obj.original_target return None @@ -406,6 +409,11 @@ class SheerkaDataProvider: def is_reference(obj): return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX) + def reset(self): + self.first_time = self.io.first_time + if hasattr(self.io, "reset"): + self.io.reset() + def add(self, event_digest: str, entry, obj, allow_multiple=True, use_ref=False): """ Adds obj to the entry 'entry' @@ -999,3 +1007,12 @@ class SheerkaDataProvider: keys[entry] = value self.save_keys(keys) return str(value) + + def dump_state(self, digest=None): + digest = digest or self.get_snapshot(SheerkaDataProvider.HeadFile) + state = self.load_state(digest) + print(json.dumps(state.data, sort_keys=True, default=json_default_converter, indent=True)) + + def dump_obj(self, digest): + obj = self.load_obj(digest) + print(json.dumps(obj.__dict__, sort_keys=True, default=json_default_converter, indent=True)) diff --git a/src/sdp/sheerkaDataProviderIO.py b/src/sdp/sheerkaDataProviderIO.py index 21e5f4a..c7c3df9 100644 --- a/src/sdp/sheerkaDataProviderIO.py +++ b/src/sdp/sheerkaDataProviderIO.py @@ -170,6 +170,10 @@ class SheerkaDataProviderDictionaryIO(SheerkaDataProviderIO): return io.BytesIO(self.cache[file_path]) if "b" in mode else io.StringIO(self.cache[file_path]) + def reset(self): + self.cache.clear() + self.first_time = True + def on_close(dictionary_io, file_path, stream): """ diff --git a/tests/BaseTest.py b/tests/BaseTest.py index bc95606..d7306c9 100644 --- a/tests/BaseTest.py +++ b/tests/BaseTest.py @@ -1,7 +1,7 @@ import ast from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts -from core.concept import Concept +from core.concept import Concept, DEFINITION_TYPE_BNF from core.sheerka.ExecutionContext import ExecutionContext from parsers.BnfParser import BnfParser from sdp.sheerkaDataProvider import Event @@ -54,6 +54,9 @@ class BaseTest: res = bnf_parser.parse(context, c.metadata.definition) if res.status: c.bnf = res.value.value + c.metadata.definition_type = DEFINITION_TYPE_BNF + else: + raise Exception(f"Error in bnf definition '{c.metadata.definition}'", sheerka.get_error(res)) sheerka.create_new_concept(context, c) elif create_new: sheerka.create_new_concept(context, c) diff --git a/tests/TestUsingMemoryBasedSheerka.py b/tests/TestUsingMemoryBasedSheerka.py index 01deeaf..fcf4738 100644 --- a/tests/TestUsingMemoryBasedSheerka.py +++ b/tests/TestUsingMemoryBasedSheerka.py @@ -3,9 +3,34 @@ from tests.BaseTest import BaseTest class TestUsingMemoryBasedSheerka(BaseTest): + singleton_instance = None + builtin_concepts = {} - def get_sheerka(self, **kwargs): - skip_builtins_in_db = kwargs.get("skip_builtins_in_db", True) + @staticmethod + def _inner_get_sheerka(skip_builtins_in_db): sheerka = Sheerka(skip_builtins_in_db=skip_builtins_in_db) sheerka.initialize("mem://") return sheerka + + def get_sheerka(self, **kwargs): + skip_builtins_in_db = kwargs.get("skip_builtins_in_db", True) + use_singleton = kwargs.get("singleton", False) + + sheerka = kwargs.get("sheerka", False) + if sheerka: + return sheerka + + if use_singleton: + singleton_instance = TestUsingMemoryBasedSheerka.singleton_instance + if singleton_instance: + singleton_instance.reset() + singleton_instance.cache_by_key.update(TestUsingMemoryBasedSheerka.builtin_concepts) # quicker ? + # singleton_instance.cache_by_key = TestUsingMemoryBasedSheerka.builtin_concepts + return singleton_instance + else: + new_instance = self._inner_get_sheerka(skip_builtins_in_db) + TestUsingMemoryBasedSheerka.builtin_concepts.update(new_instance.cache_by_key) + TestUsingMemoryBasedSheerka.singleton_instance = new_instance + return TestUsingMemoryBasedSheerka.singleton_instance + + return self._inner_get_sheerka(skip_builtins_in_db) diff --git a/tests/core/test_SheerkaCreateNewConcept.py b/tests/core/test_SheerkaCreateNewConcept.py index 0ed940c..bc37167 100644 --- a/tests/core/test_SheerkaCreateNewConcept.py +++ b/tests/core/test_SheerkaCreateNewConcept.py @@ -3,6 +3,7 @@ from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF from core.sheerka.Sheerka import Sheerka from sdp.sheerkaDataProvider import SheerkaDataProvider +from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -32,6 +33,7 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka): assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, concept.id) assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name) assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, concept.key) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+") def test_i_can_add_a_concept_when_name_differs_from_the_key(self): sheerka = self.get_sheerka() @@ -229,3 +231,29 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka): assert res.status +class TestSheerkaCreateNewConceptFileBased(TestUsingFileBasedSheerka): + def test_i_can_add_several_concepts(self): + sheerka, context, hello, greeting = self.init_concepts( + Concept("Hello world a").def_prop("a"), + Concept("Greeting a").def_prop("a"), + use_dict=False + ) + + res = sheerka.create_new_concept(self.get_context(sheerka), hello) + assert res.status + + sheerka = self.get_sheerka(use_dict=False) + res = sheerka.create_new_concept(self.get_context(sheerka), greeting) + assert res.status + + assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, hello.key) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, greeting.key) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, hello.id) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, greeting.id) + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, "Hello world a") + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, "Greeting a") + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Hello") + assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Greeting") + + + diff --git a/tests/core/test_SheerkaHistoryManager.py b/tests/core/test_SheerkaHistoryManager.py index c62ba23..cf9c73c 100644 --- a/tests/core/test_SheerkaHistoryManager.py +++ b/tests/core/test_SheerkaHistoryManager.py @@ -4,7 +4,7 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class TestSheerkaHistoryManager(TestUsingMemoryBasedSheerka): def test_i_can_retrieve_history(self): - sheerka = self.get_sheerka(skip_builtins_in_db=False) + sheerka = self.get_sheerka(skip_builtins_in_db=False, singleton=False) sheerka.evaluate_user_input("def concept one as 1") sheerka.evaluate_user_input("one") diff --git a/tests/core/test_sheerka.py b/tests/core/test_sheerka.py index c6ab4ea..b11b5cb 100644 --- a/tests/core/test_sheerka.py +++ b/tests/core/test_sheerka.py @@ -244,7 +244,7 @@ class TestSheerka(TestUsingFileBasedSheerka): assert not sheerka.is_success(sheerka.new(BuiltinConcepts.TOO_MANY_SUCCESS)) def test_cache_is_updated_after_get(self): - sheerka = self.get_sheerka() + sheerka = self.get_sheerka(skip_builtins_in_db=False) # updated when by_key returns one element sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="1")) diff --git a/tests/core/test_sheerka_call_parsers.py b/tests/core/test_sheerka_call_parsers.py index eff4549..a4cbe2b 100644 --- a/tests/core/test_sheerka_call_parsers.py +++ b/tests/core/test_sheerka_call_parsers.py @@ -171,22 +171,22 @@ class TestSheerkaExecuteParsers(TestUsingMemoryBasedSheerka): 'name=Enabled50True, priority=50, status=True, source=Enabled80False:Enabled90False:hello world', ] - def test_parsing_stop_at_the_first_success(self): - sheerka = self.get_sheerka() - sheerka.parsers = { - "Enabled80False": Enabled80FalseParser, - "Enabled50bisTrue": Enabled50bisTrueParser, - "Enabled10True": Enabled10TrueParser, - } - - user_input = [get_ret_val("hello world")] - BaseTestParser.debug_out = [] - sheerka.execute(self.get_context(sheerka), user_input, [BuiltinConcepts.PARSING]) - - assert BaseTestParser.debug_out == [ - 'name=Enabled80False, priority=80, status=False, source=hello world', - 'name=Enabled50BisTrue, priority=50, status=True, source=hello world', - ] + # def test_parsing_stop_at_the_first_success(self): + # sheerka = self.get_sheerka() + # sheerka.parsers = { + # "Enabled80False": Enabled80FalseParser, + # "Enabled50bisTrue": Enabled50bisTrueParser, + # "Enabled10True": Enabled10TrueParser, + # } + # + # user_input = [get_ret_val("hello world")] + # BaseTestParser.debug_out = [] + # sheerka.execute(self.get_context(sheerka), user_input, [BuiltinConcepts.PARSING]) + # + # assert BaseTestParser.debug_out == [ + # 'name=Enabled80False, priority=80, status=False, source=hello world', + # 'name=Enabled50BisTrue, priority=50, status=True, source=hello world', + # ] def test_parsing_stop_at_the_first_success_2(self): """ @@ -243,10 +243,13 @@ class TestSheerkaExecuteParsers(TestUsingMemoryBasedSheerka): 'name=Enabled50True, priority=50, status=False, source=Enabled80False:hello world', 'name=Enabled50True, priority=50, status=True, source=Enabled80False:Enabled90False:hello world', 'name=Enabled50BisTrue, priority=50, status=True, source=hello world', + 'name=Enabled50BisTrue, priority=50, status=True, source=Enabled90False:hello world', + 'name=Enabled50BisTrue, priority=50, status=True, source=Enabled80False:hello world', + 'name=Enabled50BisTrue, priority=50, status=True, source=Enabled80False:Enabled90False:hello world', 'name=Enabled50False, priority=50, status=False, source=hello world', 'name=Enabled50False, priority=50, status=False, source=Enabled90False:hello world', 'name=Enabled50False, priority=50, status=False, source=Enabled80False:hello world', - 'name=Enabled50False, priority=50, status=False, source=Enabled80False:Enabled90False:hello world', + 'name=Enabled50False, priority=50, status=False, source=Enabled80False:Enabled90False:hello world' ] def test_a_parser_has_access_to_the_output_of_its_predecessors(self): diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index 5b73082..c556146 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -69,7 +69,7 @@ def test_i_can_get_sub_classes(): default_parser = core.utils.get_class("parsers.DefaultParser.DefaultParser") exact_concept_parser = core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser") python_parser = core.utils.get_class("parsers.PythonParser.PythonParser") - concept_lexer_parser = core.utils.get_class("parsers.ConceptLexerParser.ConceptLexerParser") + concept_lexer_parser = core.utils.get_class("parsers.BnfNodeParser.BnfNodeParser") assert base_parser not in sub_classes assert default_parser in sub_classes diff --git a/tests/evaluators/test_AddConceptEvaluator.py b/tests/evaluators/test_AddConceptEvaluator.py index 6d9110b..a74d7f3 100644 --- a/tests/evaluators/test_AddConceptEvaluator.py +++ b/tests/evaluators/test_AddConceptEvaluator.py @@ -2,11 +2,11 @@ import ast import pytest from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts -from core.concept import VARIABLE_PREFIX, Concept +from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF from core.tokenizer import Tokenizer from evaluators.AddConceptEvaluator import AddConceptEvaluator from parsers.BaseParser import BaseParser -from parsers.ConceptLexerParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression +from parsers.BnfNodeParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression from parsers.BnfParser import BnfParser from parsers.DefaultParser import DefConceptNode, NameNode from parsers.PythonParser import PythonNode, PythonParser @@ -65,10 +65,10 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka): def_concept.post = self.get_concept_part(post) if bnf_def: def_concept.definition = bnf_def - def_concept.definition_type = "bnf" + def_concept.definition_type = DEFINITION_TYPE_BNF if definition: def_concept.definition = NameNode(list(Tokenizer(definition))) - def_concept.definition_type = "def" + def_concept.definition_type = DEFINITION_TYPE_DEF return ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=def_concept)) diff --git a/tests/evaluators/test_AddConceptInSetEvaluator.py b/tests/evaluators/test_AddConceptInSetEvaluator.py index a193a6f..56cb2d2 100644 --- a/tests/evaluators/test_AddConceptInSetEvaluator.py +++ b/tests/evaluators/test_AddConceptInSetEvaluator.py @@ -69,12 +69,14 @@ class TestAddConceptInSetEvaluator(TestUsingMemoryBasedSheerka): def test_i_can_add_bnf_concept_to_a_set_of_concept(self): """ This test is the reason why I have started the whole eval on demand stuff - Sheerka tries to evaluate the body but it can (as a and b are not defined) + Sheerka tries to evaluate the body but it can't (as a and b are not defined) So 'foo' cannot be put is set :return: """ - sheerka, context, foo, bar = self.init_concepts( - Concept("foo", definition="a plus b", body="a + b").def_prop("a").def_prop("b"), + sheerka, context, one, two, foo, bar = self.init_concepts( + "one", + "two", + Concept("foo", definition="(one|two)=a 'plus' (one|two)=b", body="a + b").def_prop("a").def_prop("b"), "bar", create_new=True) diff --git a/tests/evaluators/test_LexerNodeEvaluator.py b/tests/evaluators/test_LexerNodeEvaluator.py index 39b1dc5..8306364 100644 --- a/tests/evaluators/test_LexerNodeEvaluator.py +++ b/tests/evaluators/test_LexerNodeEvaluator.py @@ -4,7 +4,8 @@ import pytest from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve from evaluators.LexerNodeEvaluator import LexerNodeEvaluator -from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, StrMatch, UnrecognizedTokensNode, SourceCodeNode +from parsers.BaseNodeParser import SourceCodeNode +from parsers.BnfNodeParser import ConceptNode, BnfNodeParser, StrMatch, UnrecognizedTokensNode from parsers.PythonParser import PythonNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -12,7 +13,7 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka class TestLexerNodeEvaluator(TestUsingMemoryBasedSheerka): def from_parsing(self, context, grammar, expression): - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(context, grammar) ret_val = parser.parse(context, expression) diff --git a/tests/evaluators/test_MultipleErrorsEvaluator.py b/tests/evaluators/test_MultipleErrorsEvaluator.py new file mode 100644 index 0000000..ca3039d --- /dev/null +++ b/tests/evaluators/test_MultipleErrorsEvaluator.py @@ -0,0 +1,98 @@ +import pytest + +from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts +from core.concept import Concept +from evaluators.BaseEvaluator import BaseEvaluator +from evaluators.MultipleErrorsEvaluator import MultipleErrorsEvaluator +from parsers.BaseParser import BaseParser + +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka + + +def r(value, status=True): + return ReturnValueConcept(value, status, value) + + +def eval_false(name): + return ReturnValueConcept(BaseEvaluator.PREFIX + name, False, "value") + + +def eval_true(name): + return ReturnValueConcept(BaseEvaluator.PREFIX + name, True, "value") + + +def parser_false(name): + return ReturnValueConcept(BaseParser.PREFIX + name, False, "value") + + +def parser_true(name): + return ReturnValueConcept(BaseParser.PREFIX + name, True, "value") + + +reduce_requested = ReturnValueConcept( + "some_name", + True, + Concept(name=BuiltinConcepts.REDUCE_REQUESTED, key=BuiltinConcepts.REDUCE_REQUESTED)) + + +class TestMultipleErrorsEvaluator(TestUsingMemoryBasedSheerka): + @pytest.mark.parametrize("return_values, expected", [ + ([eval_false("one"), reduce_requested], False), + ([eval_false("one"), eval_false("two"), reduce_requested], True), + ([eval_false("one"), eval_false("two"), eval_false("three"), reduce_requested], True), + ([eval_false("one"), eval_false("two"), parser_false("one"), reduce_requested], True), + ([eval_false("one"), eval_false("two"), parser_true("one"), reduce_requested], False), + ([eval_false("one"), eval_false("two"), eval_true("three"), reduce_requested], False), + ([eval_false("one"), eval_false("two"), r("other concept", False), reduce_requested], True), + ([eval_false("one"), eval_false("two"), r("other concept", True), reduce_requested], True), + ([eval_false("reduce not required 1"), eval_false("reduce not required 2")], False), + ]) + def test_i_can_match(self, return_values, expected): + context = self.get_context() + assert MultipleErrorsEvaluator().matches(context, return_values) == expected + + def test_i_can_eval(self): + context = self.get_context() + + return_values = [ + eval_false("one"), + eval_false("two"), + eval_false("three"), + parser_false("one"), + parser_false("two"), + reduce_requested + ] + + evaluator = MultipleErrorsEvaluator() + evaluator.matches(context, return_values) + res = evaluator.eval(context, return_values) + + assert not res.status + assert context.sheerka.isinstance(res.body, BuiltinConcepts.MULTIPLE_ERRORS) + assert res.body.body == [eval_false("one"), eval_false("two"), eval_false("three")] + assert len(res.parents) == 6 + + def test_unwanted_return_values_are_not_eaten(self): + context = self.get_context() + + a_successful_concept = r("successful concept") + a_concept_in_error = r("concept in error", False) + return_values = [ + eval_false("one"), + eval_false("two"), + parser_false("one"), + a_successful_concept, + a_concept_in_error, + reduce_requested + ] + + evaluator = MultipleErrorsEvaluator() + evaluator.matches(context, return_values) + res = evaluator.eval(context, return_values) + + assert not res.status + assert res.body.body == [eval_false("one"), eval_false("two")] + assert len(res.parents) == 4 + + assert a_successful_concept not in res.parents + assert a_concept_in_error not in res.parents diff --git a/tests/evaluators/test_OneErrorEvaluator.py b/tests/evaluators/test_OneErrorEvaluator.py index 8d1728d..8860605 100644 --- a/tests/evaluators/test_OneErrorEvaluator.py +++ b/tests/evaluators/test_OneErrorEvaluator.py @@ -71,3 +71,4 @@ class TestOneErrorEvaluator(TestUsingMemoryBasedSheerka): assert len(res.parents) == 4 assert a_successful_concept not in res.parents + assert a_concept_in_error not in res.parents diff --git a/tests/non_reg/test_sheerka_non_reg.py b/tests/non_reg/test_sheerka_non_reg.py index b948eac..59f8f17 100644 --- a/tests/non_reg/test_sheerka_non_reg.py +++ b/tests/non_reg/test_sheerka_non_reg.py @@ -1,9 +1,9 @@ import pytest - from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, PROPERTIES_TO_SERIALIZE, Property, simplec from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator -from parsers.ConceptLexerParser import Sequence, StrMatch, OrderedChoice, Optional, ConceptExpression +from parsers.BaseNodeParser import SyaAssociativity +from parsers.BnfNodeParser import Sequence, StrMatch, OrderedChoice, Optional, ConceptExpression from sdp.sheerkaDataProvider import SheerkaDataProvider from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka @@ -125,6 +125,17 @@ as: assert sheerka.sdp.io.exists( sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_origin())) + def test_i_can_def_several_concepts(self): + sheerka = self.get_sheerka(use_dict=False) + sheerka.evaluate_user_input("def concept foo") + + sheerka = self.get_sheerka(use_dict=False) + res = sheerka.evaluate_user_input("def concept bar") + + assert len(res) == 1 + assert res[0].status + assert res[0].body.body.id == "1002" + def test_i_can_evaluate_def_concept_part_when_one_part_is_a_ref_of_another_concept(self): """ In this test, we test that the properties of 'concept a xx b' (which are 'a' and 'b') @@ -393,6 +404,7 @@ as: assert concept_found.get_prop("a") is None assert not concept_found.metadata.need_validation + # @pytest.mark.xfail @pytest.mark.parametrize("desc, definitions", [ ("Simple form", [ "def concept one as 1", @@ -467,6 +479,7 @@ as: assert res[0].status assert res[0].body == 23 + # @pytest.mark.xfail def test_i_can_mix_bnf_and_isa(self): """ if 'one' isa 'number, twenty number should be recognized @@ -531,7 +544,44 @@ as: assert res[0].status assert res[0].body == 21 - def test_i_can_mix_concept_of_concept(self): + # @pytest.mark.xfail + def test_i_can_use_concepts_defined_with_from(self): + sheerka = self.get_sheerka() + + init = [ + "def concept plus from a plus b as a + b", + "def concept one as 1", + ] + + for exp in init: + sheerka.evaluate_user_input(exp) + + res = sheerka.evaluate_user_input("eval one plus one") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 2 + + res = sheerka.evaluate_user_input("eval 1 plus one") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 2 + + res = sheerka.evaluate_user_input("eval one plus 1") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 2 + + res = sheerka.evaluate_user_input("eval 1 plus 2") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 3 + + res = sheerka.evaluate_user_input("eval 1 plus 1") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 2 + + def test_i_can_mix_bnf_concept_and_concept(self): definitions = [ "def concept one as 1", "def concept two as 2", @@ -631,24 +681,6 @@ as: assert res[1].status assert res[1].body == "little blue(house)" - @pytest.mark.xfail - def test_i_can_recognize_composition_of_concept_with_priority(self): - sheerka = self.get_sheerka() - - definitions = [ - "def concept a plus b where a,b", - "def concept a times b where a,b", - "modify concept 1001 set priority = 1", - "modify concept 1002 set priority = 2", - ] - - for definition in definitions: - sheerka.evaluate_user_input(definition) - - res = sheerka.evaluate_user_input("1 plus 2 times 3") - assert res[0].status - # check that the priority is applied - def test_i_can_say_that_a_concept_isa_another_concept(self): sheerka = self.get_sheerka() sheerka.evaluate_user_input("def concept foo") @@ -768,6 +800,7 @@ as: assert not res[0].status assert sheerka.isinstance(res[0].body, BuiltinConcepts.WHERE_CLAUSE_FAILED) + # def test_i_can_detect_when_only_one_evaluator_is_in_error(self): # sheerka = self.get_sheerka() # @@ -864,3 +897,60 @@ as: twenties = sheerka.get("twenties") number = sheerka.get("number") assert sheerka.isa(twenties, number) + + def test_i_can_mix_sya_concepts_and_bnf_concept(self): + definitions = [ + "def concept one as 1", + "def concept two as 2", + "def concept three as 3", + "def concept plus from a plus b as a + b", + "def concept mult from a mult b as a * b", + "def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit", + ] + + sheerka = self.init_scenario(definitions) + context = self.get_context(sheerka) + sheerka.set_sya_def(context, [ + (sheerka.get("mult").id, 20, SyaAssociativity.Right), + (sheerka.get("plus").id, 10, SyaAssociativity.Right), + ]) + + res = sheerka.evaluate_user_input("eval one plus two mult three") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 7 + + res = sheerka.evaluate_user_input("eval two mult three plus one") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 7 + + res = sheerka.evaluate_user_input("eval 1 plus two mult 3") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 7 + + res = sheerka.evaluate_user_input("eval 2 mult 3 plus one") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 7 + + res = sheerka.evaluate_user_input("eval twenty two plus 1") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 23 + + res = sheerka.evaluate_user_input("eval 1 plus twenty two") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 23 + + res = sheerka.evaluate_user_input("eval twenty one plus twenty two") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 43 + + res = sheerka.evaluate_user_input("eval twenty two plus twenty one mult two") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 64 diff --git a/tests/parsers/parsers_utils.py b/tests/parsers/parsers_utils.py new file mode 100644 index 0000000..3772c75 --- /dev/null +++ b/tests/parsers/parsers_utils.py @@ -0,0 +1,150 @@ +from core.concept import CC, Concept +from core.tokenizer import Tokenizer, TokenKind, Token +from parsers.BaseNodeParser import scnode, utnode, cnode, SCWC, CNC, short_cnode, SourceCodeWithConceptNode, CN, UTN +from parsers.SyaNodeParser import SyaConceptParserHelper + + +def _index(tokens, expr, index): + """ + Finds a sub list in a bigger list + :param tokens: + :param expr: + :param index: + :return: + """ + expected = [token.value for token in Tokenizer(expr) if token.type != TokenKind.EOF] + for i in range(0, len(tokens) - len(expected) + 1): + for j in range(len(expected)): + if tokens[i + j] != expected[j]: + break + else: + if index == 0: + return i, len(expected) + else: + index -= 1 + + raise ValueError(f"substring '{expr}' not found") + + +def compute_debug_array(res): + to_compare = [] + for r in res: + res_debug = [] + for token in r.debug: + if isinstance(token, Token): + if token.type == TokenKind.WHITESPACE: + continue + else: + res_debug.append(token.value) + else: + res_debug.append(token.concept.name) + to_compare.append(res_debug) + + return to_compare + + +def get_node(concepts_map, expression_as_tokens, sub_expr, concept_key=None, skip=0, is_bnf=False, sya=False): + """ + Tries to find sub in expression + When found, transform it to its correct type + :param expression_as_tokens: full expression + :param sub_expr: sub expression to search in the full expression + :param concepts_map: hash of the known concepts + :param concept_key: key of the concept if different from sub_expr + :param skip: number of occurrences of sub_expr to skip + :param is_bnf: True if the concept to search is a bnf definition + :param sya: Return SyaConceptParserHelper instead of a ConceptNode when needed + :return: + """ + if sub_expr == "')'": + return ")" + + if isinstance(sub_expr, (scnode, utnode)): + return sub_expr + + if isinstance(sub_expr, cnode): + # for cnode, map the concept key to the one from concepts_maps if needed + if sub_expr.concept_key.startswith("#"): + return cnode( + concepts_map[sub_expr.concept_key[1:]].key, + sub_expr.start, + sub_expr.end, + sub_expr.source + ) + else: + return sub_expr + + if isinstance(sub_expr, SCWC): + first = get_node(concepts_map, expression_as_tokens, sub_expr.first, sya=sya) + last = get_node(concepts_map, expression_as_tokens, sub_expr.last, sya=sya) + content = [get_node(concepts_map, expression_as_tokens, c, sya=sya) for c in sub_expr.content] + return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source() + + if isinstance(sub_expr, (CNC, CC, CN)): + concept_node = get_node( + concepts_map, + expression_as_tokens, + sub_expr.source or sub_expr.concept_key, + sub_expr.concept_key, sya=sya) + concept_found = concept_node.concept + sub_expr.concept_key = concept_found.key + sub_expr.concept = concept_found + sub_expr.fix_pos((concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start)) + if hasattr(sub_expr, "compiled"): + for k, v in sub_expr.compiled.items(): + node = get_node(concepts_map, expression_as_tokens, v, sya=sya) # need to get start and end positions + new_value = CC(Concept().update_from(concepts_map[v])) if (isinstance(v, str) and v in concepts_map) \ + else node + sub_expr.compiled[k] = new_value + sub_expr.fix_pos(node) + + if hasattr(sub_expr, "fix_source"): + sub_expr.fix_source(expression_as_tokens[sub_expr.start: sub_expr.end + 1]) + return sub_expr + + if isinstance(sub_expr, UTN): + node = get_node(concepts_map, expression_as_tokens, sub_expr.source) + sub_expr.fix_pos(node) + return sub_expr + + if isinstance(sub_expr, short_cnode): + return get_node(concepts_map, expression_as_tokens, sub_expr.source, + concept_key=sub_expr.concept_key, skip=skip, is_bnf=True, sya=sya) + + if isinstance(sub_expr, tuple): + return get_node(concepts_map, expression_as_tokens, sub_expr[0], + concept_key=concept_key, skip=sub_expr[1], is_bnf=is_bnf, sya=sya) + + start, length = _index(expression_as_tokens, sub_expr, skip) + + # special case of python source code + if "+" in sub_expr and sub_expr.strip() != "+": + return scnode(start, start + length - 1, sub_expr) + + # try to match one of the concept from the map + concept_key = concept_key or sub_expr + concept_found = concepts_map.get(concept_key, None) + if concept_found: + concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests + if not sya or len(concept_found.metadata.props) == 0 or is_bnf: + # if it's an atom, then return a ConceptNode + return CN(concept_found, start, start + length - 1, source=sub_expr) + else: + # else return a ParserHelper + return SyaConceptParserHelper(concept_found, start) + else: + # else an UnrecognizedTokensNode + return utnode(start, start + length - 1, sub_expr) + + +def compute_expected_array(concepts_map, expression, expected, sya=False): + """ + Computes a simple but sufficient version of the result of infix_to_postfix() + :param concepts_map: + :param expression: + :param expected: + :param sya: if true, generate an SyaConceptParserHelper instead of a cnode + :return: + """ + expression_as_tokens = [token.value for token in Tokenizer(expression) if token.type != TokenKind.EOF] + return [get_node(concepts_map, expression_as_tokens, sub_expr, sya=sya) for sub_expr in expected] diff --git a/tests/parsers/test_AtomsParser.py b/tests/parsers/test_AtomsParser.py new file mode 100644 index 0000000..a18a980 --- /dev/null +++ b/tests/parsers/test_AtomsParser.py @@ -0,0 +1,241 @@ +import pytest +from core.builtin_concepts import BuiltinConcepts +from core.concept import Concept +from parsers.AtomNodeParser import AtomNodeParser +from parsers.BaseNodeParser import cnode, utnode, CNC + +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +from tests.parsers.parsers_utils import compute_expected_array + + +class TestAtomsParser(TestUsingMemoryBasedSheerka): + def init_parser(self, concepts_map, **kwargs): + sheerka, context, *updated_concepts = self.init_concepts(singleton=True, *concepts_map.values(), **kwargs) + + parser = AtomNodeParser() + parser.initialize(context, updated_concepts) + + return sheerka, context, parser + + def test_i_cannot_parse_empty_string(self): + sheerka, context, parser = self.init_parser({}) + + res = parser.parse(context, "") + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) + + @pytest.mark.parametrize("text, expected", [ + ("foo", ["foo"]), + ("foo bar", ["foo", "bar"]), + ("foo bar twenties", ["foo", "bar", "twenties"]), + ]) + def test_i_can_parse_simple_sequences(self, text, expected): + concepts_map = { + "foo": Concept("foo"), + "bar": Concept("bar"), + "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"), + } + + sheerka, context, parser = self.init_parser(concepts_map) + res = parser.parse(context, text) + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status + + expected_array = compute_expected_array(concepts_map, text, expected) + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected_array + + @pytest.mark.parametrize("text, expected", [ + ("foo bar", ["foo bar"]), + ("one two three", ["one two three"]), + ("foo bar twenties one two three", ["foo bar", "twenties", "one two three"]), + ]) + def test_i_can_parse_long_names(self, text, expected): + concepts_map = { + "foo bar": Concept("foo bar"), + "one two three": Concept("one two three"), + "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"), + } + + sheerka, context, parser = self.init_parser(concepts_map) + res = parser.parse(context, text) + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status + + expected_array = compute_expected_array(concepts_map, text, expected) + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected_array + + @pytest.mark.parametrize("text, expected_status, expected", [ + ("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]), + ("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]), + ("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]), + ("foo bar 1 + 1", True, ["foo bar", " 1 + 1"]), + ("foo bar twenty one", False, ["foo bar", " twenty ", "one"]), + ("foo bar x$!#", False, ["foo bar", " x$!#"]), + + ("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]), + ("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]), + ("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]), + ("1 + 1 foo bar", True, ["1 + 1 ", "foo bar"]), + ("twenty one foo bar", False, ["twenty ", "one", "foo bar"]), + ("x$!# foo bar", False, ["x$!# ", "foo bar"]), + ("func(one)", False, ["func(", "one", ")"]), + ]) + def test_i_can_parse_when_unrecognized(self, text, expected_status, expected): + concepts_map = { + "prefixed": Concept("a prefixed").def_prop("a"), + "suffixed": Concept("prefixed a").def_prop("a"), + "infix": Concept("a infix b").def_prop("a").def_prop("b"), + "foo bar": Concept("foo bar"), + "one": Concept("one"), + "two": Concept("two"), + "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"), + } + + sheerka, context, parser = self.init_parser(concepts_map) + res = parser.parse(context, text) + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status == expected_status + + expected_array = compute_expected_array(concepts_map, text, expected) + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected_array + + @pytest.mark.parametrize("text, expected_status, expected", [ + (" one two ", True, [cnode("one", 1, 1, "one"), cnode("two", 3, 3, "two")]), + (" one x$!# ", False, [cnode("one", 1, 1, "one"), utnode(2, 7, " x$!# ")]), + (" foo bar x$!# ", False, [cnode("foo bar", 1, 3, "foo bar"), utnode(4, 9, " x$!# ")]), + ]) + def test_i_can_parse_when_surrounded_by_spaces(self, text, expected_status, expected): + concepts_map = { + "prefixed": Concept("a prefixed").def_prop("a"), + "suffixed": Concept("prefixed a").def_prop("a"), + "infix": Concept("a infix b").def_prop("a").def_prop("b"), + "foo bar": Concept("foo bar"), + "one": Concept("one"), + "two": Concept("two"), + "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"), + } + + sheerka, context, parser = self.init_parser(concepts_map) + res = parser.parse(context, text) + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status == expected_status + + expected_array = compute_expected_array(concepts_map, text, expected) + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected_array + + @pytest.mark.parametrize("text, expected", [ + ("one two", [["one", "two"], ["one two"]]) + ]) + def test_i_can_parse_when_multiple_concepts_start_with_the_same_token(self, text, expected): + concepts_map = { + "one": Concept("one"), + "two": Concept("two"), + "one two": Concept("one two"), + } + + sheerka, context, parser = self.init_parser(concepts_map) + list_of_res = parser.parse(context, text) + assert len(list_of_res) == len(expected) + + for i, res in enumerate(list_of_res): + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status + expected_array = compute_expected_array(concepts_map, text, expected[i]) + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected_array + + def test_i_can_parse_multiple_concepts_when_long_names_and_unrecognized(self): + concepts_map = { + "one": Concept("one"), + "two": Concept("two"), + "one two": Concept("one two"), + "one two x$!# one two": Concept("one two x$!# one two"), + } + + text = "one two x$!# one two" + sheerka, context, parser = self.init_parser(concepts_map) + list_of_res = parser.parse(context, text) + + expected = [ + (False, ["one", "two", " x$!# ", ("one", 1), ("two", 1)]), + (False, ["one", "two", " x$!# ", ("one two", 1)]), + (False, ["one two", " x$!# ", ("one", 1), ("two", 1)]), + (False, ["one two", " x$!# ", ("one two", 1)]), + (True, ["one two x$!# one two"]), + ] + + assert len(list_of_res) == len(expected) + + for res, expected in zip(list_of_res, expected): + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status == expected[0] + expected_array = compute_expected_array(concepts_map, text, expected[1]) + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected_array + + @pytest.mark.parametrize("text", [ + "foo", + f"foo one", + "x$!#", + "twenty one", + "1 + 1", + "foo x$!#", + "1 + 1 twenty one", + ]) + def test_i_cannot_parse_concepts_with_property_or_bnf_or_unrecognized(self, text): + concepts_map = { + "foo": Concept("foo a").def_prop("a"), + "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"), + } + + sheerka, context, parser = self.init_parser(concepts_map) + res = parser.parse(context, text) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + assert res.body.body == text + + @pytest.mark.parametrize("text, expected", [ + ("hello foo bar", + [ + (True, [CNC("hello1", source="hello foo ", a=" foo "), "bar"]), + (True, [CNC("hello2", source="hello foo ", b=" foo "), "bar"]), + ]), + ]) + def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected): + concepts_map = { + "hello1": Concept("hello a").def_prop("a"), + "hello2": Concept("hello b").def_prop("b"), + "bar": Concept("bar") + } + + sheerka, context, parser = self.init_parser(concepts_map, create_new=True) + list_of_res = parser.parse(context, text) + + assert len(list_of_res) == len(expected) + + for res, expected in zip(list_of_res, expected): + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status == expected[0] + expected_array = compute_expected_array(concepts_map, text, expected[1]) + assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected_array diff --git a/tests/parsers/test_ConceptLexerParser.py b/tests/parsers/test_BnfConceptLexerParser.py similarity index 98% rename from tests/parsers/test_ConceptLexerParser.py rename to tests/parsers/test_BnfConceptLexerParser.py index 19afc2e..ef3e810 100644 --- a/tests/parsers/test_ConceptLexerParser.py +++ b/tests/parsers/test_BnfConceptLexerParser.py @@ -4,10 +4,11 @@ import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve from core.tokenizer import Tokenizer, TokenKind, Token +from parsers.BaseNodeParser import cnode, short_cnode from parsers.BnfParser import BnfParser -from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ +from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ ParsingExpressionVisitor, TerminalNode, NonTerminalNode, ZeroOrMore, OneOrMore, \ - UnrecognizedTokensNode, cnode, short_cnode, ConceptExpression, ConceptGroupExpression + UnrecognizedTokensNode, ConceptExpression, ConceptGroupExpression from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -72,15 +73,16 @@ def cprop(concept, prop_name): return concept.compiled[prop_name] -class TestConceptLexerParser(TestUsingMemoryBasedSheerka): +class TestBnfConceptLexerParser(TestUsingMemoryBasedSheerka): def init(self, concepts, grammar): - context = self.get_context() + sheerka = self.get_sheerka(singleton=True) + context = self.get_context(sheerka) for c in concepts: context.sheerka.add_in_cache(c) context.sheerka.set_id_if_needed(c, False) - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(context, grammar) return context, parser @@ -602,7 +604,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): grammar = {foo: Optional("one", ConceptExpression("foo"))} context = self.get_context() - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(context, grammar) assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression("foo", rule_name="foo")) @@ -612,7 +614,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): context = self.get_context() context.concepts["foo"] = foo - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(context, grammar) assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo")) @@ -636,7 +638,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): grammar = {foo: Sequence("twenty", number)} - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(context, grammar) res = parser.parse(context, "twenty two") @@ -686,7 +688,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): grammar = {foo: ZeroOrMore("one")} context, parser = self.init([foo], grammar) - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(context, grammar) res = parser.parse(context, "one two") @@ -779,7 +781,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): bar: foo, foo: bar } - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(self.get_context(), grammar) assert bar not in parser.concepts_grammars @@ -793,7 +795,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): foo: OrderedChoice(bar, "foo") } - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(self.get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion @@ -824,7 +826,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): bar: foo, foo: Sequence("one", bar, "two") } - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(self.get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion @@ -838,7 +840,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): bar: foo, foo: Sequence("one", OrderedChoice(bar, "other"), "two") } - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(self.get_context(), grammar) assert foo not in parser.concepts_grammars # removed because of the infinite recursion @@ -851,7 +853,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): grammar = { foo: bar } - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(self.get_context(), grammar) assert foo in parser.concepts_grammars @@ -883,7 +885,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): bar = Concept(name="bar") grammar = {foo: Sequence("one", "two"), bar: foo} - parser = ConceptLexerParser() + parser = BnfNodeParser() ret = parser.initialize(context, grammar) return_value = ret.body @@ -1209,7 +1211,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): context.sheerka.set_id_if_needed(c, False) context.sheerka.add_concept_to_set(context, baz, bar) - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(context, grammar) encoded = parser.encode_grammar(parser.concepts_grammars) @@ -1260,7 +1262,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka): # atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')), # } # - # parser = ConceptLexerParser() + # parser = BnfNodeParser() # parser.register(grammar) # # # res = parser.parse(context, "1") diff --git a/tests/parsers/test_BnfParser.py b/tests/parsers/test_BnfParser.py index 43298e4..b0ffc3b 100644 --- a/tests/parsers/test_BnfParser.py +++ b/tests/parsers/test_BnfParser.py @@ -3,10 +3,11 @@ import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from core.tokenizer import Tokenizer, TokenKind, LexerError, Token +from parsers.BaseNodeParser import cnode from parsers.BaseParser import UnexpectedTokenErrorNode from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError -from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ - ConceptLexerParser, ConceptExpression, cnode +from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ + BnfNodeParser, ConceptExpression from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -155,7 +156,7 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value concepts = {bar: bar_definition, foo: foo_definition} - concept_parser = ConceptLexerParser() + concept_parser = BnfNodeParser() concept_parser.initialize(context, concepts) res = concept_parser.parse(context, "twenty two") diff --git a/tests/parsers/test_ConceptsWithConceptsParser.py b/tests/parsers/test_ConceptsWithConceptsParser.py index 17fd1ea..97cc2d3 100644 --- a/tests/parsers/test_ConceptsWithConceptsParser.py +++ b/tests/parsers/test_ConceptsWithConceptsParser.py @@ -5,7 +5,8 @@ import pytest from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts from core.concept import Concept from core.tokenizer import Token, TokenKind, Tokenizer -from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode +from parsers.BaseNodeParser import SourceCodeNode +from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser from parsers.MultipleConceptsParser import MultipleConceptsParser from parsers.PythonParser import PythonNode @@ -65,7 +66,7 @@ class TestConceptsWithConceptsParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("text, interested", [ ("not parser result", False), (ParserResultConcept(parser="not multiple_concepts_parser"), False), - (ParserResultConcept(parser=multiple_concepts_parser, value=[]), True), + (ParserResultConcept(parser=multiple_concepts_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True), ]) def test_not_interested(self, text, interested): context = self.get_context() diff --git a/tests/parsers/test_DefaultParser.py b/tests/parsers/test_DefaultParser.py index dc3e02b..df0b136 100644 --- a/tests/parsers/test_DefaultParser.py +++ b/tests/parsers/test_DefaultParser.py @@ -3,7 +3,7 @@ import ast from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF -from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptExpression +from parsers.BnfNodeParser import OrderedChoice, StrMatch, ConceptExpression from parsers.PythonParser import PythonParser, PythonNode from core.tokenizer import Keywords, Tokenizer, LexerError from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode, IsaConceptNode @@ -251,7 +251,7 @@ def concept add one to a as res = parser.parse(context, text) node = res.value.value definition = OrderedChoice(ConceptExpression(a_concept, rule_name="a_concept"), StrMatch("a_string")) - parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", definition, definition) + parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", None, definition, definition) expected = get_def_concept(name="name", body="__definition[0]", bnf_def=parser_result) assert res.status diff --git a/tests/parsers/test_MultipleConceptsParser.py b/tests/parsers/test_MultipleConceptsParser.py index 87325dc..39a57b3 100644 --- a/tests/parsers/test_MultipleConceptsParser.py +++ b/tests/parsers/test_MultipleConceptsParser.py @@ -3,7 +3,8 @@ import pytest from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from core.concept import Concept from core.tokenizer import Tokenizer, TokenKind, Token -from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, cnode, utnode, scnode, SourceCodeNode +from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode +from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence from parsers.MultipleConceptsParser import MultipleConceptsParser from parsers.PythonParser import PythonNode @@ -11,7 +12,7 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka def get_return_value(context, grammar, expression): - parser = ConceptLexerParser() + parser = BnfNodeParser() parser.initialize(context, grammar) ret_val = parser.parse(context, expression) diff --git a/tests/parsers/test_PythonParser.py b/tests/parsers/test_PythonParser.py index 945c820..70c4850 100644 --- a/tests/parsers/test_PythonParser.py +++ b/tests/parsers/test_PythonParser.py @@ -1,6 +1,6 @@ import ast import pytest -from core.builtin_concepts import ParserResultConcept +from core.builtin_concepts import ParserResultConcept, NotForMeConcept from core.tokenizer import Tokenizer, LexerError from parsers.PythonParser import PythonNode, PythonParser, PythonErrorNode import core.utils @@ -48,9 +48,11 @@ class TestPythonParser(TestUsingMemoryBasedSheerka): assert not res.status assert res.who == parser.name - assert isinstance(res.value, ParserResultConcept) - assert isinstance(res.value.value[0], PythonErrorNode) - assert isinstance(res.value.value[0].exception, SyntaxError) + assert isinstance(res.value, NotForMeConcept) + assert res.value.body == text + assert len(res.value.get_prop("reason")) == 1 + assert isinstance(res.value.get_prop("reason")[0], PythonErrorNode) + assert isinstance(res.value.get_prop("reason")[0].exception, SyntaxError) @pytest.mark.parametrize("text, error_msg, error_text", [ ("c::", "Concept identifiers not found", ""), @@ -61,10 +63,13 @@ class TestPythonParser(TestUsingMemoryBasedSheerka): res = parser.parse(self.get_context(), text) assert not res.status - assert isinstance(res.body, ParserResultConcept) - assert isinstance(res.body.body[0], LexerError) - assert res.body.body[0].message == error_msg - assert res.body.body[0].text == error_text + assert isinstance(res.value, NotForMeConcept) + assert res.value.body == text + + assert len(res.value.get_prop("reason")) == 1 + assert isinstance(res.value.get_prop("reason")[0], LexerError) + assert res.value.get_prop("reason")[0].message == error_msg + assert res.value.get_prop("reason")[0].text == error_text def test_i_can_parse_a_concept(self): text = "c:name|key: + 1" diff --git a/tests/parsers/test_PythonWithConceptsParser.py b/tests/parsers/test_PythonWithConceptsParser.py index 2ff54c5..6da6cf3 100644 --- a/tests/parsers/test_PythonWithConceptsParser.py +++ b/tests/parsers/test_PythonWithConceptsParser.py @@ -1,18 +1,17 @@ import ast import pytest - from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept from core.concept import Concept from core.tokenizer import Token, TokenKind, Tokenizer -from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode -from parsers.MultipleConceptsParser import MultipleConceptsParser -from parsers.PythonParser import PythonNode, PythonErrorNode +from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode +from parsers.PythonParser import PythonNode from parsers.PythonWithConceptsParser import PythonWithConceptsParser +from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -multiple_concepts_parser = MultipleConceptsParser() +unrecognized_nodes_parser = UnrecognizedNodeParser() def ret_val(*args): @@ -28,7 +27,7 @@ def ret_val(*args): result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens)) index += len(tokens) - return ReturnValueConcept("who", False, ParserResultConcept(parser=multiple_concepts_parser, value=result)) + return ReturnValueConcept("who", False, ParserResultConcept(parser=unrecognized_nodes_parser, value=result)) def to_str_ast(expression): @@ -40,7 +39,7 @@ class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("text, interested", [ ("not parser result", False), (ParserResultConcept(parser="not multiple_concepts_parser"), False), - (ParserResultConcept(parser=multiple_concepts_parser, value=[]), True), + (ParserResultConcept(parser=unrecognized_nodes_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True), ]) def test_not_interested(self, text, interested): context = self.get_context() @@ -130,9 +129,6 @@ class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka): parser = PythonWithConceptsParser() result = parser.parse(context, input_return_value.body) - wrapper = result.value - return_value = result.value.value assert not result.status - assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert isinstance(return_value[0], PythonErrorNode) + assert context.sheerka.isinstance(result.value, BuiltinConcepts.NOT_FOR_ME) diff --git a/tests/parsers/test_SyaConceptLexerParser.py b/tests/parsers/test_SyaConceptLexerParser.py new file mode 100644 index 0000000..f660db6 --- /dev/null +++ b/tests/parsers/test_SyaConceptLexerParser.py @@ -0,0 +1,1197 @@ +import pytest +from core.builtin_concepts import BuiltinConcepts +from core.concept import Concept, CC +from core.tokenizer import Tokenizer +from parsers.BaseNodeParser import utnode, ConceptNode, cnode, short_cnode, UnrecognizedTokensNode, \ + SCWC, CNC, CN, UTN, scnode +from parsers.PythonParser import PythonNode +from parsers.SyaNodeParser import SyaNodeParser, SyaConceptParserHelper, SyaAssociativity, \ + NoneAssociativeSequenceErrorNode, TooManyParametersFound + +import tests.parsers.parsers_utils +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka + + +def compute_expected_array(concepts_map, expression, expected): + return tests.parsers.parsers_utils.compute_expected_array(concepts_map, expression, expected, sya=True) + + +class TestSyaConceptLexerParser(TestUsingMemoryBasedSheerka): + + def init_parser(self, concepts_map, sya_def, **kwargs): + sheerka, context, *updated_concepts = self.init_concepts( + *concepts_map.values(), + singleton=True, + create_new=True, + **kwargs) + + if sya_def: + sya_def_to_use = {} + for k, v in sya_def.items(): + sya_def_to_use[k.id] = v + else: + sya_def_to_use = None + + parser = SyaNodeParser() + parser.initialize(context, updated_concepts, sya_def_to_use) + + return sheerka, context, parser + + def test_i_can_initialize(self): + sheerka, context, c1, c2, c3, c4, c5 = self.init_concepts( + "foo", + Concept("bar a").def_prop("a"), + Concept("a baz").def_prop("a"), + Concept("baz a qux b").def_prop("a").def_prop("b"), + Concept("foo a bar b").def_prop("a").def_prop("b"), + ) + + parser = SyaNodeParser() + parser.initialize(context, [c1, c2, c3, c4, c5]) + + assert parser.concepts_by_first_keyword == { + "foo": [c1.id, c5.id], + "bar": [c2.id], + "baz": [c3.id, c4.id], + } + + @pytest.mark.parametrize("expression, expected_sequences", [ + ("one plus two", [["one", "two", "plus"]]), + ("1 + 1 plus two", [["1 + 1 ", "two", "plus"]]), + ("one + two plus three", [ + ["one", " + ", "two", "three", "plus"], + ["one + two ", "three", "plus"]]), + ("twenty one plus two", [ + ["twenty ", "one", "two", "plus"], + [short_cnode("twenties", "twenty one"), "two", "plus"] + ]), + ("x$!# plus two", [["x$!# ", "two", "plus"]]), + + ("one plus 1 + 1", [["one", " 1 + 1", "plus"]]), + ("1 + 1 plus 2 + 2", [["1 + 1 ", " 2 + 2", "plus"]]), + ("one + two plus 1 + 1", [ + ["one", " + ", "two", " 1 + 1", "plus"], + ["one + two ", " 1 + 1", "plus"] + ]), + ("twenty one plus 1 + 1", [ + ["twenty ", "one", " 1 + 1", "plus"], + [cnode("twenties", 0, 2, "twenty one"), " 1 + 1", "plus"] + ]), + ("x$!# plus 1 + 1", [["x$!# ", " 1 + 1", "plus"]]), + + ("one plus two + three", [ + ["one", "two", "plus", " + ", "three"], + ["one", " two + three", "plus"], + ]), + ("1 + 1 plus two + three", [ + ["1 + 1 ", "two", "plus", (" + ", 1), "three"], + ["1 + 1 ", " two + three", "plus"], + ]), + ("one + two plus two + three", [ + ["one", " + ", "two", ("two", 1), "plus", (" + ", 1), "three"], + ["one + two ", ("two", 1), "plus", (" + ", 1), "three"], + ["one", " + ", "two", " two + three", "plus"], + ["one + two ", " two + three", "plus"], + ]), + ("twenty one plus two + three", [ + ["twenty ", "one", "two", "plus", " + ", "three"], + [cnode("twenties", 0, 2, "twenty one"), "two", "plus", " + ", "three"], + ["twenty ", "one", " two + three", "plus"], + [cnode("twenties", 0, 2, "twenty one"), " two + three", "plus"], + ]), + ("x$!# plus two + three", [ + ["x$!# ", "two", "plus", " + ", "three"], + ["x$!# ", " two + three", "plus"], + ]), + + ("one plus twenty two", [ + ["one", " twenty ", "plus", "two"], + ["one", cnode("twenties", 4, 6, "twenty two"), "plus"], + ]), + ("1 + 1 plus twenty one", [ + ["1 + 1 ", " twenty ", "plus", "one"], + ["1 + 1 ", cnode("twenties", 8, 10, "twenty one"), "plus"], + ]), + ("one + two plus twenty one", [ + ["one", " + ", "two", " twenty ", "plus", ("one", 1)], + ["one + two ", " twenty ", "plus", ("one", 1)], + ["one", " + ", "two", cnode("twenties", 8, 10, "twenty one"), "plus"], + ["one + two ", cnode("twenties", 8, 10, "twenty one"), "plus"], + ]), + ("twenty one plus twenty two", + [ + ["twenty ", "one", " twenty ", "plus", "two"], + [cnode("twenties", 0, 2, "twenty one"), " twenty ", "plus", "two"], + ["twenty ", "one", cnode("twenties", 6, 8, "twenty two"), "plus"], + [cnode("twenties", 0, 2, "twenty one"), cnode("twenties", 6, 8, "twenty two"), "plus"], + ]), + ("x$!# plus twenty two", [ + ["x$!# ", " twenty ", "plus", "two"], + ["x$!# ", cnode("twenties", 7, 9, "twenty two"), "plus"] + ]), + + ("one plus z$!#", [["one", " z$!#", "plus"]]), + ("1 + 1 plus z$!#", [["1 + 1 ", " z$!#", "plus"]]), + ("one + two plus z$!#", [ + ["one", " + ", "two", " z$!#", "plus"], + ["one + two ", " z$!#", "plus"], + ]), + ("twenty one plus z$!#", [ + ["twenty ", "one", " z$!#", "plus"], + [cnode("twenties", 0, 2, "twenty one"), " z$!#", "plus"], + ]), + ("x$!# plus z$!#", [["x$!# ", " z$!#", "plus"]]), + ]) + def test_i_can_post_fix_simple_infix_concepts(self, expression, expected_sequences): + concepts_map = { + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + + assert len(res) == len(expected_sequences) + for res_i, expected in zip(res, expected_sequences): + assert len(res_i.errors) == 0 + expected_array = compute_expected_array(concepts_map, expression, expected) + assert res_i.out == expected_array + + @pytest.mark.parametrize("expression, expected_sequences", [ + ("one plus plus plus 1 + 1", [["one", " 1 + 1", "plus plus plus"]]), + ("x$!# another long name infix twenty two", [ + ["x$!# ", " twenty ", "another long name infix", "two"], + ["x$!# ", cnode("twenties", 13, 15, "twenty two"), "another long name infix"], + ]), + ]) + def test_i_can_post_fix_infix_concepts_with_long_name(self, expression, expected_sequences): + concepts_map = { + "plus plus plus": Concept("a plus plus plus b").def_prop("a").def_prop("b"), + "another long name infix": Concept("a another long name infix b").def_prop("a").def_prop("b"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + + assert len(res) == len(expected_sequences) + for res_i, expected in zip(res, expected_sequences): + assert len(res_i.errors) == 0 + expected_array = compute_expected_array(concepts_map, expression, expected) + assert res_i.out == expected_array + + @pytest.mark.parametrize("expression, expected_sequences", [ + ("one prefixed", [["one", "prefixed"]]), + ("1 + 1 prefixed", [["1 + 1 ", "prefixed"]]), + ("one + two prefixed", [ + ["one", " + ", "two", "prefixed"], + ["one + two ", "prefixed"], + ]), + ("twenty one prefixed", [ + ["twenty ", "one", "prefixed"], + [cnode("twenties", 0, 2, "twenty one"), "prefixed"], + ]), + ("x$!# prefixed", [["x$!# ", "prefixed"]]), + ]) + def test_i_can_post_fix_simple_prefixed_concepts(self, expression, expected_sequences): + concepts_map = { + "prefixed": Concept("a prefixed").def_prop("a"), + "one": Concept("one"), + "two": Concept("two"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + + assert len(res) == len(expected_sequences) + for res_i, expected in zip(res, expected_sequences): + assert len(res_i.errors) == 0 + expected_array = compute_expected_array(concepts_map, expression, expected) + assert res_i.out == expected_array + + @pytest.mark.parametrize("expression, expected_sequences", [ + ("one prefixed prefixed", [["one", "prefixed prefixed"]]), + ("1 + 1 prefixed prefixed", [["1 + 1 ", "prefixed prefixed"]]), + ("one + two prefixed prefixed", [ + ["one", " + ", "two", "prefixed prefixed"], + ["one + two ", "prefixed prefixed"], + ]), + ("twenty one prefixed prefixed", [ + ["twenty ", "one", "prefixed prefixed"], + [cnode("twenties", 0, 2, "twenty one"), "prefixed prefixed"], + ]), + ("x$!# prefixed prefixed", [["x$!# ", "prefixed prefixed"]]), + + ("one long name prefixed", [["one", "long name prefixed"]]), + ("1 + 1 long name prefixed", [["1 + 1 ", "long name prefixed"]]), + ("one + two long name prefixed", [ + ["one", " + ", "two", "long name prefixed"], + ["one + two ", "long name prefixed"], + ]), + ("twenty one long name prefixed", [ + ["twenty ", "one", "long name prefixed"], + [cnode("twenties", 0, 2, "twenty one"), "long name prefixed"], + ]), + ("x$!# long name prefixed", [["x$!# ", "long name prefixed"]]), + ]) + def test_i_can_post_fix_prefixed_concepts_with_long_names(self, expression, expected_sequences): + concepts_map = { + "prefixed prefixed": Concept("a prefixed prefixed").def_prop("a"), + "long name prefixed": Concept("a long name prefixed").def_prop("a"), + "one": Concept("one"), + "two": Concept("two"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + + assert len(res) == len(expected_sequences) + for res_i, expected in zip(res, expected_sequences): + assert len(res_i.errors) == 0 + expected_array = compute_expected_array(concepts_map, expression, expected) + assert res_i.out == expected_array + + @pytest.mark.parametrize("expression, expected_sequences", [ + ("suffixed one", [["one", "suffixed"]]), + ("suffixed 1 + 1", [[" 1 + 1", "suffixed"]]), + ("suffixed one + two", [ + ["one", "suffixed", " + ", "two"], + [" one + two", "suffixed"], + ]), + ("suffixed twenty one", [ + [" twenty ", "suffixed", "one"], + [cnode("twenties", 2, 4, "twenty one"), "suffixed"], + ]), + ("suffixed x$!#", [[" x$!#", "suffixed"]]), + ]) + def test_i_can_post_fix_simple_suffixed_concepts(self, expression, expected_sequences): + concepts_map = { + "suffixed": Concept("suffixed a").def_prop("a"), + "one": Concept("one"), + "two": Concept("two"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + + assert len(res) == len(expected_sequences) + for res_i, expected in zip(res, expected_sequences): + assert len(res_i.errors) == 0 + expected_array = compute_expected_array(concepts_map, expression, expected) + assert res_i.out == expected_array + + @pytest.mark.parametrize("expression, expected", [ + ("suffixed suffixed one", ["one", "suffixed suffixed"]), + ("long name suffixed one", ["one", "long name suffixed"]), + ]) + def test_i_can_post_fix_suffixed_concepts_with_long_names(self, expression, expected): + concepts_map = { + "suffixed suffixed": Concept("suffixed suffixed a").def_prop("a"), + "long name suffixed": Concept("long name suffixed a").def_prop("a"), + "one": Concept("one"), + "two": Concept("two"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + expected_array = compute_expected_array(concepts_map, expression, expected) + assert len(res) == 1 + assert res[0].out == expected_array + + @pytest.mark.parametrize("expression, expected_sequences", [ + ("one ? two : three", [["one", "two", "three", "?"]]), + ("1+1 ? one + two : twenty one", [ + ["1+1 ", "one", " + ", "two"], # an error is detected + ["1+1 ", " one + two ", " twenty ", "?", ("one", 1)], + ["1+1 ", " one + two ", short_cnode("twenties", "twenty one"), "?"], + ]), + ("x$!# ? y$!# : z$!#", [["x$!# ", " y$!# ", " z$!#", "?"]]), + + ("if one then two else three end", [["one", "two", "three", "if"]]), + ("if 1+1 then x$!# else twenty one end", [ + [" 1+1 ", " x$!# ", " twenty ", "one"], # an error is detected + [" 1+1 ", " x$!# ", short_cnode("twenties", "twenty one"), "if"], + ]), + ("if x$!# then one + two else z$!# end", [ + [" x$!# ", "one", " + ", "two"], # an error is detected + [" x$!# ", " one + two ", " z$!# ", "if"], + ]), + ]) + def test_i_can_post_fix_ternary_concepts(self, expression, expected_sequences): + """ + The purpose of this test is to validate concepts like + that have at least 3 parameters separated by tokens + Example : + var_0 token var_1 token var_2 + token var_0 token var_1 token var_2 + token var_0 token var_1 token var_2 token + var_0 token var_1 token var_2 token + etc... + :return: + """ + + concepts_map = { + "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), + "if": Concept("if a then b else c end").def_prop("a").def_prop("b").def_prop("c"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + + assert len(res) == len(expected_sequences) + for res_i, expected in zip(res, expected_sequences): + # assert len(res_i.errors) == 0 # Do not validate errors + expected_array = compute_expected_array(concepts_map, expression, expected) + assert res_i.out == expected_array + + @pytest.mark.parametrize("expression, expected_sequences", [ + ("one ? ? two : : three", [["one", "two", "three", "? ?"]]), + ("1+1 ? ? one + two : : twenty one", [ + ["1+1 ", "one", " + ", "two"], # error + ["1+1 ", " one + two ", " twenty ", "? ?", ("one", 1)], + ["1+1 ", " one + two ", short_cnode("twenties", "twenty one"), "? ?"], + ]), + + ("if if one then then two else else three end end ", [["one", "two", "three", "if if"]]), + ("if if 1+1 then then x$!# else else twenty one end end ", [ + [" 1+1 ", " x$!# ", " twenty ", "one"], # error + [" 1+1 ", " x$!# ", short_cnode("twenties", "twenty one"), "if if"]]), + ]) + def test_i_can_post_fix_ternary_concept_with_long_names(self, expression, expected_sequences): + concepts_map = { + "? ?": Concept("a ? ? b : : c").def_prop("a").def_prop("b").def_prop("c"), + "if if": Concept("if if a then then b else else c end end").def_prop("a").def_prop("b").def_prop("c"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + + assert len(res) == len(expected_sequences) + for res_i, expected in zip(res, expected_sequences): + # assert len(res_i.errors) == 0 # Do not validate errors + expected_array = compute_expected_array(concepts_map, expression, expected) + assert res_i.out == expected_array + + @pytest.mark.parametrize("expression, expected", [ + ("foo bar baz", ["baz", "bar", "foo"]), + ("foo bar x$!#", [" x$!#", "bar", "foo"]), + ("foo bar 1 + 1", [" 1 + 1", "bar", "foo"]), + ]) + def test_i_can_post_fix_suffixed_unary_composition(self, expression, expected): + concepts_map = { + "foo": Concept("foo a").def_prop("a"), + "bar": Concept("bar a").def_prop("a"), + "baz": Concept("baz"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + expected_array = compute_expected_array(concepts_map, expression, expected) + + assert len(res) == 1 + assert res[0].out == expected_array + + @pytest.mark.parametrize("expression, expected", [ + ("baz bar foo", ["baz", "bar", "foo"]), + ("x$!# bar foo", ["x$!# ", "bar", "foo"]), + ("1 + 1 bar foo", ["1 + 1 ", "bar", "foo"]), + ]) + def test_i_can_post_fix_prefixed_unary_composition(self, expression, expected): + concepts_map = { + "foo": Concept("a foo").def_prop("a"), + "bar": Concept("a bar").def_prop("a"), + "baz": Concept("baz"), + } + sya_def = { + concepts_map["foo"]: (5, SyaAssociativity.Left), + concepts_map["bar"]: (5, SyaAssociativity.Left), # precedence greater than plus + } + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + res = parser.infix_to_postfix(context, expression) + expected_array = compute_expected_array(concepts_map, expression, expected) + + assert len(res) == 1 + assert res[0].out == expected_array + + @pytest.mark.parametrize("expression, expected", [ + ("one plus two mult three", ["one", "two", "three", "mult", "plus"]), + ("one mult two plus three", ["one", "two", "mult", "three", "plus"]), + ("(one plus two) mult three", ["one", "two", "plus", "three", "mult"]), + ("one mult (two plus three)", ["one", "two", "three", "plus", "mult"]), + ]) + def test_i_can_post_fix_binary_with_precedence(self, expression, expected): + concepts_map = { + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "mult": Concept("a mult b").def_prop("a").def_prop("b"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + } + sya_def = { + concepts_map["plus"]: (5, SyaAssociativity.Right), + concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus + } + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + res = parser.infix_to_postfix(context, expression) + expected_array = compute_expected_array(concepts_map, expression, expected) + + assert len(res) == 1 + assert res[0].out == expected_array + + def test_i_can_post_fix_unary_with_precedence(self): + concepts_map = { + "suffixed": Concept("suffixed a").def_prop("a"), + "prefixed": Concept("a prefixed").def_prop("a"), + "a": Concept("a"), + } + + sya_def = { + concepts_map["prefixed"]: (10, SyaAssociativity.Left), + concepts_map["suffixed"]: (5, SyaAssociativity.Right), + } + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + expression = "suffixed a prefixed" + expected = ["a", "prefixed", "suffixed"] + res = parser.infix_to_postfix(context, expression) + expected_array = compute_expected_array(concepts_map, expression, expected) + + assert len(res) == 1 + assert res[0].out == expected_array + + # change the precedence + sya_def = { + concepts_map["prefixed"]: (5, SyaAssociativity.Left), + concepts_map["suffixed"]: (10, SyaAssociativity.Right), + } + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + expression = "suffixed a prefixed" + expected = ["a", "suffixed", "prefixed"] + res = parser.infix_to_postfix(context, expression) + expected_array = compute_expected_array(concepts_map, expression, expected) + + assert len(res) == 1 + assert res[0].out == expected_array + + def test_i_can_post_fix_right_associated_binary(self): + concepts_map = { + "equals": Concept("a equals b").def_prop("a").def_prop("b"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + } + + sya_def = { + concepts_map["equals"]: (None, SyaAssociativity.Right), + } + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + expression = "one equals two equals three" + res = parser.infix_to_postfix(context, expression) + + expected = ["one", "two", "three", ("equals", 1), "equals"] + expected_array = compute_expected_array(concepts_map, expression, expected) + + assert len(res) == 1 + assert res[0].out == expected_array + + def test_i_can_post_fix_left_associated_binary(self): + concepts_map = { + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + } + + sya_def = { + concepts_map["plus"]: (None, SyaAssociativity.Left), + } + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + expression = "one plus two plus three" + res = parser.infix_to_postfix(context, expression) + + expected = ["one", "two", "plus", "three", ("plus", 1)] + expected_array = compute_expected_array(concepts_map, expression, expected) + + assert len(res) == 1 + assert res[0].out == expected_array + + @pytest.mark.parametrize("expression, expected", [ + ("x$!# ? y$!# : z$!# ? two : three", ["x$!# ", " y$!# ", " z$!# ", "two", "three", ("?", 1), "?"]), + ("x$!# ? y$!# : (z$!# ? two : three)", ["x$!# ", " y$!# ", "z$!# ", "two", "three", ("?", 1), "?"]), + + ("one ? x$!# ? y$!# : z$!# : three", ["one", " x$!# ", " y$!# ", " z$!# ", ("?", 1), "three", "?"]), + ("one ? (x$!# ? y$!# : z$!#) : three", ["one", "x$!# ", " y$!# ", " z$!#", ("?", 1), "three", "?"]), + + ("one ? two : x$!# ? y$!# : z$!#", ["one", "two", " x$!# ", " y$!# ", " z$!#", ("?", 1), "?"]), + ("one ? two : (x$!# ? y$!# : z$!#)", ["one", "two", "x$!# ", " y$!# ", " z$!#", ("?", 1), "?"]), + ]) + def test_i_can_post_fix_right_associated_ternary(self, expression, expected): + concepts_map = { + "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + } + sya_def = { + concepts_map["?"]: (5, SyaAssociativity.Right), + } + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + res = parser.infix_to_postfix(context, expression) + expected_array = compute_expected_array(concepts_map, expression, expected) + assert len(res) == 1 + assert res[0].out == expected_array + + @pytest.mark.parametrize("expression, expected", [ + ("x$!# ? y$!# : z$!# ? two : three", ["x$!# ", " y$!# ", " z$!# ", "?", "two", "three", ("?", 1)]), + ("(x$!# ? y$!# : z$!#) ? two : three", ["x$!# ", " y$!# ", " z$!#", "?", "two", "three", ("?", 1)]), + + # the following one is not possible when Left association + # ("one ? x$!# ? y$!# : z$!# : three", ["one", " x$!# ", " y$!# ", " z$!# ", ("?", 1), "three", "?"]), + + ("one ? two : x$!# ? y$!# : z$!#", ["one", "two", " x$!# ", "?", " y$!# ", " z$!#", ("?", 1)]), + ("(one ? two : x$!#) ? y$!# : z$!#", ["one", "two", " x$!#", "?", " y$!# ", " z$!#", ("?", 1)]), + ]) + def test_i_can_post_fix_left_associated_ternary(self, expression, expected): + concepts_map = { + "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + } + sya_def = { + concepts_map["?"]: (5, SyaAssociativity.Left), + } + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + res = parser.infix_to_postfix(context, expression) + expected_array = compute_expected_array(concepts_map, expression, expected) + assert len(res) == 1 + assert res[0].out == expected_array + + def test_i_can_post_fix_when_multiple_concepts_are_found(self): + concepts_map = { + "foo": Concept("foo a").def_prop("a"), + "foo bar": Concept("foo bar a").def_prop("a"), + "baz": Concept("baz"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + expression = "foo bar baz" + res = parser.infix_to_postfix(context, expression) + expected_sequences = [ + [" bar ", "foo", "baz"], + ["baz", "foo bar"] + ] + + assert len(res) == len(expected_sequences) + for res_i, expected in zip(res, expected_sequences): + assert len(res_i.errors) == 0 + expected_array = compute_expected_array(concepts_map, expression, expected) + assert res_i.out == expected_array + + @pytest.mark.parametrize("expression, expected", [ + # I can't manage source code functions :-( + # ("function(one plus three) minus two", []), + + ("(one plus two) ", ["one", "two", "plus"]), + ("(one prefixed) ", ["one", "prefixed"]), + ("(suffixed one) ", ["one", "suffixed"]), + ("(one ? two : three)", ["one", "two", "three", "?"]), + ("square(square(one))", ["one", ("square", 1), "square"]), + ("square ( square ( one ) )", ["one", ("square", 1), "square"]), + + ("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]), + ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]), + ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]), + + ("(one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]), + ("( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]), + ("( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]), + + ("suffixed (suffixed one)", ["one", ("suffixed", 1), "suffixed"]), + ("suffixed ( suffixed one) ", ["one", ("suffixed", 1), "suffixed"]), + ("suffixed (suffixed square(one))", ["one", "square", ("suffixed", 1), "suffixed"]), + ("suffixed ( suffixed square ( one ) )", ["one", "square", ("suffixed", 1), "suffixed"]), + + ("one plus (two minus three)", ["one", "two", "three", "minus", "plus"]), + ("one plus ( two minus three )", ["one", "two", "three", "minus", "plus"]), + ("(one plus two) minus three", ["one", "two", "plus", "three", "minus"]), + ("( one plus two ) minus three )", ["one", "two", "plus", "three", "minus"]), + + ("foo bar (one)", ["one", "foo bar"]), + ("foo bar ( one )", ["one", "foo bar"]), + ]) + def test_i_can_pos_fix_when_parenthesis(self, expression, expected): + concepts_map = { + "prefixed": Concept("a prefixed").def_prop("a"), + "suffixed": Concept("suffixed a").def_prop("a"), + "square": Concept("square(a)").def_prop("a"), + "foo bar": Concept("foo bar(a)").def_prop("a"), + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "minus": Concept("a minus b").def_prop("a").def_prop("b"), + "if": Concept("if a then b else c end").def_prop("a").def_prop("b").def_prop("c"), + "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + } + + sya_def = { + concepts_map["square"]: (None, SyaAssociativity.No), + concepts_map["plus"]: (10, SyaAssociativity.Right), + concepts_map["minus"]: (10, SyaAssociativity.Right), + } + + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + res = parser.infix_to_postfix(context, expression) + expected_array = compute_expected_array(concepts_map, expression, expected) + + assert len(res) == 1 + assert res[0].out == expected_array + + @pytest.mark.parametrize("expression, expected_sequences", [ + # composition + ("function(suffixed one)", [[SCWC("function(", ")", "one", "suffixed")]]), + ("function(one prefixed)", [[SCWC("function(", ")", "one", "prefixed")]]), + ("function(if one then two else three end)", [[SCWC("function(", ")", "one", "two", "three", "if")]]), + ("function(suffixed twenty two)", [ + [SCWC("function(", ")", " twenty ", "suffixed", "two")], + [SCWC("function(", ")", short_cnode("twenties", "twenty two"), "suffixed")]]), + ("function(twenty two prefixed)", [ + [SCWC("function(", ")", "twenty ", "two", "prefixed")], + [SCWC("function(", ")", short_cnode("twenties", "twenty two"), "prefixed")], + ]), + ("function(if one then twenty two else three end)", [ + ["')'", "one", " twenty ", "two"], # error + [SCWC("function(", ")", "one", short_cnode("twenties", "twenty two"), "three", "if")] + ]), + ("func1(func2(one two) three)", [ + [SCWC("func1(", (")", 1), SCWC("func2(", ")", "one", "two"), "three")]]), + + ("twenty two(suffixed one)", [ + ["twenty ", SCWC("two(", ")", "one", "suffixed")], + [SCWC("twenty two(", ")", "one", "suffixed")], + ]), + ("twenty two(one prefixed)", [ + ["twenty ", SCWC("two(", ")", "one", "prefixed")], + [SCWC("twenty two(", ")", "one", "prefixed")], + ]), + ("f1(one plus two mult three) plus f2(suffixed x$!# prefixed)", [ + [SCWC("f1(", ")", "one", "two", "three", "mult", "plus"), + SCWC(" f2(", (")", 1), " x$!# ", "prefixed", "suffixed"), + ("plus", 1)] + ]), + + # plus, suffixed, prefixed, ternary + ("func1(one) plus func2(two)", [[SCWC("func1(", ")", "one"), SCWC(" func2(", (")", 1), "two"), "plus"]]), + ("suffixed function(one)", [[SCWC(" function(", ")", "one"), "suffixed"]]), + ("function(one) prefixed", [[SCWC("function(", ")", "one"), "prefixed"]]), + ("if f1(one) then f2(two) else f3(three) end", [ + [SCWC(" f1(", ")", "one"), SCWC(" f2(", (")", 1), "two"), SCWC(" f3(", (")", 2), "three"), "if"]]), + + # Sequence + ("if one then two else three end function(x$!#)", [ + ["one", "two", "three", "if", SCWC(" function(", ")", "x$!#")]]), + ("one prefixed function(two)", [["one", "prefixed", SCWC(" function(", ")", "two")]]), + ("suffixed one function(two)", [["one", "suffixed", SCWC(" function(", ")", "two")]]), + ]) + def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences): + concepts_map = { + "prefixed": Concept("a prefixed").def_prop("a"), + "suffixed": Concept("suffixed a").def_prop("a"), + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "mult": Concept("a mult b").def_prop("a").def_prop("b"), + "if": Concept("if a then b else c end").def_prop("a").def_prop("b").def_prop("c"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sya_def = { + concepts_map["plus"]: (5, SyaAssociativity.Right), + concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus + } + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + res = parser.infix_to_postfix(context, expression) + + assert len(res) == len(expected_sequences) + for res_i, expected in zip(res, expected_sequences): + expected_array = compute_expected_array(concepts_map, expression, expected) + assert res_i.out == expected_array + + @pytest.mark.parametrize("expression, expected", [ + ("(", ("(", 0)), + ("one plus ( 1 + ", ("(", 4)), + ("one( 1 + ", ("(", 1)), + ("one ( 1 + ", ("(", 2)), + ("function( 1 + ", ("(", 1)), + ("function ( 1 + ", ("(", 2)), + ("one plus ) 1 + ", (")", 4)), + ("one ) 1 + ", (")", 2)), + ("function ) 1 + ", (")", 2)), + ("one ? ( : two", ("(", 4)), + ("one ? one plus ( : two", ("(", 8)), + ("one ? ) : two", (")", 4)), + ("one ? one plus ) : two", (")", 8)), + ("(one plus ( 1 + )", ("(", 0)), + ]) + def test_i_can_detect_parenthesis_mismatch_error_when_post_fixing(self, expression, expected): + concepts_map = { + "one": Concept("one"), + "two": Concept("two"), + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + + assert len(res) == 1 + assert res[0].errors == [expected] + + @pytest.mark.parametrize("expression, expected", [ + ("one ? one two : three", ("?", ":")), + ]) + def test_i_can_detected_when_too_many_parameters(self, expression, expected): + concepts_map = { + "one": Concept("one"), + "two": Concept("two"), + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + + assert len(res) == 1 + assert len(res[0].errors) == 1 + error = res[0].errors[0] + assert isinstance(error, TooManyParametersFound) + assert error.concept == concepts_map[expected[0]] + assert error.token.value == expected[1] + + @pytest.mark.parametrize("expression, expected", [ + ("one infix two x$!#", ["one", "two", "infix", " x$!#"]), + ("x$!# one infix two", ["x$!# ", "one", "two", "infix"]), + ("one prefixed x$!#", ["one", "prefixed", " x$!#"]), + ("x$!# one prefixed", ["x$!# ", "one", "prefixed"]), + ("suffixed one x$!#", ["one", "suffixed", " x$!#"]), + ("x$!# suffixed one", ["x$!# ", "one", "suffixed"]), + ("one ? two : three x$!#", ["one", "two", "three", "?", " x$!#"]), + ("x$!# one ? two : three", ["x$!# ", "one", "two", "three", "?"]), + + ("one infix two three infix four", ["one", "two", "infix", "three", "four", ("infix", 1)]), + ("one infix two three prefixed", ["one", "two", "infix", "three", "prefixed"]), + ("one infix two suffixed three", ["one", "two", "infix", "three", "suffixed"]), + ("one infix two x$!# ? y$!# : z$!#", ["one", "two", "infix", " x$!# ", " y$!# ", " z$!#", "?"]), + + ("one prefixed two infix three", ["one", "prefixed", "two", "three", "infix"]), + ("one prefixed two prefixed", ["one", "prefixed", "two", ("prefixed", 1)]), + ("one prefixed suffixed two", ["one", "prefixed", "two", "suffixed"]), + ("one prefixed x$!# ? y$!# : z$!#", ["one", "prefixed", " x$!# ", " y$!# ", " z$!#", "?"]), + + ("(one infix two) (three prefixed)", ["one", "two", "infix", "three", "prefixed"]), + ]) + def test_i_can_post_fix_sequences(self, expression, expected): + concepts_map = { + "prefixed": Concept("a prefixed").def_prop("a"), + "suffixed": Concept("suffixed a").def_prop("a"), + "infix": Concept("a infix b").def_prop("a").def_prop("b"), + "?": Concept("a ? b : c").def_prop("a").def_prop("b").def_prop("c"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "four": Concept("four"), + } + + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, expression) + expected_array = compute_expected_array(concepts_map, expression, expected) + + assert len(res) == 1 + assert res[0].out == expected_array + + def test_the_more_concepts_the_more_results(self): + concepts_map = { + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "plus plus": Concept("a plus plus").def_prop("a"), + "plus equals": Concept("a plus equals b").def_prop("a").def_prop("b"), + } + + sheerka, context, parser = self.init_parser(concepts_map, None) + + expression = "a plus plus equals b" + res = parser.infix_to_postfix(context, expression) + expected_array = tests.parsers.parsers_utils.compute_debug_array(res) + assert expected_array == [ + ["a", "a plus b", "a plus b", "equals", "b"], + ["a", "a plus b", "a plus plus", "equals", "b"], + ["a", "a plus b", "a plus equals b", "equals", "b"], + ["a", "a plus plus", "plus", "equals", "b"], + ["a", "a plus plus", "plus", "equals", "b"], + ["a", "a plus plus", "plus", "equals", "b"], + ["a", "a plus equals b", "a plus b", "equals", "b"], + ["a", "a plus equals b", "a plus plus", "equals", "b"], + ["a", "a plus equals b", "a plus equals b", "equals", "b"], + ] + + def test_i_can_use_string_instead_of_identifier(self): + concepts_map = { + "ternary": Concept("a ? ? b '::' c").def_prop("a").def_prop("b").def_prop("c"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + } + + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.infix_to_postfix(context, "one ? ? two '::' three") + assert len(res) == 1 + assert res[0].out == [ + cnode("one", start=0, end=0, source="one"), + cnode("two", start=6, end=6, source="two"), + cnode("three", start=10, end=10, source="three"), + SyaConceptParserHelper(concepts_map["ternary"], 2), + ] + + def test_i_cannot_chain_non_associative(self): + concepts_map = { + "less than": Concept("a less than b").def_prop("a").def_prop("b"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + } + sya_def = { + concepts_map["less than"]: (None, SyaAssociativity.No), + } + + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + res = parser.infix_to_postfix(context, "one less than two less than three") + assert len(res) == 1 + assert res[0].errors == [NoneAssociativeSequenceErrorNode(concepts_map["less than"], 2, 8)] + + def test_i_can_post_fix_bnf_definition(self): + """ + The definition of a BNF concept is considered as an atom concept + Not quite sure why this test is here + :return: + """ + concepts_map = { + "foo": Concept("foo a").def_prop("a"), + "one": Concept("one"), + "two": Concept("two"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + expression = "foo twenties" + res = parser.infix_to_postfix(context, expression) + + expected = [cnode("twenties", 2, 2, "twenties"), "foo"] + expected_array = compute_expected_array(concepts_map, expression, expected) + assert len(res) == 1 + assert res[0].out == expected_array + + def test_i_can_parse_when_concept_atom_only(self): + concepts_map = { + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "mult": Concept("a mult b").def_prop("a").def_prop("b"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + } + sya_def = { + concepts_map["plus"]: (5, SyaAssociativity.Right), + concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus + } + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + text = "one plus two mult three" + res = parser.parse(context, text) + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == [ConceptNode(concepts_map["plus"], 0, 8, source=text)] + + # check the compiled + expected_concept = lexer_nodes[0].concept + assert expected_concept.compiled["a"] == concepts_map["one"] + assert expected_concept.compiled["b"] == concepts_map["mult"] + assert expected_concept.compiled["b"].compiled["a"] == concepts_map["two"] + assert expected_concept.compiled["b"].compiled["b"] == concepts_map["three"] + + def test_i_can_parse_when_python_code(self): + concepts_map = { + "foo": Concept("foo a").def_prop("a") + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + text = "foo 1 + 1" + res = parser.parse(context, text) + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == [ConceptNode(concepts_map["foo"], 0, 6, source=text)] + + # check the compiled + expected_concept = lexer_nodes[0].concept + assert len(expected_concept.compiled["a"]) == 1 + + return_value_a = expected_concept.compiled["a"][0] + assert sheerka.isinstance(return_value_a, BuiltinConcepts.RETURN_VALUE) + assert return_value_a.status + assert sheerka.isinstance(return_value_a.body, BuiltinConcepts.PARSER_RESULT) + assert return_value_a.body.source == " 1 + 1" + assert isinstance(return_value_a.body.body, PythonNode) + + def test_i_can_parse_when_bnf_concept(self): + concepts_map = { + "foo": Concept("foo a").def_prop("a"), + "one": Concept("one"), + "two": Concept("two"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + text = "foo twenty one" + res = parser.parse(context, text) + assert len(res) == 2 + assert res[1].status + + wrapper = res[1].body + lexer_nodes = res[1].body.body + + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == [ConceptNode(concepts_map["foo"], 0, 4, source=text)] + + # check the compiled + expected_concept = lexer_nodes[0].concept + assert sheerka.isinstance(expected_concept.compiled["a"], "twenties") + assert expected_concept.compiled["a"].compiled["unit"] == concepts_map["one"] + + def test_i_can_parse_sequences(self): + concepts_map = { + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "foo": Concept("foo a").def_prop("a"), + "one": Concept("one"), + "two": Concept("two"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + text = "one plus 1 + 1 foo two" + res = parser.parse(context, text) + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == [ + ConceptNode(concepts_map["plus"], 0, 9, source="one plus 1 + 1 "), + ConceptNode(concepts_map["foo"], 10, 12, source="foo two")] + + # check the compiled + concept_plus_a = lexer_nodes[0].concept.compiled["a"] + concept_plus_b = lexer_nodes[0].concept.compiled["b"] + concept_foo_a = lexer_nodes[1].concept.compiled["a"] + + assert concept_plus_a == concepts_map["one"] + assert len(concept_plus_b) == 1 + assert sheerka.isinstance(concept_plus_b[0], BuiltinConcepts.RETURN_VALUE) + assert isinstance(concept_plus_b[0].body.body, PythonNode) + assert concept_foo_a == concepts_map["two"] + + @pytest.mark.parametrize("text, expected_status, expected_result", [ + ("function(suffixed one)", True, [ + SCWC("function(", ")", CNC("suffixed", 2, 4, a="one"))]), + ("function(one plus two mult three)", True, [ + SCWC("function(", ")", CNC("plus", 2, 10, a="one", b=CC("mult", a="two", b="three")))]), + ("f1(one prefixed) plus f2(suffixed two)", True, [ + CNC("plus", + a=SCWC("f1(", ")", CNC("prefixed", a="one")), + b=SCWC(" f2(", (")", 1), CNC("suffixed", a="two"))) + ]), + ("function(suffixed x$!#)", False, [ + SCWC("function(", ")", CNC("suffixed", 2, 7, a=" x$!#"))]), + ]) + def test_i_can_parse_when_one_result(self, text, expected_status, expected_result): + concepts_map = { + "prefixed": Concept("a prefixed").def_prop("a"), + "suffixed": Concept("suffixed a").def_prop("a"), + "mult": Concept("a mult b").def_prop("a").def_prop("b"), + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "if": Concept("if a then b else c end").def_prop("a").def_prop("b").def_prop("c"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sya_def = { + concepts_map["plus"]: (5, SyaAssociativity.Right), + concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus + } + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + res = parser.parse(context, text) + wrapper = res.body + lexer_nodes = res.body.body + + expected_array = compute_expected_array(concepts_map, text, expected_result) + assert res.status == expected_status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected_array + + # @pytest.mark.parametrize("text, list_of_expected", [ + # ("1 plus twenty one", [ + # (False, [CNC("plus", a=scnode(0, 0, "1"), b=UTN(" twenty ")), CN("one")]), + # (True, [CNC("plus", a=scnode(0, 0, "1"), b=CN("twenties", source="twenty one"))]) + # ]) + # ]) + # def test_i_can_parse_when_multiple_results(self, text, list_of_expected): + # concepts_map = { + # "prefixed": Concept("a prefixed").def_prop("a"), + # "suffixed": Concept("suffixed a").def_prop("a"), + # "mult": Concept("a mult b").def_prop("a").def_prop("b"), + # "plus": Concept("a plus b").def_prop("a").def_prop("b"), + # "if": Concept("if a then b else c end").def_prop("a").def_prop("b").def_prop("c"), + # "one": Concept("one"), + # "two": Concept("two"), + # "three": Concept("three"), + # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + # } + # sya_def = { + # concepts_map["plus"]: (5, SyaAssociativity.Right), + # concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus + # } + # sheerka, context, parser = self.init_parser(concepts_map, sya_def) + # + # list_of_res = parser.parse(context, text) + # assert len(list_of_res) == len(list_of_expected) + # + # for res, expected in zip(list_of_res, list_of_expected): + # wrapper = res.body + # lexer_nodes = res.body.body + # assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + # + # expected_array = compute_expected_array(concepts_map, text, expected[1]) + # assert res.status == expected[0] + # assert lexer_nodes == expected_array + + @pytest.mark.parametrize("text, expected_concept, expected_unrecognized", [ + ("x$!# prefixed", "prefixed", ["a"]), + ("suffixed x$!#", "suffixed", ["a"]), + ("one infix x$!#", "infix", ["b"]), + ("x$!# infix one", "infix", ["a"]), + ("x$!# infix z$!#", "infix", ["a", "b"]), + ]) + def test_i_cannot_parse_when_unrecognized(self, text, expected_concept, expected_unrecognized): + concepts_map = { + "suffixed": Concept("suffixed a").def_prop("a"), + "prefixed": Concept("a prefixed").def_prop("a"), + "infix": Concept("a infix b").def_prop("a").def_prop("b"), + "one": Concept("one") + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.parse(context, text) + wrapper = res.body + lexer_nodes = res.body.body + expected_end = len(list(Tokenizer(text))) - 2 + + assert not res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == [ConceptNode(concepts_map[expected_concept], 0, expected_end, source=text)] + + concept_found = lexer_nodes[0].concept + for unrecognized in expected_unrecognized: + assert isinstance(concept_found.compiled[unrecognized], UnrecognizedTokensNode) + + @pytest.mark.parametrize("text, expected", [ + ("x$!# suffixed one", [utnode(0, 4, "x$!# "), cnode("suffixed __var__0", 5, 7, "suffixed one")]), + ("one prefixed x$!#", [cnode("__var__0 prefixed", 0, 2, "one prefixed"), utnode(3, 7, " x$!#")]), + ]) + def test_i_cannot_parse_when_part_of_the_sequence_is_not_recognized(self, text, expected): + concepts_map = { + "suffixed": Concept("suffixed a").def_prop("a"), + "prefixed": Concept("a prefixed").def_prop("a"), + "infix": Concept("a infix b").def_prop("a").def_prop("b"), + "one": Concept("one"), + "two": Concept("two"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.parse(context, text) + wrapper = res.body + lexer_nodes = res.body.body + + assert not res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected + + @pytest.mark.parametrize("text", [ + "one", + "1 + 1", + "x$!# ", + "twenty one" + "", + "function(not an sya concept)", + ]) + def test_i_cannot_parse_when_no_concept_is_recognized(self, text): + """ + it's actually no concept with property + Atoms concepts, source code or BNF concepts alone are discarded by the lexer + :return: + """ + concepts_map = { + "plus": Concept("a plus b").def_prop("a").def_prop("b"), + "one": Concept("one"), + "two": Concept("two"), + "three": Concept("three"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_prop("unit"), + } + sheerka, context, parser = self.init_parser(concepts_map, None) + + res = parser.parse(context, text) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + assert res.body.body == text + + def test_i_cannot_parse_empty_string(self): + sheerka, context, parser = self.init_parser({}, None) + + res = parser.parse(context, "") + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) diff --git a/tests/parsers/test_UnrecognizedNodeParser.py b/tests/parsers/test_UnrecognizedNodeParser.py new file mode 100644 index 0000000..db3cd48 --- /dev/null +++ b/tests/parsers/test_UnrecognizedNodeParser.py @@ -0,0 +1,383 @@ +from core.builtin_concepts import ParserResultConcept, BuiltinConcepts +from core.concept import Concept, CC +from core.tokenizer import Tokenizer, TokenKind +from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, scnode, cnode, \ + utnode, SyaAssociativity, CN, CNC, UTN +from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser + +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +from tests.parsers.parsers_utils import compute_expected_array, get_node + + +def get_input_nodes_from(my_concepts_map, full_expr, *args): + def _get_real_node(n): + if isinstance(n, CC): + concept = n.concept or Concept.update_from(my_concepts_map[n.concept_key]) + for k, v in n.compiled.items(): + concept.compiled[k] = _get_real_node(v) + return concept + + if isinstance(n, (utnode, UTN)): + return UnrecognizedTokensNode(n.start, n.end, full_expr_as_tokens[n.start: n.end + 1]) + + if isinstance(n, (CNC, CN, cnode)): + concept = n.concept if hasattr(n, "concept") and n.concept else \ + Concept().update_from(my_concepts_map[n.concept_key]) + tokens = full_expr_as_tokens[n.start: n.end + 1] + if hasattr(node, "compiled"): + for k, v in n.compiled.items(): + concept.compiled[k] = _get_real_node(v) + return ConceptNode(concept, n.start, n.end, tokens) + + raise NotImplementedError() + + res = [] + full_expr_as_tokens = list(Tokenizer(full_expr)) + tokens_for_get_node = [token.value for token in full_expr_as_tokens if token.type != TokenKind.EOF] + for arg in args: + node = get_node(my_concepts_map, tokens_for_get_node, arg) + res.append(_get_real_node(node)) + + return res + + +concepts_map = { + "5params": Concept("5params").def_prop("a").def_prop("b").def_prop("c").def_prop("d").def_prop("e"), + "plus": Concept("a plus b", body="a + b").def_prop("a").def_prop("b"), + "mult": Concept("a mult b", body="a * b").def_prop("a").def_prop("b"), + "one": Concept("one", body="1"), + "two": Concept("two", body="2"), + "three": Concept("three", body="3"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit", body="20 + unit").def_prop("unit"), + "hello_atom": Concept("hello one"), + "hello_sya": Concept("hello a").def_prop("a"), + "greetings_a": Concept("greetings a").def_prop("a"), + "greetings_b": Concept("greetings b").def_prop("b"), + +} + + +class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): + sheerka = None + + @classmethod + def setup_class(cls): + t = TestUnrecognizedNodeParser() + TestUnrecognizedNodeParser.sheerka, context, _ = t.init_parser(concepts_map, create_new=True) + TestUnrecognizedNodeParser.sheerka.set_sya_def(context, [ + (concepts_map["mult"].id, 20, SyaAssociativity.Right), + (concepts_map["plus"].id, 10, SyaAssociativity.Right), + ]) + + def init_parser(self, my_concepts_map=None, **kwargs): + if my_concepts_map: + sheerka, context, *updated_concepts = self.init_concepts(*my_concepts_map.values(), **kwargs) + for i, pair in enumerate(my_concepts_map): + my_concepts_map[pair] = updated_concepts[i] + else: + sheerka = TestUnrecognizedNodeParser.sheerka + context = self.get_context(sheerka) + + parser = UnrecognizedNodeParser() + return sheerka, context, parser + + def test_i_can_validate_a_valid_concept_node(self): + sheerka, context, parser = self.init_parser() + node = get_input_nodes_from(concepts_map, "one", "one")[0] + + res = UnrecognizedNodeParser().validate_concept_node(context, node) + + assert res.status + assert res.body.concept == concepts_map["one"] + + def test_i_can_validate_concept_unrecognized_tokens(self): + sheerka, context, parser = self.init_parser() + node = get_input_nodes_from( + concepts_map, + "5params one two three twenty one 1 + 2 one plus two mult three", + CNC("5params", + a=" one ", + b=" two three ", + c=" twenty one ", + d=utnode(12, 18, " 1 + 2 "), + e=" one plus two mult three"))[0] + + res = UnrecognizedNodeParser().validate_concept_node(context, node) + + assert res.status + + concept = res.body.concept + assert concept == concepts_map["5params"] + + assert len(concept.compiled["a"]) == 1 + assert sheerka.isinstance(concept.compiled["a"][0], BuiltinConcepts.RETURN_VALUE) + assert concept.compiled["a"][0].status + assert concept.compiled["a"][0].who == "parsers.AtomNode" + assert concept.compiled["a"][0].body.body == [cnode("one", 1, 1, "one")] + + assert len(concept.compiled["b"]) == 1 + assert sheerka.isinstance(concept.compiled["b"][0], BuiltinConcepts.RETURN_VALUE) + assert concept.compiled["b"][0].status + assert concept.compiled["b"][0].who == "parsers.AtomNode" + assert concept.compiled["b"][0].body.body == [cnode("two", 1, 1, "two"), cnode("three", 3, 3, "three")] + + assert len(concept.compiled["c"]) == 1 + assert sheerka.isinstance(concept.compiled["c"][0], BuiltinConcepts.RETURN_VALUE) + assert concept.compiled["c"][0].status + assert concept.compiled["c"][0].who == "parsers.BnfNode" + expected_nodes = compute_expected_array( + concepts_map, + " twenty one ", + [CNC("twenties", source="twenty one", unit="one", one="one")]) + assert concept.compiled["c"][0].body.body == expected_nodes + + assert len(concept.compiled["d"]) == 1 + assert sheerka.isinstance(concept.compiled["d"][0], BuiltinConcepts.RETURN_VALUE) + assert concept.compiled["d"][0].status + assert concept.compiled["d"][0].who == "parsers.Python" + assert concept.compiled["d"][0].body.source == "1 + 2" + + assert len(concept.compiled["e"]) == 1 + assert sheerka.isinstance(concept.compiled["e"][0], BuiltinConcepts.RETURN_VALUE) + assert concept.compiled["e"][0].status + assert concept.compiled["e"][0].who == "parsers.SyaNode" + expected_nodes = compute_expected_array( + concepts_map, + " one plus two mult three ", + [CNC("plus", a="one", b=CC("mult", a="two", b="three"))]) + assert concept.compiled["e"][0].body.body == expected_nodes + + # # sanity check, I can evaluate the concept + # evaluated = sheerka.evaluate_concept(self.get_context(sheerka, eval_body=True), concept) + # assert evaluated.key == concept.key + # assert evaluated.get_prop("a") == + + def test_i_can_validate_with_recursion(self): + sheerka, context, parser = self.init_parser() + + node = get_input_nodes_from( + concepts_map, + "1 plus 2 mult twenty two", + CNC("plus", + a="1 ", + b=CC("mult", a=" 2 ", b=" twenty two")))[0] + + res = UnrecognizedNodeParser().validate_concept_node(context, node) + + assert res.status + assert res.body.concept == concepts_map["plus"] + assert len(res.body.concept.compiled["a"]) == 1 + assert res.body.concept.compiled["a"][0].status + assert res.body.concept.compiled["a"][0].who == "parsers.Python" + assert res.body.concept.compiled["a"][0].body.source == "1" + + assert res.body.concept.compiled["b"] == concepts_map["mult"] + assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["a"][0], BuiltinConcepts.RETURN_VALUE) + assert res.body.concept.compiled["b"].compiled["a"][0].status + assert res.body.concept.compiled["b"].compiled["a"][0].who == "parsers.Python" + assert res.body.concept.compiled["b"].compiled["a"][0].body.source == "2" + + assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["b"][0], BuiltinConcepts.RETURN_VALUE) + assert res.body.concept.compiled["b"].compiled["b"][0].status + assert res.body.concept.compiled["b"].compiled["b"][0].who == "parsers.BnfNode" + expected_nodes = compute_expected_array( + concepts_map, + " twenty two", + [CNC("twenties", source="twenty two", unit="two", two="two")]) + assert res.body.concept.compiled["b"].compiled["b"][0].body.body == expected_nodes + + # def test_i_can_validate_and_evaluate_a_concept_node_with_python(self): + # sheerka, context, parser = self.init_parser() + # + # node = get_input_nodes_from( + # concepts_map, + # "one plus 1 + 1", + # CNC("plus", + # a=UTN("one "), + # b=UTN("1 + 1")))[0] + # + # res = UnrecognizedNodeParser().validate_concept_node(context, node) + # + # assert res.status + # assert res.body.concept == concepts_map["plus"] + # assert res.body.concept.compiled["a"] == concepts_map["one"] + # assert len(res.body.concept.compiled["b"]) == 1 + # assert sheerka.isinstance(res.body.concept.compiled["b"][0], BuiltinConcepts.RETURN_VALUE) + # assert res.body.concept.compiled["b"][0].status + # assert res.body.concept.compiled["b"][0].who == "parsers.Python" + # assert res.body.concept.compiled["b"][0].body.source == "1 + 1" + # + # # # evaluate + # # context = self.get_context(sheerka, eval_body=True) + # # evaluated = sheerka.evaluate_concept(context, res.body.concept) + # # assert evaluated.body == 3 + + # def test_i_can_validate_and_evaluate_concept_when_bnf_concept(self): + # sheerka, context, parser = self.init_parser() + # node = get_concept_node(concepts_map, "one plus twenty one", "plus", "one", "twenty one") + # + # res = UnrecognizedNodeParser().validate_concept_node(context, node) + # + # assert res.status + # assert res.body.concept == concepts_map["plus"] + # assert res.body.concept.compiled["a"] == concepts_map["one"] + # assert len(res.body.concept.compiled["b"]) == 1 + # assert res.body.concept.compiled["b"][0].status + # assert res.body.concept.compiled["b"][0].who == "parsers.BnfNode" + # + # # evaluate + # context = self.get_context(sheerka, eval_body=True) + # evaluated = sheerka.evaluate_concept(context, res.body.concept) + # assert evaluated.body == 22 + + def test_i_can_parse_and_evaluate_unrecognized_python_node(self): + sheerka, context, parser = self.init_parser() + + expression = "1 + 1" + nodes = get_input_nodes_from(concepts_map, expression, UTN(expression)) + parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes) + + res = parser.parse(context, parser_input) + parser_result = res.body + actual_nodes = res.body.body + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert len(actual_nodes) == 1 + assert actual_nodes[0] == scnode(0, 4, expression) + + def test_i_can_parse_unrecognized_bnf_concept_node(self): + + sheerka, context, parser = self.init_parser() + + expression = "twenty one" + nodes = get_input_nodes_from(concepts_map, expression, UTN(expression)) + parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes) + + res = parser.parse(context, parser_input) + parser_result = res.body + actual_nodes = res.body.body + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert len(actual_nodes) == 1 + expected_array = compute_expected_array( + concepts_map, + expression, [CNC("twenties", source=expression, unit="one", one="one")]) + assert actual_nodes == expected_array + + def test_i_can_parse_unrecognized_sya_concept_node(self): + sheerka, context, parser = self.init_parser() + + expression = "one plus two mult three" + nodes = get_input_nodes_from(concepts_map, expression, UTN(expression)) + parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes) + + res = parser.parse(context, parser_input) + parser_result = res.body + actual_nodes = res.body.body + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert len(actual_nodes) == 1 + + expected_array = compute_expected_array( + concepts_map, + expression, [CNC("plus", + a="one", + b=CC("mult", source="two mult three", a="two", b="three"))]) + assert actual_nodes == expected_array + + def test_i_can_parse_sequences(self): + sheerka, context, parser = self.init_parser() + + expression = "one plus two three" + sequence = get_input_nodes_from(concepts_map, expression, + CNC("plus", a="one", b="two"), + utnode(5, 6, " three")) + parser_input = ParserResultConcept("parsers.xxx", source="one plus two three", value=sequence) + + res = parser.parse(context, parser_input) + actual_nodes = res.body.body + + assert res.status + + expected_array = compute_expected_array( + concepts_map, + expression, [ + CNC("plus", a="one", b="two"), + CN("three", start=6, end=6)]) + assert actual_nodes == expected_array + + def test_i_can_parse_when_multiple_atom_and_sya(self): + sheerka, context, parser = self.init_parser() + expression = "two hello one three" + nodes = get_input_nodes_from(concepts_map, expression, + "two", UTN("hello one"), "three") + parser_input = ParserResultConcept("parsers.xxx", source="one plus two hello one", value=nodes) + + res = parser.parse(context, parser_input) + assert len(res) == 2 + assert res[0].status + assert res[1].status + + actual_nodes0 = res[0].body.body + expected_0 = compute_expected_array(concepts_map, expression, [ + CN("two", 0, 0), + CN("hello_atom", source="hello one", start=2, end=4), + CN("three", 6, 6)]) + assert actual_nodes0 == expected_0 + + actual_nodes1 = res[1].body.body + expected_1 = compute_expected_array(concepts_map, expression, [ + CN("two", 0, 0), + CNC("hello_sya", source="hello one", start=2, end=4, a="one"), + CN("three", 6, 6)]) + + assert actual_nodes1 == expected_1 + + def test_i_can_parse_when_multiple_sya_concepts(self): + sheerka, context, parser = self.init_parser() + expression = "greetings two" + nodes = get_input_nodes_from(concepts_map, expression, UTN("greetings two")) + + parser_input = ParserResultConcept("parsers.xxx", source="greetings two", value=nodes) + + res = parser.parse(context, parser_input) + assert len(res) == 2 + assert res[0].status + assert res[1].status + + actual_nodes0 = res[0].body.body + expected_0 = compute_expected_array(concepts_map, expression, [ + CNC("greetings_a", source="greetings two", start=0, end=2, a="two")]) + assert actual_nodes0 == expected_0 + + actual_nodes1 = res[1].body.body + expected_1 = compute_expected_array(concepts_map, expression, [ + CNC("greetings_b", source="greetings two", start=0, end=2, b="two")]) + assert actual_nodes1 == expected_1 + + def test_i_cannot_parse_when_i_cannot_validate(self): + sheerka, context, parser = self.init_parser(concepts_map, create_new=True) + expression = "one plus unknown tokens" + nodes = get_input_nodes_from(concepts_map, expression, + CNC("plus", a="one ", b=" unknown tokens")) + + parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes) + res = parser.parse(context, parser_input) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) + + def test_i_cannot_parse_when_unrecognized(self): + sheerka, context, parser = self.init_parser(concepts_map, create_new=True) + expression = "unknown tokens" + nodes = get_input_nodes_from(concepts_map, expression, UTN(expression)) + + parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes) + res = parser.parse(context, parser_input) + actual_nodes = res.body.body + + assert not res.status + assert actual_nodes == nodes