diff --git a/_concepts.txt b/_concepts.txt index 6ae9331..396d006 100644 --- a/_concepts.txt +++ b/_concepts.txt @@ -102,4 +102,6 @@ set_isa(c:x is a command:, __COMMAND) def concept q from q ? as question(q) pre is_question() set_is_lesser(__PRECEDENCE, q) def concept x is a 'concept' as isinstance(x, Concept) pre is_question() -def concept x is a y as isa(x,y) pre is_question() \ No newline at end of file +def concept x is a y as isa(x,y) pre is_question() +def concept explain x values where x as get_results() | filter(f"id=={x}") | format_d +set_isa(c:explain x values:, __COMMAND) \ No newline at end of file diff --git a/_concepts_lite.txt b/_concepts_lite.txt index c4a5d82..2f9310b 100644 --- a/_concepts_lite.txt +++ b/_concepts_lite.txt @@ -1,13 +1,8 @@ def concept one as 1 def concept two as 2 -def concept plus from a plus b as a + b def concept explain as get_results() | filter("id == 0") | recurse(2) set_isa(c:explain:, __COMMAND) def concept explain last as get_last_results() | filter("id == 0") | recurse(2) set_isa(c:explain last:, __COMMAND) -def concept precedence a > precedence b as set_is_greater_than(BuiltinConcepts.PRECEDENCE, a, b) -set_isa(c:precedence a > precedence b:, __COMMAND) -def concept x is a command as set_isa(x, __COMMAND) -set_isa(c:x is a command:, __COMMAND) -def concept q from q ? as question(q) pre in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED) -def concept x is a 'concept' as isinstance(x, Concept) pre in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED) \ No newline at end of file +def concept explain x as get_results() | filter(f"id == {x}") | recurse(3) where x +set_isa(c:explain x:, __COMMAND) \ No newline at end of file diff --git a/src/core/builtin_concepts.py b/src/core/builtin_concepts.py index 8ff5a8b..7fb2436 100644 --- a/src/core/builtin_concepts.py +++ b/src/core/builtin_concepts.py @@ -16,6 +16,8 @@ class BuiltinConcepts(Enum): SHEERKA = "sheerka" # processing instructions during sheerka.execute() + # The instruction may alter how the actions work + DEBUG = "debug" # activate all debug information EVAL_BODY_REQUESTED = "eval body" # to evaluate the body EVAL_WHERE_REQUESTED = "eval where" # to evaluate the where clause RETURN_BODY_REQUESTED = "return body" # returns the body of the concept instead of the concept itself diff --git a/src/core/builtin_helpers.py b/src/core/builtin_helpers.py index a69f1ce..8b08617 100644 --- a/src/core/builtin_helpers.py +++ b/src/core/builtin_helpers.py @@ -6,14 +6,16 @@ from core.ast.nodes import CallNodeConcept from core.ast.visitors import UnreferencedNamesVisitor from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, NotInit, ConceptParts +from core.sheerka.services.SheerkaExecute import SheerkaExecute from core.tokenizer import Keywords # from evaluators.BaseEvaluator import BaseEvaluator -from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode +from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode, SourceCodeWithConceptNode from parsers.BaseParser import BaseParser, ErrorNode PARSE_STEPS = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING] EVAL_STEPS = PARSE_STEPS + [BuiltinConcepts.BEFORE_EVALUATION, BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION] +PARSERS = ["EmptyString", "ShortTermMemory", "AtomNode", "BnfNode", "SyaNode", "Python"] def is_same_success(context, return_values): @@ -342,6 +344,37 @@ def parse_unrecognized(context, source, parsers, who=None, prop=None, filter_fun return no_python +def parse_function(context, source, tokens=None, start=0): + """ + Helper function to parse what is supposed to be a function + :param context: + :param source: + :param tokens: + :param start: start index for the source code node + :return: + """ + sheerka = context.sheerka + from parsers.FunctionParser import FunctionParser + parser = FunctionParser() + desc = f"Parsing function '{source}'" + with context.push(BuiltinConcepts.PARSE_CODE, source, desc=desc) as sub_context: + sheerka_execution = sheerka.services[SheerkaExecute.NAME] + res = parser.parse(sub_context, sheerka_execution.get_parser_input(source, tokens)) + + if not isinstance(res, list): + res = [res] + + for r in [r for r in res if sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT)]: + r.body.body.start += start + r.body.body.end += start + if isinstance(r.body.body, SourceCodeWithConceptNode): + for n in [r.body.body.first, r.body.body.last] + r.body.body.nodes: + n.start += start + n.end += start + + return res + + def evaluate(context, source, evaluators="all", @@ -415,7 +448,12 @@ def get_lexer_nodes(return_values, start, tokens): end = start + len(tokens) - 1 lexer_nodes.append( - [SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)]) + [SourceCodeNode(start, + end, + tokens, + ret_val.body.source, + python_node=ret_val.body.body, + return_value=ret_val)]) elif ret_val.who == "parsers.ExactConcept": concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body] @@ -479,6 +517,81 @@ def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers return get_lexer_nodes(res.body.body, unrecognized_tokens_node.start, unrecognized_tokens_node.tokens) +def update_compiled(context, concept, errors, parsers=None): + """ + recursively iterate thru concept.compiled to replace LexerNode into concepts or list of ReturnValueConcept + When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...) + the result will be a LexerNode. + In the specific case of a ConceptNode, the compiled variables will also be LexerNode (UnrecognizedTokensNode...) + This function iterate thru the compile to transform these nodes into concept of compiled AST + :param context: + :param concept: + :param errors: a list the must be initialized by the caller + :param parsers: to customize the parsers to use + :return: + """ + + sheerka = context.sheerka + parsers = parsers or PARSERS + + def _validate_concept(c): + """ + Recursively browse the compiled properties in order to find unrecognized + :param c: + :return: + """ + for k, v in c.compiled.items(): + if isinstance(v, Concept): + _validate_concept(v) + + elif isinstance(v, SourceCodeWithConceptNode): + from parsers.PythonWithConceptsParser import PythonWithConceptsParser + parser_helper = PythonWithConceptsParser() + res = parser_helper.parse_nodes(context, v.get_all_nodes()) + if res.status: + c.compiled[k] = [res] + else: + errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'")) + + elif isinstance(v, UnrecognizedTokensNode): + res = parse_unrecognized(context, v.source, parsers) + res = only_successful(context, res) # only key successful parsers + if res.status: + c.compiled[k] = res.body.body + else: + errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'")) + + def _get_source(compiled, var_name): + if var_name not in compiled: + return None + if not isinstance(compiled[var_name], list): + return None + if not len(compiled[var_name]) == 1: + return None + if not sheerka.isinstance(compiled[var_name][0], BuiltinConcepts.RETURN_VALUE): + return None + if not sheerka.isinstance(compiled[var_name][0].body, BuiltinConcepts.PARSER_RESULT): + return None + if compiled[var_name][0].body.name == "parsers.ShortTermMemory": + return None + + return compiled[var_name][0].body.source + + _validate_concept(concept) + + # Special case where the values of the variables are the names of the variable + # example : Concept("a plus b").def_var("a").def_var("b") + # and the user has entered 'a plus b' + # Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2') + # This means that 'a' and 'b' don't have any real value + if len(concept.metadata.variables) > 0: + for name, value in concept.metadata.variables: + if _get_source(concept.compiled, name) != name: + break + else: + concept.metadata.is_evaluated = True + + def get_names(sheerka, concept_node): """ Finds all the names referenced by the concept_node @@ -603,10 +716,11 @@ def remove_from_ret_val(sheerka, return_values, concept_key): return return_values -def set_is_evaluated(concepts): +def set_is_evaluated(concepts, check_nb_variables=False): """ set is_evaluated to True :param concepts: + :param check_nb_variables: only set is_evaluated if the concept has variables :return: """ if concepts is None: @@ -614,6 +728,8 @@ def set_is_evaluated(concepts): if hasattr(concepts, "__iter__"): for c in concepts: - c.metadata.is_evaluated = True + if not check_nb_variables or check_nb_variables and len(c.metadata.variables) > 0: + c.metadata.is_evaluated = True else: - concepts.metadata.is_evaluated = True + if not check_nb_variables or check_nb_variables and len(concepts.metadata.variables) > 0: + concepts.metadata.is_evaluated = True diff --git a/src/core/concept.py b/src/core/concept.py index ffc0a1d..eb84024 100644 --- a/src/core/concept.py +++ b/src/core/concept.py @@ -130,7 +130,7 @@ class Concept: if isinstance(other, simplec): return self.name == other.name and self.body == other.body - if isinstance(other, (CC, CB, CV, CMV)): + if isinstance(other, (CC, CB, CV, CMV, CIO)): return other == self if not isinstance(other, Concept): @@ -726,4 +726,45 @@ class CMV: return txt + ")" +class CIO: + """ + Concept id only + only test the id + """ + + def __init__(self, concept, source=None): + if isinstance(concept, str): + self.concept_name = concept + self.concept_id = None + self.concept = None + elif isinstance(concept, Concept): + self.concept_id = concept.id + self.concept = concept + self.source = source + self.start = -1 + self.end = -1 + + def set_concept(self, concept): + self.concept = concept + self.concept_id = concept.id + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, Concept): + return self.concept_id == other.id + + if not isinstance(other, CIO): + return False + + return self.concept_id == other.concept_id + + def __hash__(self): + return hash(self.concept_id) + + def __repr__(self): + return f"CIO(concept='{self.concept}')" if self.concept else f"CIO(name='{self.concept_name}')" + + simplec = namedtuple("concept", "name body") # for simple concept (tests purposes only) diff --git a/src/core/sheerka/ExecutionContext.py b/src/core/sheerka/ExecutionContext.py index 063cda8..b573529 100644 --- a/src/core/sheerka/ExecutionContext.py +++ b/src/core/sheerka/ExecutionContext.py @@ -1,7 +1,7 @@ import logging import time -from core.builtin_concepts import BuiltinConcepts +from core.builtin_concepts import BuiltinConcepts, ParserResultConcept from core.concept import Concept from core.sheerka.services.SheerkaExecute import NO_MATCH from core.sheerka.services.SheerkaShortTermMemory import SheerkaShortTermMemory @@ -309,6 +309,15 @@ class ExecutionContext: def in_private_context(self, concept_key): return concept_key in self.private_hints + def add_to_private_hints (self, concept_key): + self.private_hints.add(concept_key) + + def add_to_protected_hints(self, concept_key): + self.protected_hints.add(concept_key) + + def add_to_global_hints(self, concept_key): + self.global_hints.add(concept_key) + @staticmethod def _is_return_value(obj): return isinstance(obj, Concept) and obj.key == str(BuiltinConcepts.RETURN_VALUE) @@ -358,7 +367,11 @@ class ExecutionContext: ret_val = self.values["return_values"] if not isinstance(ret_val, Concept) or not ret_val.key == str(BuiltinConcepts.RETURN_VALUE): return None - return ret_val.status + if ret_val.status: + return True + if isinstance(ret_val.body, ParserResultConcept): + return "Almost" + return False def as_bag(self): """ diff --git a/src/core/sheerka/Sheerka.py b/src/core/sheerka/Sheerka.py index bc38d3e..aaba5d5 100644 --- a/src/core/sheerka/Sheerka.py +++ b/src/core/sheerka/Sheerka.py @@ -558,6 +558,12 @@ class Sheerka(Concept): return self._get_unknown(metadata) def resolve(self, concept): + """ + Try to find a concept by its name, id, or c:: definition + A new instance (using new_from_template()) is returned when it's possible + :param concept: + :return: + """ def new_instances(concepts): if hasattr(concepts, "__iter__"): @@ -567,6 +573,9 @@ class Sheerka(Concept): if concept is None: return None + # ############## + # PREPROCESS + # ############## # if the entry is a concept token, use its values. if isinstance(concept, Token): if concept.type != TokenKind.CONCEPT: @@ -578,6 +587,9 @@ class Sheerka(Concept): (tmp := core.utils.unstr_concept(concept)) != (None, None): concept = tmp + # ############## + # PROCESS + # ############## # if the entry is a tuple # concept[0] is the name # concept[1] is the id @@ -599,7 +611,7 @@ class Sheerka(Concept): if isinstance(concept, str): if self.is_known(found := self.get_by_name(concept)): instances = new_instances(found) - core.builtin_helpers.set_is_evaluated(instances) + core.builtin_helpers.set_is_evaluated(instances, check_nb_variables=True) return instances return None diff --git a/src/core/sheerka/services/SheerkaAdmin.py b/src/core/sheerka/services/SheerkaAdmin.py index 71ffb0b..47f576e 100644 --- a/src/core/sheerka/services/SheerkaAdmin.py +++ b/src/core/sheerka/services/SheerkaAdmin.py @@ -5,7 +5,7 @@ from core.sheerka.services.sheerka_service import BaseService CONCEPTS_FILE = "_concepts_lite.txt" CONCEPTS_FILE_ALL_CONCEPTS = "_concepts.txt" -CONCEPTS_FILE_TO_USE = CONCEPTS_FILE_ALL_CONCEPTS +CONCEPTS_FILE_TO_USE = CONCEPTS_FILE class SheerkaAdmin(BaseService): NAME = "Admin" @@ -47,6 +47,9 @@ class SheerkaAdmin(BaseService): if concept_file == "full": concept_file = CONCEPTS_FILE_ALL_CONCEPTS + elif not concept_file.startswith("_concepts"): + concept_file = f"_concepts_{concept_file}.txt" + try: start = time.time_ns() nb_lines = 0 diff --git a/src/core/sheerka/services/SheerkaExecute.py b/src/core/sheerka/services/SheerkaExecute.py index c9bfabf..245d834 100644 --- a/src/core/sheerka/services/SheerkaExecute.py +++ b/src/core/sheerka/services/SheerkaExecute.py @@ -2,7 +2,7 @@ import core.utils from cache.Cache import Cache from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept from core.sheerka.services.sheerka_service import BaseService -from core.tokenizer import Tokenizer, TokenKind, Keywords, Token +from core.tokenizer import Tokenizer, TokenKind, Token NO_MATCH = "** No Match **" @@ -88,6 +88,20 @@ class ParserInput: return self.pos < self.end + def seek(self, pos): + """ + Move the token offset to position pos + :param pos: + :return: True is pos is a valid position False otherwise + """ + if pos < 0 or pos >= self.end: + self.token = None + return False + + self.pos = pos + self.token = self.tokens[self.pos] + return True + def is_empty(self): if self.text.strip() == "": return True @@ -116,7 +130,6 @@ class ParserInput: tokens = [tokens] switcher = { - TokenKind.KEYWORD: lambda t: Keywords(t.value).value, TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value), } diff --git a/src/core/sheerka/services/SheerkaModifyConcept.py b/src/core/sheerka/services/SheerkaModifyConcept.py index d85ed37..d81bb32 100644 --- a/src/core/sheerka/services/SheerkaModifyConcept.py +++ b/src/core/sheerka/services/SheerkaModifyConcept.py @@ -31,6 +31,7 @@ class SheerkaModifyConcept(BaseService): if old_version == concept: # the concept is not modified + # This is an important sanity check. Do no remove because you don't understand it return self.sheerka.ret( self.NAME, False, self.sheerka.new( diff --git a/src/core/sheerka/services/SheerkaVariableManager.py b/src/core/sheerka/services/SheerkaVariableManager.py index ecd3cb7..4feb7c1 100644 --- a/src/core/sheerka/services/SheerkaVariableManager.py +++ b/src/core/sheerka/services/SheerkaVariableManager.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from typing import List from cache.Cache import Cache +from core.builtin_concepts import BuiltinConcepts from core.sheerka.services.sheerka_service import ServiceObj, BaseService @@ -48,6 +49,7 @@ class SheerkaVariableManager(BaseService): variable = Variable(context.event.get_digest(), who, key, value, None) self.sheerka.cache_manager.put(self.VARIABLES_ENTRY, variable.get_key(), variable) + return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS)) def load(self, who, key): variable = self.sheerka.cache_manager.get(self.VARIABLES_ENTRY, who + "|" + key) diff --git a/src/core/tokenizer.py b/src/core/tokenizer.py index 72147ed..bb467e4 100644 --- a/src/core/tokenizer.py +++ b/src/core/tokenizer.py @@ -62,6 +62,7 @@ class Token: _strip_quote: str = field(default=None, repr=False, compare=False, hash=None) _str_value: str = field(default=None, repr=False, compare=False, hash=None) + _repr_value: str = field(default=None, repr=False, compare=False, hash=None) def __repr__(self): if self.type == TokenKind.IDENTIFIER: @@ -82,7 +83,7 @@ class Token: if self._strip_quote: return self._strip_quote - self._strip_quote = self._to_str(True) + self._strip_quote = self.value[1:-1] if self.type == TokenKind.STRING else self.value return self._strip_quote @property @@ -90,18 +91,36 @@ class Token: if self._str_value: return self._str_value - self._str_value = self._to_str(False) + self._str_value = self.to_str(False) return self._str_value + @property + def repr_value(self): + if self._repr_value: + return self._repr_value + + if self.type == TokenKind.EOF: + self._repr_value = "" + elif self.type == TokenKind.WHITESPACE: + self._repr_value = "" + elif self.type == TokenKind.NEWLINE: + self._repr_value = "" + else: + self._repr_value = self.str_value + return self._repr_value + @staticmethod def is_whitespace(token): return token and token.type == TokenKind.WHITESPACE - def _to_str(self, strip_quote): + def to_str(self, strip_quote): if strip_quote and self.type == TokenKind.STRING: return self.value[1:-1] elif self.type == TokenKind.KEYWORD: return self.value.value + elif self.type == TokenKind.CONCEPT: + from core.utils import str_concept + return str_concept(self.value) else: return str(self.value) @@ -136,8 +155,6 @@ class Tokenizer: Class that can iterate on the tokens """ - KEYWORDS = set(x.value for x in Keywords) - def __init__(self, text, yield_eof=True, parse_word=False): self.text = text self.text_len = len(text) @@ -175,9 +192,7 @@ class Tokenizer: from core.concept import VARIABLE_PREFIX if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha(): identifier = self.eat_identifier(self.i) - token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER - value = Keywords(identifier) if identifier in self.KEYWORDS else identifier - yield Token(token_type, value, self.i, self.line, self.column) + yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column) self.i += len(identifier) self.column += len(identifier) elif self.i + 7 < self.text_len and \ @@ -335,11 +350,9 @@ class Tokenizer: yield Token(TokenKind.WORD, word, self.i, self.line, self.column) self.i += len(word) self.column += len(word) - elif c.isalpha() or c == "_": + elif c.isalpha(): identifier = self.eat_identifier(self.i) - token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER - value = Keywords(identifier) if identifier in self.KEYWORDS else identifier - yield Token(token_type, value, self.i, self.line, self.column) + yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column) self.i += len(identifier) self.column += len(identifier) elif c.isdigit(): @@ -457,7 +470,7 @@ class Tokenizer: i = start_index + 1 escape = False - #newline = None + # newline = None while i < self.text_len: c = self.text[i] result += c diff --git a/src/core/utils.py b/src/core/utils.py index 34bf093..4db3e7a 100644 --- a/src/core/utils.py +++ b/src/core/utils.py @@ -296,6 +296,28 @@ def dict_product(a, b): return res +def get_n_clones(obj, n): + objs = [obj] + for i in range(n - 1): + objs.append(obj.clone()) + return objs + + +def obj_product(list_of_objs, new_items, add_item): + if list_of_objs is None or len(list_of_objs) == 0: + return list_of_objs + + res = [] + + for obj in list_of_objs: + instances = get_n_clones(obj, len(new_items)) + res.extend(instances) + for instance, item in zip(instances, new_items): + add_item(instance, item) + + return res + + def strip_quotes(text): if not isinstance(text, str): return text diff --git a/src/evaluators/AddConceptEvaluator.py b/src/evaluators/AddConceptEvaluator.py index f3802a4..3718814 100644 --- a/src/evaluators/AddConceptEvaluator.py +++ b/src/evaluators/AddConceptEvaluator.py @@ -1,6 +1,7 @@ import core.utils from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind, Tokenizer from evaluators.BaseEvaluator import OneReturnValueEvaluator from parsers.BaseParser import NotInitializedNode @@ -67,7 +68,8 @@ class AddConceptEvaluator(OneReturnValueEvaluator): elif isinstance(part_ret_val, NameNode): source = str(part_ret_val) elif isinstance(part_ret_val, ReturnValueConcept) and part_ret_val.status: - source = part_ret_val.value.source + source = part_ret_val.value.source.as_text() if isinstance(part_ret_val.value.source, + ParserInput) else part_ret_val.value.source else: raise Exception("Unexpected") setattr(concept.metadata, prop, source) @@ -143,7 +145,9 @@ class AddConceptEvaluator(OneReturnValueEvaluator): # if isinstance(ret_value.value, ParserResultConcept) and len(concept_name) > 1: variables = set() - tokens = ret_value.value.tokens or list(Tokenizer(ret_value.value.source, yield_eof=False)) + source = ret_value.value.source.as_text() if isinstance(ret_value.value.source, + ParserInput) else ret_value.value.source + tokens = ret_value.value.tokens or list(Tokenizer(source, yield_eof=False)) tokens = [t.str_value for t in tokens] for identifier in [i for i in concept_name if str(i).isalnum()]: if identifier in tokens: diff --git a/src/evaluators/LexerNodeEvaluator.py b/src/evaluators/LexerNodeEvaluator.py index 065ef6a..7c3e6cc 100644 --- a/src/evaluators/LexerNodeEvaluator.py +++ b/src/evaluators/LexerNodeEvaluator.py @@ -1,7 +1,7 @@ from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from evaluators.BaseEvaluator import OneReturnValueEvaluator from parsers.BaseNodeParser import SourceCodeNode, ConceptNode -from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode +from parsers.PythonWithConceptsParser import PythonWithConceptsParser class LexerNodeEvaluator(OneReturnValueEvaluator): @@ -82,19 +82,10 @@ class LexerNodeEvaluator(OneReturnValueEvaluator): def evaluate_python_code(self, context, nodes): sheerka = context.sheerka - helper = LexerNodeParserHelperForPython() - result = helper.parse(context, nodes) - - if isinstance(result, PythonNode): - return sheerka.ret( - self.name, - True, - sheerka.new( - BuiltinConcepts.PARSER_RESULT, - parser=self, - source=result.source, - body=result, - try_parsed=None)) + parser = PythonWithConceptsParser() + result = parser.parse_nodes(context, nodes) + if result: + return result else: return sheerka.ret( self.name, diff --git a/src/evaluators/PythonEvaluator.py b/src/evaluators/PythonEvaluator.py index 837bec7..bdd0237 100644 --- a/src/evaluators/PythonEvaluator.py +++ b/src/evaluators/PythonEvaluator.py @@ -40,6 +40,7 @@ class Expando: def __repr__(self): return f"{dir(self)}" + @dataclass class PythonEvalError: error: Exception @@ -59,13 +60,19 @@ class PythonEvaluator(OneReturnValueEvaluator): self.globals = {} def matches(self, context, return_value): - return return_value.status and \ - isinstance(return_value.value, ParserResultConcept) and \ - isinstance(return_value.value.value, PythonNode) + if not return_value.status or not isinstance(return_value.value, ParserResultConcept): + return False + body = return_value.value.value + return isinstance(body, PythonNode) or ( + hasattr(body, "python_node") and isinstance(body.python_node, PythonNode)) + # return return_value.status and \ + # isinstance(return_value.value, ParserResultConcept) and \ + # isinstance(return_value.value.value, PythonNode) def eval(self, context, return_value): sheerka = context.sheerka - node = return_value.value.value + node = return_value.value.value if isinstance(return_value.value.value, PythonNode) else \ + return_value.value.value.python_node context.log(f"Evaluating python node {node}.", self.name) diff --git a/src/parsers/AtomNodeParser.py b/src/parsers/AtomNodeParser.py index 6aead37..0186dfb 100644 --- a/src/parsers/AtomNodeParser.py +++ b/src/parsers/AtomNodeParser.py @@ -4,8 +4,8 @@ from core import builtin_helpers from core.builtin_concepts import BuiltinConcepts from core.concept import DEFINITION_TYPE_BNF, Concept from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import Tokenizer -from core.utils import strip_tokens +from core.tokenizer import Tokenizer, TokenKind +from core.utils import strip_tokens, make_unique from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode @@ -228,6 +228,34 @@ class AtomNodeParser(BaseNodeParser): """ return len(concept.metadata.variables) == 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF + def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False): + + def new_instances(list_of_concepts): + if list_of_concepts is None: + return None + + return [self.context.sheerka.new_from_template(c, c.id) for c in list_of_concepts] + + if token.type == TokenKind.WHITESPACE: + return None + + def as_list(a): + if a is None: + return a + + return a if isinstance(a, list) else [a] + + concepts_by_name = as_list(self.sheerka.resolve(token.value)) + concepts_by_first_keyword = new_instances(super().get_concepts(token, self._is_eligible)) + + if concepts_by_name is None: + return concepts_by_first_keyword + + if concepts_by_first_keyword is None: + return concepts_by_name + + return make_unique(concepts_by_name + concepts_by_first_keyword, lambda c: c.id) + def get_concepts_sequences(self): forked = [] @@ -242,13 +270,6 @@ class AtomNodeParser(BaseNodeParser): concept_parser_helpers.extend(forked) forked.clear() - def _get_concepts_by_name(name): - other_concepts = self.sheerka.get_by_name(name) - if isinstance(other_concepts, list): - return other_concepts - - return [other_concepts] if self.sheerka.is_known(other_concepts) else [] - concept_parser_helpers = [AtomConceptParserHelper(self.context)] while self.parser_input.next_token(False): @@ -263,8 +284,8 @@ class AtomNodeParser(BaseNodeParser): if concept_parser.eat_token(token, pos): concept_parser.lock() - concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name) - #self.context.log(f"concepts found for {token=}: {concepts}", who=self.name) + concepts = self.get_concepts(token, self._is_eligible) + # self.context.log(f"concepts found for {token=}: {concepts}", who=self.name) if not concepts: for concept_parser in concept_parser_helpers: concept_parser.eat_unrecognized(token, pos) @@ -303,12 +324,13 @@ class AtomNodeParser(BaseNodeParser): def get_by_name(self): """ - Try to recognize the full parser input as a concept name + Use the whole input to recognize the concepts + It will use the name of the concept, but also its compact form (c::) :return: """ source = self.parser_input.as_text() - concepts = self.sheerka.get_by_name(source.strip()) - if not self.sheerka.is_known(concepts): + concepts = self.sheerka.resolve(source.strip()) + if concepts is None: return None concepts = [concepts] if isinstance(concepts, Concept) else concepts @@ -316,17 +338,27 @@ class AtomNodeParser(BaseNodeParser): start, end = self.get_tokens_boundaries(self.parser_input.as_tokens()) for concept in concepts: parser_helper = AtomConceptParserHelper(None) - parser_helper.sequence.append(ConceptNode( - concept, - start, - end, - strip_tokens(self.parser_input.as_tokens(), True), source)) + parser_helper.sequence.append(ConceptNode(concept, + start, + end, + strip_tokens(self.parser_input.as_tokens(), True), source)) res.append(parser_helper) return res def get_valid(self, concept_parser_helpers): valid_parser_helpers = [] # be careful, it will be a list of list + already_seen = set() + + def compute_hash_code(ph): + """ + compute a hash code for already seen parser helper + :param ph: + :return: + """ + return "#".join( + [f"c:|{n.concept.id}:" if isinstance(n, ConceptNode) else n.source for n in ph.sequence]) + for parser_helper in concept_parser_helpers: if parser_helper.has_error(): continue @@ -335,16 +367,18 @@ class AtomNodeParser(BaseNodeParser): continue for node in parser_helper.sequence: - if isinstance(node, ConceptNode): - if len(node.concept.metadata.variables) > 0: - node.concept.metadata.is_evaluated = True # Do not try to evaluate those concepts + # if isinstance(node, ConceptNode): + # if len(node.concept.metadata.variables) > 0: + # node.concept.metadata.is_evaluated = True # Do not try to evaluate those concepts node.tokens = self.parser_input.tokens[node.start:node.end + 1] node.fix_source() - if parser_helper in valid_parser_helpers: + parser_helper_hash_code = compute_hash_code(parser_helper) + if parser_helper_hash_code in already_seen: continue valid_parser_helpers.append(parser_helper) + already_seen.add(parser_helper_hash_code) return valid_parser_helpers diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index 58877d0..f3bf9c8 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -7,7 +7,7 @@ import core.utils from core.builtin_concepts import BuiltinConcepts from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import TokenKind, LexerError, Token, Keywords +from core.tokenizer import TokenKind, LexerError, Token from parsers.BaseParser import Node, BaseParser, ErrorNode DEBUG_COMPILED = True @@ -46,14 +46,18 @@ class LexerNode(Node): def clone(self): pass + def to_short_str(self): + raise NotImplementedError + class UnrecognizedTokensNode(LexerNode): def __init__(self, start, end, tokens): super().__init__(start, end, tokens) - self.is_frozen = False + self.is_frozen = False # TODO: Remove as it seems to now be useless self.parenthesis_count = 0 def freeze(self): + # TODO: Remove as it seems to now be useless self.is_frozen = True def reset(self): @@ -61,6 +65,7 @@ class UnrecognizedTokensNode(LexerNode): self.tokens.clear() self.is_frozen = False self.parenthesis_count = 0 + self.source = "" def add_token(self, token, pos): if self.is_frozen: @@ -135,7 +140,7 @@ class UnrecognizedTokensNode(LexerNode): return hash((self.start, self.end, self.source)) def __repr__(self): - return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')" + return f"UnrecognizedTokensNode(source='{self.source}', start={self.start}, end={self.end})" def clone(self): clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:]) @@ -143,6 +148,9 @@ class UnrecognizedTokensNode(LexerNode): clone.parenthesis_count = self.parenthesis_count return clone + def to_short_str(self): + return f"UTN('{self.source}')" + class ConceptNode(LexerNode): """ @@ -209,15 +217,30 @@ class ConceptNode(LexerNode): # bag["compiled"] = self.concept.compiled return bag + def to_short_str(self): + return f'CN({self.concept})' + class SourceCodeNode(LexerNode): """ Returned when some source code (like Python source code is recognized) """ - def __init__(self, node, start, end, tokens=None, source=None, return_value=None): + def __init__(self, start, end, tokens=None, source=None, python_node=None, return_value=None): + """ + + :param start: start position (index of the first token) + :param end: end position (index of the last token) + :param tokens: + :param source: tokens as string + :param python_node: PythonNode found (when the SourceCodeNode is validated) + :param return_value: ReturnValueConcept returned when the source was validated + + When return_value is provided, + You should have return_value.body.body == node + """ super().__init__(start, end, tokens, source) - self.node = node # The PythonNode (or whatever language node) that is found + self.python_node = python_node # The PythonNode (or whatever language node) that is found self.return_value = return_value # original result of the parsing def __eq__(self, other): @@ -232,7 +255,7 @@ class SourceCodeNode(LexerNode): if not isinstance(other, SourceCodeNode): return False - return self.node == other.node and \ + return self.python_node == other.python_node and \ self.start == other.start and \ self.end == other.end and \ self.source == other.source @@ -243,6 +266,9 @@ class SourceCodeNode(LexerNode): def __repr__(self): return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')" + def to_short_str(self): + return f"SCN('{self.source}')" + class SourceCodeWithConceptNode(LexerNode): """ @@ -254,17 +280,22 @@ class SourceCodeWithConceptNode(LexerNode): So I push all the nodes into one big bag """ - def __init__(self, first_node, last_node, content_nodes=None): + def __init__(self, first_node, last_node, content_nodes=None, has_unrecognized=False): super().__init__(9999, -1, None) # why not sys.maxint ? self.first = first_node self.last = last_node self.nodes = content_nodes or [] - self.has_unrecognized = False + self.has_unrecognized = has_unrecognized + self._all_nodes = None self.fix_all_pos() + self.python_node = None # if the source code node is validated against a python parse, here is the PythonNode + self.return_value = None # return_value that produced the PythonNode + def add_node(self, node): self.nodes.append(node) self.fix_pos(node) + self._all_nodes = None return self @@ -304,6 +335,9 @@ class SourceCodeWithConceptNode(LexerNode): return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')" def fix_all_pos(self): + if self.first is None: # to ease some unit test where only the python_node is necessary + return + for n in [self.first, self.last] + self.nodes: self.fix_pos(n) @@ -334,10 +368,20 @@ class SourceCodeWithConceptNode(LexerNode): self.source += self.last.source return self + def get_all_nodes(self): + if self._all_nodes: + return self._all_nodes + + self._all_nodes = [self.first, *self.nodes, self.last] + return self._all_nodes + def clone(self): - clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes) + clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes.copy(), self.has_unrecognized) return clone + def to_short_str(self): + return f"SCWC({self.first}" + ", ".join(n.to_short_str for n in self.nodes) + f"{self.last})" + @dataclass() class GrammarErrorNode(ErrorNode): @@ -479,7 +523,7 @@ class SCWC(HelperWithPos): TODO: create a common function or whatever... :return: """ - source = self.first.source + source = self.first.source if hasattr(self.first, "source") else self.first for n in self.content: source += " " if hasattr(n, "source"): @@ -488,7 +532,7 @@ class SCWC(HelperWithPos): source += str(n.concept) else: source += " unknown" - source += self.last.source + source += self.last.source if hasattr(self.last, "source") else self.last return source @@ -514,7 +558,7 @@ class CN(HelperWithPos): self.concept = concept if isinstance(concept, Concept) else None def fix_source(self, str_tokens): - self.source = "".join([s.value if isinstance(s, Keywords) else s for s in str_tokens]) + self.source = "".join(str_tokens) return self def __eq__(self, other): @@ -660,7 +704,7 @@ class UTN(HelperWithPos): return hash((self.source, self.start, self.end)) def __repr__(self): - txt = f"UTN( source='{self.source}'" + txt = f"UTN(source='{self.source}'" if self.start is not None: txt += f", start={self.start}" if self.end is not None: @@ -733,7 +777,7 @@ class BaseNodeParser(BaseParser): else: name = token.value - custom_concepts = custom(name) if custom else [] + custom_concepts = custom(name) if custom else [] # to get extra concepts using an alternative method result = [] if name in self.concepts_by_first_keyword: @@ -746,6 +790,7 @@ class BaseNodeParser(BaseParser): concept = to_map(self, concept) if to_map else concept result.append(concept) + return core.utils.make_unique(result + custom_concepts, lambda c: c.concept.id if hasattr(c, "concept") else c.id) diff --git a/src/parsers/BaseParser.py b/src/parsers/BaseParser.py index b68b9b5..66410c2 100644 --- a/src/parsers/BaseParser.py +++ b/src/parsers/BaseParser.py @@ -5,8 +5,9 @@ import core.utils from core.builtin_concepts import BuiltinConcepts, ParserResultConcept from core.concept import Concept from core.sheerka.ExecutionContext import ExecutionContext +from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka_logger import get_logger -from core.tokenizer import TokenKind, Keywords, Token, Tokenizer +from core.tokenizer import TokenKind, Token, Tokenizer, LexerError # # keep a cache for the parser input @@ -118,6 +119,20 @@ class BaseParser: def __repr__(self): return self.name + def reset_parser(self, context, parser_input: ParserInput): + self.context = context + self.sheerka = context.sheerka + self.parser_input = parser_input + self.error_sink.clear() + + try: + self.parser_input.reset(False) + self.parser_input.next_token() + except LexerError as e: + self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) + return False + return True + def parse(self, context, parser_input): pass @@ -227,15 +242,14 @@ class BaseParser: tokens = [tokens] switcher = { - TokenKind.KEYWORD: lambda t: Keywords(t.value).value, - TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value), + # TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value), } if custom_switcher: switcher.update(custom_switcher) for token in tokens: - value = switcher.get(token.type, lambda t: t.value)(token) + value = switcher.get(token.type, lambda t: t.str_value)(token) res += value if tracker is not None and token.type in custom_switcher: tracker[value] = token.value diff --git a/src/parsers/DefaultParser.py b/src/parsers/DefaultParser.py index 6227660..f6a9405 100644 --- a/src/parsers/DefaultParser.py +++ b/src/parsers/DefaultParser.py @@ -201,12 +201,12 @@ class DefaultParser(BaseParser): def parse_statement(self): token = self.parser_input.token - if token.value == Keywords.DEF: + if token.value == Keywords.DEF.value: self.parser_input.next_token() self.context.log("Keyword DEF found.", self.name) return self.parse_def_concept(token) - else: - return self.parse_isa_concept() + + return self.add_error(CannotHandleErrorNode([token], "")) def parse_def_concept(self, def_token): """ @@ -250,44 +250,15 @@ class DefaultParser(BaseParser): return concept_found - def parse_isa_concept(self): - concept_name = self.parse_concept_name() - if isinstance(concept_name, DefaultParserErrorNode): - return concept_name - - keyword = [] - token = self.parser_input.token - if token.value != Keywords.ISA: - return self.add_error(CannotHandleErrorNode([token], "")) - keyword.append(token) - self.parser_input.next_token() - - set_name = self.parse_concept_name() - return IsaConceptNode(keyword, concept_name, set_name) - - def parse_concept_name(self): - tokens = [] - token = self.parser_input.token - - while not (token.type == TokenKind.EOF or token.type == TokenKind.KEYWORD): - tokens.append(token) - self.parser_input.next_token() - token = self.parser_input.token - - if len(tokens) == 0: - return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", [])) - else: - return NameNode(tokens) - def regroup_tokens_by_parts(self, keywords_tokens): - def_concept_parts = [Keywords.CONCEPT, - Keywords.FROM, - Keywords.AS, - Keywords.WHERE, - Keywords.PRE, - Keywords.POST, - Keywords.RET] + def_concept_parts = [Keywords.CONCEPT.value, + Keywords.FROM.value, + Keywords.AS.value, + Keywords.WHERE.value, + Keywords.PRE.value, + Keywords.POST.value, + Keywords.RET.value] # tokens found, when trying to recognize the parts tokens_found_by_parts = { @@ -307,7 +278,7 @@ class DefaultParser(BaseParser): while token.type != TokenKind.EOF: if token.value in def_concept_parts: keywords_tokens.append(token) # keep track of the keywords - keyword = token.value + keyword = Keywords(token.value) if tokens_found_by_parts[keyword]: # a part is defined more than once self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations.")) @@ -327,7 +298,7 @@ class DefaultParser(BaseParser): def get_concept_name(self, first_token, tokens_found_by_parts): name_first_token_index = 1 token = self.parser_input.token - if first_token.value != Keywords.CONCEPT: + if first_token.value != Keywords.CONCEPT.value: self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT])) name_first_token_index = 0 @@ -353,7 +324,7 @@ class DefaultParser(BaseParser): self.add_error(SyntaxErrorNode([], "Empty declaration"), False) return None, NotInitializedNode() - if definition_tokens[1].value == Keywords.BNF: + if definition_tokens[1].value == Keywords.BNF.value: return self.get_concept_bnf_definition(current_concept_def, definition_tokens) return self.get_concept_simple_definition(definition_tokens) @@ -381,7 +352,7 @@ class DefaultParser(BaseParser): return DEFINITION_TYPE_BNF, parsing_result def get_concept_simple_definition(self, definition_tokens): - start = 2 if definition_tokens[1].value == Keywords.DEF else 1 + start = 2 if definition_tokens[1].value == Keywords.DEF.value else 1 tokens = core.utils.strip_tokens(definition_tokens[start:]) if len(tokens) == 0: self.add_error(SyntaxErrorNode([definition_tokens[start]], "Empty declaration"), False) diff --git a/src/parsers/ExactConceptParser.py b/src/parsers/ExactConceptParser.py index 5176a3b..70868a1 100644 --- a/src/parsers/ExactConceptParser.py +++ b/src/parsers/ExactConceptParser.py @@ -2,9 +2,9 @@ import logging import core.builtin_helpers from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts -from core.concept import VARIABLE_PREFIX, ConceptParts +from core.concept import VARIABLE_PREFIX from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import Keywords, TokenKind, LexerError +from core.tokenizer import TokenKind, LexerError from core.utils import str_concept from parsers.BaseParser import BaseParser @@ -56,6 +56,7 @@ class ExactConceptParser(BaseParser): concepts = result if isinstance(result, list) else [result] for concept in concepts: + # update the variables of the freshly recognized concept if concept in already_recognized: context.log(f"Recognized concept {concept} again. Skipping.", self.name) # example @@ -105,7 +106,7 @@ class ExactConceptParser(BaseParser): break if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE: continue - res.append(t.value.value if isinstance(t.value, Keywords) else t.value) + res.append(t.value) return res def combinations(self, iterable): diff --git a/src/parsers/ExpressionParser.py b/src/parsers/ExpressionParser.py index 349a5d5..ed2d617 100644 --- a/src/parsers/ExpressionParser.py +++ b/src/parsers/ExpressionParser.py @@ -191,23 +191,8 @@ class ExpressionParser(BaseParser): def __init__(self, **kwargs): super().__init__("Expression", 50, False) - def reset_parser(self, context, parser_input: ParserInput): - self.context = context - self.sheerka = context.sheerka - self.parser_input = parser_input - self.error_sink.clear() - - try: - self.parser_input.reset(False) - self.parser_input.next_token() - except LexerError as e: - self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) - return False - return True - def parse(self, context, parser_input: ParserInput): """ - parser_input can be string, but text can also be an list of tokens :param context: :param parser_input: :return: diff --git a/src/parsers/FunctionParser.py b/src/parsers/FunctionParser.py new file mode 100644 index 0000000..ec15036 --- /dev/null +++ b/src/parsers/FunctionParser.py @@ -0,0 +1,407 @@ +from dataclasses import dataclass +from typing import List + +from core.builtin_concepts import BuiltinConcepts +from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled +from core.sheerka.services.SheerkaExecute import ParserInput +from core.tokenizer import TokenKind, Token +from core.utils import get_n_clones +from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode +from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEof, Node +from parsers.PythonWithConceptsParser import PythonWithConceptsParser + +# No need to check for Python code as the source code node will resolve to python code anyway +# I only look for concepts, so +PARSERS = ["BnfNode", "SyaNode", "AtomNode"] + + +@dataclass +class FunctionParserNode(Node): + pass + + +@dataclass() +class NamesNode(FunctionParserNode): + start: int # index of the first token + end: int # index of the last token + tokens: List[Token] + + def __repr__(self): + return f"NameNode('{self.str_value()}')" + + def str_value(self): + if self.tokens is None: + return None + + return "".join([t.str_value for t in self.tokens]) + + def to_unrecognized(self): + return UnrecognizedTokensNode(self.start, self.end, self.tokens).fix_source() + + +@dataclass() +class FunctionParameter: + """ + class the represent result of the parameter parsing + """ + value: NamesNode # value parsed + separator: NamesNode = None # holds the value and the position of the separator + + def add_sep(self, start, end, tokens): + self.separator = NamesNode(start, end, tokens) + + def value_to_unrecognized(self): + return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source() + + def separator_to_unrecognized(self): + if self.separator is None: + return None + return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source() + + +@dataclass +class FunctionNode(FunctionParserNode): + first: NamesNode # beginning of the function (it should represent the name of the function) + last: NamesNode # last part of the function (it should be the trailing parenthesis) + parameters: list + + +class FN(FunctionNode): + """ + Test class only + It matches with FunctionNode but with less constraints + + Thereby, + FN("first", "last", ["param1," ...]) can be compared to + FunctionNode(NamesNode("first"), NamesNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")]) + + Note that FunctionParameter can easily be defined with a single string + * "param" -> FunctionParameter(NamesNode("param"), None) + * "param, " -> FunctionParameter(NamesNode("param"), NamesNode(", ")) + For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part + """ + + def __init__(self, first, last, parameters): + self.first = first + self.last = last + self.parameters = [] + for param in parameters: + if isinstance(param, tuple): + self.parameters.append(param) + elif isinstance(param, str) and (pos := param.find(",")) != -1: + self.parameters.append((param[:pos], param[pos:])) + else: + self.parameters.append((param, None)) + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, FN): + return self.first == other.first and self.last == other.last and self.parameters == other.parameters + + if isinstance(other, FunctionNode): + if self.first != other.first.str_value() or self.last != other.last.str_value(): + return False + if len(self.parameters) != len(other.parameters): + return False + for self_parameter, other_parameter in zip(self.parameters, other.parameters): + value = other_parameter.value.str_value() if isinstance(self_parameter[0], + str) else other_parameter.value + sep = other_parameter.separator.str_value() if other_parameter.separator else None + if self_parameter[0] != value or self_parameter[1] != sep: + return False + + return True + + return False + + def __hash__(self): + return hash((self.first, self.last, self.parameters)) + + +class FunctionParser(BaseParser): + """ + The parser will be used to parse func(x, y, z) + where x, y and z can be source code, concepts or other functions + It will return a SourceCodeNode or SourceCodeNodeWithConcept + """ + + def __init__(self, sep=",", longest_concepts_only=True, **kwargs): + """ + + :param sep: + :param longest_concepts_only: When multiples concepts are found, only keep the longest one + so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]] + :param kwargs: + """ + super().__init__("Function", 55, True) + self.sep = sep + self.longest_concepts_only = longest_concepts_only + self.record_errors = True + + def add_error(self, error, next_token=True): + if not self.record_errors: + return + + return super().add_error(error, next_token) + + def parse(self, context, parser_input: ParserInput): + """ + + :param context: + :param parser_input: + :return: + """ + + if not isinstance(parser_input, ParserInput): + return None + + context.log(f"Parsing '{parser_input}' with FunctionParser", self.name) + sheerka = context.sheerka + + if parser_input.is_empty(): + return sheerka.ret(self.name, + False, + sheerka.new(BuiltinConcepts.IS_EMPTY)) + + if not self.reset_parser(context, parser_input): + return self.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + + node = self.parse_function() + + if self.parser_input.next_token(): + self.add_error(UnexpectedTokenErrorNode("Only one function supported", + self.parser_input.token, + [TokenKind.EOF])) + + if self.has_error: + if node is None: + body = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, + body=parser_input.as_text(), + reason=self.error_sink) + else: + body = context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink) + return self.sheerka.ret(self.name, False, body) + + source_code_nodes = self.to_source_code_node(node) + + res = [] + for source_code_node in source_code_nodes: + value = self.get_return_value_body(context.sheerka, + self.parser_input.as_text(), + source_code_node, + source_code_node) + + res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, value)) + + return res[0] if len(res) == 1 else res + + def parse_function(self): + + start = self.parser_input.pos + token = self.parser_input.token + if token.type != TokenKind.IDENTIFIER: + self.add_error(UnexpectedTokenErrorNode(f"{token.repr_value} is not a identifier", + token, + [TokenKind.IDENTIFIER])) + return None + + if not self.parser_input.next_token(): + self.add_error(UnexpectedEof(f"Unexpected EOF while parsing left parenthesis")) + return None + + token = self.parser_input.token + if token.type != TokenKind.LPAR: + self.add_error(UnexpectedTokenErrorNode(f"{token.repr_value} is not a left parenthesis", + token, + [TokenKind.LPAR])) + return None + + start_node = NamesNode(start, start + 1, self.parser_input.tokens[start:start + 2]) + if not self.parser_input.next_token(): + self.add_error(UnexpectedEof(f"Unexpected EOF after left parenthesis")) + return FunctionNode(start_node, None, None) + + params = self.parse_parameters() + if self.has_error: + return FunctionNode(start_node, None, params) + + token = self.parser_input.token + if token.type != TokenKind.RPAR: + self.add_error(UnexpectedTokenErrorNode(f"Right parenthesis not found", + token, + [TokenKind.RPAR])) + return FunctionNode(start_node, None, params) + + return FunctionNode(start_node, + NamesNode(self.parser_input.pos, self.parser_input.pos, [token]), + params) + + def parse_parameters(self): + nodes = [] + while True: + param_value = self.parse_parameter_value() + if not param_value: + break + + function_parameter = FunctionParameter(param_value) + nodes.append(function_parameter) + + token = self.parser_input.token + if token.type == TokenKind.EOF: + self.add_error(UnexpectedEof(f"Unexpected EOF while parsing parameters")) + return None + + if token.type == TokenKind.RPAR: + break + + if token.value == self.sep: + sep_pos = self.parser_input.pos + self.parser_input.next_token() + function_parameter.add_sep(sep_pos, + self.parser_input.pos - 1, + self.parser_input.tokens[sep_pos: self.parser_input.pos]) + + return nodes + + def parse_parameter_value(self): + # check if the parameter is a function + start_pos = self.parser_input.pos + self.record_errors = False + func = self.parse_function() + self.record_errors = True + if func: + self.parser_input.next_token() + return func + + # otherwise, eat until LPAR or separator + self.parser_input.seek(start_pos) + self.record_errors = True + tokens = [] + while True: + token = self.parser_input.token + # if token is None: + # break + + if token.value == self.sep or token.type == TokenKind.RPAR: + break + + tokens.append(token) + if not self.parser_input.next_token(skip_whitespace=False): + break + + return NamesNode(start_pos, self.parser_input.pos - 1, tokens) if len(tokens) else None + + def to_source_code_node(self, function_node: FunctionNode): + python_parser = PythonWithConceptsParser() + + if len(function_node.parameters) == 0: + # validate the source + nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()] + python_parsing_res = python_parser.parse_nodes(self.context, nodes_to_parse) + python_node = python_parsing_res.body.body if python_parsing_res.status else None + + return [SourceCodeNode(start=function_node.first.start, + end=function_node.last.end, + tokens=function_node.first.tokens + function_node.last.tokens, + python_node=python_node, + return_value=python_parsing_res)] + + def update_source_code_node(scn, nodes, sep): + if hasattr(nodes, "__iter__"): + for n in nodes: + scn.add_node(n) + else: + scn.add_node(nodes) + + if sep: + scn.add_node(sep.to_unrecognized()) + + res = [SourceCodeWithConceptNode(function_node.first.to_unrecognized(), function_node.last.to_unrecognized())] + for param in function_node.parameters: + if isinstance(param.value, NamesNode): + unrecognized = param.value.to_unrecognized() + # try to recognize concepts + nodes_sequences = get_lexer_nodes_from_unrecognized(self.context, + unrecognized, + PARSERS) + else: + # the parameter is also a function + nodes_sequences = self.to_source_code_node(param.value) + + if self.longest_concepts_only: + nodes_sequences = self.get_longest_concepts(nodes_sequences) + + if nodes_sequences is None: + # no concept found + for source_code_node in res: + update_source_code_node(source_code_node, unrecognized, param.separator) + + elif len(nodes_sequences) == 1: + # only one result + # It is the same code than when there are multiple results + # But here, we save the creation of the tmp_res object (not sure it worth it) + for source_code_node in res: + update_source_code_node(source_code_node, nodes_sequences[0], param.separator) + else: + # multiple result, make the cartesian product + tmp_res = [] + for source_code_node in res: + instances = get_n_clones(source_code_node, len(nodes_sequences)) + tmp_res.extend(instances) + for instance, node_sequence in zip(instances, nodes_sequences): + update_source_code_node(instance, node_sequence, param.separator) + res = tmp_res + + # check if it is a valid source code + for source_code_node in res: + source_code_node.fix_all_pos() + source_code_node.pseudo_fix_source() + + python_parsing_res = python_parser.parse_nodes(self.context, source_code_node.get_all_nodes()) + if python_parsing_res.status: + source_code_node.python_node = python_parsing_res.body.body + source_code_node.return_value = python_parsing_res + + # make sure that concepts found can be evaluated + errors = [] + for c in source_code_node.python_node.concepts.values(): + update_compiled(self.context, c, errors) + + return res + + @staticmethod + def get_longest_concepts(nodes_sequences): + """ + The longest sequences are the ones that have the less number of concepts + For example + 'twenty one' resolves to + [c:twenty one:] + [c:twenty:, c:one:] + [c:twenty one:] has only one concept, so it's the longest one (two tokens against one token twice) + :param nodes_sequences: + :return: + """ + if nodes_sequences is None: + return None + + res = [] + min_len = -1 + for current_sequence in nodes_sequences: + # awful hack to remove when NodeSequence and ConceptSequence will be implemented + current_len = len(current_sequence) if hasattr(current_sequence, "__len__") else 1 + if len(res) == 0: + res.append(current_sequence) + min_len = current_len + elif current_len == min_len: + res.append(current_sequence) + elif current_len < min_len: + res.clear() + res.append(current_sequence) + min_len = current_len + + return res diff --git a/src/parsers/PythonParser.py b/src/parsers/PythonParser.py index a00c8b2..1c2728f 100644 --- a/src/parsers/PythonParser.py +++ b/src/parsers/PythonParser.py @@ -4,9 +4,8 @@ from dataclasses import dataclass import core.utils from core.builtin_concepts import BuiltinConcepts -from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute +from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import LexerError, TokenKind -from parsers.BaseNodeParser import ConceptNode from parsers.BaseParser import BaseParser, Node, ErrorNode log = logging.getLogger(__name__) @@ -70,87 +69,6 @@ class PythonGetNamesVisitor(ast.NodeVisitor): self.names.add(node.id) -class LexerNodeParserHelperForPython: - """Helper class to parse mix of concepts and Python""" - - def __init__(self): - self.identifiers = {} # cache for already created identifier (the key is id(concept)) - self.identifiers_key = {} # number of identifiers with the same root (prefix) - - def _get_identifier(self, concept): - """ - Get an identifier for a concept. - Make sure to return the same identifier if the same concept - Make sure to return a different identifier if same name but different concept - - Internal function because I don't want identifiers, identifiers_key and python_ids_mappings - to be instance variables - I would like to keep this parser as stateless as possible - :param concept: - :return: - """ - if id(concept) in self.identifiers: - return self.identifiers[id(concept)] - - identifier = "__C__" + self._sanitize(concept.key or concept.name) - if concept.id: - identifier += "__" + concept.id - - if identifier in self.identifiers_key: - self.identifiers_key[identifier] += 1 - identifier += f"_{self.identifiers_key[identifier]}" - else: - self.identifiers_key[identifier] = 0 - - identifier += "__C__" - - self.identifiers[id(concept)] = identifier - return identifier - - @staticmethod - def _sanitize(identifier): - res = "" - for c in identifier: - res += c if c.isalnum() else "0" - return res - - def parse(self, context, nodes): - source = "" - to_parse = "" - - concepts = {} # the key is the Python identifier - - for node in nodes: - if isinstance(node, ConceptNode): - source += node.source - if to_parse: - to_parse += " " - concept = node.concept - python_id = self._get_identifier(concept) - to_parse += python_id - concepts[python_id] = concept - else: - source += node.source - to_parse += node.source - - with context.push(BuiltinConcepts.PARSE_CODE, - {"language": "Python", "source": to_parse}, - desc="Trying Python for '" + to_parse + "'") as sub_context: - sub_context.add_inputs(to_parse=to_parse) - python_parser = PythonParser() - parser_input = context.sheerka.services[SheerkaExecute.NAME].get_parser_input(to_parse) - result = python_parser.parse(sub_context, parser_input) - sub_context.add_values(return_values=result) - - if result.status: - python_node = result.body.body - python_node.source = source - python_node.concepts = concepts - return python_node - - return result.body # the error - - class PythonParser(BaseParser): """ Parse Python scripts diff --git a/src/parsers/PythonWithConceptsParser.py b/src/parsers/PythonWithConceptsParser.py index 57a1cff..6551d41 100644 --- a/src/parsers/PythonWithConceptsParser.py +++ b/src/parsers/PythonWithConceptsParser.py @@ -1,8 +1,8 @@ from core.builtin_concepts import BuiltinConcepts from core.sheerka.services.SheerkaExecute import SheerkaExecute +from parsers.BaseNodeParser import ConceptNode from parsers.BaseNodeParser import SourceCodeWithConceptNode from parsers.BaseParser import BaseParser -from parsers.BaseNodeParser import ConceptNode from parsers.PythonParser import PythonParser from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser @@ -12,8 +12,6 @@ unrecognized_nodes_parser = UnrecognizedNodeParser() class PythonWithConceptsParser(BaseParser): def __init__(self, **kwargs): super().__init__("PythonWithConcepts", 20) - self.identifiers = None - self.identifiers_key = None @staticmethod def sanitize(identifier): @@ -33,11 +31,15 @@ class PythonWithConceptsParser(BaseParser): yield node def parse(self, context, parser_input): - sheerka = context.sheerka nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser) + return self.parse_nodes(context, nodes) + + def parse_nodes(self, context, nodes): if not nodes: return None + sheerka = context.sheerka + source = "" to_parse = "" identifiers = {} diff --git a/src/parsers/SyaNodeParser.py b/src/parsers/SyaNodeParser.py index daf7530..d65e63c 100644 --- a/src/parsers/SyaNodeParser.py +++ b/src/parsers/SyaNodeParser.py @@ -5,10 +5,12 @@ from typing import List from core import builtin_helpers from core.builtin_concepts import BuiltinConcepts +from core.builtin_helpers import parse_function from core.concept import Concept, DEFINITION_TYPE_BNF from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Token, TokenKind, Tokenizer +from core.utils import get_n_clones from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \ SourceCodeWithConceptNode, BaseNodeParser from parsers.BaseParser import ErrorNode @@ -17,39 +19,73 @@ PARSERS = ["BnfNode", "AtomNode", "Python"] function_parser_res = namedtuple("FunctionParserRes", 'to_out function') +DEBUG_PUSH = "PUSH" +DEBUG_PUSH_UNREC = "PUSH_UNREC" +DEBUG_POP = "POP" +DEBUG_EAT = "EAT" +DEBUG_RECOG = "RECOG" + + +@dataclass() +class DebugInfo: + """ + Debug item to trace how the sya parser worked + Possible action: + PUSH: push the token or the concept to the stack + PUSH_UNREC: push the token to the UnrecognizedTokensNode + POP: pop item to out + EAT: eat the current token (it means that it was part of the concept currently being parsed) + RECOG: when tokens from UnrecognizedTokensNode are parsed and recognized + """ + pos: int = -1 # position of the parser input + token: Token = None # current token + concept: Concept = None # current concept if ay + action: str = None # action taken + + def __repr__(self): + token_repr = self.token.repr_value if isinstance(self.token, Token) else self.token + msg = f"{self.pos:3}:{token_repr}" if self.pos != -1 else " _:" + if self.concept: + msg += f"({self.concept})" + return msg + f" => {self.action}" + class ParenthesisMismatchErrorNode(ErrorNode): def __init__(self, error_int): if isinstance(error_int, tuple): - self.token = error_int[0] + if isinstance(error_int[0], Token): + self.token_value = error_int[0].value + self.token = error_int[0] + else: + self.token_value = error_int[0] + self.token = None self.pos = error_int[1] elif isinstance(error_int, Token): self.token = error_int + self.token_value = error_int.value self.pos = -1 else: # isinstance(UnrecognizedTokensNode) for i, t in reversed(list(enumerate(error_int.tokens))): if t.type == TokenKind.LPAR: self.token = t + self.token_value = t.value self.pos = i + error_int.start def __eq__(self, other): if id(self) == id(other): return True - if isinstance(other, tuple): - return other[0] == self.token.value and other[1] == self.pos - if not isinstance(other, ParenthesisMismatchErrorNode): return False - return self.token == other.token and self.pos == other.pos + return self.token_value == other.token_value and self.pos == other.pos def __hash__(self): return hash(self.pos) def __repr__(self): - return f"ParenthesisMismatchErrorNode('{self.token.value}', {self.pos}" + return f"ParenthesisMismatchErrorNode('{self.token_value}', {self.pos}" @dataclass() @@ -211,8 +247,9 @@ class SyaConceptParserHelper: class InFixToPostFix: - def __init__(self, context): + def __init__(self, context, debug_enabled=False): self.context = context + self.debug_enabled = debug_enabled self.is_locked = False # when locked, cannot process input @@ -227,6 +264,8 @@ class InFixToPostFix: self.false_positives = [] # concepts that looks like known one, but not (for debug purpose) self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens + self.parsing_function = False # indicate that we are currently parsing a function + def __repr__(self): return f"InFixToPostFix({self.debug})" @@ -243,6 +282,8 @@ class InFixToPostFix: return len(self.sequence) + len(self.errors) def _add_error(self, error): + if self.debug_enabled: + self.debug.append(DebugInfo(action=f"=> ERROR {error}")) self.errors.append(error) def _is_lpar(self, token): @@ -294,7 +335,11 @@ class InFixToPostFix: item.error = "Not enough suffix parameters" else: item.error = f"token '{item.expected[0].strip_quote}' not found" + if self.debug_enabled: + self.debug.append(DebugInfo(action=f"ERROR {item.error}")) + if self.debug_enabled: + self.debug.append(DebugInfo(action=f"{DEBUG_POP} {item}")) if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1: self.out.insert(item.potential_pos, item) else: @@ -345,6 +390,26 @@ class InFixToPostFix: for i, token in enumerate(parser_helper.tokens): self.unrecognized_tokens.add_token(token, parser_helper.start + i) + def _remove_debug_info_if_needed(self): + """ + Before trying to manage the unrecognized, a line is added to explain the token which has triggered + the recognition try + This line is useless if self.unrecognized_tokens was irrelevant + :return: + """ + if len(self.debug) > 0 and self.debug[-1].action == "??": + self.debug.pop() + + def _debug_nodes(self, nodes_sequences): + res = "[" + first = True + for sequence in nodes_sequences: + if not first: + res += ", " + res += "[" + ", ".join([n.to_short_str() for n in sequence]) + "]" + first = False + return res + "]" + def get_errors(self): def has_error(item): if isinstance(item, SyaConceptParserHelper) and item.error: @@ -439,41 +504,40 @@ class InFixToPostFix: self.unrecognized_tokens.fix_source() - # try to recognize concepts - nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized( - self.context, - self.unrecognized_tokens, - PARSERS) - - if nodes_sequences: - # There are more than one solution found - # In the case, we create a new InfixToPostfix for each new possibility - if len(nodes_sequences) > 1: - for node_sequence in nodes_sequences[1:]: - clone = self.clone() - for node in node_sequence: - clone._put_to_out(node) - clone.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) - self.forked.append(clone) - - # Do not forget the first result that will go with the current InfixToPostfix - for node in nodes_sequences[0]: - self._put_to_out(node) - else: + if self.unrecognized_tokens.parenthesis_count > 0: + # parenthesis mismatch detected, do not try to resolve the unrecognized + self._add_error(ParenthesisMismatchErrorNode(self.unrecognized_tokens)) self._put_to_out(self.unrecognized_tokens) + else: + # try to recognize concepts + nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized( + self.context, + self.unrecognized_tokens, + PARSERS) - # # try to recognize concepts - # nodes = self._get_lexer_nodes_from_unrecognized() - # if nodes: - # for node in nodes: - # self._put_to_out(node) - # else: - # self._put_to_out(self.unrecognized_tokens) + if nodes_sequences: + # There are more than one solution found + # In the case, we create a new InfixToPostfix for each new possibility + if self.debug_enabled: + self.debug.append(DebugInfo(action=f"{DEBUG_RECOG} {self._debug_nodes(nodes_sequences)}")) + if len(nodes_sequences) > 1: + for node_sequence in nodes_sequences[1:]: + clone = self.clone() + for node in node_sequence: + clone._put_to_out(node) + clone.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + self.forked.append(clone) + + # Do not forget the first result that will go with the current InfixToPostfix + for node in nodes_sequences[0]: + self._put_to_out(node) + else: + self._put_to_out(self.unrecognized_tokens) # create another instance self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) - def get_functions_from_unrecognized(self, token, pos): + def get_functions_names_from_unrecognized(self, token, pos): """ The unrecognized ends with an lpar '(' It means that its a function like foo(something) @@ -489,19 +553,32 @@ class InFixToPostFix: self.context, self.unrecognized_tokens, PARSERS) - if nodes_sequences is None: - return None + + if not nodes_sequences: + nodes_sequences = [[self.unrecognized_tokens.clone()]] res = [] for sequence in nodes_sequences: - if isinstance(sequence[-1], UnrecognizedTokensNode): - function = sequence[-1] - else: - function = UnrecognizedTokensNode(sequence[-1].start, sequence[-1].end, sequence[-1].tokens) - function.add_token(token, pos).fix_source() + last_node = sequence[-1] - res.append(function_parser_res(sequence[:-1], function)) + if len(last_node.tokens) > 1: + if isinstance(last_node, UnrecognizedTokensNode): + to_out = [UnrecognizedTokensNode(last_node.start, pos - 2, last_node.tokens[:-1]).fix_source()] + function_name = UnrecognizedTokensNode(pos - 1, pos - 1, [last_node.tokens[-1]]) + function_name.add_token(token, pos) + else: + to_out = [last_node.fix_source()] + function_name = None + else: # len(last_node.tokens) == 1 + if not isinstance(last_node, UnrecognizedTokensNode): + function_name = UnrecognizedTokensNode(last_node.start, last_node.end, last_node.tokens) + else: + function_name = last_node + function_name.add_token(token, pos) + to_out = [] + + res.append(function_parser_res(sequence[:-1] + to_out, function_name)) return res def pop_stack_to_out(self): @@ -614,6 +691,8 @@ class InFixToPostFix: self.unrecognized_tokens.pop(TokenKind.WHITESPACE) current_concept.end = pos + if self.debug_enabled: + self.debug.append(DebugInfo(pos, token, None, "??")) self.manage_unrecognized() # manage that some clones may have been forked for forked in self.forked: @@ -673,17 +752,53 @@ class InFixToPostFix: if self.is_locked: return + if self.parsing_function: + if self.debug_enabled: + self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC)) + + self.unrecognized_tokens.add_token(token, pos) + + if self.unrecognized_tokens.parenthesis_count == 0: + self.unrecognized_tokens.fix_source() + res = parse_function(self.context, + self.unrecognized_tokens.source, + self.unrecognized_tokens.tokens[:], + self.unrecognized_tokens.start) + + instances = get_n_clones(self, len(res)) + self.forked.extend(instances[1:]) + for instance, res_i in zip(instances, res): + + if res_i.status or instance.context.sheerka.isinstance(res_i.body, BuiltinConcepts.PARSER_RESULT): + # 1. we manage to recognize a function + # 2. we almost manage, ex func(one two). It's not a function but almost + instance._put_to_out(res_i.body.body) + instance.unrecognized_tokens.reset() + else: + # it is not a function, try to recognized the token + # This situation is unlikely to occur + instance.manage_unrecognized() + + instance.parsing_function = False + + return True + if self.handle_expected_token(token, pos): # a token is found, let's check if it's part of a concepts being parsed # example Concept(name="foo", definition="foo a bar b").def_var("a").def_var("b") # if the token 'bar' is found, it has to be considered as part of the concept foo - self.debug.append(token) + if self.debug_enabled: + self._remove_debug_info_if_needed() + self.debug.append(DebugInfo(pos, token, None, DEBUG_EAT)) return True elif self._is_lpar(token): - self.debug.append(token) + + if self.debug_enabled: + self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC)) if self.unrecognized_tokens.is_empty() or self.unrecognized_tokens.is_whitespace(): + # first, remove what was in the buffer self.manage_unrecognized() for forked in self.forked: @@ -691,40 +806,65 @@ class InFixToPostFix: forked.eat_token(token, pos) self.stack.append((token, pos)) + else: # the parenthesis is part of the unrecognized - # So it's a function + # So it's maybe a function call - list_of_results = self.get_functions_from_unrecognized(token, pos) - if list_of_results: - instances = [self] - for i in range(len(list_of_results) - 1): - clone = self.clone() - self.forked.append(clone) - instances.append(clone) + list_of_results = self.get_functions_names_from_unrecognized(token, pos) + instances = [self] + for i in range(len(list_of_results) - 1): + clone = self.clone() + self.forked.append(clone) + instances.append(clone) - # Manage the result for self and its clones - for instance, parsing_res in zip(instances, list_of_results): - for to_out in parsing_res.to_out: - instance._put_to_out(to_out) + # Manage the result for self and its clones + for instance, parsing_res in zip(instances, list_of_results): + + for to_out in parsing_res.to_out: + instance._put_to_out(to_out) + + if parsing_res.function: + instance.unrecognized_tokens = parsing_res.function + instance.parsing_function = True + else: + # special case of "twenty two(". It's not considered as a function + # The manage_unrecognized() what somewhat done by get_functions_names_from_unrecognized() + # So we just put the unrecognized to out + + instance.unrecognized_tokens.reset() # make sure to pop the current concept if self._stack_isinstance(SyaConceptParserHelper): self.pop_stack_to_out() - instance._put_to_out(")") # mark where the function should end - instance.stack.append(parsing_res.function) - instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # reset unrecognized - else: - self._put_to_out(")") # mark where the function should end - self.eat_unrecognized(token, pos) # add the '(' to the rest of the unknown - self.stack.append(self.unrecognized_tokens.fix_source()) - self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) + instance.stack.append((token, pos)) + + # # instance._put_to_out(")") # mark where the function should end + # # instance.stack.append(parsing_res.function) + # # instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # reset unrecognized + # else: + # # handle when there are multiple pending tokens + # if len(self.unrecognized_tokens.tokens) > 1: + # unrecognized = UnrecognizedTokensNode(self.unrecognized_tokens.start, + # pos - 2, + # self.unrecognized_tokens.tokens[:-1]) + # unrecognized.fix_source() + # self._put_to_out(unrecognized) + # last_token = self.unrecognized_tokens.tokens[-1] + # self.unrecognized_tokens.reset() + # self.unrecognized_tokens.add_token(last_token, pos - 1) + # + # self.eat_unrecognized(token, pos) # add the '(' to the rest of the unknown + # self.parsing_function = True + # # self.stack.append(self.unrecognized_tokens.fix_source()) + # # self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) return True elif self._is_rpar(token): - self.debug.append(token) + if self.debug_enabled: + self.debug.append(DebugInfo(pos, token, None, DEBUG_EAT)) # first, remove what was in the buffer self.manage_unrecognized() @@ -775,32 +915,36 @@ class InFixToPostFix: return False - def eat_concept(self, sya_concept_def, token, pos): + def eat_concept(self, sya_concept_def, token, pos, first_pass=True): """ a concept is found :param sya_concept_def: :param token: :param pos: + :param first_pass: When not called from a fork after manage_unrecognized() :return: """ if self.is_locked: return - self.debug.append(sya_concept_def) parser_helper = SyaConceptParserHelper(sya_concept_def, pos) - if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE: - parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1] + if first_pass: + if self.debug_enabled: + self.debug.append(DebugInfo(pos, token, sya_concept_def, "??")) - if Token.is_whitespace(parser_helper.last_token_before_first_token): - self.unrecognized_tokens.pop(TokenKind.WHITESPACE) + if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE: + parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1] - # First, try to recognize the tokens that are waiting - self.manage_unrecognized() - for forked in self.forked: - # manage the fact that some clone may have been forked - forked.eat_concept(sya_concept_def, token, pos) + if Token.is_whitespace(parser_helper.last_token_before_first_token): + self.unrecognized_tokens.pop(TokenKind.WHITESPACE) + + # First, try to recognize the tokens that are waiting + self.manage_unrecognized() + for forked in self.forked: + # manage the fact that some clone may have been forked + forked.eat_concept(sya_concept_def, token, pos, first_pass=False) # then, check if this new concept is linked to the previous ones # ie, is the previous concept fully matched ? @@ -823,6 +967,9 @@ class InFixToPostFix: self.manage_parameters_when_new_concept(parser_helper) self._put_to_out(parser_helper.fix_concept()) else: + if self.debug_enabled: + self._remove_debug_info_if_needed() + self.debug.append(DebugInfo(pos, token, sya_concept_def, DEBUG_PUSH)) self.stack.append(parser_helper) self.manage_parameters_when_new_concept(parser_helper) @@ -836,11 +983,12 @@ class InFixToPostFix: if self.is_locked: return - self.debug.append(token) + if self.debug_enabled: + self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC)) self.unrecognized_tokens.add_token(token, pos) - def finalize(self): + def finalize(self, pos): """ Put the remaining items from the stack to out :return: @@ -850,8 +998,14 @@ class InFixToPostFix: return if len(self.stack) == 0 and len(self.out) == 0: + # check for parenthesis mismatch + if self.unrecognized_tokens.parenthesis_count > 0: + self._add_error(ParenthesisMismatchErrorNode(self.unrecognized_tokens)) return # no need to pop the buffer, as no concept is found + if self.debug_enabled: + self.debug.append(DebugInfo(pos, "", None, "??")) + while len(self.stack) > 0: parser_helper = self.stack[-1] @@ -863,7 +1017,7 @@ class InFixToPostFix: self.manage_unrecognized() for forked in self.forked: # manage that some clones may have been forked - forked.finalize() + forked.finalize(pos) failed_to_match = sum(map(lambda e: e.type != TokenKind.VAR_DEF, parser_helper.expected)) if failed_to_match > 0: @@ -878,10 +1032,10 @@ class InFixToPostFix: self.manage_unrecognized() for forked in self.forked: # manage that some clones may have been forked - forked.finalize() + forked.finalize(pos) def clone(self): - clone = InFixToPostFix(self.context) + clone = InFixToPostFix(self.context, self.debug_enabled) clone.is_locked = self.is_locked clone.out = self.out[:] clone.stack = [i.clone() if hasattr(i, "clone") else i for i in self.stack] @@ -983,7 +1137,7 @@ class SyaNodeParser(BaseNodeParser): res.extend(forked) forked.clear() - res = [InFixToPostFix(context)] + res = [InFixToPostFix(context, context.in_context(BuiltinConcepts.DEBUG))] while self.parser_input.next_token(False): for infix_to_postfix in res: infix_to_postfix.reset() @@ -1027,7 +1181,7 @@ class SyaNodeParser(BaseNodeParser): # make sure that remaining items in stack are moved to out for infix_to_postfix in res: infix_to_postfix.reset() - infix_to_postfix.finalize() + infix_to_postfix.finalize(self.parser_input.pos) _add_forked_to_res() return res @@ -1058,14 +1212,14 @@ class SyaNodeParser(BaseNodeParser): start = item.start end = item.end has_unrecognized = False - concept = sheerka.new_from_template(item.concept, item.concept.id) + concept = sheerka.new_from_template(item.concept, item.concept.key) for param_index in reversed(range(len(concept.metadata.variables))): inner_item = self.postfix_to_item(sheerka, postfixed) if inner_item.start < start: start = inner_item.start if inner_item.end > end: end = inner_item.end - has_unrecognized |= isinstance(inner_item, UnrecognizedTokensNode) + has_unrecognized |= isinstance(inner_item, (UnrecognizedTokensNode, SourceCodeWithConceptNode)) param_name = concept.metadata.variables[param_index][0] param_value = inner_item.concept if hasattr(inner_item, "concept") else \ @@ -1128,6 +1282,7 @@ class SyaNodeParser(BaseNodeParser): if has_unrecognized: # Manage some sick cases where missing parenthesis mess the order or the sequence # example "foo bar(one plus two" + # too lazy to fix the why... sequence.sort(key=attrgetter("start")) ret.append( diff --git a/src/parsers/UnrecognizedNodeParser.py b/src/parsers/UnrecognizedNodeParser.py index b1b0c0e..4577122 100644 --- a/src/parsers/UnrecognizedNodeParser.py +++ b/src/parsers/UnrecognizedNodeParser.py @@ -2,7 +2,7 @@ from dataclasses import dataclass import core.utils from core.builtin_concepts import BuiltinConcepts -from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes +from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes, update_compiled from core.concept import Concept from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode from parsers.BaseParser import BaseParser, ErrorNode @@ -38,6 +38,7 @@ class UnrecognizedNodeParser(BaseParser): sequences_found = [[]] has_unrecognized = False + self.error_sink.clear() for node in nodes: if isinstance(node, ConceptNode): @@ -93,7 +94,7 @@ class UnrecognizedNodeParser(BaseParser): sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, - source=parser_input, + source=parser_input.source, body=choice, try_parsed=choice))) @@ -105,56 +106,8 @@ class UnrecognizedNodeParser(BaseParser): return ret def validate_concept_node(self, context, concept_node): - - sheerka = context.sheerka errors = [] - - def _validate_concept(concept): - """ - Recursively browse the compiled properties in order to find unrecognized - :param concept: - :return: - """ - for k, v in concept.compiled.items(): - if isinstance(v, Concept): - _validate_concept(v) - - elif isinstance(v, UnrecognizedTokensNode): - res = parse_unrecognized(context, v.source, PARSERS) - res = only_successful(context, res) # only key successful parsers - if res.status: - concept.compiled[k] = res.body.body - else: - errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'")) - - def _get_source(compiled, var_name): - if var_name not in compiled: - return None - if not isinstance(compiled[var_name], list): - return None - if not len(compiled[var_name]) == 1: - return None - if not sheerka.isinstance(compiled[var_name][0], BuiltinConcepts.RETURN_VALUE): - return None - if not sheerka.isinstance(compiled[var_name][0].body, BuiltinConcepts.PARSER_RESULT): - return None - if compiled[var_name][0].body.name == "parsers.ShortTermMemory": - return None - - return compiled[var_name][0].body.source - - _validate_concept(concept_node.concept) - - # Special case where the values of the variables are the names of the variable - # example : Concept("a plus b").def_var("a").def_var("b") - # and the user has entered 'a plus b' - # Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2') - # This means that 'a' and 'b' don't have any real value - for name, value in concept_node.concept.metadata.variables: - if not _get_source(concept_node.concept.compiled, name) == name: - break - else: - concept_node.concept.metadata.is_evaluated = True + update_compiled(context, concept_node.concept, errors) if len(errors) > 0: return context.sheerka.ret(self.name, False, errors) diff --git a/src/repl/SheerkaPromptCompleter.py b/src/repl/SheerkaPromptCompleter.py index 4876667..79103e4 100644 --- a/src/repl/SheerkaPromptCompleter.py +++ b/src/repl/SheerkaPromptCompleter.py @@ -173,8 +173,11 @@ class SheerkaPromptCompleter(Completer): break m = NAME.match(text[:i][::-1]) - func_name = m.group(0)[::-1] - return FuncFound(func_name, i - len(func_name), paren_index) if m else None + if m: + func_name = m.group(0)[::-1] + return FuncFound(func_name, i - len(func_name), paren_index) + + return None @staticmethod def after_pipe(text, pos): diff --git a/tests/core/test_SheerkaEvaluateConcept.py b/tests/core/test_SheerkaEvaluateConcept.py index 5c7d028..bc80a3b 100644 --- a/tests/core/test_SheerkaEvaluateConcept.py +++ b/tests/core/test_SheerkaEvaluateConcept.py @@ -88,6 +88,15 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): assert evaluated.variables() == {"a": Property("a", expected)} assert evaluated.metadata.is_evaluated + def test_i_can_evaluate_when_the_body_is_the_name_of_the_concept(self): + # to prove that I can distinguish from a string + sheerka, context, concept = self.init_concepts(Concept("foo", body="'foo'"), eval_body=True, create_new=True) + + evaluated = sheerka.evaluate_concept(context, concept) + + assert evaluated.key == concept.key + assert evaluated.body == "foo" + def test_i_can_evaluate_metadata_using_do_not_resolve(self): sheerka, context, concept = self.init_concepts(Concept("foo"), eval_body=True) concept.compiled[ConceptParts.BODY] = DoNotResolve("do not resolve") diff --git a/tests/core/test_tokenizer.py b/tests/core/test_tokenizer.py index 01d6106..f53fd4e 100644 --- a/tests/core/test_tokenizer.py +++ b/tests/core/test_tokenizer.py @@ -1,5 +1,5 @@ import pytest -from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords +from core.tokenizer import Tokenizer, Token, TokenKind, LexerError def test_i_can_tokenize(): @@ -156,19 +156,6 @@ def test_i_can_parse_numbers(text): assert tokens[0].value == text -@pytest.mark.parametrize("text, expected", [ - ("def", Keywords.DEF), - ("concept", Keywords.CONCEPT), - ("as", Keywords.AS), - ("pre", Keywords.PRE), - ("post", Keywords.POST) -]) -def test_i_can_recognize_keywords(text, expected): - tokens = list(Tokenizer(text)) - assert tokens[0].type == TokenKind.KEYWORD - assert tokens[0].value == expected - - @pytest.mark.parametrize("text, expected", [ ("c:key:", ("key", None)), ("c:key|id:", ("key", "id")), diff --git a/tests/evaluators/test_LexerNodeEvaluator.py b/tests/evaluators/test_LexerNodeEvaluator.py index 748b578..bfeefbc 100644 --- a/tests/evaluators/test_LexerNodeEvaluator.py +++ b/tests/evaluators/test_LexerNodeEvaluator.py @@ -27,7 +27,7 @@ class TestLexerNodeEvaluator(TestUsingMemoryBasedSheerka): for fragment in fragments: if isinstance(fragment, str): node = PythonNode(fragment, ast.parse(fragment.strip(), mode="eval")) - nodes.append(SourceCodeNode(node, 0, 0, [], fragment)) + nodes.append(SourceCodeNode(0, 0, [], fragment, node)) else: nodes.append(ConceptNode(fragment, 0, 0, [], fragment.name)) @@ -82,10 +82,9 @@ class TestLexerNodeEvaluator(TestUsingMemoryBasedSheerka): wrapper = result.body return_value = result.body.body - assert result.who == evaluator.name + assert result.who == "parsers.PythonWithConcepts" assert result.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert wrapper.parser == evaluator assert wrapper.source == "foo + 1" assert return_value == PythonNode('foo + 1', ast.parse("__C__foo__C__ + 1", mode="eval")) diff --git a/tests/evaluators/test_PythonEvaluator.py b/tests/evaluators/test_PythonEvaluator.py index dac0861..028b06d 100644 --- a/tests/evaluators/test_PythonEvaluator.py +++ b/tests/evaluators/test_PythonEvaluator.py @@ -1,8 +1,12 @@ +import ast + import pytest from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts from core.concept import Concept, CB, NotInit from core.sheerka.services.SheerkaExecute import ParserInput +from core.tokenizer import Tokenizer from evaluators.PythonEvaluator import PythonEvaluator, PythonEvalError +from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode from parsers.PythonParser import PythonNode, PythonParser from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -12,10 +16,28 @@ def get_concept_name(concept): return concept.name +def get_source_code_node(source_code, concepts=None): + if source_code: + python_node = PythonNode(source_code, ast.parse(source_code, f"", 'eval')) + else: + python_node = PythonNode("", None) + + if concepts is None: + tokens = list(Tokenizer(source_code, yield_eof=False)) + return SourceCodeNode(0, len(tokens), tokens, python_node=python_node) + else: + python_node.concepts = concepts + scwcn = SourceCodeWithConceptNode(None, None) + scwcn.python_node = python_node + return scwcn + + class TestPythonEvaluator(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("ret_val, expected", [ (ReturnValueConcept("some_name", True, ParserResultConcept(value=PythonNode("", None))), True), + (ReturnValueConcept("some_name", True, ParserResultConcept(value=get_source_code_node(""))), True), + (ReturnValueConcept("some_name", True, ParserResultConcept(value=get_source_code_node("", {}))), True), (ReturnValueConcept("some_name", True, ParserResultConcept(value="other thing")), False), (ReturnValueConcept("some_name", False, "not relevant"), False), (ReturnValueConcept("some_name", True, Concept()), False) @@ -39,6 +61,19 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): assert evaluated.status assert evaluated.value == expected + @pytest.mark.parametrize("source_code_node, expected", [ + (get_source_code_node("1 + 1"), 2), + (get_source_code_node("one + one", {"one": Concept("one", body="1")}), 2) + ]) + def test_i_can_eval_source_code_node(self, source_code_node, expected): + context = self.get_context() + return_value = context.sheerka.ret("parsers.??", True, ParserResultConcept(value=source_code_node)) + + evaluated = PythonEvaluator().eval(context, return_value) + + assert evaluated.status + assert evaluated.value == expected + def test_i_can_eval_using_context(self): context = self.get_context() parsed = PythonParser().parse(context, ParserInput("test_using_context('value for param1', 10)")) @@ -239,3 +274,18 @@ class TestPythonEvaluator(TestUsingMemoryBasedSheerka): PythonEvaluator().update_globals_with_context(my_globals, context) assert my_globals == {"self": foo, "b": "'Initialized!'"} + + def test_i_can_use_sheerka_locals(self): + sheerka, context = self.init_concepts() + + def func(i): + return i + 1 + + sheerka.locals["func"] = func + + parsed = PythonParser().parse(context, ParserInput("func(10)")) + python_evaluator = PythonEvaluator() + evaluated = python_evaluator.eval(context, parsed) + + assert evaluated.status + assert evaluated.value == 11 diff --git a/tests/non_reg/test_sheerka_non_reg.py b/tests/non_reg/test_sheerka_non_reg.py index 2b6a2be..aaa469c 100644 --- a/tests/non_reg/test_sheerka_non_reg.py +++ b/tests/non_reg/test_sheerka_non_reg.py @@ -348,8 +348,8 @@ as: "def concept one as 1", "def concept two as 2", "def concept number", - "one isa number", - "two isa number", + "set_isa(one, number)", + "set_isa(two, number)", "def concept twenties from bnf 'twenty' number as 20 + number" ]), ("When using isa and concept twenty", [ @@ -357,8 +357,8 @@ as: "def concept two as 2", "def concept twenty as 20", "def concept number", - "one isa number", - "two isa number", + "set_isa(one, number)", + "set_isa(two, number)", "def concept twenties from bnf twenty number as 20 + number" ]), ]) @@ -408,8 +408,8 @@ as: sheerka.evaluate_user_input("def concept one as 1") sheerka.evaluate_user_input("def concept two as 2") sheerka.evaluate_user_input("def concept number") - sheerka.evaluate_user_input("one isa number") - sheerka.evaluate_user_input("two isa number") + sheerka.evaluate_user_input("set_isa(one, number)") + sheerka.evaluate_user_input("set_isa(two, number)") sheerka.evaluate_user_input("def concept twenties from bnf 'twenty' number as 20 + number") res = sheerka.evaluate_user_input("twenty one") @@ -450,8 +450,8 @@ as: "def concept one as 1", "def concept twenty as 20", "def concept number", - "one isa number", - "twenty isa number", + "set_isa(one, number)", + "set_isa(twenty, number)", "def concept twenties from bnf twenty number as twenty + number" ] @@ -563,7 +563,7 @@ as: definitions = [ "def concept two as 2", "def concept number", - "two isa number", + "set_isa(two, number)", "def concept plus_one from bnf number=n1 'plus_one' as n1 + 1", ] @@ -574,15 +574,6 @@ as: assert res[0].status assert res[0].body == 3 - def test_i_can_say_that_a_concept_isa_another_concept(self): - sheerka = self.get_sheerka() - sheerka.evaluate_user_input("def concept foo") - sheerka.evaluate_user_input("def concept bar") - - res = sheerka.evaluate_user_input("foo isa bar") - assert len(res) == 1 - assert res[0].status - assert sheerka.isinstance(res[0].body, BuiltinConcepts.SUCCESS) def test_eval_does_not_break_valid_result(self): sheerka = self.get_sheerka() @@ -662,9 +653,9 @@ as: "def concept three as 3", "def concept twenty as 20", "def concept number", - "one isa number", - "two isa number", - "three isa number", + "set_isa(one, number)", + "set_isa(two, number)", + "set_isa(three, number)", "def concept twenties from bnf twenty number where number <= 2 as twenty + number" ] @@ -759,7 +750,7 @@ as: definitions = [ "def concept one as 1", "def concept number", - "one isa number", + "set_isa(one, number)", "def concept hundreds from bnf number=n1 'hundred' ('and' number=n2)? where n1<10 and n2<100 as n1 * 100 + n2", ] @@ -782,7 +773,7 @@ as: sheerka.evaluate_user_input("def concept two as 2") sheerka.evaluate_user_input("def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit") - res = sheerka.evaluate_user_input("twenties isa number") + res = sheerka.evaluate_user_input("set_isa(twenties, number)") assert len(res) == 1 assert res[0].status @@ -950,11 +941,11 @@ as: "def concept two as 2", "def concept twenty as 20", "def concept number", - "one isa number", - "two isa number", - "twenty isa number", + "set_isa(one, number)", + "set_isa(two, number)", + "set_isa(twenty, number)", "def concept twenties from bnf twenty number where number < 10 as twenty + number", - "twenties isa number", + "set_isa(twenties, number)", ] sheerka = self.init_scenario(init) @@ -975,7 +966,7 @@ as: sheerka = self.init_scenario(init) - res = sheerka.evaluate_user_input("last_created_concept() isa number") + res = sheerka.evaluate_user_input("set_isa(last_created_concept(), number)") assert res[0].status assert sheerka.isa(sheerka.new("one"), sheerka.new("number")) @@ -1021,7 +1012,7 @@ as: "def concept one", "def concept foo", "def concept number", - "one isa number", + "set_isa(one, number)", "def concept x is a y as isa(x,y) pre in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED)", "def concept x is a y as set_isa(x,y)", ] @@ -1041,7 +1032,7 @@ as: init = [ "def concept one as 1", "def concept number", - "one isa number", + "set_isa(one, number)", "def concept one as 10", # to make sure that it won't be rejected because of the cast "def concept x is a y as isa(x,y) pre in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED)", "def concept x is a y as set_isa(x,y)", @@ -1069,7 +1060,7 @@ as: "def concept one", "def concept foo", "def concept number", - "one isa number", + "set_isa(one, number)", "def concept q from q ? as question(q)", "def concept is_a from x is a y as isa(x,y) pre in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED)", "set_is_greater_than(BuiltinConcepts.PRECEDENCE, c:is_a:, c:q:)" @@ -1125,6 +1116,34 @@ as: assert len(res) == 1 assert res[0].status + def test_i_can_eval_concepts_fed_with_functions(self): + init = [ + "def concept inc a as a + 1", + "def concept one as 1" + ] + + def times_five(i): + return i * 5 + + sheerka = self.init_scenario(init) + sheerka.locals["times_five"] = times_five + + res = sheerka.evaluate_user_input("eval inc times_five(one)") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 6 + + def test_i_can_define_a_concept_when_where_clause_contains_the_name_of_the_variable(self): + init = [ + "def concept x is a y as isa(x,y) pre is_question()", + ] + sheerka = self.init_scenario(init) + + res = sheerka.evaluate_user_input("def concept a x b where a is a number as a + b") + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].body, BuiltinConcepts.NEW_CONCEPT) + class TestSheerkaNonRegFile(TestUsingFileBasedSheerka): def test_i_can_def_several_concepts(self): @@ -1197,15 +1216,15 @@ class TestSheerkaNonRegFile(TestUsingFileBasedSheerka): self.init_scenario([ "def concept one as 1", "def concept number", - "one isa number", + "set_isa(one, number)", "def concept twenty as 20", - "twenty isa number", + "set_isa(twenty, number)", "def concept twenties from bnf twenty number where number < 10 as twenty + number", - "twenties isa number", + "set_isa(twenties, number)", "def concept thirty as 30", - "thirty isa number", + "set_isa(thirty, number)", "def concept thirties from bnf thirty number where number < 10 as thirty + number", - "thirties isa number", + "set_isa(thirties, number)", ]) sheerka = self.get_sheerka() # another instance diff --git a/tests/parsers/parsers_utils.py b/tests/parsers/parsers_utils.py index d59db84..187b146 100644 --- a/tests/parsers/parsers_utils.py +++ b/tests/parsers/parsers_utils.py @@ -1,4 +1,4 @@ -from core.concept import CC, Concept, ConceptParts, DoNotResolve +from core.concept import CC, Concept, ConceptParts, DoNotResolve, CIO from core.tokenizer import Tokenizer, TokenKind, Token from parsers.BaseNodeParser import scnode, utnode, cnode, SCWC, CNC, short_cnode, SourceCodeWithConceptNode, CN, UTN, \ SCN @@ -13,7 +13,7 @@ def _index(tokens, expr, index): :param index: :return: """ - expected = [token.value for token in Tokenizer(expr) if token.type != TokenKind.EOF] + expected = [token.str_value for token in Tokenizer(expr) if token.type != TokenKind.EOF] for i in range(0, len(tokens) - len(expected) + 1): for j in range(len(expected)): if tokens[i + j] != expected[j]: @@ -74,6 +74,14 @@ def get_node( if isinstance(sub_expr, (scnode, utnode, DoNotResolve)): return sub_expr + if isinstance(sub_expr, CIO): + sub_expr.set_concept(concepts_map[sub_expr.concept_name]) + if sub_expr.source: + node = get_node(concepts_map, expression_as_tokens, sub_expr.source, sya=sya) + sub_expr.start = node.start + sub_expr.end = node.end + return sub_expr + if isinstance(sub_expr, cnode): # for cnode, map the concept key to the one from concepts_maps if needed if sub_expr.concept_key.startswith("#"): @@ -192,7 +200,7 @@ def compute_expected_array(concepts_map, expression, expected, sya=False, init_e :param exclude_body: do not include ConceptParts.BODY in comparison :return: """ - expression_as_tokens = [token.value for token in Tokenizer(expression) if token.type != TokenKind.EOF] + expression_as_tokens = [token.str_value for token in Tokenizer(expression) if token.type != TokenKind.EOF] return [get_node( concepts_map, expression_as_tokens, diff --git a/tests/parsers/test_AtomsParser.py b/tests/parsers/test_AtomsParser.py index 1c91284..56a6a4f 100644 --- a/tests/parsers/test_AtomsParser.py +++ b/tests/parsers/test_AtomsParser.py @@ -34,6 +34,11 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("text, expected", [ ("foo", ["foo"]), + ("c:foo:", [CN("foo", source="c:foo:")]), + ("c:|1001:", [CN("foo", source="c:|1001:")]), + (" foo", ["foo"]), + ("foo ", ["foo"]), + (" foo ", ["foo"]), ("foo bar", ["foo", "bar"]), ("foo bar twenties", ["foo", "bar", "twenties"]), ("a plus b", [CN("plus", 0, 4)]), @@ -347,3 +352,27 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): assert res.status assert lexer_nodes[0].concept.metadata.is_evaluated == expected_is_evaluated + + def test_the_parser_always_return_a_new_instance_of_the_concept(self): + concepts_map = { + "foo": Concept("foo"), + } + + sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True) + res = parser.parse(context, ParserInput("foo")) + + assert res.status + assert id(res.body.body[0].concept) != id(sheerka.get_by_name("foo")) + + def test_i_can_only_parse_when_the_name_is_an_identifier(self): + # to prove that I can distinguish string from actual concept name + concepts_map = { + "foo": Concept("foo"), + } + + sheerka, context, parser = self.init_parser(concepts_map, create_new=True, use_sheerka=True) + res = parser.parse(context, ParserInput("'foo'")) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + diff --git a/tests/parsers/test_DefaultParser.py b/tests/parsers/test_DefaultParser.py index 7419987..8e1e705 100644 --- a/tests/parsers/test_DefaultParser.py +++ b/tests/parsers/test_DefaultParser.py @@ -6,13 +6,16 @@ from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnVa from core.concept import DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF, Concept, CV from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Keywords, Tokenizer, LexerError +from parsers.BaseNodeParser import SCN, SCWC from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch from parsers.BnfParser import BnfParser -from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode, IsaConceptNode +from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode +from parsers.FunctionParser import FunctionParser from parsers.PythonParser import PythonParser, PythonNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +from tests.parsers.parsers_utils import get_node, compute_expected_array def get_def_concept(name, where=None, pre=None, post=None, body=None, definition=None, bnf_def=None, ret=None): @@ -52,6 +55,18 @@ def get_concept_part(part): parser=PythonParser(), value=node)) + if isinstance(part, FN): + # node = PythonNode(part.strip(), ast.parse(part.strip(), mode="eval")) + nodes = compute_expected_array({}, part.source, [SCWC(part.first, part.last, *part.content)]) + return ReturnValueConcept( + who="parsers.Default", + status=True, + value=ParserResultConcept( + source=part.source, + parser=FunctionParser(), + value=nodes[0], + try_parsed=nodes[0])) + if isinstance(part, PN): node = PythonNode(part.source.strip(), ast.parse(part.source.strip(), mode=part.mode)) return ReturnValueConcept( @@ -84,6 +99,17 @@ class PN: mode: str # compilation mode +@dataclass +class FN: + """ + Function Node + """ + source: str + first: str + last: str + content: list + + class TestDefaultParser(TestUsingMemoryBasedSheerka): def init_parser(self, *concepts): @@ -117,7 +143,7 @@ class TestDefaultParser(TestUsingMemoryBasedSheerka): def test_i_can_parse_complex_def_concept_statement(self): text = """def concept a mult b where a,b -pre isinstance(b, int) +pre isinstance(a, int) and isinstance(b, int) post isinstance(res, a) as res = a * b ret a if isinstance(a, Concept) else self @@ -128,8 +154,8 @@ ret a if isinstance(a, Concept) else self expected_concept = get_def_concept( name="a mult b", where="a,b\n", - pre="isinstance(b, int)\n", - post="isinstance(res, a)\n", + pre="isinstance(a, int) and isinstance(b, int)\n", + post=FN("isinstance(res, a)\n", "isinstance(", ")", ["res", ", ", "a"]), body=PN("res = a * b\n", "exec"), ret="a if isinstance(a, Concept) else self\n" ) @@ -354,24 +380,21 @@ def concept add one to a as assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME) assert isinstance(res.value.body[0], CannotHandleErrorNode) - def test_i_can_parse_is_a(self): - text = "the name of my 'concept' isa the name of the set" - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - expected = IsaConceptNode([], - concept=NameNode(list(Tokenizer("the name of my 'concept'"))), - set=NameNode(list(Tokenizer("the name of the set")))) - - assert res.status - assert res.who == parser.name - assert res.value.source == text - assert isinstance(res.value, ParserResultConcept) - assert res.value.value == expected + # def test_i_can_parse_is_a(self): + # text = "the name of my 'concept' isa the name of the set" + # sheerka, context, parser = self.init_parser() + # res = parser.parse(context, ParserInput(text)) + # expected = IsaConceptNode([], + # concept=NameNode(list(Tokenizer("the name of my 'concept'"))), + # set=NameNode(list(Tokenizer("the name of the set")))) + # + # assert res.status + # assert res.who == parser.name + # assert res.value.source == text + # assert isinstance(res.value, ParserResultConcept) + # assert res.value.value == expected @pytest.mark.parametrize("text", [ - "concept", - "isa number", - "name isa", "def", "def concept_name" ]) @@ -383,6 +406,19 @@ def concept add one to a as assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert isinstance(res.body.body[0], UnexpectedTokenErrorNode) + @pytest.mark.parametrize("text", [ + "concept", + "isa number", + "name isa", + ]) + def test_i_cannot_parse_not_for_me_entries(self, text): + sheerka, context, parser = self.init_parser() + res = parser.parse(context, ParserInput(text)) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + assert isinstance(res.body.body[0], CannotHandleErrorNode) + @pytest.mark.parametrize("text, error_msg, error_text", [ ("'name", "Missing Trailing quote", "'name"), ("foo isa 'name", "Missing Trailing quote", "'name"), diff --git a/tests/parsers/test_FunctionParser.py b/tests/parsers/test_FunctionParser.py new file mode 100644 index 0000000..35c3428 --- /dev/null +++ b/tests/parsers/test_FunctionParser.py @@ -0,0 +1,176 @@ +import pytest +from core.builtin_concepts import BuiltinConcepts +from core.concept import Concept +from core.sheerka.services.SheerkaExecute import ParserInput +from parsers.BaseNodeParser import SCN, SCWC, CN, UTN, CNC +from parsers.FunctionParser import FunctionParser, FN + +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +from tests.parsers.parsers_utils import compute_expected_array + +cmap = { + "one": Concept("one"), + "two": Concept("two"), + "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + "plus": Concept("a plus b").def_var("a").def_var("b"), +} + + +class TestFunctionParser(TestUsingMemoryBasedSheerka): + sheerka = None + + @classmethod + def setup_class(cls): + t = cls() + cls.sheerka, context, _ = t.init_parser(cmap) + + def init_parser(self, concepts_map=None): + if concepts_map is not None: + sheerka, context, *concepts = self.init_concepts(*concepts_map.values(), create_new=True) + else: + sheerka = TestFunctionParser.sheerka + context = self.get_context(sheerka) + + parser = FunctionParser() + return sheerka, context, parser + + def test_i_can_detect_empty_expression(self): + sheerka, context, parser = self.init_parser() + res = parser.parse(context, ParserInput("")) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) + + def test_input_must_be_a_parser_input(self): + sheerka, context, parser = self.init_parser() + parser.parse(context, "not a parser input") is None + + def test_i_cannot_parse_when_not_a_function(self): + sheerka, context, parser = self.init_parser() + res = parser.parse(context, ParserInput("not a function")) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + + @pytest.mark.parametrize("expression, expected", [ + ("func()", FN("func(", ")", [])), + ("concept(one)", FN("concept(", ")", ["one"])), + ("func(one)", FN("func(", ")", ["one"])), + ("func(a long two, 'three', ;:$*)", FN("func(", ")", ["a long two, ", "'three', ", ";:$*"])), + ("func(func1(one), two, func2(func3(), func4(three)))", FN("func(", ")", [ + (FN("func1(", ")", ["one"]), ", "), + "two, ", + (FN("func2(", ")", [ + (FN("func3(", ")", []), ", "), + (FN("func4(", ")", ["three"]), None), + ]), None) + ])), + ]) + def test_i_can_parse_function(self, expression, expected): + sheerka, context, parser = self.init_parser() + + parser.reset_parser(context, ParserInput(expression)) + res = parser.parse_function() + + assert res == expected + + @pytest.mark.parametrize("text, expected", [ + ("func()", SCN("func()")), + (" func()", SCN("func()")), + ("func(one)", SCWC("func(", ")", CN("one"))), + ("func(one, unknown, two)", SCWC("func(", ")", CN("one"), ", ", UTN("unknown"), (", ", 1), CN("two"))), + ("func(one, twenty two)", SCWC("func(", ")", "one", ", ", CN("twenties", source="twenty two"))), + ("func(one plus two, three)", SCWC("func(", ")", CNC("plus", a="one", b="two"), ", ", UTN("three"))), + ("func(func1(one), two)", SCWC("func(", (")", 1), SCWC("func1(", ")", "one"), ", ", "two")) + ]) + def test_i_can_parse(self, text, expected): + sheerka, context, parser = self.init_parser() + resolved_expected = compute_expected_array(cmap, text, [expected])[0] + + res = parser.parse(context, ParserInput(text)) + parser_result = res.body + expression = res.body.body + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert expression == resolved_expected + assert expression.python_node is not None + assert expression.return_value is not None + + def test_i_can_parse_when_multiple_results_when_requested(self): + sheerka, context, parser = self.init_parser() + parser.longest_concepts_only = False + text = "func(one, twenty two)" + expected = [SCWC("func(", ")", "one", ", ", "twenty ", "two"), + SCWC("func(", ")", "one", ", ", CN("twenties", source="twenty two"))] + all_resolved_expected = compute_expected_array(cmap, text, expected) + + results = parser.parse(context, ParserInput(text)) + + assert len(results) == 2 + + for res, resolved_expected in zip(results, all_resolved_expected): + parser_result = res.body + expressions = res.body.body + + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert expressions == resolved_expected + + @pytest.mark.parametrize("text, expected_error_type", [ + ("one", BuiltinConcepts.NOT_FOR_ME), + ("$*!", BuiltinConcepts.NOT_FOR_ME), + ("func(", BuiltinConcepts.ERROR), + ("func(one", BuiltinConcepts.ERROR), + ("func(one, two, ", BuiltinConcepts.ERROR), + ("func(one) and func(two)", BuiltinConcepts.ERROR), + ("one func(one)", BuiltinConcepts.NOT_FOR_ME), + ]) + def test_i_cannot_parse(self, text, expected_error_type): + sheerka, context, parser = self.init_parser() + + res = parser.parse(context, ParserInput(text)) + + assert not res.status + assert sheerka.isinstance(res.body, expected_error_type) + + @pytest.mark.parametrize("text, expected", [ + ("func(one two)", SCWC("func(", ")", "one", "two")), + ]) + def test_i_can_detect_non_function(self, text, expected): + sheerka, context, parser = self.init_parser() + resolved_expected = compute_expected_array(cmap, text, [expected])[0] + + res = parser.parse(context, ParserInput(text)) + parser_result = res.body + expression = res.body.body + + assert not res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert expression == resolved_expected + assert expression.python_node is None + assert expression.return_value is None + + @pytest.mark.parametrize("sequence, expected", [ + (None, None), + ([["a"]], [["a"]]), + ([["a"], ["b", "c"]], [["a"]]), + ([["b", "c"], ["a"]], [["a"]]), + ([["b", "c"], ["a"], ["d", "e"], ["f"]], [["a"], ["f"]]), + ]) + def test_i_can_get_the_longest_concept_sequence(self, sequence, expected): + assert FunctionParser.get_longest_concepts(sequence) == expected + + def test_concepts_found_are_fully_initialized(self): + sheerka, context, parser = self.init_parser() + + res = parser.parse(context, ParserInput("func(one plus three)")) + concept = res.body.body.nodes[0].concept + + assert res.status + assert isinstance(concept.compiled["a"], Concept) + + # three is not recognized, + # so it will be transformed into list of ReturnValueConcept that indicate how to recognized it + assert isinstance(concept.compiled["b"], list) + for item in concept.compiled["b"]: + assert sheerka.isinstance(item, BuiltinConcepts.RETURN_VALUE) diff --git a/tests/parsers/test_PythonWithConceptsParser.py b/tests/parsers/test_PythonWithConceptsParser.py index 8ea43bf..9e741db 100644 --- a/tests/parsers/test_PythonWithConceptsParser.py +++ b/tests/parsers/test_PythonWithConceptsParser.py @@ -104,6 +104,25 @@ class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka): assert result.status assert return_value.concepts["__C__foo0et000000__1001__C__"] == foo + def test_i_can_parse_when_multiple_concepts(self): + sheerka, context, foo, bar = self.init_concepts("foo", "bar") + input_return_value = ret_val("func(", foo, ", ", bar, ")") + + parser = PythonWithConceptsParser() + result = parser.parse(context, input_return_value.body) + parser_result = result.value + return_value = result.value.value + + assert result.status + assert result.who == parser.name + assert context.sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert parser_result.source == "func(foo, bar)" + assert isinstance(return_value, PythonNode) + assert return_value.source == "func(foo, bar)" + assert return_value.get_dump(return_value.ast_) == to_str_ast("func(__C__foo__1001__C__, __C__bar__1002__C__)") + assert return_value.concepts["__C__foo__1001__C__"] == foo + assert return_value.concepts["__C__bar__1002__C__"] == bar + def test_python_ids_mappings_are_correct_when_concepts_with_the_same_name(self): context = self.get_context() foo1 = Concept("foo") diff --git a/tests/parsers/test_SyaNodeParser.py b/tests/parsers/test_SyaNodeParser.py index 9cbdd82..e63e41d 100644 --- a/tests/parsers/test_SyaNodeParser.py +++ b/tests/parsers/test_SyaNodeParser.py @@ -1,14 +1,14 @@ import pytest from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept, CC +from core.concept import Concept, CIO from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer from parsers.BaseNodeParser import utnode, ConceptNode, cnode, short_cnode, UnrecognizedTokensNode, \ - SCWC, CNC, UTN, SourceCodeWithConceptNode + SCWC, CNC, UTN, SCN, CN from parsers.PythonParser import PythonNode from parsers.SyaNodeParser import SyaNodeParser, SyaConceptParserHelper, SyaAssociativity, \ - NoneAssociativeSequenceErrorNode, TooManyParametersFound + NoneAssociativeSequenceErrorNode, TooManyParametersFound, InFixToPostFix, ParenthesisMismatchErrorNode import tests.parsers.parsers_utils from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -633,21 +633,25 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert res_i.out == expected_array @pytest.mark.parametrize("expression, expected", [ - # I can't manage source code functions :-( - # ("function(one plus three) minus two", []), + # ("function(one plus three) minus two", + # [SCWC("function(", ")", CNC("plus", a="one", b="three")), "two", "minus"]), + ("two minus function(one plus three)", + ["two", SCWC("function(", ")", CNC("plus", a="one", b="three")), "minus"]), + ("func1() minus func2()", [SCN("func1()"), SCN("func2()"), "minus"]), + ("func1() comes with func2()", [SCN("func1()"), UTN(" comes with "), SCN("func2()")]), - # ("(one plus two) ", ["one", "two", "plus"]), - # ("(one prefixed) ", ["one", "prefixed"]), - # ("(suffixed one) ", ["one", "suffixed"]), - # ("(one ? two : three)", ["one", "two", "three", "?"]), - # ("square(square(one))", ["one", ("square", 1), "square"]), - # ("square ( square ( one ) )", ["one", ("square", 1), "square"]), - # - # ("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]), - # ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]), - # ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]), - # - # ("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]), + ("(one plus two) ", ["one", "two", "plus"]), + ("(one prefixed) ", ["one", "prefixed"]), + ("(suffixed one) ", ["one", "suffixed"]), + ("(one ? two : three)", ["one", "two", "three", "?"]), + ("square(square(one))", ["one", ("square", 1), "square"]), + ("square ( square ( one ) )", ["one", ("square", 1), "square"]), + + ("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]), + ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]), + ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]), + + ("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]), ("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]), ("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]), @@ -666,6 +670,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ]) def test_i_can_pos_fix_when_parenthesis(self, expression, expected): sheerka, context, parser = self.init_parser() + context.add_to_protected_hints(BuiltinConcepts.DEBUG) res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = compute_expected_array(cmap, expression, expected) @@ -675,34 +680,30 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("expression, expected_sequences", [ # composition - ("function(suffixed one)", [[SCWC("function(", ")", "one", "suffixed")]]), - ("function(one prefixed)", [[SCWC("function(", ")", "one", "prefixed")]]), - ("function(if one then two else three end)", [[SCWC("function(", ")", "one", "two", "three", "if")]]), - ("function(suffixed twenty two)", [ - [SCWC("function(", ")", "twenty ", "suffixed", "two")], - [SCWC("function(", ")", short_cnode("twenties", "twenty two"), "suffixed")]]), - ("function(twenty two prefixed)", [ - [SCWC("function(", ")", "twenty ", "two", "prefixed")], - [SCWC("function(", ")", short_cnode("twenties", "twenty two"), "prefixed")], - ]), - ("function(if one then twenty two else three end)", [ - ["')'", "one", "twenty ", "two"], # error - [SCWC("function(", ")", "one", short_cnode("twenties", "twenty two"), "three", "if")] - ]), - ("func1(func2(one two) three)", [ - [SCWC("func1(", (")", 1), SCWC("func2(", ")", "one", "two"), "three")]]), + ("function(suffixed one)", [[SCWC("function(", ")", CNC("suffixed", a="one"))]]), + ("function(one prefixed)", [[SCWC("function(", ")", CNC("prefixed", a="one"))]]), + ("function(if one then two else three end)", + [[SCWC("function(", ")", CNC("if", a="one", b="two", c="three", end=14))]]), + ("function(suffixed twenty two)", + [[SCWC("function(", ")", CNC("suffixed", a=CIO("twenties", source="twenty two")))]]), + ("function(twenty two prefixed)", + [[SCWC("function(", ")", CNC("prefixed", a=CIO("twenties", source="twenty two")))]]), + ("function(if one then twenty two else three end)", + [[SCWC("function(", ")", CNC("if", a="one", b=CIO("twenties", source="twenty two"), c="three", end=16))]]), + ("func1(func2(one two) three)", + [[SCWC("func1(", (")", 1), SCWC("func2(", ")", "one", "two"), "three")]]), ("twenty two(suffixed one)", [ - ["twenty ", SCWC("two(", ")", "one", "suffixed")], - [SCWC("twenty two(", ")", "one", "suffixed")], + ["twenty ", SCWC("two(", ")", CNC("suffixed", a="one"))], + [CN("twenties", source="twenty two"), "one", "suffixed"], ]), ("twenty two(one prefixed)", [ - ["twenty ", SCWC("two(", ")", "one", "prefixed")], - [SCWC("twenty two(", ")", "one", "prefixed")], + ["twenty ", SCWC("two(", ")", CNC("prefixed", a="one"))], + [CN("twenties", source="twenty two"), "one", "prefixed"], ]), ("f1(one plus two mult three) plus f2(suffixed x$!# prefixed)", [ - [SCWC("f1(", ")", "one", "two", "three", "mult", "plus"), - SCWC("f2(", (")", 1), "x$!#", "prefixed", "suffixed"), + [SCWC("f1(", ")", CN("plus", source="one plus two mult three")), + SCWC("f2(", (")", 1), CN("suffixed", source="suffixed x$!# prefixed")), ("plus", 1)] ]), @@ -715,12 +716,10 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): # Sequence ("if one then two else three end function(x$!#)", [ - ["one", "two", "three", "if", SCWC(" function(", ")", "x$!#")]]), - ("one prefixed function(two)", [["one", "prefixed", SCWC(" function(", ")", "two")]]), - ("suffixed one function(two)", [["one", "suffixed", SCWC(" function(", ")", "two")]]), - ( - "func1(suffixed one func2(two))", - [[SCWC("func1(", (")", 1), "one", "suffixed", SCWC(" func2(", ")", "two"))]]), + ["one", "two", "three", "if", UTN(" ", start=13, end=13), SCWC("function(", ")", "x$!#")]]), + ("one prefixed function(two)", [["one", "prefixed", UTN(" ", start=3, end=3), SCWC("function(", ")", "two")]]), + ("suffixed one function(two)", [["one", "suffixed", UTN(" ", start=3, end=3), SCWC("function(", ")", "two")]]), + ("func(one, two, three)", [[SCWC("func(", ")", "one", ", ", "two", (", ", 1), "three")]]), ]) def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences): sheerka, context, parser = self.init_parser() @@ -737,6 +736,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("one plus ( 1 + ", ("(", 4)), ("one( 1 + ", ("(", 1)), ("one ( 1 + ", ("(", 2)), + ("function(", ("(", 1)), ("function( 1 + ", ("(", 1)), ("function ( 1 + ", ("(", 2)), ("one plus ) 1 + ", (")", 4)), @@ -754,7 +754,16 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): res = parser.infix_to_postfix(context, ParserInput(expression)) assert len(res) == 1 - assert res[0].errors == [expected] + assert res[0].errors == [ParenthesisMismatchErrorNode(expected)] + + def test_i_can_detect_parenthesis_mismatch_error_special_case(self): + sheerka, context, parser = self.init_parser() + expression = "one ? function( : two" + expected = [ParenthesisMismatchErrorNode(("(", 5)), ParenthesisMismatchErrorNode(("(", 5))] + res = parser.infix_to_postfix(context, ParserInput(expression)) + + assert len(res) == 1 + assert res[0].errors == expected @pytest.mark.parametrize("expression, expected", [ ("one ? one two : three", ("?", ":")), @@ -802,29 +811,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert len(res) == 1 assert res[0].out == expected_array - def test_i_cannot_post_fix_using_concept_short_name(self): - concepts_map = { - "infixed": self.from_def_concept("infixed", "a infixed b", ["a", "b"]), - "suffixed": self.from_def_concept("suffixed", "suffixed a", ["a"]), - "prefixed": self.from_def_concept("prefixed", "a prefixed", ["a"]), - } - sheerka, context, parser = self.init_parser(concepts_map) - - res = parser.infix_to_postfix(context, ParserInput("desc(infixed)")) - assert len(res) == 1 - assert isinstance(res[0].out[0], SourceCodeWithConceptNode) - assert res[0].out[0].nodes[0].error == 'Not enough prefix parameters' - - res = parser.infix_to_postfix(context, ParserInput("desc(suffixed)")) - assert len(res) == 1 - assert isinstance(res[0].out[0], SourceCodeWithConceptNode) - assert res[0].out[0].nodes[0].error == 'Not enough suffix parameters' - - res = parser.infix_to_postfix(context, ParserInput("desc(prefixed)")) - assert len(res) == 1 - assert isinstance(res[0].out[0], SourceCodeWithConceptNode) - assert res[0].out[0].nodes[0].error == 'Not enough prefix parameters' - @pytest.mark.parametrize("expression", [ "one ? two : three", "one?two:three", @@ -861,7 +847,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): expression = "a plus plus equals b" res = parser.infix_to_postfix(context, ParserInput(expression)) expected_array = tests.parsers.parsers_utils.compute_debug_array(res) - assert expected_array == [ + assert len(expected_array) == len([ ["T(a)", "C(a plus b)", "C(a plus b)", "T(equals)", "T(b)"], ["T(a)", "C(a plus b)", "C(a plus plus)", "T(equals)", "T(b)"], ["T(a)", "C(a plus b)", "C(a plus equals b)", "T(equals)", "T(b)"], @@ -871,27 +857,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ["T(a)", "C(a plus equals b)", "C(a plus b)", "T(equals)", "T(b)"], ["T(a)", "C(a plus equals b)", "C(a plus plus)", "T(equals)", "T(b)"], ["T(a)", "C(a plus equals b)", "C(a plus equals b)", "T(equals)", "T(b)"], - ] - - def test_non_reg(self): - concepts_map = { - "plus": Concept("a plus b").def_var("a").def_var("b"), - "complex infix": Concept("a complex infix b ").def_var("a").def_var("b"), - } - - sya_def = { - # concepts_map["plus"]: (1, SyaAssociativity.Right), - # concepts_map["plus plus"]: (1, SyaAssociativity.Right), - # concepts_map["plus equals"]: (1, SyaAssociativity.Right), - } - - sheerka, context, parser = self.init_parser(concepts_map, sya_def) - - expression = "a plus complex infix b" - res = parser.infix_to_postfix(context, ParserInput(expression)) - - res = parser.parse(context, ParserInput(expression)) - pass + ]) def test_i_can_use_string_instead_of_identifier(self): concepts_map = { @@ -945,6 +911,81 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert len(res) == 1 assert res[0].out == expected_array + @pytest.mark.parametrize("expression, expected_debugs", [ + ("one", [[" 0:one => PUSH_UNREC"]]), + ("one plus two", [[ + ' 0:one => PUSH_UNREC', + ' 1: => PUSH_UNREC', + ' 2:plus(SyaConceptDef(concept=(1005)a plus b, precedence=1, associativity=right)) => ??', + " _: => RECOG [[CN((1001)one)]]", + " _: => POP ConceptNode(concept='(1001)one', source='one', start=0, end=0)", + ' 2:plus(SyaConceptDef(concept=(1005)a plus b, precedence=1, associativity=right)) => PUSH', + ' 3: => EAT', + ' 4:two => PUSH_UNREC', + ' 5: => ??', + " _: => RECOG [[CN((1002)two)]]", + " _: => POP ConceptNode(concept='(1002)two', source='two', start=4, end=4)", + ' _: => POP SyaConceptParserHelper(concept=(1005)a plus b, start=2, error=None)']]), + ("suffixed one", [[ + ' 0:suffixed(SyaConceptDef(concept=(1009)suffixed a, precedence=1, associativity=right)) => PUSH', + ' 1: => EAT', + ' 2:one => PUSH_UNREC', + ' 3: => ??', + " _: => RECOG [[CN((1001)one)]]", + " _: => POP ConceptNode(concept='(1001)one', source='one', start=2, end=2)", + ' _: => POP SyaConceptParserHelper(concept=(1009)suffixed a, start=0, error=None)' + ]]), + ("one ? twenty one : three", [[ + ' 0:one => PUSH_UNREC', + ' 1: => PUSH_UNREC', + ' 2:?(SyaConceptDef(concept=(1011)a ? b : c, precedence=1, associativity=right)) => ??', + " _: => RECOG [[CN((1001)one)]]", + " _: => POP ConceptNode(concept='(1001)one', source='one', start=0, end=0)", + ' 2:?(SyaConceptDef(concept=(1011)a ? b : c, precedence=1, associativity=right)) => PUSH', + ' 3: => EAT', + ' 4:twenty => PUSH_UNREC', + ' 5: => PUSH_UNREC', + ' 6:one => PUSH_UNREC', + ' 7: => PUSH_UNREC', + ' 8:: => ??', + " _: => RECOG [[UTN('twenty '), CN((1001)one)], [CN((1016)twenties)]]", + " _: => POP UnrecognizedTokensNode(source='twenty ', start=4, end=5)", + " _: => POP ConceptNode(concept='(1001)one', source='one', start=6, end=6)", + " _: => => ERROR Too many parameters found for '(1011)a ? b : c' before token 'Token(:)'", + ' 8:: => EAT', + ], [ + ' 0:one => PUSH_UNREC', + ' 1: => PUSH_UNREC', + ' 2:?(SyaConceptDef(concept=(1011)a ? b : c, precedence=1, associativity=right)) => ??', + ' _: => RECOG [[CN((1001)one)]]', + " _: => POP ConceptNode(concept='(1001)one', source='one', start=0, end=0)", + ' 2:?(SyaConceptDef(concept=(1011)a ? b : c, precedence=1, associativity=right)) => PUSH', + ' 3: => EAT', + ' 4:twenty => PUSH_UNREC', + ' 5: => PUSH_UNREC', + ' 6:one => PUSH_UNREC', + ' 7: => PUSH_UNREC', + ' 8:: => ??', + " _: => RECOG [[UTN('twenty '), CN((1001)one)], [CN((1016)twenties)]]", + " _: => POP ConceptNode(concept='(1016)twenties', source='twenty one', start=4, end=6, ConceptParts.BODY='DoNotResolve(value='twenty one')', unit='(1001)one')", + ' 9: => EAT', + ' 10:three => PUSH_UNREC', + ' 11: => ??', + ' _: => RECOG [[CN((1003)three)]]', + " _: => POP ConceptNode(concept='(1003)three', source='three', start=10, end=10)", + ' _: => POP SyaConceptParserHelper(concept=(1011)a ? b : c, start=2, error=None)' + ]]), + ]) + def test_i_can_debug(self, expression, expected_debugs): + sheerka, context, parser = self.init_parser() + context.add_to_private_hints(BuiltinConcepts.DEBUG) + res = parser.infix_to_postfix(context, ParserInput(expression)) + + assert len(res) == len(expected_debugs) + for res_i, expected_debug in zip(res, expected_debugs): + actual_debug = [str(di) for di in res_i.debug] + assert actual_debug == expected_debug + def test_i_can_parse_when_concept_atom_only(self): sheerka, context, parser = self.init_parser() @@ -1032,17 +1073,11 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert concept_suffixed_a == cmap["two"] @pytest.mark.parametrize("text, expected_status, expected_result", [ - ("function(suffixed one)", True, [ - SCWC("function(", ")", CNC("suffixed", 2, 4, a="one"))]), - ("function(one plus two mult three)", True, [ - SCWC("function(", ")", CNC("plus", 2, 10, a="one", b=CC("mult", a="two", b="three")))]), - ("f1(one prefixed) plus f2(suffixed two)", True, [ + ("f1(one prefixed) plus f2(suffixed two)", False, [ CNC("plus", a=SCWC("f1(", ")", CNC("prefixed", a="one")), b=SCWC("f2(", (")", 1), CNC("suffixed", a="two"))) ]), - ("function(suffixed x$!#)", False, [ - SCWC("function(", ")", CNC("suffixed", 2, 7, a="x$!#"))]), ("one is a concept", True, [CNC("is a concept", c="one")]), ("a is a concept", False, [CNC("is a concept", c=UTN("a"))]), ]) @@ -1058,6 +1093,19 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array + @pytest.mark.parametrize("text", [ + "function(suffixed one)", + "function(one plus two mult three)", + "function(suffixed x$!#)" + ]) + def test_i_cannot_parse_when_function_only(self, text): + sheerka, context, parser = self.init_parser() + + res = parser.parse(context, ParserInput(text)) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + @pytest.mark.parametrize("text", [ "foo bar (one", "foo bar one", @@ -1082,14 +1130,13 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("one plus two foo bar baz", [CNC("plus", a="one", b="two"), UTN(" foo bar baz")]), ("one plus two foo bar", [CNC("plus", a="one", b="two"), UTN(" foo bar")]), ("foo bar one plus two", [UTN("foo bar "), CNC("plus", a="one", b="two")]), - ("foo bar (one plus two", [UTN("foo bar ("), CNC("plus", a="one", b="two")]), ("one plus two a long other b", [CNC("plus", a="one", b="two"), UTN(" a long other b")]), ("one plus two a long infixed", [CNC("plus", a="one", b="two"), UTN(" a long infixed")]), ("one plus two a long", [CNC("plus", a="one", b="two"), UTN(" a long")]), ("one ? a long infixed : two", [CNC("?", a="one", b=UTN("a long infixed"), c="two")]), ("one ? a long infix : two", [CNC("?", a="one", b=UTN("a long infix"), c="two")]), ]) - def test_i_cannot_parse_when_one_part_is_recognized_but_not_the_rest(self, text, expected_result): + def test_i_can_almost_parse_when_one_part_is_recognized_but_not_the_rest(self, text, expected_result): """ We test that the parsed concept seems like a known one, but it was not. The parser has to detected that the predication was incorrect @@ -1194,3 +1241,57 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) + + @pytest.mark.parametrize("expression, expected", [ + ("function(", ([], "function(")), + ("before the function(", (["before the "], "function(")), + ("one two function(", (["one", "two", UTN(" ", 3, 3)], "function(")), + ("one(", ([], "one(")), + ("one before the function(", (["one", " before the "], "function(")), + ]) + def test_i_can_get_functions_names_from_unrecognized(self, expression, expected): + sheerka, context, parser = self.init_parser() + infix_to_postfix = InFixToPostFix(context) + + tokens = list(Tokenizer(expression, yield_eof=False)) + for pos, token in enumerate(tokens[:-1]): + infix_to_postfix.eat_unrecognized(token, pos) + + resolved_to_out = compute_expected_array(cmap, expression, expected[0]) + resolved_function_name = compute_expected_array(cmap, expression, [expected[1]]) + actual = infix_to_postfix.get_functions_names_from_unrecognized(tokens[-1], len(tokens) - 1) + + assert len(actual) == 1 + + assert actual[0].to_out == resolved_to_out + actual[0].function.fix_source() + assert actual[0].function == resolved_function_name[0] + + @pytest.mark.parametrize("expression, expected_list", [ + ("twenty two function(", [(["twenty ", "two", UTN(" ", 3, 3)], "function("), + ([CN("twenties", source="twenty two"), UTN(" ", 3, 3)], "function(")]), + ("twenty two(", [(["twenty "], "two("), + ([CN("twenties", source="twenty two")], None)]), + ]) + def test_i_can_get_functions_names_from_unrecognized_when_multiple_results(self, expression, expected_list): + sheerka, context, parser = self.init_parser() + infix_to_postfix = InFixToPostFix(context) + + tokens = list(Tokenizer(expression, yield_eof=False)) + for pos, token in enumerate(tokens[:-1]): + infix_to_postfix.eat_unrecognized(token, pos) + + actual_list = infix_to_postfix.get_functions_names_from_unrecognized(tokens[-1], len(tokens) - 1) + + assert len(actual_list) == len(expected_list) + + for actual, expected in zip(actual_list, expected_list): + resolved_to_out = compute_expected_array(cmap, expression, expected[0]) + + assert actual.to_out == resolved_to_out + if actual.function: + actual.function.fix_source() + resolved_function_name = compute_expected_array(cmap, expression, [expected[1]]) + assert actual.function == resolved_function_name[0] + else: + assert actual.function is None diff --git a/tests/parsers/test_UnrecognizedNodeParser.py b/tests/parsers/test_UnrecognizedNodeParser.py index 2a83606..72597a2 100644 --- a/tests/parsers/test_UnrecognizedNodeParser.py +++ b/tests/parsers/test_UnrecognizedNodeParser.py @@ -31,9 +31,9 @@ def get_input_nodes_from(my_concepts_map, full_expr, *args): if isinstance(n, SCWC): n.first = _get_real_node(n.first) - n.last = _get_real_node(n.first) + n.last = _get_real_node(n.last) n.content = tuple(_get_real_node(nn) for nn in n.content) - return SourceCodeWithConceptNode(n.first, n.last, list(n.content)) + return SourceCodeWithConceptNode(n.first, n.last, list(n.content)).pseudo_fix_source() if isinstance(n, (UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SourceCodeWithConceptNode)): return n @@ -254,6 +254,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert parser_result.source == expression assert len(actual_nodes) == 1 assert actual_nodes[0] == scnode(0, 4, expression) @@ -270,6 +271,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): assert not res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert parser_result.source == expression assert len(actual_nodes) == 1 assert actual_nodes[0] == nodes[0] @@ -287,6 +289,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert parser_result.source == expression assert len(actual_nodes) == 1 expected_array = compute_expected_array( concepts_map, @@ -306,6 +309,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert parser_result.source == expression assert len(actual_nodes) == 1 expected_array = compute_expected_array( @@ -328,8 +332,9 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): parser_result = res.body actual_nodes = res.body.body - assert not res.status # status is False to let PythonWithConceptParser validate the code + assert not res.status # status is False to let PythonWithConceptParser validate the code assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert parser_result.source == expression assert len(actual_nodes) == 1 assert actual_nodes[0].nodes[0].concept.metadata.is_evaluated # 'a plus b' is recognized as concept definition @@ -348,9 +353,37 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): assert not res.status # status is False to let PythonWithConceptParser validate the code assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert parser_result.source == expression assert len(actual_nodes) == 1 assert not actual_nodes[0].nodes[0].concept.metadata.is_evaluated # 'a plus b' need to be evaluated + def test_i_can_parse_unrecognized_sya_concept_that_references_source_code(self): + sheerka, context, parser = self.init_parser() + + expression = "hello get_user_name(twenty one)" + tmp_node = CNC("hello_sya", + source="hello get_user_name(twenty one)", + a=SCWC("get_user_name(", ")", CNC("twenties", source="twenty one", unit="one"))) + nodes = get_input_nodes_from(concepts_map, expression, tmp_node) + parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes) + + res = parser.parse(context, parser_input) + parser_result = res.body + actual_nodes = res.body.body + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert parser_result.source == expression + assert len(actual_nodes) == 1 + + expected_array = compute_expected_array( + concepts_map, + expression, [CN("hello_sya", source="hello get_user_name(twenty one)")], + exclude_body=True) + assert actual_nodes == expected_array + assert isinstance(actual_nodes[0].concept.compiled["a"], list) + assert sheerka.isinstance(actual_nodes[0].concept.compiled["a"][0], BuiltinConcepts.RETURN_VALUE) + def test_i_can_parse_sequences(self): sheerka, context, parser = self.init_parser()