Implemented FunctionParser

2020-09-17 14:11:09 +02:00
parent 8a866880bc
commit 177a6b1d5f
40 changed files with 1752 additions and 561 deletions
@@ -16,6 +16,8 @@ class BuiltinConcepts(Enum):
    SHEERKA = "sheerka"

    # processing instructions during sheerka.execute()
+    # The instruction may alter how the actions work
+    DEBUG = "debug" # activate all debug information
    EVAL_BODY_REQUESTED = "eval body"  # to evaluate the body
    EVAL_WHERE_REQUESTED = "eval where"  # to evaluate the where clause
    RETURN_BODY_REQUESTED = "return body"  # returns the body of the concept instead of the concept itself
@@ -6,14 +6,16 @@ from core.ast.nodes import CallNodeConcept
 from core.ast.visitors import UnreferencedNamesVisitor
 from core.builtin_concepts import BuiltinConcepts
 from core.concept import Concept, NotInit, ConceptParts
+from core.sheerka.services.SheerkaExecute import SheerkaExecute
 from core.tokenizer import Keywords
 # from evaluators.BaseEvaluator import BaseEvaluator
-from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
+from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode, SourceCodeWithConceptNode
 from parsers.BaseParser import BaseParser, ErrorNode

 PARSE_STEPS = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
 EVAL_STEPS = PARSE_STEPS + [BuiltinConcepts.BEFORE_EVALUATION, BuiltinConcepts.EVALUATION,
                            BuiltinConcepts.AFTER_EVALUATION]
+PARSERS = ["EmptyString", "ShortTermMemory", "AtomNode", "BnfNode", "SyaNode", "Python"]


 def is_same_success(context, return_values):
@@ -342,6 +344,37 @@ def parse_unrecognized(context, source, parsers, who=None, prop=None, filter_fun
        return no_python


+def parse_function(context, source, tokens=None, start=0):
+    """
+    Helper function to parse what is supposed to be a function
+    :param context:
+    :param source:
+    :param tokens:
+    :param start: start index for the source code node
+    :return:
+    """
+    sheerka = context.sheerka
+    from parsers.FunctionParser import FunctionParser
+    parser = FunctionParser()
+    desc = f"Parsing function '{source}'"
+    with context.push(BuiltinConcepts.PARSE_CODE, source, desc=desc) as sub_context:
+        sheerka_execution = sheerka.services[SheerkaExecute.NAME]
+        res = parser.parse(sub_context, sheerka_execution.get_parser_input(source, tokens))
+
+        if not isinstance(res, list):
+            res = [res]
+
+        for r in [r for r in res if sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT)]:
+            r.body.body.start += start
+            r.body.body.end += start
+            if isinstance(r.body.body, SourceCodeWithConceptNode):
+                for n in [r.body.body.first, r.body.body.last] + r.body.body.nodes:
+                    n.start += start
+                    n.end += start
+
+    return res
+
+
 def evaluate(context,
             source,
             evaluators="all",
@@ -415,7 +448,12 @@ def get_lexer_nodes(return_values, start, tokens):

            end = start + len(tokens) - 1
            lexer_nodes.append(
-                [SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)])
+                [SourceCodeNode(start,
+                                end,
+                                tokens,
+                                ret_val.body.source,
+                                python_node=ret_val.body.body,
+                                return_value=ret_val)])

        elif ret_val.who == "parsers.ExactConcept":
            concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
@@ -479,6 +517,81 @@ def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers
    return get_lexer_nodes(res.body.body, unrecognized_tokens_node.start, unrecognized_tokens_node.tokens)


+def update_compiled(context, concept, errors, parsers=None):
+    """
+    recursively iterate thru concept.compiled to replace LexerNode into concepts or list of ReturnValueConcept
+    When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...)
+    the result will be a LexerNode.
+    In the specific case of a ConceptNode, the compiled variables will also be LexerNode (UnrecognizedTokensNode...)
+    This function iterate thru the compile to transform these nodes into concept of compiled AST
+    :param context:
+    :param concept:
+    :param errors: a list the must be initialized by the caller
+    :param parsers: to customize the parsers to use
+    :return:
+    """
+
+    sheerka = context.sheerka
+    parsers = parsers or PARSERS
+
+    def _validate_concept(c):
+        """
+        Recursively browse the compiled properties in order to find unrecognized
+        :param c:
+        :return:
+        """
+        for k, v in c.compiled.items():
+            if isinstance(v, Concept):
+                _validate_concept(v)
+
+            elif isinstance(v, SourceCodeWithConceptNode):
+                from parsers.PythonWithConceptsParser import PythonWithConceptsParser
+                parser_helper = PythonWithConceptsParser()
+                res = parser_helper.parse_nodes(context, v.get_all_nodes())
+                if res.status:
+                    c.compiled[k] = [res]
+                else:
+                    errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
+
+            elif isinstance(v, UnrecognizedTokensNode):
+                res = parse_unrecognized(context, v.source, parsers)
+                res = only_successful(context, res)  # only key successful parsers
+                if res.status:
+                    c.compiled[k] = res.body.body
+                else:
+                    errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
+
+    def _get_source(compiled, var_name):
+        if var_name not in compiled:
+            return None
+        if not isinstance(compiled[var_name], list):
+            return None
+        if not len(compiled[var_name]) == 1:
+            return None
+        if not sheerka.isinstance(compiled[var_name][0], BuiltinConcepts.RETURN_VALUE):
+            return None
+        if not sheerka.isinstance(compiled[var_name][0].body, BuiltinConcepts.PARSER_RESULT):
+            return None
+        if compiled[var_name][0].body.name == "parsers.ShortTermMemory":
+            return None
+
+        return compiled[var_name][0].body.source
+
+    _validate_concept(concept)
+
+    # Special case where the values of the variables are the names of the variable
+    # example : Concept("a plus b").def_var("a").def_var("b")
+    # and the user has entered 'a plus b'
+    # Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
+    # This means that 'a' and 'b' don't have any real value
+    if len(concept.metadata.variables) > 0:
+        for name, value in concept.metadata.variables:
+            if _get_source(concept.compiled, name) != name:
+                break
+        else:
+            concept.metadata.is_evaluated = True
+
+
 def get_names(sheerka, concept_node):
    """
    Finds all the names referenced by the concept_node
@@ -603,10 +716,11 @@ def remove_from_ret_val(sheerka, return_values, concept_key):
    return return_values


-def set_is_evaluated(concepts):
+def set_is_evaluated(concepts, check_nb_variables=False):
    """
    set is_evaluated to True
    :param concepts:
+    :param check_nb_variables: only set is_evaluated if the concept has variables
    :return:
    """
    if concepts is None:
@@ -614,6 +728,8 @@ def set_is_evaluated(concepts):

    if hasattr(concepts, "__iter__"):
        for c in concepts:
-            c.metadata.is_evaluated = True
+            if not check_nb_variables or check_nb_variables and len(c.metadata.variables) > 0:
+                c.metadata.is_evaluated = True
    else:
-        concepts.metadata.is_evaluated = True
+        if not check_nb_variables or check_nb_variables and len(concepts.metadata.variables) > 0:
+            concepts.metadata.is_evaluated = True
@@ -130,7 +130,7 @@ class Concept:
        if isinstance(other, simplec):
            return self.name == other.name and self.body == other.body

-        if isinstance(other, (CC, CB, CV, CMV)):
+        if isinstance(other, (CC, CB, CV, CMV, CIO)):
            return other == self

        if not isinstance(other, Concept):
@@ -726,4 +726,45 @@ class CMV:
        return txt + ")"


+class CIO:
+    """
+    Concept id only
+    only test the id
+    """
+
+    def __init__(self, concept, source=None):
+        if isinstance(concept, str):
+            self.concept_name = concept
+            self.concept_id = None
+            self.concept = None
+        elif isinstance(concept, Concept):
+            self.concept_id = concept.id
+            self.concept = concept
+        self.source = source
+        self.start = -1
+        self.end = -1
+
+    def set_concept(self, concept):
+        self.concept = concept
+        self.concept_id = concept.id
+
+    def __eq__(self, other):
+        if id(self) == id(other):
+            return True
+
+        if isinstance(other, Concept):
+            return self.concept_id == other.id
+
+        if not isinstance(other, CIO):
+            return False
+
+        return self.concept_id == other.concept_id
+
+    def __hash__(self):
+        return hash(self.concept_id)
+
+    def __repr__(self):
+        return f"CIO(concept='{self.concept}')" if self.concept else f"CIO(name='{self.concept_name}')"
+
+
 simplec = namedtuple("concept", "name body")  # for simple concept (tests purposes only)
@@ -1,7 +1,7 @@
 import logging
 import time

-from core.builtin_concepts import BuiltinConcepts
+from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
 from core.concept import Concept
 from core.sheerka.services.SheerkaExecute import NO_MATCH
 from core.sheerka.services.SheerkaShortTermMemory import SheerkaShortTermMemory
@@ -309,6 +309,15 @@ class ExecutionContext:
    def in_private_context(self, concept_key):
        return concept_key in self.private_hints

+    def add_to_private_hints (self, concept_key):
+        self.private_hints.add(concept_key)
+
+    def add_to_protected_hints(self, concept_key):
+        self.protected_hints.add(concept_key)
+
+    def add_to_global_hints(self, concept_key):
+        self.global_hints.add(concept_key)
+
    @staticmethod
    def _is_return_value(obj):
        return isinstance(obj, Concept) and obj.key == str(BuiltinConcepts.RETURN_VALUE)
@@ -358,7 +367,11 @@ class ExecutionContext:
            ret_val = self.values["return_values"]
            if not isinstance(ret_val, Concept) or not ret_val.key == str(BuiltinConcepts.RETURN_VALUE):
                return None
-            return ret_val.status
+            if ret_val.status:
+                return True
+            if isinstance(ret_val.body, ParserResultConcept):
+                return "Almost"
+            return False

    def as_bag(self):
        """
@@ -558,6 +558,12 @@ class Sheerka(Concept):
        return self._get_unknown(metadata)

    def resolve(self, concept):
+        """
+        Try to find a concept by its name, id, or c:: definition
+        A new instance (using new_from_template()) is returned when it's possible
+        :param concept:
+        :return:
+        """

        def new_instances(concepts):
            if hasattr(concepts, "__iter__"):
@@ -567,6 +573,9 @@ class Sheerka(Concept):
        if concept is None:
            return None

+        # ##############
+        # PREPROCESS
+        # ##############
        # if the entry is a concept token, use its values.
        if isinstance(concept, Token):
            if concept.type != TokenKind.CONCEPT:
@@ -578,6 +587,9 @@ class Sheerka(Concept):
            (tmp := core.utils.unstr_concept(concept)) != (None, None):
            concept = tmp

+        # ##############
+        # PROCESS
+        # ##############
        # if the entry is a tuple
        # concept[0] is the name
        # concept[1] is the id
@@ -599,7 +611,7 @@ class Sheerka(Concept):
        if isinstance(concept, str):
            if self.is_known(found := self.get_by_name(concept)):
                instances = new_instances(found)
-                core.builtin_helpers.set_is_evaluated(instances)
+                core.builtin_helpers.set_is_evaluated(instances, check_nb_variables=True)
                return instances

        return None
@@ -5,7 +5,7 @@ from core.sheerka.services.sheerka_service import BaseService

 CONCEPTS_FILE = "_concepts_lite.txt"
 CONCEPTS_FILE_ALL_CONCEPTS = "_concepts.txt"
-CONCEPTS_FILE_TO_USE = CONCEPTS_FILE_ALL_CONCEPTS
+CONCEPTS_FILE_TO_USE = CONCEPTS_FILE

 class SheerkaAdmin(BaseService):
    NAME = "Admin"
@@ -47,6 +47,9 @@ class SheerkaAdmin(BaseService):
        if concept_file == "full":
            concept_file = CONCEPTS_FILE_ALL_CONCEPTS

+        elif not concept_file.startswith("_concepts"):
+            concept_file = f"_concepts_{concept_file}.txt"
+
        try:
            start = time.time_ns()
            nb_lines = 0
@@ -2,7 +2,7 @@ import core.utils
 from cache.Cache import Cache
 from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
 from core.sheerka.services.sheerka_service import BaseService
-from core.tokenizer import Tokenizer, TokenKind, Keywords, Token
+from core.tokenizer import Tokenizer, TokenKind, Token

 NO_MATCH = "** No Match **"

@@ -88,6 +88,20 @@ class ParserInput:

        return self.pos < self.end

+    def seek(self, pos):
+        """
+        Move the token offset to position pos
+        :param pos:
+        :return: True is pos is a valid position False otherwise
+        """
+        if pos < 0 or pos >= self.end:
+            self.token = None
+            return False
+
+        self.pos = pos
+        self.token = self.tokens[self.pos]
+        return True
+
    def is_empty(self):
        if self.text.strip() == "":
            return True
@@ -116,7 +130,6 @@ class ParserInput:
            tokens = [tokens]

        switcher = {
-            TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
            TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
        }

@@ -31,6 +31,7 @@ class SheerkaModifyConcept(BaseService):

        if old_version == concept:
            # the concept is not modified
+            # This is an important sanity check. Do no remove because you don't understand it
            return self.sheerka.ret(
                self.NAME, False,
                self.sheerka.new(
@@ -2,6 +2,7 @@ from dataclasses import dataclass
 from typing import List

 from cache.Cache import Cache
+from core.builtin_concepts import BuiltinConcepts
 from core.sheerka.services.sheerka_service import ServiceObj, BaseService


@@ -48,6 +49,7 @@ class SheerkaVariableManager(BaseService):

        variable = Variable(context.event.get_digest(), who, key, value, None)
        self.sheerka.cache_manager.put(self.VARIABLES_ENTRY, variable.get_key(), variable)
+        return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))

    def load(self, who, key):
        variable = self.sheerka.cache_manager.get(self.VARIABLES_ENTRY, who + "|" + key)
@@ -62,6 +62,7 @@ class Token:

    _strip_quote: str = field(default=None, repr=False, compare=False, hash=None)
    _str_value: str = field(default=None, repr=False, compare=False, hash=None)
+    _repr_value: str = field(default=None, repr=False, compare=False, hash=None)

    def __repr__(self):
        if self.type == TokenKind.IDENTIFIER:
@@ -82,7 +83,7 @@ class Token:
        if self._strip_quote:
            return self._strip_quote

-        self._strip_quote = self._to_str(True)
+        self._strip_quote = self.value[1:-1] if self.type == TokenKind.STRING else self.value
        return self._strip_quote

    @property
@@ -90,18 +91,36 @@ class Token:
        if self._str_value:
            return self._str_value

-        self._str_value = self._to_str(False)
+        self._str_value = self.to_str(False)
        return self._str_value

+    @property
+    def repr_value(self):
+        if self._repr_value:
+            return self._repr_value
+
+        if self.type == TokenKind.EOF:
+            self._repr_value = "<EOF>"
+        elif self.type == TokenKind.WHITESPACE:
+            self._repr_value = "<ws>"
+        elif self.type == TokenKind.NEWLINE:
+            self._repr_value = "<nl>"
+        else:
+            self._repr_value = self.str_value
+        return self._repr_value
+
    @staticmethod
    def is_whitespace(token):
        return token and token.type == TokenKind.WHITESPACE

-    def _to_str(self, strip_quote):
+    def to_str(self, strip_quote):
        if strip_quote and self.type == TokenKind.STRING:
            return self.value[1:-1]
        elif self.type == TokenKind.KEYWORD:
            return self.value.value
+        elif self.type == TokenKind.CONCEPT:
+            from core.utils import str_concept
+            return str_concept(self.value)
        else:
            return str(self.value)

@@ -136,8 +155,6 @@ class Tokenizer:
    Class that can iterate on the tokens
    """

-    KEYWORDS = set(x.value for x in Keywords)
-
    def __init__(self, text, yield_eof=True, parse_word=False):
        self.text = text
        self.text_len = len(text)
@@ -175,9 +192,7 @@ class Tokenizer:
                from core.concept import VARIABLE_PREFIX
                if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
                    identifier = self.eat_identifier(self.i)
-                    token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
-                    value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
-                    yield Token(token_type, value, self.i, self.line, self.column)
+                    yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
                    self.i += len(identifier)
                    self.column += len(identifier)
                elif self.i + 7 < self.text_len and \
@@ -335,11 +350,9 @@ class Tokenizer:
                yield Token(TokenKind.WORD, word, self.i, self.line, self.column)
                self.i += len(word)
                self.column += len(word)
-            elif c.isalpha() or c == "_":
+            elif c.isalpha():
                identifier = self.eat_identifier(self.i)
-                token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
-                value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
-                yield Token(token_type, value, self.i, self.line, self.column)
+                yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
                self.i += len(identifier)
                self.column += len(identifier)
            elif c.isdigit():
@@ -457,7 +470,7 @@ class Tokenizer:

        i = start_index + 1
        escape = False
-        #newline = None
+        # newline = None
        while i < self.text_len:
            c = self.text[i]
            result += c
@@ -296,6 +296,28 @@ def dict_product(a, b):
    return res


+def get_n_clones(obj, n):
+    objs = [obj]
+    for i in range(n - 1):
+        objs.append(obj.clone())
+    return objs
+
+
+def obj_product(list_of_objs, new_items, add_item):
+    if list_of_objs is None or len(list_of_objs) == 0:
+        return list_of_objs
+
+    res = []
+
+    for obj in list_of_objs:
+        instances = get_n_clones(obj, len(new_items))
+        res.extend(instances)
+        for instance, item in zip(instances, new_items):
+            add_item(instance, item)
+
+    return res
+
+
 def strip_quotes(text):
    if not isinstance(text, str):
        return text