Sheerka-Old/src/evaluators/DefConceptEvaluator.py

from dataclasses import dataclass

import core.utils
from core.ast_helpers import UnreferencedVariablesVisitor
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from core.global_symbols import NotInit
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind, Tokenizer
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.BnfNodeParser import ParsingExpression, ParsingExpressionVisitor
from parsers.DefConceptParser import DefConceptNode, NameNode
from parsers.PythonParser import get_python_node


@dataclass(eq=True, frozen=True)
class MandatoryVariable:
    """
    When we are searching for variables, we are searching for potential variable
    So if the variable found has no match in the concept definition, it's not a problem
    for example:
        def concept foo x as isinstance(x, str)
        {x, str} will be detected as potential variable, but 'str' will find no match.

    But there are cases where the variable found must exist, otherwise, it's an error
    example:
        def concept foo from bnf xxx
        'xxx' is detected as a variable (assuming that there is no concept named 'xxx' and a match must be
            found in the the name of the variable

    To distinguish between mandatory and not mandatory variable, we use MandatoryVariable
    """
    name: str

    def __hash__(self):
        return hash(("MandatoryVariable", self.name))


class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
    """
    Gets the concepts referenced by BNF
    If a rule_name is given, it will also be considered as a potential property
    """

    def __init__(self):
        super().__init__()
        self.names = set()

    def visit_ConceptExpression(self, node):
        if node.rule_name:
            self.names.add(node.rule_name)
        elif isinstance(node.concept, Concept):
            self.names.add(node.concept.name)
        else:
            self.names.add(node.concept)

    def visit_VariableExpression(self, node):
        self.names.add(MandatoryVariable(node.rule_name))

    def visit_all(self, node):
        if node.rule_name:
            self.names.add(node.rule_name)


class DefConceptEvaluator(OneReturnValueEvaluator):
    """
    Used to add a new concept
    """
    NAME = "DefConcept"

    def __init__(self):
        super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 50)

    def matches(self, context, return_value):
        debugger = context.get_debugger(self.NAME, "matches")
        debugger.debug_entering(return_value=return_value)
        return return_value.status and \
               isinstance(return_value.value, ParserResultConcept) and \
               isinstance(return_value.value.value, DefConceptNode)

    def eval(self, context, return_value):
        context.log("Adding a new concept", self.name)
        def_concept_node = return_value.value.value
        sheerka = context.sheerka

        debugger = context.get_debugger(self.NAME, "eval")
        debugger.debug_entering(def_concept=def_concept_node)

        # validate the node
        variables_found = set()
        mandatory_variables = set()  # these variable MUST have a match in the name (if the name is not None)

        concept = Concept(str(def_concept_node.name))
        concept.get_metadata().definition_type = def_concept_node.definition_type
        name_to_use = self.get_name_to_use(def_concept_node)

        # get variables
        for prop in ("definition", "where", "pre", "post", "body", "ret"):

            part_ret_val = getattr(def_concept_node, prop)

            # put back the sources
            if part_ret_val is NotInit:
                continue
            elif isinstance(part_ret_val, NameNode):
                source = str(part_ret_val)
            elif isinstance(part_ret_val, ReturnValueConcept) and part_ret_val.status:
                source = part_ret_val.value.source.as_text() if isinstance(part_ret_val.value.source,
                                                                           ParserInput) else part_ret_val.value.source
            else:
                raise Exception("Unexpected")
            setattr(concept.get_metadata(), prop, source)

            # Do not try to resolve variables from itself
            if prop == "definition" and concept.get_metadata().definition_type == DEFINITION_TYPE_DEF:
                continue

            # try to find what can be a property
            for p in self.get_variables(context, part_ret_val, name_to_use):
                if isinstance(p, MandatoryVariable):
                    variables_found.add(p.name)
                    mandatory_variables.add(p.name)
                else:
                    variables_found.add(p)

        # add variables by order of appearance when possible
        for name_part in name_to_use:
            if name_part in variables_found:
                concept.def_var(name_part, None)

        # check that all mandatory variables are defined in the name
        # KSI: 2021-02-17
        # The mandatory variables come for bnf definition where it was not possible to resolve to a concept
        # So rather that issuing a 'UnresolvedVariableError' I prefer UNKNOWN_CONCEPT
        if (diff := mandatory_variables.difference(set(name_to_use))) != set():
            unknown_concepts = [sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body={"name": c}) for c in sorted(diff)]
            error = sheerka.new(BuiltinConcepts.ERROR, body=unknown_concepts)
            return sheerka.ret(self.name, False, error, parents=[return_value])

        # add the remaining properties
        # They mainly come from BNF definition
        for p in variables_found:
            if p not in concept.values():
                concept.def_var(p, None)

        # initialize the key
        key_source = def_concept_node.definition.tokens if \
            def_concept_node.definition_type == DEFINITION_TYPE_DEF else \
            def_concept_node.name.tokens
        concept.init_key(key_source)

        # update the bnf definition if needed
        if def_concept_node.definition is not NotInit and \
            def_concept_node.definition_type == DEFINITION_TYPE_BNF:
            concept.set_bnf(def_concept_node.definition.value.value)

        ret = sheerka.create_new_concept(context, concept)
        if not ret.status:
            error_cause = sheerka.objvalue(ret.body)
            context.log(f"Failed to add concept '{concept.name}'. Reason: {error_cause}", self.name)
        return sheerka.ret(self.name, ret.status, ret.value, parents=[return_value])

    @staticmethod
    def get_name_to_use(node):
        source = node.definition if node.definition_type == DEFINITION_TYPE_DEF else node.name
        return [part.str_value for part in core.utils.strip_tokens(source.tokens, True)]

    @staticmethod
    def get_variables(context, ret_value, concept_name):
        """
        Try to find out the variables
        This function can only be a draft, as there may be tons of different situations
        I guess that it can only be complete when will we have access to Sheerka memory
        """
        debugger = context.get_debugger(DefConceptEvaluator.NAME, "get_variables")
        #
        # Case of NameNode
        #
        if isinstance(ret_value, NameNode):
            names = [str(t.value) for t in ret_value.tokens if t.type in (
                TokenKind.IDENTIFIER, TokenKind.STRING, TokenKind.KEYWORD)]
            debugger.debug_var("names", names, hint="from NameNode")
            return set(filter(lambda x: x in concept_name and context.sheerka.is_not_a_variable(x), names))

        #
        # case of BNF
        #
        if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, ParsingExpression):
            visitor = ConceptOrRuleNameVisitor()
            visitor.visit(ret_value.value.value)
            debugger.debug_var("names", visitor.names, hint="from BNF")
            return set(visitor.names)

        #
        # Case of python code
        #
        if (python_node := get_python_node(ret_value.value.value)) is not None:
            if len(concept_name) > 1:
                visitor = UnreferencedVariablesVisitor(context)
                names = visitor.get_names(python_node.ast_)
                debugger.debug_var("names", names, hint="from python node")
                return set(filter(lambda x: x in concept_name and context.sheerka.is_not_a_variable(x), names))
            else:
                return set()

        #
        # Concept
        #
        if isinstance(ret_value.value, ParserResultConcept) and len(concept_name) > 1:
            variables = set()
            source = ret_value.value.source.as_text() if isinstance(ret_value.value.source,
                                                                    ParserInput) else ret_value.value.source
            tokens = ret_value.value.tokens or list(Tokenizer(source, yield_eof=False))
            possible_vars = set()
            for t in tokens:
                if t.type == TokenKind.RULE:
                    for v in [v for v in t.value if v is not None]:
                        possible_vars.add(v)
                else:
                    possible_vars.add(t.str_value)

            for identifier in [i for i in concept_name if str(i).isalnum()]:
                if identifier in possible_vars:
                    variables.add(identifier)
            debugger.debug_var("names", variables, hint="from concept")
            return variables

        return set()