Sheerka-Old/src/parsers/BaseNodeParser.py

from collections import namedtuple
from dataclasses import dataclass
from enum import Enum
from typing import Set

import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
from core.rule import Rule
from core.tokenizer import TokenKind, Token
from parsers.BaseParser import Node, BaseParser, ParsingError

DEBUG_COMPILED = True


@dataclass
class ChickenAndEggError(Exception):
    concepts: Set[str]


@dataclass
class NoFirstTokenError(ParsingError):
    concept: Concept
    key: str


@dataclass()
class LexerNode(Node):
    start: int  # starting index in the tokens list
    end: int  # ending index in the tokens list
    tokens: list = None  # tokens
    source: str = None  # string representation of what was parsed

    def __post_init__(self):
        if self.source is None:
            self.source = core.utils.get_text_from_tokens(self.tokens)

    def __eq__(self, other):
        if not isinstance(other, LexerNode):
            return False

        return self.start == other.start and \
               self.end == other.end and \
               self.source == other.source and \
               self.tokens == other.tokens

    def fix_source(self, force=True):
        if force or self.source is None:
            self.source = core.utils.get_text_from_tokens(self.tokens)
        return self

    def clone(self):
        pass

    def to_short_str(self):
        raise NotImplementedError


class UnrecognizedTokensNode(LexerNode):
    def __init__(self, start, end, tokens):
        super().__init__(start, end, tokens)
        self.is_frozen = False  # TODO: Remove as it seems to now be useless
        self.parenthesis_count = 0

    def freeze(self):
        # TODO: Remove as it seems to now be useless
        self.is_frozen = True

    def reset(self):
        self.start = self.end = -1
        self.tokens.clear()
        self.is_frozen = False
        self.parenthesis_count = 0
        self.source = ""

    def add_token(self, token, pos):
        if self.is_frozen:
            raise Exception("The node is frozen")

        if self.end != -1 and pos == self.end + 2:
            # add the missing whitespace
            p = self.tokens[-1]  # previous token
            self.tokens.append(Token(TokenKind.WHITESPACE, " ", p.index + 1, p.line, p.column + 1))

        self.tokens.append(token)
        self.end = pos
        if self.start == -1:
            self.start = pos

        if token.type == TokenKind.LPAR:
            self.parenthesis_count += 1

        if token.type == TokenKind.RPAR:
            self.parenthesis_count -= 1

        return self

    def pop(self, token_kind):
        if self.is_frozen:
            raise Exception("The node is frozen")

        if len(self.tokens) > 0 and self.tokens[-1].type == token_kind:
            self.tokens.pop()
            if len(self.tokens) == 0:
                self.reset()
            else:
                self.end -= 1

    def has_open_paren(self):
        return self.parenthesis_count > 0

    def not_whitespace(self):
        return not self.is_whitespace()

    def is_whitespace(self):
        for t in self.tokens:
            if t.type not in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
                return False
        return True

    def is_empty(self):
        return len(self.tokens) == 0

    def last_token_type(self):
        if len(self.tokens) == 0:
            return None
        return self.tokens[-1].type

    def __eq__(self, other):
        if isinstance(other, utnode):
            return self.start == other.start and \
                   self.end == other.end and \
                   self.source == other.source

        if isinstance(other, UTN):
            return other == self

        if not isinstance(other, UnrecognizedTokensNode):
            return False

        return self.start == other.start and \
               self.end == other.end and \
               self.source == other.source

    def __hash__(self):
        return hash((self.start, self.end, self.source))

    def __repr__(self):
        return f"UnrecognizedTokensNode(source='{self.source}', start={self.start}, end={self.end})"

    def clone(self):
        clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:])
        clone.is_frozen = self.is_frozen
        clone.parenthesis_count = self.parenthesis_count
        return clone

    def to_short_str(self):
        return f"UTN('{self.source}')"


class RuleNode(LexerNode):
    def __init__(self, rule, start, end, tokens=None, source=None):
        super().__init__(start, end, tokens, source)
        self.rule = rule
        self.fix_source(False)

    def __eq__(self, other):
        if id(self) == id(other):
            return True

        if isinstance(other, RN):
            return other == self

        if not isinstance(other, RuleNode):
            return False

        return self.rule == other.rule and \
               self.start == other.start and \
               self.end == other.end and \
               self.source == other.source

    def __hash__(self):
        return hash((self.rule, self.start, self.end, self.source))

    def __repr__(self):
        return f"RuleNode(rule='{self.rule}', source='{self.source}', start={self.start}, end={self.end})"

    def clone(self):
        return RuleNode(self.rule, self.start, self.end, self.tokens, self.source)

    def to_short_str(self):
        return f'RN({self.rule})'


class ConceptNode(LexerNode):
    """
    Returned by the BnfNodeParser
    It represents a recognized concept
    """

    def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
        super().__init__(start, end, tokens, source)
        self.concept = concept
        self.underlying = underlying
        self.fix_source(False)

    def __eq__(self, other):
        if id(self) == id(other):
            return True

        if isinstance(other, (CN, CNC)):
            return other == self

        if isinstance(other, cnode):
            return self.concept.key == other.concept_key and \
                   self.start == other.start and \
                   self.end == other.end and \
                   self.source == other.source

        if isinstance(other, short_cnode):
            return self.concept.key == other.concept_key and self.source == other.source

        if not isinstance(other, ConceptNode):
            return False

        return self.concept == other.concept and \
               self.start == other.start and \
               self.end == other.end and \
               self.source == other.source and \
               self.underlying == other.underlying

    def __hash__(self):
        return hash((self.concept, self.start, self.end, self.source, self.underlying))

    def __repr__(self):
        text = f"ConceptNode(concept='{self.concept}', source='{self.source}', start={self.start}, end={self.end}"
        if DEBUG_COMPILED:
            for k, v in self.concept.get_compiled().items():
                text += f", {k}='{v}'"
        return text + ")"

    def clone(self):
        # do we need to clone the concept as well ?
        clone = ConceptNode(self.concept, self.start, self.end, self.tokens, self.source, self.underlying)
        return clone

    def as_bag(self):
        """
        Creates a dictionary with the useful properties of the ConceptNode
        see Concept.as_bag() for extra informations
        """
        bag = {}
        for k, v in self.__dict__.items():
            bag[k] = v

        # if isinstance(self.concept, Concept):
        #     bag["compiled"] = self.concept.get_compiled()
        return bag

    def to_short_str(self):
        return f'CN({self.concept})'


class SourceCodeNode(LexerNode):
    """
    Returned when some source code (like Python source code is recognized)
    """

    def __init__(self, start, end, tokens=None, source=None, python_node=None, return_value=None):
        """

        :param start: start position (index of the first token)
        :param end: end position (index of the last token)
        :param tokens:
        :param source: tokens as string
        :param python_node: PythonNode found (when the SourceCodeNode is validated)
        :param return_value: ReturnValueConcept returned when the source was validated

        When return_value is provided,
        You should have return_value.body.body == node
        """
        super().__init__(start, end, tokens, source)
        self.python_node = python_node  # The PythonNode (or whatever language node) that is found
        self.return_value = return_value  # original result of the parsing

    def __eq__(self, other):
        if isinstance(other, scnode):
            return self.start == other.start and \
                   self.end == other.end and \
                   self.source == other.source

        if isinstance(other, SCN):
            return other == self

        if not isinstance(other, SourceCodeNode):
            return False

        return self.python_node == other.python_node and \
               self.start == other.start and \
               self.end == other.end and \
               self.source == other.source

    def __hash__(self):
        return hash((self.start, self.end, self.source))

    def __repr__(self):
        return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"

    def to_short_str(self):
        return f"SCN('{self.source}')"


class SourceCodeWithConceptNode(LexerNode):
    """
    Kind of temporary version for SourceCodeNode
    I know that there is some code,
    I know that there are some concepts
    I just don't want to make the glue yet

    So I push all the nodes into one big bag
    """

    def __init__(self, first_node, last_node, content_nodes=None, has_unrecognized=False):
        super().__init__(9999, -1, None)  # why not sys.maxint ?
        self.first = first_node
        self.last = last_node
        self.nodes = content_nodes or []
        self.has_unrecognized = has_unrecognized
        self._all_nodes = None
        self.fix_all_pos()

        self.python_node = None  # if the source code node is validated against a python parse, here is the PythonNode
        self.return_value = None  # return_value that produced the PythonNode

    def add_node(self, node):
        self.nodes.append(node)
        self.fix_pos(node)
        self._all_nodes = None

        return self

    def __eq__(self, other):
        if id(self) == id(other):
            return True

        if isinstance(other, SCWC):
            return other == self

        if not isinstance(other, SourceCodeWithConceptNode):
            return False

        if self.start != other.start or self.end != other.end:
            return False

        if self.first != other.first:
            return False

        if self.last != other.last:
            return False

        if len(self.nodes) != len(other.nodes):
            return False

        for self_node, other_node in zip(self.nodes, other.nodes):
            if self_node != other_node:
                return False

        # at last
        return True

    def __hash__(self):
        return hash((self.first, self.last, len(self.nodes)))

    def __repr__(self):
        return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')"

    def fix_all_pos(self):
        if self.first is None:  # to ease some unit test where only the python_node is necessary
            return

        for n in [self.first, self.last] + self.nodes:
            self.fix_pos(n)

    def fix_pos(self, node):
        if hasattr(node, "start") and node.start is not None:
            if node.start < self.start:
                self.start = node.start

        if hasattr(node, "end") and node.end is not None:
            if node.end > self.end:
                self.end = node.end
        return self

    def pseudo_fix_source(self):
        """
        pseudo because the code is not that clean !
        :return:
        """
        self.source = self.first.source
        for n in self.nodes:
            self.source += " "
            if hasattr(n, "source"):
                self.source += n.source
            elif hasattr(n, "concept"):
                self.source += str(n.concept)
            else:
                self.source += " unknown"
        self.source += self.last.source
        return self

    def get_all_nodes(self):
        if self._all_nodes:
            return self._all_nodes

        self._all_nodes = [self.first, *self.nodes, self.last]
        return self._all_nodes

    def clone(self):
        clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes.copy(), self.has_unrecognized)
        return clone

    def to_short_str(self):
        return f"SCWC({self.first}" + ", ".join(n.to_short_str for n in self.nodes) + f"{self.last})"


@dataclass()
class GrammarErrorNode(ParsingError):
    message: str


class SyaAssociativity(Enum):
    Left = "left"
    Right = "right"
    No = "No"

    def __repr__(self):
        return self.value


cnode = namedtuple("ConceptNode", "concept_key start end source")
short_cnode = namedtuple("ConceptNode", "concept_key source")
utnode = namedtuple("utnode", "start end source")
scnode = namedtuple("scnode", "start end source")


class HelperWithPos:
    def __init__(self, start=None, end=None):
        self.start = start
        self.end = end

        self.start_is_fixed = start is not None
        self.end_is_fixed = end is not None

    def fix_pos(self, node):
        if not self.start_is_fixed:
            start = node.start if hasattr(node, "start") else \
                node[0] if isinstance(node, tuple) else None

            if start is not None and (self.start is None or start < self.start):
                self.start = start

        if not self.end_is_fixed:
            end = node.end if hasattr(node, "end") else \
                node[1] if isinstance(node, tuple) else None

            if end is not None and (self.end is None or end > self.end):
                self.end = end
        return self


class SCN(HelperWithPos):
    """
    SourceCodeNode tester class
    It matches with SourceCodeNode but with less constraints

    SCN == SourceCodeNode if source, start, end (start and end are not validated when None)
    """

    def __init__(self, source, start=None, end=None):
        super().__init__(start, end)
        self.source = source

    def __eq__(self, other):
        if id(self) == id(other):
            return True

        if isinstance(other, SourceCodeNode):
            if self.source != other.source:
                return False
            if self.start is not None and self.start != other.start:
                return False
            if self.end is not None and self.end != other.end:
                return False

            return True

        if not isinstance(other, CN):
            return False

        return self.source == other.source and \
               self.start == other.start and \
               self.end == other.end

    def __hash__(self):
        return hash((self.source, self.start, self.end))

    def __repr__(self):
        txt = f"SCN(source='{self.source}'"
        if self.start is not None:
            txt += f", start={self.start}"
        if self.end is not None:
            txt += f", end={self.end}"
        return txt + ")"


class SCWC(HelperWithPos):
    """
    SourceNodeWithConcept tester class
    It matches with a SourceNodeWithConcept
    but it's easier to instantiate during the tests
    """

    def __init__(self, first, last, *args):
        super().__init__(None, None)
        self.first = first
        self.last = last
        self.content = args

    def __eq__(self, other):
        if id(self) == id(other):
            return True

        if isinstance(other, SourceCodeWithConceptNode):
            if self.first != other.first:
                return False

            if self.last != other.last:
                return False

            if len(self.content) != len(other.nodes):
                return False

            for self_node, other_node in zip(self.content, other.nodes):
                if self_node != other_node:
                    return False

            # at last
            return True

    def __repr__(self):
        txt = "SCWC("
        if self.start is not None:
            txt += f"start={self.start}"
        if self.end is not None:
            txt += f", end={self.end}"
        txt += f", source='{self.source}'"
        return txt + ")"

    @property
    def source(self):
        """
        this code is a copy and paste from SourceCodeWithConceptNode.pseudo_fix_source
        TODO: create a common function or whatever...
        :return:
        """
        source = self.first.source if hasattr(self.first, "source") else self.first
        for n in self.content:
            source += " "
            if hasattr(n, "source"):
                source += n.source
            elif hasattr(n, "concept"):
                source += str(n.concept)
            else:
                source += " unknown"
        source += self.last.source if hasattr(self.last, "source") else self.last
        return source


class CN(HelperWithPos):
    """
    ConceptNode tester class
    It matches with ConceptNode but with less constraints

    CN == ConceptNode if concept key, start, end and source are the same
    """

    def __init__(self, concept, start=None, end=None, source=None):
        """

        :param concept: Concept or concept_key (only the key is used anyway)
        :param start:
        :param end:
        :param source:
        """
        super().__init__(start, end)
        self.concept_key = concept.key if isinstance(concept, Concept) else concept
        self.source = source
        self.concept = concept if isinstance(concept, Concept) else None

    def fix_source(self, str_tokens):
        self.source = "".join(str_tokens)
        return self

    def __eq__(self, other):
        if id(self) == id(other):
            return True

        if isinstance(other, ConceptNode):
            if other.concept is None:
                return False
            if other.concept.key != self.concept_key:
                return False
            if self.start is not None and self.start != other.start:
                return False
            if self.end is not None and self.end != other.end:
                return False
            if self.source is not None and self.source != other.source:
                return False
            return True

        if not isinstance(other, CN):
            return False

        return self.concept_key == other.concept_key and \
               self.start == other.start and \
               self.end == other.end and \
               self.source == other.source

    def __hash__(self):
        return hash((self.concept_key, self.start, self.end, self.source))

    def __repr__(self):
        if self.concept:
            txt = f"CN(concept='{self.concept}'"
        else:
            txt = f"CN(concept_key='{self.concept_key}'"
        txt += f", source='{self.source}'"
        if self.start is not None:
            txt += f", start={self.start}"
        if self.end is not None:
            txt += f", end={self.end}"
        return txt + ")"


class CNC(CN):
    """
    ConceptNode for Compiled tester class
    It matches with ConceptNode
    But focuses on the 'compiled' property of the concept

    CNC == ConceptNode if CNC.get_compiled() == ConceptNode.concept.get_compiled()
    """

    def __init__(self, concept_key, start=None, end=None, source=None, exclude_body=False, **kwargs):
        super().__init__(concept_key, start, end, source)
        self.compiled = kwargs
        self.exclude_body = exclude_body
        if "body" in self.compiled:
            self.compiled[ConceptParts.BODY] = self.compiled["body"]
            del self.compiled["body"]

    def __eq__(self, other):
        if id(self) == id(other):
            return True

        if isinstance(other, ConceptNode):
            if other.concept is None:
                return False
            if other.concept.key != self.concept_key:
                return False
            if self.start is not None and self.start != other.start:
                return False
            if self.end is not None and self.end != other.end:
                return False
            if self.source is not None and self.source != other.source:
                return False
            if self.exclude_body:
                to_compare = {k: v for k, v in other.concept.get_compiled().items() if k != ConceptParts.BODY}
            else:
                to_compare = other.concept.get_compiled()
            if self.compiled == to_compare:  # expanded form to ease the debug
                return True
            else:
                return False

        if not isinstance(other, CNC):
            return False

        return self.concept_key == other.concept_key and \
               self.start == other.start and \
               self.end == other.end and \
               self.source == other.source and \
               self.compiled == other.compiled

    def __repr__(self):
        if self.concept:
            txt = f"CNC(concept='{self.concept}'"
        else:
            txt = f"CNC(concept_key='{self.concept_key}'"
        txt += f", source='{self.source}'"
        if self.start is not None:
            txt += f", start={self.start}"
        if self.end is not None:
            txt += f", end={self.end}"

        for k, v in self.compiled.items():
            txt += f", {k}='{v}'"
        return txt + ")"


class UTN(HelperWithPos):
    """
    Tester class for UnrecognizedTokenNode
    compare the source, and start, end  if defined
    """

    def __init__(self, source, start=None, end=None):
        """
        :param source:
        :param start:
        :param end:
        """
        super().__init__(start, end)
        self.source = source

    def __eq__(self, other):
        if id(self) == id(other):
            return True

        if isinstance(other, UnrecognizedTokensNode):
            return self.start == other.start and \
                   self.end == other.end and \
                   self.source == other.source

        if not isinstance(other, UTN):
            return False

        return self.start == other.start and \
               self.end == other.end and \
               self.source == other.source

    def __hash__(self):
        return hash((self.source, self.start, self.end))

    def __repr__(self):
        txt = f"UTN(source='{self.source}'"
        if self.start is not None:
            txt += f", start={self.start}"
        if self.end is not None:
            txt += f", end={self.end}"
        return txt + ")"


class RN(HelperWithPos):
    """
    Helper class to test RuleNode
    """

    def __init__(self, rule, start=None, end=None, source=None):
        """

        :param concept: Concept or concept_key (only the key is used anyway)
        :param start:
        :param end:
        :param source:
        """
        super().__init__(start, end)
        self.rule_id = rule.id if isinstance(rule, Rule) else rule
        self.source = source or core.utils.str_concept((None, self.rule_id), prefix="r:")
        self.rule = rule if isinstance(rule, Rule) else None

    def __eq__(self, other):
        if id(self) == id(other):
            return True

        if isinstance(other, RuleNode):
            if other.rule is None:
                return False
            if other.rule.id != self.rule_id:
                return False
            if self.start is not None and self.start != other.start:
                return False
            if self.end is not None and self.end != other.end:
                return False
            if self.source is not None and self.source != other.source:
                return False
            return True

        if not isinstance(other, RN):
            return False

        return self.rule_id == other.rule_id and \
               self.start == other.start and \
               self.end == other.end and \
               self.source == other.source

    def __hash__(self):
        return hash((self.rule_id, self.start, self.end, self.source))

    def __repr__(self):
        if self.rule:
            txt = f"RN(rule='{self.rule}'"
        else:
            txt = f"RN(rule_id='{self.rule_id}'"
        txt += f", source='{self.source}'"
        if self.start is not None:
            txt += f", start={self.start}"
        if self.end is not None:
            txt += f", end={self.end}"
        return txt + ")"


class BaseNodeParser(BaseParser):
    """
    Parser that return LexerNode
    """

    def __init__(self, name, priority, **kwargs):
        super().__init__(name, priority, yield_eof=True)
        if 'sheerka' in kwargs:
            sheerka = kwargs.get("sheerka")
            self.concepts_by_first_keyword = sheerka.resolved_concepts_by_first_keyword

        else:
            self.concepts_by_first_keyword = None

    def init_from_concepts(self, context, concepts, **kwargs):
        """
        Initialize the parser with a list of concepts
        For unit tests convenience
        :param context
        :param concepts
        :return:
        """
        concepts_by_first_keyword = self.get_concepts_by_first_token(context, concepts).body
        self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body

    def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
        """
        Tries to find if there are concepts that match the value of the token
        Caution: Returns the actual cache, not a copy
        :param token:
        :param to_keep: predicate to tell if the concept is eligible
        :param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
        :param to_map:
        :param strip_quotes: Remove quotes from strings
        :return:
        """

        if token.type == TokenKind.WHITESPACE:
            return None

        if token.type == TokenKind.STRING:
            name = token.value[1:-1] if strip_quotes else token.value
        else:
            name = token.value

        custom_concepts = custom(name) if custom else []  # to get extra concepts using an alternative method

        result = []
        if name in self.concepts_by_first_keyword:
            for concept_id in self.concepts_by_first_keyword.get(name):

                concept = self.sheerka.get_by_id(concept_id)

                if not to_keep(concept):
                    continue

                concept = to_map(concept, self, self.sheerka) if to_map else concept
                result.append(concept)

            return core.utils.make_unique(result + custom_concepts,
                                          lambda c: c.concept.id if hasattr(c, "concept") else c.id)

        return custom_concepts if custom else None

    @staticmethod
    def get_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
        """
        Create the map describing the first token expected by a concept
        :param context:
        :param concepts: lists of concepts to parse
        :param use_sheerka: if True, update concepts_by_first_keyword from sheerka
        :param previous_entries:
        :return:
        """
        sheerka = context.sheerka
        res = sheerka.cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka else (previous_entries or {})
        for concept in concepts:
            keywords = BaseNodeParser.get_first_tokens(sheerka, concept)

            if keywords is None:
                # no first token found for a concept ?
                return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))

            for keyword in keywords:
                res.setdefault(keyword, []).append(concept.id)

        # 'uniquify' the lists
        for k, v in res.items():
            res[k] = core.utils.make_unique(v)

        return sheerka.ret("BaseNodeParser", True, res)

    @staticmethod
    def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword, modified_concepts=None):
        sheerka = context.sheerka
        res = {}

        def get_by_id(c_id):
            if modified_concepts and c_id in modified_concepts:
                return modified_concepts[c_id]
            return sheerka.get_by_id(c_id)

        def resolve_concepts(concept_str):
            c_key, c_id = core.utils.unstr_concept(concept_str)
            if c_id in already_seen:
                return ChickenAndEggError(already_seen)

            already_seen.add(c_id)

            resolved = set()
            to_resolve = set()
            chicken_and_egg = set()

            concept = get_by_id(c_id)

            if sheerka.isaset(context, concept):
                concepts = sheerka.get_set_elements(context, concept)
            else:
                concepts = [concept]

            for concept in concepts:
                BaseNodeParser.ensure_bnf(context, concept)  # need to make sure that it cannot fail
                keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
                for keyword in keywords:
                    (to_resolve if keyword.startswith("c:|") else resolved).add(keyword)

                for concept_to_resolve_str in to_resolve:
                    res = resolve_concepts(concept_to_resolve_str)
                    if isinstance(res, ChickenAndEggError):
                        chicken_and_egg |= res.concepts
                    else:
                        resolved |= res
                to_resolve.clear()

            if len(resolved) == 0 and len(chicken_and_egg) > 0:
                raise ChickenAndEggError(chicken_and_egg)
            else:
                return resolved

        for k, v in concepts_by_first_keyword.items():
            if k.startswith("c:|"):
                try:
                    already_seen = set()
                    resolved_keywords = resolve_concepts(k)
                    for resolved in resolved_keywords:
                        res.setdefault(resolved, []).extend(v)
                except ChickenAndEggError as ex:
                    context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
                    concepts_in_recursion = ex.concepts
                    # make sure to have all the parents
                    for parent in v:
                        concepts_in_recursion.add(parent)

                    for concept_id in concepts_in_recursion:
                        # make sure we keep the longest chain
                        old = sheerka.chicken_and_eggs.get(concept_id)
                        if old is None or len(old) < len(ex.concepts):
                            sheerka.chicken_and_eggs.put(concept_id, concepts_in_recursion)
            else:
                res.setdefault(k, []).extend(v)

        # 'uniquify' the lists
        for k, v in res.items():
            res[k] = core.utils.make_unique(v)

        return sheerka.ret("BaseNodeParser", True, res)

    @staticmethod
    def get_referenced_concepts(context, concept_id, already_seen):
        """
        Gets all the tokens that may allow to recognize concept concept_id
        Basically, it returns all the starting tokens for concept concept_id
        CHICKEN_AND_EGG is returned when a circular references are found
        :param context:
        :param concept_id:
        :param already_seen:
        :return:
        """
        if concept_id in already_seen:
            return ChickenAndEggError(already_seen)

        already_seen.add(concept_id)

        resolved = set()
        to_resolve = set()
        chicken_and_egg = set()
        sheerka = context.sheerka
        concept = sheerka.get_by_id(concept_id)

        if sheerka.isaset(context, concept):
            concepts = sheerka.get_set_elements(context, concept)
        else:
            concepts = [concept]

        for concept in concepts:
            BaseNodeParser.ensure_bnf(context, concept)  # need to make sure that it cannot fail
            keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
            for keyword in keywords:
                (to_resolve if keyword.startswith("c:|") else resolved).add(keyword)

            for concept_to_resolve_str in to_resolve:
                c_key, c_id = core.utils.unstr_concept(concept_to_resolve_str)
                res = BaseNodeParser.get_referenced_concepts(context, c_id, already_seen)
                if isinstance(res, ChickenAndEggError):
                    chicken_and_egg |= res.concepts
                else:
                    resolved |= res
            to_resolve.clear()

        if len(resolved) == 0 and len(chicken_and_egg) > 0:
            raise ChickenAndEggError(chicken_and_egg)
        else:
            return resolved

    @staticmethod
    def resolve_sya_associativity_and_precedence(context, sya):
        pass

    @staticmethod
    def get_first_tokens(sheerka, concept):
        """

        :param sheerka:
        :param concept:
        :return:
        """
        if concept.get_bnf():
            from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
            bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
            bnf_visitor.visit(concept.get_bnf())
            return bnf_visitor.first_tokens
        else:
            keywords = concept.key.split()
            for keyword in keywords:
                if keyword.startswith(VARIABLE_PREFIX):
                    continue

                return [keyword]

        return None

    @staticmethod
    def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
        if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF and not concept.get_bnf():
            from parsers.BnfDefinitionParser import BnfDefinitionParser
            regex_parser = BnfDefinitionParser()
            desc = f"Resolving BNF '{concept.get_metadata().definition}'"
            with context.push(BuiltinConcepts.INIT_BNF,
                              concept,
                              who=parser_name,
                              obj=concept,
                              desc=desc) as sub_context:
                sub_context.add_inputs(parser_input=concept.get_metadata().definition)
                bnf_parsing_ret_val = regex_parser.parse(sub_context, concept.get_metadata().definition)
                sub_context.add_values(return_values=bnf_parsing_ret_val)

                if not bnf_parsing_ret_val.status:
                    raise Exception(bnf_parsing_ret_val.value)

                concept.set_bnf(bnf_parsing_ret_val.body.body)
                if concept.id:
                    context.sheerka.get_by_id(concept.id).set_bnf(concept.get_bnf())  # update bnf in cache