Sheerka-Old/src/parsers/BnfNodeParser.py

#####################################################################################################
# This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
# I don't directly use the project, but it helped me figure out
# what to do.
#       Dejanović I., Milosavljević G., Vaderna R.:
#       Arpeggio: A flexible PEG parser for Python,
#       Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
#####################################################################################################
import re
from collections import defaultdict
from dataclasses import dataclass, field
from operator import attrgetter
from typing import List

import core.builtin_helpers
import core.utils
from cache.Cache import Cache
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DEFINITION_TYPE_BNF, DoNotResolve
from core.global_symbols import NotFound
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Token, TokenKind, Tokenizer
from core.utils import CONSOLE_COLORS_MAP as CCM
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, GrammarErrorNode, NoMatchingTokenError, RuleNode, \
    SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensCache, UnrecognizedTokensNode

PARSERS = ["Sequence", "Sya", "Python"]
VARIABLE_EXPR_PARSER = ["Sequence", "Sya", "Python", "Bnf"]


@dataclass(eq=True)
class RegExDef:
    to_match: str = None
    ignore_case: bool = True
    multiline: bool = None
    explicit_flags: int = re.MULTILINE

    def __hash__(self):
        return hash((self.to_match, self.ignore_case, self.multiline, self.explicit_flags))

    @staticmethod
    def compile_flags(ignore_case, multiline, explicit_flags):
        flags = explicit_flags
        if multiline is True:
            flags |= re.DOTALL
        if multiline is False and flags & re.DOTALL:
            flags -= re.DOTALL
        if ignore_case is True:
            flags |= re.IGNORECASE
        if ignore_case is False and flags & re.IGNORECASE:
            flags -= re.IGNORECASE
        return flags

    def serialize(self):
        return f"{self.to_match}__!##ZZSEPZZ##!__{self.ignore_case}|{self.multiline}|{int(self.explicit_flags)}"

    def deserialize(self, txt):
        parts = txt.split("__!##ZZSEPZZ##!__")
        parts2 = parts[1].split("|")
        self.to_match = parts[0]
        self.ignore_case = None if parts2[0] == "None" else True if parts2[0] == "True" else False
        self.multiline = None if parts2[1] == "None" else True if parts2[1] == "True" else False
        self.explicit_flags = int(parts2[2])

        return self


class ParseTreeNode:
    def __init__(self, parsing_expression, start: int, end: int, tokens: List[Token] = None, source: str = None):
        self.parsing_expression = parsing_expression
        self.start = start
        self.end = end
        self.tokens = tokens
        self.source = source

        if self.source is None:
            self.source = core.utils.get_text_from_tokens(self.tokens)


class NonTerminalNode(ParseTreeNode):
    """
    A ParseTreeNode is the result of the parsing of a parsing expression (pexpression)
    NonTerminalNode when parsing a pexpression which has children (Sequence, OrderedChoice, Optional, Repetition...)
    """

    def __init__(self, parsing_expression, start, end, tokens, children=None):
        super().__init__(parsing_expression, start, end, tokens)
        self.children = children

    def __repr__(self):
        name = "Node:" + (self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__)
        if len(self.children) > 0:
            sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
        else:
            sub_names = ""
        return name + sub_names

    def __eq__(self, other):
        if not isinstance(other, NonTerminalNode):
            return False

        return self.parsing_expression == other.parsing_expression and \
               self.start == other.start and \
               self.end == other.end and \
               self.children == other.children

    def __hash__(self):
        return hash((self.parsing_expression, self.start, self.end, self.children))

    def clone(self):
        clone = NonTerminalNode(self.parsing_expression, self.start, self.end, self.tokens, self.children.copy())
        return clone

    def get_debug(self):
        res = f"{self.parsing_expression.concept}=>" if isinstance(self.parsing_expression, ConceptExpression) else ""
        return res + ".".join([c.get_debug() for c in self.children])

    def get_depth(self):
        if isinstance(self.parsing_expression, ConceptExpression):
            return 1 + max([c.get_depth() for c in self.children])
        else:
            return max([c.get_depth() for c in self.children])


class TerminalNode(ParseTreeNode):
    """
    A ParseTreeNode is the result of the parsing of a parsing expression (pexpression)
    TerminalNode for StrMatch
    """

    def __init__(self, parsing_expression, start, end, source, value):
        super().__init__(parsing_expression, start, end, source=source)
        self.value = value

    def __repr__(self):
        name = "Node:" + (self.parsing_expression.rule_name or "")
        return name + f"'{self.value}'"

    def __eq__(self, other):
        if not isinstance(other, TerminalNode):
            return False

        return self.parsing_expression == other.parsing_expression and \
               self.start == other.start and \
               self.end == other.end and \
               self.source == other.source and \
               self.value == other.value

    def __hash__(self):
        return hash((self.parsing_expression, self.start, self.end, self.source, self.value))

    def clone(self):
        clone = TerminalNode(self.parsing_expression, self.start, self.end, self.source, self.value)
        return clone

    def get_debug(self):
        return str(self.value)

    def get_depth(self):
        return 0


class MultiNode:
    """"
    MultiNode is used by the UnorderedChoice parsing expression when multiple choices are found
    It should inherit from ParseTreeNode, like its siblings TerminalNode and NonTerminal node
    but I am too lazy to bother with start and end positions
    """

    def __init__(self, results):
        self.results = results

    def __repr__(self):
        text = "MultiNode("
        sources = [r.node.source for r in self.results]
        text += f"{sources})"
        return text

    def combine(self, parsing_expression):
        for i in range(len(self.results)):
            node = self.results[i].node
            self.results[i].node = NonTerminalNode(parsing_expression,
                                                   node.start,
                                                   node.end,
                                                   node.tokens,
                                                   [node])

        return self


@dataclass
class ParsingContext:
    """
    Class used to allow backtracking when parsing UnOrderedChoice pexpression
    It keeps the ParseTreeNode parsed and the position of the parser right after the parsing
    """
    node: ParseTreeNode  # node or nodes parsed
    pos: int  # position of the parser after the parsing
    next_results: List[ParseTreeNode] = None  # other node parsed, when known
    variables: dict = field(default_factory=dict)  # variables already seen
    to_remove: bool = False  # an error/inconsistency is detected. Remove this parsing context ASAP

    def clone(self):
        next_result_clones = [n.clone() for n in self.next_results] if self.next_results else None
        return ParsingContext(self.node.clone(), self.pos, next_result_clones, self.variables.copy(), False)

    def fix_tokens(self, parser_helper):
        """
        When the nodes are fully created, make sure that their sources and tokens are correct
        :param parser_helper:
        :return:
        """
        self.node.tokens = parser_helper.parser.parser_input.tokens[self.node.start: self.node.end + 1]
        self.node.source = core.utils.get_text_from_tokens(self.node.tokens)

    def update_with_ptree_node(self, ptree_node, pos):
        next_results = None

        if isinstance(ptree_node, list):
            next_results = ptree_node[1:]
            ptree_node = ptree_node[0]

        if ptree_node.end == -1:
            # means that the node must not be added, but the parsing context is not in error
            return

        if isinstance(ptree_node.parsing_expression, VariableExpression):
            # check the variables consistency
            var_name = ptree_node.parsing_expression.rule_name
            if var_name in self.variables and self.variables[var_name] != ptree_node.source:
                self.to_remove = True
                return
            self.variables[var_name] = ptree_node.source

        self.pos = pos
        self.node.children.append(ptree_node)
        self.node.end = ptree_node.end
        if ptree_node.start < self.node.start:
            # fix start pos when sequence stars with VariableExpression
            self.node.start = ptree_node.start

        if next_results is not None:
            self.next_results = next_results

    def __mul__(self, other):
        res = [self]
        for i in range(other - 1):
            res.append(self.clone())
        return res

    def __repr__(self):
        if isinstance(self.node, list):
            res = f"ParsingContext('{[n.get_debug() for n in self.node]}', pos={self.pos})"
        else:
            res = f"ParsingContext('{self.node.get_debug()}', pos={self.pos})"
        return res

    def get_depth(self):
        if isinstance(self.node, list):
            return max([n.get_depth() for n in self.node])
        else:
            return self.node.get_depth()


class ParsingExpression:
    log_sink = []

    @classmethod
    def reset_logs(cls):
        cls.log_sink.clear()

    def __init__(self, *args, **kwargs):
        self.elements = args

        nodes = kwargs.get('nodes', []) or []
        if not hasattr(nodes, '__iter__'):
            nodes = [nodes]
        self.nodes = nodes

        self.rule_name = kwargs.get('rule_name', '')

    def __eq__(self, other):
        if not isinstance(other, ParsingExpression):
            return False

        if self.rule_name != other.rule_name:
            return False

        if len(self.elements) != len(other.elements):
            return False

        for self_element, other_element in zip(self.elements, other.elements):
            if self_element != other_element:
                return False

        return True

    def __hash__(self):
        return hash((self.rule_name, self.elements))

    def parse(self, parser_helper):
        # TODO : add memoization

        # parser_helper.debugger.debug_log(f">> {parser_helper.pos:3d} : {self}")
        # if self.debug_enabled:
        #     self.debug(f">> {parser_helper.pos:3d} : {self}")

        res = self._parse(parser_helper)
        return res

    def add_rule_name_if_needed(self, text):
        return text + "=" + self.rule_name if self.rule_name else text

    def inner_get_debug(self, n, tab=""):
        """

        :param n: line number
        :param tab: current indentation
        :return:
        """

        if not self.debug:
            return None

        id_self = id(self)

        def add_debug_for_current(_n, _debug):
            if n >= len(self.log_sink):
                return _n, _debug

            _l = self.log_sink[_n]
            while _l[0] == id_self:
                _debug += tab + _l[1] + "\n"
                _n += 1
                if _n == len(self.log_sink):
                    return _n, _debug
                _l = self.log_sink[_n]
            return _n, _debug

        # if n >= len(self.log_sink):
        #     return n, None
        #
        # line = self.log_sink[n]
        #
        # if line[0] != id_self:
        #     # return n, f"{tab}>> No log for {self}\n"
        #     return n, None

        debug = ""
        n, debug = add_debug_for_current(n, debug)
        # while line[0] == id_self:
        #     debug += tab + line[1] + "\n"
        #     n += 1
        #     if n == len(self.log_sink):
        #         return n, debug
        #     line = self.log_sink[n]

        for node in self.nodes:
            n, node_debug = node.inner_get_debug(n, tab + "  ")
            if node_debug:
                debug += node_debug
            n, debug = add_debug_for_current(n, debug)

        return n, debug

    @staticmethod
    def debug_prefix(self_name, parser_helper):
        current_rule_name = parser_helper.get_current_rule_name()
        current_concept = parser_helper.concepts[-1]
        str_rule_name = f":{current_rule_name}" if current_rule_name not in (None, current_concept.name) else ""
        return f"{self_name}({current_concept}{str_rule_name})"

    @staticmethod
    def debug_remaining_text(parser_helper):
        remaining_text = parser_helper.get_parsing_text()[parser_helper.token.index:]
        if len(remaining_text) > 50:
            remaining_text = remaining_text[:47] + "..."
        return remaining_text

    @staticmethod
    def debug_to_raw(variables):
        res = ""
        first = True
        for k, v in variables.items():
            if not first:
                res += ", "
            res += f"{k}={v}"
            first = False

        return res


class ConceptExpression(ParsingExpression):
    """
    Will match a concept
    It used only for rule definition

    When the grammar is created, it is replaced by the actual concept
    """

    def __init__(self, concept, rule_name="", nodes=None):
        super().__init__(rule_name=rule_name, nodes=nodes)
        self.concept = concept

    def __repr__(self):
        return self.add_rule_name_if_needed(f"{self.concept}")

    def __eq__(self, other):
        if not super().__eq__(other):
            return False

        if not isinstance(other, ConceptExpression):
            return False

        if isinstance(self.concept, Concept):
            return self.concept.id == other.concept.id

        # when it's only the name of the concept
        return self.concept == other.concept

    def __hash__(self):
        return hash((self.concept, self.rule_name))

    def _parse(self, parser_helper):
        parser_helper.rules_names.append(self.rule_name)
        parser_helper.push_concept(self.concept)
        # parser_helper.debug_concept(self.debug_prefix("ConceptExpression", parser_helper) + "=start")

        node = self.nodes[0].parse(parser_helper)

        # parser_helper.debug_concept(self.debug_prefix("ConceptExpression", parser_helper) + "=end")

        parser_helper.pop_concept()
        parser_helper.rules_names.pop()

        if node is None:
            return None

        if isinstance(node, MultiNode):
            return node.combine(self)

        return NonTerminalNode(self,
                               node.start,
                               node.end,
                               parser_helper.parser.parser_input.tokens[node.start: node.end + 1],
                               [node])

    @staticmethod
    def get_recursion_id(parent_id, concept_id, rule_name):
        return f"{parent_id}#{concept_id}({rule_name})"


class VariableExpression(ParsingExpression):
    def __init__(self, rule_name):
        super().__init__(rule_name=rule_name)
        self.before_first_token_node = False
        self.expected_variables = [self]
        self.next_node_to_parse = None

    def __repr__(self):
        return self.add_rule_name_if_needed(f"Var")

    def __eq__(self, other):
        if not super().__eq__(other):
            return False

        return isinstance(other, VariableExpression)

    def __hash__(self):
        return hash(("VariableExpression", self.rule_name))

    def init_parsing(self):
        """
        Get the instance of the following VariableExpression if they exists,
        :return:
        """

        next_node_to_parse = self.nodes[0] if len(self.nodes) > 0 else None
        while isinstance(next_node_to_parse, VariableExpression):
            self.expected_variables.append(next_node_to_parse)
            next_node_to_parse = next_node_to_parse.nodes[0] if len(next_node_to_parse.nodes) > 0 else None

        self.next_node_to_parse = next_node_to_parse

    def get_nodes_sequences_when_variables_are_first(self, parser_helper):
        if len(parser_helper.sequence) < len(self.expected_variables):
            # variable(s) is/are expected. But nothing found
            return None

        # only take the requested number of variables
        nodes_sequence = parser_helper.sequence[-len(self.expected_variables):]
        nodes_sequences = [nodes_sequence]

        return nodes_sequences

    def get_nodes_sequences_when_variables_are_last(self, parser_helper):
        tokens = parser_helper.get_remaining_tokens()
        start = parser_helper.pos
        end = parser_helper.get_last_token_pos()
        nodes_sequences = self.get_nodes_sequences_from_tokens(parser_helper, start, end, tokens)
        if not nodes_sequences:
            return nodes_sequences

        # only take the requested number of variables
        sequences_to_keep = []
        for sequence in nodes_sequences:
            if len(sequence) < len(self.expected_variables):
                continue  # not enough parameters to feed the VariableExpression
            sequences_to_keep.append(sequence[:len(self.expected_variables)])

        return sequences_to_keep

    def get_nodes_sequences_when_variables_are_in_between(self, parser_helper):
        start = parser_helper.pos
        end = parser_helper.get_last_token_pos()
        # start by the end, to be the greediest
        while end >= start:
            parser_helper.seek(end)
            node = self.next_node_to_parse.parse(parser_helper)
            if node and node.end != -1:
                break
            end -= 1
        tokens = parser_helper.parser.parser_input.tokens[start:end]
        parser_helper.seek(end)  # for the next node
        return self.get_nodes_sequences_from_tokens(parser_helper, start, end, tokens)

    def _parse(self, parser_helper):

        if parser_helper.debugger.is_enabled():
            debug_prefix = self.debug_prefix("VariableExpression", parser_helper)
            debug_vars = {"pos": parser_helper.pos,
                          "expected variables": self.expected_variables,
                          "next to match": self.next_node_to_parse}
            debug_text = self.debug_to_raw(debug_vars)
            parser_helper.debug_concept(debug_prefix, color="cyan", raw=debug_text)

        if self.before_first_token_node:
            nodes_sequences = self.get_nodes_sequences_when_variables_are_first(parser_helper)
        elif not self.next_node_to_parse:
            nodes_sequences = self.get_nodes_sequences_when_variables_are_last(parser_helper)
        else:
            nodes_sequences = self.get_nodes_sequences_when_variables_are_in_between(parser_helper)

        if nodes_sequences is None or self.has_unrecognized(nodes_sequences):
            # nothing is recognized or only part is recognized
            return None

        all_results = []

        for nodes_sequence in nodes_sequences:
            # this outer loop deals with when there a multiple choices
            # ie, the result in either nodes_sequence_1 or nodes_sequence_2, etc..

            ptree_nodes = []
            for variable_expr, node in zip(self.expected_variables, nodes_sequence):
                # this inner loop deals with results with multiples concepts in a row
                # ie the result is a sequence of node_1, then node_2, etc...

                resolved = self.get_resolved(node)
                if resolved is None:
                    parser_helper.errors.append(f"Failed to recognize {node.source}")
                    break

                ptree_nodes.append(TerminalNode(variable_expr, node.start, node.end, node.source, resolved))

            if len(ptree_nodes) != len(nodes_sequence):
                # it means that we did not recognize all the nodes
                # So it's a mismatch
                continue

            # finally adds the results
            if len(ptree_nodes) == 1:
                all_results.append(ptree_nodes[0])
            else:
                all_results.append(ptree_nodes)

        if len(all_results) == 0:
            return None

        # every seems to be fine. We can pop the nodes from parser_helper used as variable
        if self.before_first_token_node:
            for i in range(len(self.expected_variables)):
                parser_helper.sequence.pop()

        if len(all_results) == 1:
            return all_results[0]

        #  all results are valid, let's return them
        parsing_contexts = [ParsingContext(ptree_node, parser_helper.pos) for ptree_node in all_results]
        return MultiNode(parsing_contexts)

    @staticmethod
    def get_resolved(node):
        """
        Turn Lexer node into Concept, Rule or List[ReturnValueConcept], (basically what is
            expected by SheerkaEvaluateConcept.resolve())
        May be merged with builtin_helpers.update_compiled() ?
        :param node:
        :return:
        """
        if isinstance(node, UnrecognizedTokensNode):
            return None

        if isinstance(node, RuleNode):
            return node.rule

        if isinstance(node, ConceptNode):
            return node.concept

        if isinstance(node, (SourceCodeNode, SourceCodeWithConceptNode)):
            return node.return_value

        raise NotImplementedError()

    @staticmethod
    def get_nodes_sequences_from_tokens(parser_helper, start, end, tokens):
        if len(tokens) == 0:
            return None

        utn = UnrecognizedTokensNode(start, end, tokens)
        nodes_sequences = parser_helper.parser.cache2.get_lexer_nodes_from_unrecognized(parser_helper.parser.context,
                                                                                        utn)
        return nodes_sequences

    @staticmethod
    def has_unrecognized(nodes_sequence: list):
        for n in nodes_sequence:
            if isinstance(n, UnrecognizedTokensNode):
                return True

        return False


class Sequence(ParsingExpression):
    """
    Will match sequence of parser expressions in exact order they are defined.
    """

    def _parse(self, parser_helper):
        init_pos = parser_helper.pos
        end_pos = parser_helper.pos

        if parser_helper.debugger.is_enabled():
            debug_prefix = self.debug_prefix("Sequence", parser_helper)
            debug_vars = {"pos": parser_helper.pos,
                          "nodes": self.nodes,
                          "to_match": self.debug_remaining_text(parser_helper)}
            debug_text = self.debug_to_raw(debug_vars)
            parser_helper.debug_concept(debug_prefix, color="cyan", raw=debug_text)

        ntn = NonTerminalNode(self,
                              init_pos,
                              end_pos,
                              None,
                              [])
        parsing_contexts = [ParsingContext(ntn, parser_helper.pos)]
        to_append = []
        for e in self.nodes:

            for pcontext in parsing_contexts:
                if parser_helper.debugger.is_enabled():
                    parser_helper.debug_concept(debug_prefix, node=e, pcontext=pcontext)

                if pcontext.next_results:
                    node = pcontext.next_results
                else:
                    parser_helper.seek(pcontext.pos)
                    node = e.parse(parser_helper)

                if node is None:
                    pcontext.to_remove = True

                elif isinstance(node, MultiNode):
                    clones = pcontext * len(node.results)  # clones pcontext (and first item is current pcontext)
                    to_append.extend(clones[1:])
                    for clone, node_pcontext in zip(clones, node.results):
                        clone.update_with_ptree_node(node_pcontext.node, node_pcontext.pos)

                else:
                    pcontext.update_with_ptree_node(node, parser_helper.pos)

            # clean up and reorganize list of parsing_contexts
            parsing_contexts.extend(to_append)
            core.utils.remove_from_list(parsing_contexts, lambda pc: pc.to_remove)

            if len(parsing_contexts) == 0:
                if parser_helper.debugger.is_enabled():
                    parser_helper.debug_concept(debug_prefix,
                                                raw="All pcontexts are failed. Sequence failed",
                                                color="red")
                return None

            to_append.clear()

        # reset tokenizer for the following pexpression
        parser_helper.seek(parsing_contexts[0].pos)

        # update nodes sources and tokens
        for pcontext in parsing_contexts:
            pcontext.fix_tokens(parser_helper)

        if len(parsing_contexts) == 1:
            # parser_helper.debugger.debug_log(f"<< Found match '{parsing_contexts[0].node.source}'")
            return parsing_contexts[0].node

        # parser_helper.debugger.debug_log(f"<< Found matches {[r.node.source for r in parsing_contexts]}")
        return MultiNode(parsing_contexts)

    def __repr__(self):
        to_str = ", ".join(repr(n) for n in self.elements)
        return self.add_rule_name_if_needed(f"({to_str})")


class OrderedChoice(ParsingExpression):
    """
    Will match the first one among multiple
    It will stop at the first match (so the order of definition is important)
    TODO : implement MultiNode support
    """

    def _parse(self, parser_helper):
        init_pos = parser_helper.pos

        for e in self.nodes:
            node = e.parse(parser_helper)
            if node:
                return NonTerminalNode(self,
                                       init_pos,
                                       node.end,
                                       parser_helper.parser.parser_input.tokens[init_pos: node.end + 1],
                                       [node])

            parser_helper.seek(init_pos)  # backtrack

        return None

    def __repr__(self):
        to_str = "| ".join(repr(n) for n in self.elements)
        return self.add_rule_name_if_needed(f"({to_str})")


class UnOrderedChoice(ParsingExpression):
    """
    May match many nodes. It will return nodes sorted by length
    All elements will be tested, so the order is not important
    The behaviour when multiple candidates with same length are found is not defined yet
    """

    def _parse(self, parser_helper):
        init_pos = parser_helper.pos
        parsing_contexts = []

        if parser_helper.debugger.is_enabled():
            debug_prefix = self.debug_prefix("UnOrderedChoice", parser_helper)
            debug_vars = {"pos": parser_helper.pos, "text": self.debug_remaining_text(parser_helper)}
            debug_text = self.debug_to_raw(debug_vars)
            parser_helper.debug_concept(debug_prefix, color="cyan", raw=debug_text)

        debug_text = ""
        for e in self.nodes:
            if isinstance(e, ConceptExpression) and e.concept.id in parser_helper.get_concepts_ids():
                # avoid circular reference
                continue

            node = e.parse(parser_helper)
            if node:
                debug_text += CCM["green"] + str(e) + CCM["reset"] + ", "
                if isinstance(node, MultiNode):
                    node.combine(self)
                    parsing_contexts.extend(node.results)
                else:
                    tn = NonTerminalNode(self,
                                         init_pos,
                                         node.end,
                                         parser_helper.parser.parser_input.tokens[init_pos: node.end + 1],
                                         [node])
                    parsing_contexts.append(ParsingContext(tn, parser_helper.pos))
            else:
                debug_text += f"{e}, "
            parser_helper.seek(init_pos)  # backtrack

        if parser_helper.debugger.is_enabled():
            parser_helper.debug_concept(debug_prefix, raw=f"[{debug_text}]")

        if len(parsing_contexts) == 0:
            return None

        parser_helper.seek(parsing_contexts[0].pos)

        # Try to simplify the parsing_context
        simplified_parsing_contexts = self.simplify(parsing_contexts)

        if parser_helper.debugger.is_enabled() and len(simplified_parsing_contexts) != len(parsing_contexts):
            parser_helper.debug_concept(debug_prefix, simplified=simplified_parsing_contexts)

        if len(simplified_parsing_contexts) == 1:
            return simplified_parsing_contexts[0].node
        else:
            simplified_parsing_contexts.sort(key=attrgetter("pos"), reverse=True)
            return MultiNode(simplified_parsing_contexts)

    def __repr__(self):
        to_str = "# ".join(repr(n) for n in self.elements)
        return self.add_rule_name_if_needed(f"({to_str})")

    @staticmethod
    def simplify(parsing_contexts: List[ParsingContext]):
        """
        Try to remove redundant parsing context
        for example, if
            color is an adjective
            red is an adjective
            red is a color
        when parsing 'red' we will receive two parsing context
            one for 'red'
            one for 'color' -> 'red'

        The second one should be discarded
        :param parsing_contexts:
        :return:
        """
        if len(parsing_contexts) == 1:
            return parsing_contexts

        by_target = {}
        for pc in parsing_contexts:
            by_target.setdefault(pc.node.source, []).append((pc, pc.get_depth()))

        res = []
        for k, tuple_pc_pc_depth in by_target.items():
            min_depth = min([pc_depth for pc, pc_depth in tuple_pc_pc_depth])
            res.extend([pc for pc, pc_depth in tuple_pc_pc_depth if pc_depth == min_depth])

        return res


class Optional(ParsingExpression):
    """
    Will match or not the elements
    if many matches, will choose longest one
    If you need order, use Optional(OrderedChoice)
    TODO : implement MultiNode support
    """

    def _parse(self, parser_helper):
        init_pos = parser_helper.pos
        selected_node = NonTerminalNode(self, parser_helper.pos, -1, [], [])  # means that nothing is found

        for e in self.nodes:
            node = e.parse(parser_helper)
            if node:
                if node.end > selected_node.end:
                    selected_node = NonTerminalNode(
                        self,
                        node.start,
                        node.end,
                        parser_helper.parser.parser_input.tokens[node.start: node.end + 1],
                        [node])

            parser_helper.seek(init_pos)  # backtrack

        if selected_node.end != -1:
            parser_helper.seek(selected_node.end)
            parser_helper.next_token()  # eat the tokens found

        return selected_node

    def __repr__(self):
        if len(self.elements) == 1:
            return f"{self.elements[0]}?"
        else:
            to_str = ", ".join(repr(n) for n in self.elements)
        return self.add_rule_name_if_needed(f"({to_str})?")


class Repetition(ParsingExpression):
    """
    Base class for all repetition-like parser expressions (?,*,+)
    Args:
        eolterm(bool): Flag that indicates that end of line should
            terminate repetition match.
    """

    def __init__(self, *elements, **kwargs):
        super(Repetition, self).__init__(*elements, **kwargs)
        self.sep = kwargs.get('sep', None)

    def clone(self):
        return Repetition(self.elements,
                          rule_name=self.rule_name,
                          nodes=self.nodes,
                          sep=self.sep)


class ZeroOrMore(Repetition):
    """
    ZeroOrMore will try to match parser expression specified zero or more
    times. It will never fail.
    TODO : implement MultiNode support
    """

    def _parse(self, parser_helper):
        init_pos = parser_helper.pos
        end_pos = -1
        children = []

        while True:
            current_pos = parser_helper.pos

            # maybe eat the separator if needed
            if self.sep and children:
                sep_result = self.sep.parse(parser_helper)
                if sep_result is None:
                    parser_helper.seek(current_pos)
                    break

            # eat the ZeroOrMore
            node = self.nodes[0].parse(parser_helper)
            if node is None:
                parser_helper.seek(current_pos)
                break
            else:
                if node.end != -1:  # because returns -1 when no match
                    children.append(node)
                    end_pos = node.end

        if len(children) == 0:
            return NonTerminalNode(self, init_pos, -1, [], [])

        return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1],
                               children)

    def __repr__(self):
        to_str = ", ".join(repr(n) for n in self.elements)
        return self.add_rule_name_if_needed(f"({to_str})*")


class OneOrMore(Repetition):
    """
    OneOrMore will try to match parser expression specified one or more times.
    TODO : implement MultiNode support
    """

    def _parse(self, parser_helper):
        init_pos = parser_helper.pos
        end_pos = -1
        children = []

        while True:
            current_pos = parser_helper.pos

            # maybe eat the separator if needed
            if self.sep and children:
                sep_result = self.sep.parse(parser_helper)
                if sep_result is None:
                    parser_helper.seek(current_pos)
                    break

            # eat the ZeroOrMore
            node = self.nodes[0].parse(parser_helper)
            if node is None:
                parser_helper.seek(current_pos)
                break
            else:
                if node.end != -1:  # because returns -1 when no match
                    children.append(node)
                    end_pos = node.end

        if len(children) == 0:  # if nothing is found, it's an error
            return None

        return NonTerminalNode(self,
                               init_pos,
                               end_pos,
                               parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1],
                               children)

    def __repr__(self):
        to_str = ", ".join(repr(n) for n in self.elements)
        return self.add_rule_name_if_needed(f"({to_str})+")


class UnorderedGroup(Repetition):
    """
    Will try to match all of the parsing expression in any order.
    """

    def _parse(self, parser):
        raise NotImplementedError()

    # def __repr__(self):
    #     to_str = ", ".join(repr(n) for n in self.elements)
    #     return f"({to_str})#"


class Match(ParsingExpression):
    """
    Base class for all classes that will try to match something from the input.
    """

    def __init__(self, rule_name, root=False):
        super(Match, self).__init__(rule_name=rule_name, root=root)

    def parse(self, parser):
        return self._parse(parser)


class StrMatch(Match):
    """
    Matches a literal
    """

    def __init__(self, to_match, rule_name="", ignore_case=True, skip_whitespace=True):
        super(Match, self).__init__(rule_name=rule_name)
        self.to_match = to_match
        self.ignore_case = ignore_case
        self.skip_white_space = skip_whitespace

    def __repr__(self):
        text = self.to_match
        if not self.ignore_case:
            text += "#!ic"
        if not self.skip_white_space:
            text += "#!sw"
        return self.add_rule_name_if_needed(f"'{text}'")

    def __eq__(self, other):
        if not super().__eq__(other):
            return False

        if not isinstance(other, StrMatch):
            return False

        return self.to_match == other.to_match and \
               self.ignore_case == other.ignore_case and \
               self.skip_white_space == other.skip_white_space

    def __hash__(self):
        return hash(("StrMatch", self.to_match, self.ignore_case, self.skip_white_space))

    def _parse(self, parser_helper):
        token = parser_helper.get_token()

        if parser_helper.debugger.is_enabled():
            debug_prefix = self.debug_prefix("StrMatch", parser_helper)
            debug_text = f"pos={parser_helper.pos}, to_match={self.to_match}, token={token.str_value}"
            parser_helper.debug_concept(debug_prefix, raw=f"{CCM['green']}{debug_text}{CCM['reset']}")

        m = token.str_value.lower() == self.to_match.lower() if self.ignore_case \
            else token.strip_quote == self.to_match

        if m:
            if parser_helper.debugger.is_enabled():
                parser_helper.debug_concept(debug_prefix, raw=f"{CCM['green']}{debug_text}{CCM['reset']}")
            node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value, token.str_value)
            parser_helper.next_token(self.skip_white_space)
            return node

        if parser_helper.debugger.is_enabled():
            parser_helper.debug_concept(debug_prefix, raw=f"{CCM['red']}{debug_text}{CCM['reset']}")
        return None


class RegExMatch(Match):
    """
    Matches regular expression
    """

    def __init__(self, to_match, rule_name="", ignore_case=True, multiline=None):
        super(Match, self).__init__(rule_name=rule_name)
        self.to_match = to_match
        self.ignore_case = ignore_case
        self.multiline = multiline
        self.explicit_flags = re.MULTILINE
        self.regex = None

    def __eq__(self, other):
        if not super().__eq__(other):
            return False

        if not isinstance(other, RegExMatch):
            return False

        return self.to_match == other.to_match and \
               self.ignore_case == other.ignore_case and \
               self.multiline == other.multiline and \
               self.explicit_flags == other.explicit_flags

    def __hash__(self):
        return hash(("RegExMatch", self.to_match, self.ignore_case, self.multiline, self.explicit_flags))

    def __repr__(self):
        text = self.to_match
        if not self.ignore_case:
            text += "#!ic"
        if self.multiline:
            text += "#ml"
        return self.add_rule_name_if_needed(f"r'{text}'")

    def compile(self):
        flags = RegExDef.compile_flags(self.ignore_case, self.multiline, self.explicit_flags)
        self.regex = re.compile(self.to_match, flags)

    def _parse(self, parser_helper):
        text = parser_helper.get_parsing_text()

        # if parser_helper.debugger.is_enabled():
        #     debug_prefix = self.debug_prefix("StrMatch", parser_helper)
        #     text_debug = text[:12] + "..." if len(text) > 12 else text
        #     debug_text = f"pos={parser_helper.pos}, to_match={self.to_match}, text={text_debug}"
        #     parser_helper.debug_concept(debug_prefix, raw=f"{CCM['green']}{debug_text}{CCM['reset']}")

        m = self.regex.match(text, parser_helper.token.index)
        if m:
            matched = m.group()

            # TODO: Add debug info here

            if matched:
                # the match is only valid if it fits with the actual tokens
                next_pos = parser_helper.get_next_matching_pos(m.end())
                if next_pos is NotFound:
                    parser_helper.errors.append(NoMatchingTokenError(m.end()))
                    return None

                node = TerminalNode(self, parser_helper.pos, next_pos - 1, matched, matched)
                parser_helper.seek(next_pos - 1)
                parser_helper.next_token()
                return node

        # if parser_helper.debugger.is_enabled():
        #     parser_helper.debug_concept(debug_prefix, raw=f"{CCM['red']}{debug_text}{CCM['reset']}")
        return None


class ParsingExpressionVisitor:
    """
    visit ParsingExpression
    """

    STOP = "##_Stop_##"

    def __init__(self, get_nodes=None, circular_ref_strategy=None):
        self.get_nodes = get_nodes or (lambda pe: pe.elements)

        self.circular_ref_strategy = circular_ref_strategy
        self.seen = set() if circular_ref_strategy else None

    def visit(self, parsing_expression):
        name = parsing_expression.__class__.__name__

        if self.circular_ref_strategy:
            if id(parsing_expression) in self.seen:
                if self.circular_ref_strategy == "skip":
                    return
                raise RecursionError(f"circular ref detected : {self}")
            self.seen.add(id(parsing_expression))

        method = 'visit_' + name
        visitor = getattr(self, method, self.generic_visit)
        return visitor(parsing_expression)

    def generic_visit(self, parsing_expression):
        if hasattr(self, "visit_all"):
            self.visit_all(parsing_expression)

        self.visit_children(parsing_expression)

    def visit_children(self, parsing_expression):
        for node in self.get_nodes(parsing_expression):
            if isinstance(node, Concept):
                res = self.visit(ConceptExpression(node.key or node.name))
            elif isinstance(node, str):
                res = self.visit(StrMatch(node))
            else:
                res = self.visit(node)

            if res == self.STOP:
                return


class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor):
    def __init__(self, sheerka):
        super().__init__()
        self.sheerka = sheerka
        self.first_tokens = None

    def add_first_token(self, first_token):
        if not self.first_tokens:
            self.first_tokens = [first_token]
        else:
            self.first_tokens.append(first_token)

    def visit_ConceptExpression(self, pe):
        concept = self.sheerka.get_by_key(pe.concept) if isinstance(pe.concept, str) else pe.concept
        if self.sheerka.is_known(concept):
            self.add_first_token(core.utils.str_concept(concept, drop_name=True))
        return self.STOP

    def visit_StrMatch(self, pe):
        if not pe.to_match:
            return
        self.add_first_token(pe.to_match)
        return self.STOP

    def visit_RegExMatch(self, pe):
        if not pe.to_match:
            return
        self.add_first_token(RegExDef(pe.to_match, pe.ignore_case, pe.multiline, pe.explicit_flags))
        return self.STOP

    def visit_OrderedChoice(self, parsing_expression):
        for node in parsing_expression.elements:
            self.visit(node)
        return self.STOP

    def visit_UnOrderedChoice(self, parsing_expression):
        for node in parsing_expression.elements:
            self.visit(node)
        return self.STOP


class BnfNodeConceptExpressionVisitor(ParsingExpressionVisitor):
    def __init__(self):
        super().__init__()
        self.references = []

    def visit_ConceptExpression(self, pe):
        self.references.append(pe.concept)


class HasAChoiceExpressionVisitor(ParsingExpressionVisitor):
    def __init__(self):
        super().__init__()
        self.result = False

    def visit_OrderedChoice(self, parsing_expression):
        self.result = True

    def visit_UnOrderedChoice(self, parsing_expression):
        self.result = True


class BnfConceptParserHelper:
    def __init__(self, parser, debugger):
        self.parser = parser
        self.debugger = debugger
        self.debug = []  # keep track of the tokens
        self.errors = []  # sink of errors
        self.sequence = []  # output. List of lexer nodes correctly parsed
        self.concepts = []  # stack of concepts being processed (fed by ConceptExpression)
        self.concepts_ids = []  # ids if the concept to increase speed
        self.rules_names = []  # stack of concepts rules names
        self.concept_depth = 0  # depth of concept (+1 for each ConceptExpression which is not an OrderedChoice)

        self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
        self.has_unrecognized = False
        self.bnf_parsed = False

        self.forked = []

        self.token = None
        self.pos = -1

    def __repr__(self):
        nodes = core.builtin_helpers.debug_nodes(self.sequence)
        return f"BnfConceptParserHelper({nodes})"

    def __eq__(self, other):
        if id(self) == id(other):
            return True

        if not isinstance(other, BnfConceptParserHelper):
            return False

        return self.sequence == other.sequence and self.errors == other.errors

    def __hash__(self):
        return len(self.sequence) + len(self.errors)

    def debug_concept(self, text, **kwargs):
        if len(self.concepts) <= 2:
            self.debugger.debug_concept(self.concepts[0], text, **kwargs)

    def get_current_rule_name(self):
        for rule_name in reversed(self.rules_names):
            if rule_name:
                return rule_name

    def push_concept(self, concept):
        self.concepts.append(concept)
        self.concepts_ids.append(concept.id)

    def pop_concept(self):
        self.concepts.pop()
        self.concepts_ids.pop()

    def get_concepts_ids(self):
        return self.concepts_ids

    def get_token(self) -> Token:
        return self.token

    def get_parsing_text(self) -> str:
        return self.parser.parser_input.sub_text

    def get_remaining_tokens(self):
        return self.parser.parser_input.tokens[self.pos:-1]  # do not return the trailing EOF

    def get_last_token_pos(self):
        last_token = self.parser.parser_input.tokens[self.parser.parser_input.end]
        return self.parser.parser_input.end - 1 if last_token.type == TokenKind.EOF else self.parser.parser_input.end

    def next_token(self, skip_whitespace=True):
        if self.token and self.token.type == TokenKind.EOF:
            return False

        self.pos += 1
        self.token = self.parser.parser_input.tokens[self.pos]

        if skip_whitespace:
            while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
                self.pos += 1
                self.token = self.parser.parser_input.tokens[self.pos]

        return self.token.type != TokenKind.EOF

    def get_next_matching_pos(self, token_index):
        """
        Given the token, tries to find a token (within the remaining tokens) that matches the index
        :param token_index:
        :return:
        """
        current = self.pos
        while current <= self.parser.parser_input.end:
            if self.parser.parser_input.tokens[current].index == token_index:
                return current
            current += 1

        # No matching token
        return NotFound

    def seek(self, pos):
        self.pos = pos
        self.token = self.parser.parser_input.tokens[self.pos]

    def has_error(self):
        return len(self.errors) > 0

    def is_locked(self):
        return self.parser.parser_input.pos <= self.pos or self.has_error()

    def eat_concept(self, concept, token):
        def _get_longest_valid_node(multi_node):
            res = []
            longest = -1
            for node_res in multi_node.results:
                if node_res.node is None or node_res.node.end == -1:
                    continue
                if longest == -1 or node_res.pos == longest:
                    res.append(node_res.node)
                    longest = node_res.pos
                else:
                    break

            return None if len(res) == 0 else res[0] if len(res) == 1 else res

        if self.is_locked():
            return

        try:
            self.push_concept(concept)
            self.debug.append(concept)

            self.manage_unrecognized()
            for forked in self.forked:
                # manage the fact that some clone may have been forked
                forked.eat_concept(concept, token)

            # init
            parsing_expression = self.parser.get_parsing_expression(self.parser.context, concept)
            if not isinstance(parsing_expression, ParsingExpression):
                self.debug.append(concept)
                error_msg = f"Failed to parse concept '{concept}'"
                if parsing_expression is not None:
                    error_msg += f". Reason: '{parsing_expression}'"
                self.errors.append(GrammarErrorNode(error_msg))
                return

            self.pos = self.parser.parser_input.pos
            self.token = self.parser.parser_input.tokens[self.pos]

            # parse
            self.debugger.debug_concept(concept, parsing_expression=parsing_expression)
            node = parsing_expression.parse(self)

            if isinstance(node, MultiNode):
                node = _get_longest_valid_node(node)

            if isinstance(node, list):
                # multiple results are found.
                # add the nodes to the forks
                instances = [self]
                for i in range(len(node) - 1):
                    clone = self.clone()
                    instances.append(clone)
                    self.forked.append(clone)

                for instance, n in zip(instances, node):
                    instance.sequence.append(instance.create_concept_node(concept, n))
                    instance.pos = n.end
                    instance.bnf_parsed = True

            elif isinstance(node, ParseTreeNode) and node.end != -1:
                self.sequence.append(self.create_concept_node(concept, node))
                self.pos = node.end
                self.bnf_parsed = True

            else:
                self.debug.append(("Rewind", token))
                self.unrecognized_tokens.add_token(token, self.parser.parser_input.pos)
                self.pos = self.parser.parser_input.pos  # reset position
        finally:
            self.concepts.pop()

    def eat_unrecognized(self, token):
        if self.is_locked():
            return

        self.debug.append(token)
        self.unrecognized_tokens.add_token(token, self.parser.parser_input.pos)

    def manage_unrecognized(self):
        if self.unrecognized_tokens.is_empty():
            return

        # do not put empty UnrecognizedToken in out
        if self.unrecognized_tokens.is_whitespace():
            self.unrecognized_tokens.reset()
            return

        self.unrecognized_tokens.fix_source()

        # try to recognize concepts
        nodes_sequences = self.parser.cache.get_lexer_nodes_from_unrecognized(self.parser.context,
                                                                              self.unrecognized_tokens)

        if nodes_sequences:
            instances = [self]
            for i in range(len(nodes_sequences) - 1):
                clone = self.clone()
                instances.append(clone)
                self.forked.append(clone)

            for instance, node_sequence in zip(instances, nodes_sequences):
                for node in node_sequence:
                    instance.sequence.append(node)
                    if isinstance(node, UnrecognizedTokensNode) or \
                            hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens:
                        instance.has_unrecognized = True
                instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])

        else:
            self.sequence.append(self.unrecognized_tokens)
            self.has_unrecognized = True

        # create another instance
        self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])

    def clone(self):
        clone = BnfConceptParserHelper(self.parser, self.debugger)
        clone.debug = self.debug[:]
        clone.errors = self.errors[:]
        clone.sequence = self.sequence[:]

        clone.unrecognized_tokens = self.unrecognized_tokens.clone()
        clone.has_unrecognized = self.has_unrecognized
        clone.bnf_parsed = self.bnf_parsed

        clone.pos = self.pos

        return clone

    def finalize(self):
        if self.bnf_parsed:
            self.manage_unrecognized()
            for forked in self.forked:
                # manage that some clones may have been forked
                forked.finalize()

    def create_concept_node(self, template, underlying):
        sheerka = self.parser.context.sheerka
        key = (template.key, template.id) if template.id else template.key
        concept = sheerka.new(key)
        concept = self.finalize_concept(sheerka, concept, underlying)
        concept.get_hints().use_copy = True
        concept_node = ConceptNode(concept,
                                   underlying.start,
                                   underlying.end,
                                   self.parser.parser_input.tokens[underlying.start: underlying.end + 1],
                                   None,
                                   underlying)
        return concept_node

    def finalize_concept(self, sheerka, concept, underlying, init_empty_body=True):
        """
        Updates the properties of the concept
        Goes in recursion if the property is a concept
        """

        # this cache is to make sure that we return the same concept for the same ConceptExpression
        _underlying_value_cache = {}

        def _add_compiled(_concept, prop_name, value):
            """
            Adds a new entry,
            makes a list if the property already exists
            """
            if prop_name not in _concept.get_compiled() or _concept.get_compiled()[prop_name] is None:
                # new entry
                _concept.get_compiled()[prop_name] = value
            else:
                # make a list if there was a value
                previous_value = _concept.get_compiled()[prop_name]
                if isinstance(previous_value, list):
                    previous_value.append(value)
                else:
                    new_value = [previous_value, value]
                    _concept.get_compiled()[prop_name] = new_value

        def _look_for_concept_match(_underlying):
            """
            At some point, there is either an StrMatch or a ConceptMatch,
            that allowed the recognition.
            Look for the ConceptMatch, with recursion if needed
            """
            if isinstance(_underlying.parsing_expression, ConceptExpression):
                return _underlying

            if not isinstance(_underlying, NonTerminalNode):
                return None

            if len(_underlying.children) != 1:
                return None

            return _look_for_concept_match(_underlying.children[0])

        def _get_underlying_value(_underlying):
            if (concept_match_node := _look_for_concept_match(_underlying)) is not None:
                # the value is a concept
                if id(concept_match_node) in _underlying_value_cache:
                    result = _underlying_value_cache[id(concept_match_node)]
                else:
                    ref_tpl = concept_match_node.parsing_expression.concept
                    new = sheerka.new_from_template(ref_tpl, ref_tpl.key)
                    result = self.finalize_concept(sheerka, new, concept_match_node.children[0], init_empty_body)
                    _underlying_value_cache[id(concept_match_node)] = result
            elif not hasattr(_underlying, "value") or isinstance(_underlying.value, str):
                result = DoNotResolve(_underlying.source)
            else:
                result = _underlying.value

            return result

        def _process_rule_name(_concept, _underlying):
            if _underlying.parsing_expression.rule_name:
                # make sure VariableExpression are only added once
                if (not isinstance(_underlying.parsing_expression, VariableExpression) or
                        _underlying.parsing_expression.rule_name not in _concept.get_compiled()):
                    var_value = _get_underlying_value(_underlying)
                    _add_compiled(_concept, _underlying.parsing_expression.rule_name, var_value)
                    _concept.get_hints().need_validation = True

            elif isinstance(_underlying, NonTerminalNode):
                for child in _underlying.children:
                    _process_rule_name(_concept, child)

        # first set the body to something if it is required
        if init_empty_body and concept.get_metadata().body is None:
            value = _get_underlying_value(underlying)
            concept.get_compiled()[ConceptParts.BODY] = value
            if underlying.parsing_expression.rule_name:
                _add_compiled(concept, underlying.parsing_expression.rule_name, value)
                # KSI : Why don't we set concept.get_hints().need_validation to True ?

        # then recursively browse children to update concept variables
        if isinstance(underlying, NonTerminalNode) and not isinstance(underlying.parsing_expression, ConceptExpression):
            for node in underlying.children:
                _process_rule_name(concept, node)

        return concept

    def get_node_value(self, node):
        """
        Try to evaluate the value of a given ParseTreeNode (TerminalNode or NonTerminalNode)
        :param node:
        :return:
        """

        if isinstance(node, TerminalNode):
            return node.value

        if isinstance(node.parsing_expression, ConceptExpression):
            concept = node.parsing_expression.concept
            finalized = self.finalize_concept(self.parser.sheerka, concept, node)
            evaluated = core.builtin_helpers.ensure_evaluated(self.parser.context, finalized)
            return evaluated.body

        return None


@dataclass
class UnderConstruction:
    concept_id: str


@dataclass()
class ToUpdate:
    instance_id: int
    parsing_expression: ParsingExpression

    def __hash__(self):
        return hash(self.instance_id)


class BnfNodeParser(BaseNodeParser):
    NAME = "Bnf"

    def __init__(self, **kwargs):
        super().__init__(BnfNodeParser.NAME, 50, **kwargs)

        if 'sheerka' in kwargs:
            sheerka = kwargs.get("sheerka")
            self.concepts_grammars = sheerka.get_concepts_bnf_definitions()
            self.sheerka = sheerka
        else:
            self.concepts_grammars = Cache()

        self.cache = UnrecognizedTokensCache(PARSERS)
        self.cache2 = UnrecognizedTokensCache(VARIABLE_EXPR_PARSER)
        self.ignore_case = True

    @staticmethod
    def _is_eligible(concept):
        """
        Predicate that select concepts that must handled by BnfNodeParser
        :param concept:
        :return:
        """
        return concept.get_metadata().definition_type == DEFINITION_TYPE_BNF

    @staticmethod
    def get_expression_from_concept_name(name):
        """
        Create the parsing expression from the name
        This function differs from BNFParser.parse() as it does not try to resolve identifiers into concepts
        >>> assert get_expression_from_concept_name('one hundred') == Sequence(StrMatch("one"), StrMatch("hundred"))
        while BNFParser.parse("one hundred") will look for concept 'one' and concept 'hundred'
        :param name:
        :return:
        """
        if name is None or name.strip() == "":
            return []

        res = []
        tokens = Tokenizer(name, yield_eof=False)
        for token in tokens:
            if token.type == TokenKind.WHITESPACE:
                continue
            elif token.type == TokenKind.STRING:
                sub_tokens = list(Tokenizer(token.strip_quote, yield_eof=False))
                for sub_token in sub_tokens[:-1]:
                    res.append(StrMatch(sub_token.str_value, skip_whitespace=False))
                res.append(StrMatch(sub_tokens[-1].str_value))
            else:
                res.append(StrMatch(token.str_value))

        return res[0] if len(res) == 1 else Sequence(*res)

    def get_valid(self, parsers_helpers):
        bnf_found = False
        valid_parser_helpers = []
        for parser_helper in parsers_helpers:
            if parser_helper.bnf_parsed:
                bnf_found = True

            if parser_helper.has_error():
                self.error_sink.extend(parser_helper.errors)

            if not parser_helper.bnf_parsed or parser_helper.has_error():
                continue

            if parser_helper in valid_parser_helpers:
                continue

            valid_parser_helpers.append(parser_helper)

        return valid_parser_helpers if bnf_found else None

    def get_concepts_sequences(self, context):
        """
        Main method that parses the tokens and extract the concepts
        :return:
        """

        def _add_forked_to_concept_parser_helpers():
            # check that if some new InfixToPostfix are created
            for parser in concept_parser_helpers:
                if len(parser.forked) > 0:
                    forked.extend(parser.forked)
                    parser.forked.clear()
            if len(forked) > 0:
                concept_parser_helpers.extend(forked)
                forked.clear()

        def _get_longest(parser_helpers):
            # when there is a match with several concepts
            # on keep the ones that eat the more tokens
            by_end_pos = defaultdict(list)
            for helper in parser_helpers:
                by_end_pos[helper.pos].append(helper)

            return by_end_pos[max(by_end_pos)]

        def _merge(list1, list2):
            if not list1:
                return list2
            if not list2:
                return list1

            return list1 + list2

        forked = []
        debugger = context.get_debugger(self.NAME, "parse")
        debugger.debug_entering(source=self.parser_input.as_text())
        concept_parser_helpers = [BnfConceptParserHelper(self, debugger)]

        while self.parser_input.next_token(False):

            token = self.parser_input.token
            if debugger.is_enabled():
                debug_prefix = f"pos={self.parser_input.pos}, {token=}, {len(concept_parser_helpers)} parser(s)"

            try:
                # KSI 2021-02-13. I am not quite sure of the reason why we want to stop the processing
                # if all the parsers are locked.
                # It means that if we have two concepts 'foo bar baz' and 'bar baz'
                # we are going to miss the sequence '[UTN('foo'), CN('bar baz')]
                # ...
                not_locked = [p for p in concept_parser_helpers if not p.is_locked()]
                if len(not_locked) == 0:
                    if debugger.is_enabled():
                        debugger.debug_log(debug_prefix + ", all parsers are locked. Nothing to do.")
                    continue

                by_token = context.sheerka.get_concepts_by_first_token(token, self._is_eligible, strip_quotes=False)
                by_regex = context.sheerka.get_concepts_by_first_regex(self.parser_input.sub_text, token.index)

                concepts = _merge(by_token, by_regex)

                if not concepts:
                    if debugger.is_enabled():
                        debugger.debug_log(debug_prefix + ", no concept found.")

                    for concept_parser in not_locked:
                        concept_parser.eat_unrecognized(token)
                    continue

                if debugger.is_enabled():
                    debugger.debug_log(debug_prefix + f", concept(s) found={concepts}")

                if len(concepts) == 1:
                    for concept_parser in not_locked:
                        concept_parser.eat_concept(concepts[0], token)
                    continue

                # make the cartesian product
                temp_res = []
                for concept_parser in concept_parser_helpers:
                    if concept_parser.is_locked():
                        # It means that it already eat the token
                        # so simply add it, do not clone
                        temp_res.append(concept_parser)
                        continue

                    for concept in concepts:
                        clone = concept_parser.clone()
                        temp_res.append(clone)
                        clone.eat_concept(concept, token)
                        if debugger.is_enabled():
                            debugger.debug_log(f"..{concept}, parsed={clone.bnf_parsed}, length={clone.pos}")

                # only keep the longest
                concept_parser_helpers = _get_longest(temp_res)
                if debugger.is_enabled() and len(temp_res) > 1:
                    debugger.debug_log(f"Only keep longest -> {len(concept_parser_helpers)} parser(s) left")

            finally:
                _add_forked_to_concept_parser_helpers()

        # make sure that remaining items in stack are moved to out
        for concept_parser in concept_parser_helpers:
            concept_parser.finalize()
            _add_forked_to_concept_parser_helpers()

        debugger.debug_var("result", concept_parser_helpers)
        return concept_parser_helpers

    def check_for_infinite_recursion(self, parsing_expression, already_found, in_recursion, only_first=False):

        if isinstance(parsing_expression, ConceptExpression):
            if parsing_expression.concept.id in already_found:
                already_found.append(parsing_expression.concept.id)  # add the id again, to know where the cycle starts
                in_recursion.extend(already_found)
                return True

            already_found.append(parsing_expression.concept.id)
            return self.check_for_infinite_recursion(parsing_expression.nodes[0],
                                                     already_found,
                                                     in_recursion,
                                                     only_first)

        already_found_for_current_node = []

        if isinstance(parsing_expression, Sequence):
            # for sequence, we need to check all nodes (unless, only first)
            if only_first:
                nodes = [] if len(parsing_expression.nodes) == 0 else [parsing_expression.nodes[0]]
            else:
                nodes = parsing_expression.nodes
            for node in nodes:
                already_found_for_current_node.clear()
                already_found_for_current_node.extend(already_found)
                if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, False):
                    return True
            return False

        if isinstance(parsing_expression, OrderedChoice):
            # for ordered choice, if there is at least one node that does not resolved to a recursion
            # we are safe
            for node in parsing_expression.nodes:
                already_found_for_current_node.clear()
                already_found_for_current_node.extend(already_found)
                if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True):
                    return True
                else:
                    return False
            return False

        # if isinstance(parsing_expression, UnOrderedChoice):
        #     for node in parsing_expression.nodes:
        #         already_found_for_current_node.clear()
        #         already_found_for_current_node.extend(already_found.copy())
        #         if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True):
        #             return True
        #     return False

        return False

    def get_parsing_expression(self, context, concept):
        """
        Compute the parsing expression for a given concept
        :param context:
        :param concept:
        :return:
        """
        if concept.id in self.concepts_grammars:
            return self.concepts_grammars.get(concept.id)

        # internal cache of already computed parsing expression to use during the recursion
        grammar = {}

        # concept that are not totally resolved, because they reference parsing expression under construction
        to_update = set()  # the key is the instance id of the parsing expression

        desc = f"Get parsing expression for concept {concept}"
        with context.push(BuiltinConcepts.INIT_BNF, concept,
                          who=self.name,
                          obj=concept,
                          desc=desc) as sub_context:
            # get the parsing expression
            to_skip = {concept.id}
            presult = self.resolve_concept_parsing_expression(sub_context, concept, None, grammar, to_skip, to_update)

            # check and update parsing expression that are still under construction
            for item in to_update:
                pe = item.parsing_expression
                for i, node in enumerate(pe.nodes):
                    if isinstance(node, UnderConstruction):
                        pe.nodes[i] = grammar.get(node.concept_id)

            # check for infinite recursion definitions
            already_seen = [concept.id]
            in_recursion = []  # there may be cases where in_recursion is less than already_seen
            concepts_in_recursion = self.check_for_infinite_recursion(presult, already_seen, in_recursion)
            if concepts_in_recursion:
                chicken_anf_egg = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=in_recursion)
                for concept_id in in_recursion:
                    grammar[concept_id] = chicken_anf_egg

            # update, in case of infinite recursion
            presult = grammar[concept.id]

            # finally, update the list of the known pexpression (self.concepts_grammars) for latter use
            for k, v in grammar.items():
                self.concepts_grammars.put(k, v)

            sub_context.add_values(return_values=presult)

        return presult

    def resolve_concept_parsing_expression(self, context, concept, name, grammar, to_skip, to_update):
        """

        :param context:
        :param concept: concept
        :param name: rule_name of the concept if exists
        :param grammar: already resolved parsing expressions
        :param to_skip: list of concepts to skip in order to avoid circular references (only for UnOrderedChoice pe)
        :param to_update: parsing expressions that contains unresolved parsing expression
        :return:
        """
        sheerka = context.sheerka

        # if sheerka.isaset(context, concept) and hasattr(context, "obj"):
        #     key_to_use = ConceptExpression.get_recursion_id(context.obj.id, concept.id, name)
        # else:
        #     key_to_use = concept.id
        key_to_use = concept.id

        if key_to_use in self.concepts_grammars:
            return self.concepts_grammars.get(key_to_use)
            # # Use the global pexpression only if it does not contains UnOrderedChoice
            # pe = self.concepts_grammars.get(key_to_use)
            # if not pe.has_unordered_choice():

        if key_to_use in grammar:  # under construction entry
            return grammar.get(key_to_use)

        desc = f"Resolve concept parsing expression for '{concept}'. {key_to_use=}"
        with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context:
            if not concept.get_bnf():  # 'if' is done outside to save a function call. Not sure it worth it.
                core.builtin_helpers.ensure_bnf(sub_context, concept, self.name)

            grammar[key_to_use] = UnderConstruction(concept.id)

            if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
                expression = concept.get_bnf()
                desc = f"Bnf concept detected. Resolving parsing expression '{expression}'"
                with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
                    ssc.add_inputs(expression=expression)
                    resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_skip, to_update)
                    ssc.add_values(return_values=resolved)

            elif sheerka.isaset(context, concept):
                desc = f"Concept is a group. Resolving parsing expression using 'isa'"
                with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
                    ssc.add_inputs(concept=concept)
                    concepts_in_group = self.sheerka.get_set_elements(ssc, concept)

                    # valid_concepts = [c for c in concepts_in_group if c.id not in to_skip]
                    valid_concepts = concepts_in_group

                    nodes = []
                    for c in valid_concepts:
                        nodes.append(ConceptExpression(c, rule_name=c.key))

                    resolved = self.resolve_parsing_expression(ssc,
                                                               UnOrderedChoice(*nodes),
                                                               grammar,
                                                               to_skip,
                                                               to_update)
                    ssc.add_values(concepts_in_group=concepts_in_group)
                    ssc.add_values(return_values=resolved)

            else:
                desc = f"Concept is a simple concept."
                with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
                    expression = self.get_expression_from_concept_name(concept.name)
                    resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_skip, to_update)

            grammar[key_to_use] = resolved

            if self.has_error:
                sub_context.add_values(errors=self.error_sink)
                return None

            sub_context.add_values(return_values=resolved)
            return resolved

    def resolve_parsing_expression(self, context, expression, grammar, to_skip, to_update):

        if isinstance(expression, str):
            ret = StrMatch(expression, ignore_case=self.ignore_case)

        elif not isinstance(expression, ParsingExpression):
            return expression  # escalate the error

        elif isinstance(expression, ConceptExpression):
            concept = self.get_concept(context, expression.concept)
            expression.concept = concept

            if not self.sheerka.is_known(concept):
                unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept)
                return self.add_error(unknown_concept)

            inner_to_skip = to_skip.copy()
            inner_to_skip.add(concept.id)
            pe = self.resolve_concept_parsing_expression(context,
                                                         concept,
                                                         expression.rule_name,
                                                         grammar,
                                                         inner_to_skip,
                                                         to_update)

            if not isinstance(pe, (ParsingExpression, UnderConstruction)):
                return pe  # an error is detected, escalate it

            if isinstance(pe, UnderConstruction):
                to_update.add(ToUpdate(id(expression), expression))

            expression.nodes = [pe]
            expression.rule_name = expression.rule_name or concept.name
            ret = expression

        elif isinstance(expression, StrMatch):
            ret = expression
            if ret.ignore_case is None:
                ret.ignore_case = self.ignore_case

        elif isinstance(expression, RegExMatch):
            # Regular expression are not compiled yet
            # to support global settings propagation from
            # parser.
            ret = expression
            if ret.ignore_case is None:
                ret.ignore_case = self.ignore_case
            ret.compile()

        elif isinstance(expression, (Sequence, OrderedChoice, UnOrderedChoice, ZeroOrMore, OneOrMore, Optional)):
            ret = expression
            ret.nodes = []
            for e in ret.elements:
                if not isinstance(e, VariableExpression):
                    pe = self.resolve_parsing_expression(context, e, grammar, to_skip, to_update)
                    if not isinstance(pe, (ParsingExpression, UnderConstruction)):
                        return pe  # an error is detected, escalate it
                    if isinstance(pe, UnderConstruction):
                        to_update.add(ToUpdate(id(expression), ret))
                    ret.nodes.append(pe)
                else:
                    ret.nodes.append(e)

            # manage VariableExpression
            start_node = None  # first non VariableExpression node
            variable_expr_nodes = []
            for i, e in enumerate(ret.nodes):
                if isinstance(e, VariableExpression):
                    variable_expr_nodes.append(e)
                    e.before_first_token_node = start_node is None
                    if i < len(ret.nodes) - 1:
                        e.nodes.append(ret.nodes[i + 1])
                else:
                    start_node = e

            for variable_expr in variable_expr_nodes:
                variable_expr.init_parsing()

        else:
            ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)

        # Translate separator expression.
        if isinstance(ret, Repetition) and expression.sep:
            expression.sep = self.resolve_parsing_expression(context,
                                                             expression.sep,
                                                             grammar,
                                                             to_skip,
                                                             to_update)

        return ret

    def get_concept(self, context, concept):
        if isinstance(concept, Concept):
            return concept

        if context.concepts and concept in context.concepts:
            return context.concepts[concept]
        return self.sheerka.get_by_key(concept)

    def parse(self, context, parser_input: ParserInput):
        """
        parser_input can be string, but text can also be an list of tokens
        :param context:
        :param parser_input:
        :return:
        """

        if not isinstance(parser_input, ParserInput):
            return None

        context.log(f"Parsing '{parser_input}' with BnfNode", self.name)
        sheerka = context.sheerka

        if parser_input.is_empty():
            return sheerka.ret(self.name,
                               False,
                               sheerka.new(BuiltinConcepts.NOT_FOR_ME,
                                           body=parser_input.as_text(),
                                           reason=BuiltinConcepts.IS_EMPTY))

        if not self.reset_parser(context, parser_input):
            return self.sheerka.ret(
                self.name,
                False,
                context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))

        sequences = self.get_concepts_sequences(context)
        valid_parser_helpers = self.get_valid(sequences)

        debugger = context.get_debugger(self.NAME, "parse")
        if debugger.is_enabled:
            debugger.debug_var("stats", self.cache.to_dict())
            #debugger.debug_var("stats", self.cache2.to_dict())

        if valid_parser_helpers is None:
            return self.sheerka.ret(
                self.name,
                False,
                context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text(), reason=self.error_sink))

        if len(valid_parser_helpers) == 0:
            # token error
            return self.sheerka.ret(
                self.name,
                False,
                context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))

        ret = []
        for parser_helper in valid_parser_helpers:
            ret.append(
                self.sheerka.ret(
                    self.name,
                    not parser_helper.has_unrecognized,
                    self.sheerka.new(
                        BuiltinConcepts.PARSER_RESULT,
                        parser=self,
                        source=parser_input.as_text(),
                        body=parser_helper.sequence,
                        try_parsed=parser_helper.sequence)))

        if len(ret) == 1:
            self.log_result(context, parser_input.as_text(), ret[0])
            return ret[0]
        else:
            self.log_multiple_results(context, parser_input.as_text(), ret)
            return ret