Added SyaNodeParser (finally, after one month)

2020-04-09 15:42:36 +02:00
parent c9acfa99a1
commit 6c7c529016
56 changed files with 5322 additions and 404 deletions
@@ -0,0 +1,369 @@
+import copy
+from dataclasses import dataclass
+
+from core import builtin_helpers
+from core.builtin_concepts import BuiltinConcepts
+from core.concept import Concept, DEFINITION_TYPE_BNF
+from core.tokenizer import TokenKind, Tokenizer
+from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode
+from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, ErrorNode
+
+PARSERS = ["BnfNode", "SyaNode", "Python"]
+
+
+@dataclass()
+class TokensNodeFound(ErrorNode):
+    expected_tokens: list
+
+    def __eq__(self, other):
+        if id(other) == id(self):
+            return True
+
+        if not isinstance(other, UnexpectedTokenErrorNode):
+            return False
+
+        if self.message != other.message:
+            return False
+
+        if self.token.type != other.token.type or self.token.value != other.token.value:
+            return False
+
+        if len(self.expected_tokens) != len(other.expected_tokens):
+            return False
+
+        for i, t in enumerate(self.expected_tokens):
+            if t != other.expected_tokens[i]:
+                return False
+
+        return True
+
+    def __hash__(self):
+        return hash((self.message, self.token, self.expected_tokens))
+
+
+class AtomConceptParserHelper:
+    def __init__(self, context):
+
+        self.context = context
+        self.debug = []
+        self.sequence = []  # sequence of concepts already found found
+        self.current_concept: ConceptNode = None  # concept being parsed
+        self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])  # buffer that keeps tracks of tokens positions
+        self.expected_tokens = None  # expected tokens for this concepts
+        self.is_locked = False
+        self.errors = []
+        self.has_unrecognized = False
+        self.forked = []  # use to duplicate AtomConceptParserHelper. See manage_unrecognized()
+
+    def __eq__(self, other):
+        if id(other) == id(self):
+            return True
+
+        if not isinstance(other, AtomConceptParserHelper):
+            return False
+
+        if len(self.sequence) != len(other.sequence):
+            return False
+
+        for item_self, item_other in zip(self.sequence, other.sequence):
+            if item_self != item_other:
+                return False
+
+        return True
+
+    def __hash__(self):
+        return hash(len(self.sequence))
+
+    def __repr__(self):
+        return f"{self.sequence}"
+
+    def lock(self):
+        self.is_locked = True
+
+    def reset(self):
+        self.is_locked = False
+
+    def has_error(self):
+        return len(self.errors) > 0
+
+    def eat_token(self, token, pos):
+        if not self.expected_tokens:
+            return False
+
+        self.debug.append(token)
+
+        if self.expected_tokens[0] != BaseNodeParser.get_token_value(token):
+            self.errors.append(UnexpectedTokenErrorNode(
+                f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
+                token,
+                [self.expected_tokens[0]]))
+            return False
+
+        self.current_concept.end = pos
+        del self.expected_tokens[0]
+
+        if not self.expected_tokens:
+            # the concept is fully matched
+            self.sequence.append(self.current_concept)
+            self.expected_tokens = None
+
+        return True
+
+    def eat_concept(self, concept, pos):
+        if self.is_locked:
+            return
+
+        self.debug.append(concept)
+        self.manage_unrecognized()
+        for forked in self.forked:
+            # manage that some clones may have been forked
+            forked.eat_concept(concept, pos)
+
+        concept_node = ConceptNode(concept, pos, pos)
+        expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1]
+
+        if not expected:
+            # the concept is already matched
+            self.sequence.append(concept_node)
+        else:
+            self.current_concept = concept_node
+            self.expected_tokens = expected
+
+    def manage_unrecognized(self):
+        if self.unrecognized_tokens.is_empty():
+            return
+
+        # do not put empty UnrecognizedToken in out
+        if self.unrecognized_tokens.is_whitespace():
+            self.unrecognized_tokens.reset()
+            return
+
+        self.unrecognized_tokens.fix_source()
+
+        # try to recognize concepts
+        nodes_sequences = self._get_lexer_nodes_from_unrecognized()
+        if nodes_sequences:
+            instances = [self]
+            for i in range(len(nodes_sequences) - 1):
+                clone = self.clone()
+                instances.append(clone)
+                self.forked.append(clone)
+
+            for instance, node_sequence in zip(instances, nodes_sequences):
+                for node in node_sequence:
+                    instance.sequence.append(node)
+                    if isinstance(node, UnrecognizedTokensNode) or \
+                        hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens:
+                        instance.has_unrecognized = True
+                instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
+
+        else:
+            self.sequence.append(self.unrecognized_tokens)
+            self.has_unrecognized = True
+
+        # create another instance
+        self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
+
+    def eat_unrecognized(self, token, pos):
+        if self.is_locked:
+            return
+
+        self.debug.append(token)
+        self.unrecognized_tokens.add_token(token, pos)
+
+    def finalize(self):
+        if len(self.sequence) > 0:
+            self.manage_unrecognized()
+            for forked in self.forked:
+                # manage that some clones may have been forked
+                forked.finalize()
+
+        if self.expected_tokens:
+            self.errors.append(TokensNodeFound(self.expected_tokens))
+
+    def clone(self):
+        clone = AtomConceptParserHelper(self.context)
+        clone.debug = self.debug[:]
+        clone.sequence = self.sequence[:]
+        clone.current_concept = self.current_concept.clone() if self.current_concept else None
+        clone.unrecognized_tokens = self.unrecognized_tokens.clone()
+        clone.expected_tokens = self.expected_tokens[:] if self.expected_tokens else None
+        clone.is_locked = self.is_locked
+        clone.errors = self.errors[:]
+        clone.has_unrecognized = self.has_unrecognized
+        return clone
+
+    def _get_lexer_nodes_from_unrecognized(self):
+        """
+        Use the source of self.unrecognized_tokens gto find concepts or source code
+        :return:
+        """
+
+        res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
+        only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
+
+        if not only_parsers_results.status:
+            return None
+
+        return builtin_helpers.get_lexer_nodes(
+            only_parsers_results.body.body,
+            self.unrecognized_tokens.start,
+            self.unrecognized_tokens.tokens)
+
+
+class AtomNodeParser(BaseNodeParser):
+    """
+    Parser used to recognize atoms concepts or sequence of atoms concepts
+    An atom concept is concept that does not have any property thought it may have a body
+
+    So, if 'one', 'two', 'three' are defined as atom concepts (with no property/parameter)
+    This parser can recognize the sequence 'one two three'
+        as [ConceptNode(one), ConceptNode(two), ConceptNode(three)]
+    It can partly recognized 'one x$1!! two three'
+        as [ConceptNode(one), UnrecognizedTokensNode(x$1!!), [ConceptNode(two), [ConceptNode(three)]
+    It cannot recognize concepts with parameters (non atom)
+        ex: 'one plus two' won't be recognized as ConceptNode(plus, one, two)
+        it will be [ConceptNode(one), UnrecognizedTokensNode(plus), [ConceptNode(two)]
+
+    Note 'one plus two' will be recognized by the SyaParser
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__("AtomNode", 50, **kwargs)
+        self.enabled = False
+
+    @staticmethod
+    def _is_eligible(concept):
+        """
+        Predicate that select concepts that must handled by AtomNodeParser
+        :param concept:
+        :return:
+        """
+        return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF
+
+    def get_concepts_sequences(self):
+
+        forked = []
+
+        def _add_forked_to_concept_parser_helpers():
+            # check that if some new InfixToPostfix are created
+            for parser in concept_parser_helpers:
+                if len(parser.forked) > 0:
+                    forked.extend(parser.forked)
+                    parser.forked.clear()
+            if len(forked) > 0:
+                concept_parser_helpers.extend(forked)
+                forked.clear()
+
+        concept_parser_helpers = [AtomConceptParserHelper(self.context)]
+
+        while self.next_token(False):
+            for concept_parser in concept_parser_helpers:
+                concept_parser.reset()
+
+            token = self.token
+
+            try:
+                for concept_parser in concept_parser_helpers:
+                    if concept_parser.eat_token(self.token, self.pos):
+                        concept_parser.lock()
+
+                concepts = self.get_concepts(token, self._is_eligible)
+                if not concepts:
+                    for concept_parser in concept_parser_helpers:
+                        concept_parser.eat_unrecognized(token, self.pos)
+                    continue
+
+                if len(concepts) == 1:
+                    for concept_parser in concept_parser_helpers:
+                        concept_parser.eat_concept(concepts[0], self.pos)
+                    continue
+
+                # make the cartesian product
+                temp_res = []
+                for concept_parser in concept_parser_helpers:
+                    if concept_parser.is_locked:
+                        # It means that it already eat the token
+                        # so simply add it, do not clone
+                        temp_res.append(concept_parser)
+                        continue
+
+                    for concept in concepts:
+                        clone = concept_parser.clone()
+                        temp_res.append(clone)
+                        clone.eat_concept(concept, self.pos)
+
+                concept_parser_helpers = temp_res
+            finally:
+                _add_forked_to_concept_parser_helpers()
+
+        # make sure that remaining items in stack are moved to out
+        for concept_parser in concept_parser_helpers:
+            concept_parser.reset()
+            concept_parser.finalize()
+            _add_forked_to_concept_parser_helpers()
+
+        return concept_parser_helpers
+
+    def get_valid(self, concept_parser_helpers):
+        valid_parser_helpers = []  # be careful, it will be a list of list
+        for parser_helper in concept_parser_helpers:
+            if parser_helper.has_error():
+                continue
+
+            if len(parser_helper.sequence) == 0:
+                continue
+
+            for node in parser_helper.sequence:
+                node.tokens = self.tokens[node.start:node.end + 1]
+                node.fix_source()
+
+            if parser_helper in valid_parser_helpers:
+                continue
+
+            valid_parser_helpers.append(parser_helper)
+
+        return valid_parser_helpers
+
+    def parse(self, context, parser_input):
+        if parser_input == "":
+            return context.sheerka.ret(
+                self.name,
+                False,
+                context.sheerka.new(BuiltinConcepts.IS_EMPTY)
+            )
+
+        if not self.reset_parser(context, parser_input):
+            return self.sheerka.ret(
+                self.name,
+                False,
+                context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
+
+        parser_helpers = self.get_valid(self.get_concepts_sequences())
+
+        if len(parser_helpers):
+            ret = []
+            for parser_helper in parser_helpers:
+                ret.append(
+                    self.sheerka.ret(
+                        self.name,
+                        not parser_helper.has_unrecognized,
+                        self.sheerka.new(
+                            BuiltinConcepts.PARSER_RESULT,
+                            parser=self,
+                            source=parser_input,
+                            body=parser_helper.sequence,
+                            try_parsed=parser_helper.sequence)))
+
+            if len(ret) == 1:
+                self.log_result(context, parser_input, ret[0])
+                return ret[0]
+            else:
+                self.log_multiple_results(context, parser_input, ret)
+                return ret
+
+        else:
+            return self.sheerka.ret(
+                self.name,
+                False,
+                context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
@@ -0,0 +1,669 @@
+from collections import namedtuple
+from dataclasses import dataclass
+from enum import Enum
+
+from core.builtin_concepts import BuiltinConcepts
+from core.concept import VARIABLE_PREFIX, Concept
+from core.sheerka.ExecutionContext import ExecutionContext
+from core.tokenizer import TokenKind, LexerError, Token
+from parsers.BaseParser import Node, BaseParser, ErrorNode
+
+DEBUG_COMPILED = True
+
+
+@dataclass()
+class LexerNode(Node):
+    start: int  # starting index in the tokens list
+    end: int  # ending index in the tokens list
+    tokens: list = None  # tokens
+    source: str = None  # string representation of what was parsed
+
+    def __post_init__(self):
+        if self.source is None:
+            self.source = BaseParser.get_text_from_tokens(self.tokens)
+
+    def __eq__(self, other):
+        if not isinstance(other, LexerNode):
+            return False
+
+        return self.start == other.start and \
+               self.end == other.end and \
+               self.source == other.source and \
+               self.tokens == other.tokens
+
+    def fix_source(self, force=True):
+        if force or self.source is None:
+            self.source = BaseParser.get_text_from_tokens(self.tokens)
+        return self
+
+
+class UnrecognizedTokensNode(LexerNode):
+    def __init__(self, start, end, tokens):
+        super().__init__(start, end, tokens)
+        self.is_frozen = False
+        self.parenthesis_count = 0
+
+    def freeze(self):
+        self.is_frozen = True
+
+    def reset(self):
+        self.start = self.end = -1
+        self.tokens.clear()
+        self.is_frozen = False
+        self.parenthesis_count = 0
+
+    def has_open_paren(self):
+        return self.parenthesis_count > 0
+
+    def add_token(self, token, pos):
+        if self.is_frozen:
+            raise Exception("The node is frozen")
+
+        if self.end != -1 and pos == self.end + 2:
+            # add the missing whitespace
+            p = self.tokens[-1]  # previous token
+            self.tokens.append(Token(TokenKind.WHITESPACE, " ", p.index + 1, p.line, p.column + 1))
+
+        self.tokens.append(token)
+        self.end = pos
+        if self.start == -1:
+            self.start = pos
+
+        if token.type == TokenKind.LPAR:
+            self.parenthesis_count += 1
+
+        if token.type == TokenKind.RPAR:
+            self.parenthesis_count -= 1
+
+        return self
+
+    def not_whitespace(self):
+        return not self.is_whitespace()
+
+    def is_whitespace(self):
+        for t in self.tokens:
+            if t.type not in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
+                return False
+        return True
+
+    def is_empty(self):
+        return len(self.tokens) == 0
+
+    def __eq__(self, other):
+        if isinstance(other, utnode):
+            return self.start == other.start and \
+                   self.end == other.end and \
+                   self.source == other.source
+
+        if isinstance(other, UTN):
+            return other == self
+
+        if not isinstance(other, UnrecognizedTokensNode):
+            return False
+
+        return self.start == other.start and \
+               self.end == other.end and \
+               self.source == other.source
+
+    def __hash__(self):
+        return hash((self.start, self.end, self.source))
+
+    def __repr__(self):
+        return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
+
+    def clone(self):
+        clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:])
+        clone.is_frozen = self.is_frozen
+        clone.parenthesis_count = self.parenthesis_count
+        return clone
+
+
+class ConceptNode(LexerNode):
+    """
+    Returned by the BnfNodeParser
+    It represents a recognized concept
+    """
+
+    def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
+        super().__init__(start, end, tokens, source)
+        self.concept = concept
+        self.underlying = underlying
+        self.fix_source(False)
+
+    def __eq__(self, other):
+        if id(self) == id(other):
+            return True
+
+        if isinstance(other, (CN, CNC)):
+            return other == self
+
+        if isinstance(other, cnode):
+            return self.concept.key == other.concept_key and \
+                   self.start == other.start and \
+                   self.end == other.end and \
+                   self.source == other.source
+
+        if isinstance(other, short_cnode):
+            return self.concept.key == other.concept_key and self.source == other.source
+
+        if not isinstance(other, ConceptNode):
+            return False
+
+        return self.concept == other.concept and \
+               self.start == other.start and \
+               self.end == other.end and \
+               self.source == other.source and \
+               self.underlying == other.underlying
+
+    def __hash__(self):
+        return hash((self.concept, self.start, self.end, self.source, self.underlying))
+
+    def __repr__(self):
+        text = f"ConceptNode(concept='{self.concept}', source='{self.source}', start={self.start}, end={self.end}"
+        if DEBUG_COMPILED:
+            for k, v in self.concept.compiled.items():
+                text += f", {k}='{v}'"
+        return text + ")"
+
+    def clone(self):
+        # do we need to clone the concept as well ?
+        clone = ConceptNode(self.concept, self.start, self.end, self.tokens, self.source, self.underlying)
+        return clone
+
+
+class SourceCodeNode(LexerNode):
+    """
+    Returned when some source code (like Python source code is recognized)
+    """
+
+    def __init__(self, node, start, end, tokens=None, source=None, return_value=None):
+        super().__init__(start, end, tokens, source)
+        self.node = node  # The PythonNode (or whatever language node) that is found
+        self.return_value = return_value  # original result of the parsing
+
+    def __eq__(self, other):
+        if isinstance(other, scnode):
+            return self.start == other.start and \
+                   self.end == other.end and \
+                   self.source == other.source
+
+        if not isinstance(other, SourceCodeNode):
+            return False
+
+        return self.node == other.node and \
+               self.start == other.start and \
+               self.end == other.end and \
+               self.source == other.source
+
+    def __hash__(self):
+        return hash((self.start, self.end, self.source))
+
+    def __repr__(self):
+        return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
+
+
+class SourceCodeWithConceptNode(LexerNode):
+    """
+    Kind of temporary version for SourceCodeNode
+    I know that there is some code,
+    I know that there are some concepts
+    I just don't want to make the glue yet
+
+    So I push all the nodes into one big bag
+    """
+
+    def __init__(self, first_node, last_node, content_nodes=None):
+        super().__init__(9999, -1, None)  # why not sys.maxint ?
+        self.first = first_node
+        self.last = last_node
+        self.nodes = content_nodes or []
+        self.has_unrecognized = False
+        self.fix_all_pos()
+
+    def add_node(self, node):
+        self.nodes.append(node)
+        self.fix_pos(node)
+
+        return self
+
+    def __eq__(self, other):
+        if id(self) == id(other):
+            return True
+
+        if not isinstance(other, SourceCodeWithConceptNode):
+            return False
+
+        if self.start != other.start or self.end != other.end:
+            return False
+
+        if self.first != other.first:
+            return False
+
+        if self.last != other.last:
+            return False
+
+        if len(self.nodes) != len(other.nodes):
+            return False
+
+        for self_node, other_node in zip(self.nodes, other.nodes):
+            if self_node != other_node:
+                return False
+
+        # at last
+        return True
+
+    def __hash__(self):
+        return hash((self.first, self.last, len(self.nodes)))
+
+    def __repr__(self):
+        return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')"
+
+    def fix_all_pos(self):
+        for n in [self.first, self.last] + self.nodes:
+            self.fix_pos(n)
+
+    def fix_pos(self, node):
+        if hasattr(node, "start") and node.start is not None:
+            if node.start < self.start:
+                self.start = node.start
+
+        if hasattr(node, "end") and node.end is not None:
+            if node.end > self.end:
+                self.end = node.end
+        return self
+
+    def pseudo_fix_source(self):
+        self.source = self.first.source
+        for n in self.nodes:
+            self.source += " "
+            if hasattr(n, "source"):
+                self.source += n.source
+            elif hasattr(n, "concept"):
+                self.source += str(n.concept)
+            else:
+                self.source += " unknown"
+        self.source += self.last.source
+        return self
+
+    def clone(self):
+        clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes)
+        return clone
+
+
+@dataclass()
+class GrammarErrorNode(ErrorNode):
+    message: str
+
+
+class SyaAssociativity(Enum):
+    Left = "left"
+    Right = "right"
+    No = "No"
+
+    def __repr__(self):
+        return self.value
+
+
+cnode = namedtuple("ConceptNode", "concept_key start end source")
+short_cnode = namedtuple("ConceptNode", "concept_key source")
+utnode = namedtuple("utnode", "start end source")
+scnode = namedtuple("scnode", "start end source")
+
+
+@dataclass(init=False)
+class SCWC:
+    """
+    SourceNodeWithConcept tester class
+    It matches with a SourceNodeWithConcept
+    but it's easier to instantiate during the tests
+    """
+    first: LexerNode
+    last: LexerNode
+    content: tuple
+
+    def __init__(self, first, last, *args):
+        self.first = first
+        self.last = last
+        self.content = args
+
+
+class HelperWithPos:
+    def __init__(self, start=None, end=None):
+        self.start = start
+        self.end = end
+
+        self.start_is_fixed = start is not None
+        self.end_is_fixed = end is not None
+
+    def fix_pos(self, node):
+        if not self.start_is_fixed:
+            start = node.start if hasattr(node, "start") else \
+                node[0] if isinstance(node, tuple) else None
+
+            if start is not None and (self.start is None or start < self.start):
+                self.start = start
+
+        if not self.end_is_fixed:
+            end = node.end if hasattr(node, "end") else \
+                node[1] if isinstance(node, tuple) else None
+
+            if end is not None and (self.end is None or end > self.end):
+                self.end = end
+        return self
+
+
+class CN(HelperWithPos):
+    """
+    ConceptNode tester class
+    It matches with ConceptNode but with less constraints
+
+    CNC == ConceptNode if concept key, start, end and source are the same
+    """
+
+    def __init__(self, concept, start=None, end=None, source=None):
+        """
+
+        :param concept: Concept or concept_key (only the key is used anyway)
+        :param start:
+        :param end:
+        :param source:
+        """
+        super().__init__(start, end)
+        self.concept_key = concept.key if isinstance(concept, Concept) else concept
+        self.source = source
+        self.concept = concept if isinstance(concept, Concept) else None
+
+    def fix_source(self, str_tokens):
+        self.source = "".join(str_tokens)
+        return self
+
+    def __eq__(self, other):
+        if id(self) == id(other):
+            return True
+
+        if isinstance(other, ConceptNode):
+            if other.concept is None:
+                return False
+            if other.concept.key != self.concept_key:
+                return False
+            if self.start is not None and self.start != other.start:
+                return False
+            if self.end is not None and self.end != other.end:
+                return False
+            return True
+
+        if not isinstance(other, CN):
+            return False
+
+        return self.concept_key == other.concept_key and \
+               self.start == other.start and \
+               self.end == other.end and \
+               self.source == other.source
+
+    def __hash__(self):
+        return hash((self.concept_key, self.start, self.end, self.source))
+
+    def __repr__(self):
+        if self.concept:
+            txt = f"CN(concept='{self.concept}'"
+        else:
+            txt = f"CN(concept_key='{self.concept_key}'"
+        txt += f", source='{self.source}'"
+        if self.start is not None:
+            txt += f", start={self.start}"
+        if self.end is not None:
+            txt += f", end={self.end}"
+        return txt + ")"
+
+
+class CNC(CN):
+    """
+    ConceptNode for Compiled tester class
+    It matches with ConceptNode
+    But focuses on the 'compiled' property of the concept
+
+    CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
+    """
+
+    def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
+        super().__init__(concept_key, start, end, source)
+        self.compiled = kwargs
+
+    def __eq__(self, other):
+        if id(self) == id(other):
+            return True
+
+        if isinstance(other, ConceptNode):
+            if other.concept is None:
+                return False
+            if other.concept.key != self.concept_key:
+                return False
+            if self.start is not None and self.start != other.start:
+                return False
+            if self.end is not None and self.end != other.end:
+                return False
+            return self.compiled == other.concept.compiled  # assert instead of return to help debugging tests
+
+        if not isinstance(other, CNC):
+            return False
+
+        return self.concept_key == other.concept_key and \
+               self.start == other.start and \
+               self.end == other.end and \
+               self.source == other.source and \
+               self.compiled == other.compiled
+
+    def __repr__(self):
+        if self.concept:
+            txt = f"CNC(concept='{self.concept}'"
+        else:
+            txt = f"CNC(concept_key='{self.concept_key}'"
+        txt += f", source='{self.source}'"
+        if self.start is not None:
+            txt += f", start={self.start}"
+        if self.end is not None:
+            txt += f", end={self.end}"
+
+        for k, v in self.compiled.items():
+            txt += f", {k}='{v}'"
+        return txt + ")"
+
+
+class BaseNodeParser(BaseParser):
+    def __init__(self, name, priority, **kwargs):
+        super().__init__(name, priority)
+        if 'sheerka' in kwargs:
+            sheerka = kwargs.get("sheerka")
+            self.init_from_sheerka(sheerka)
+
+        else:
+            self.concepts_by_first_keyword = None
+            self.sya_definitions = None
+
+        self.token = None
+        self.pos = -1
+        self.tokens = None
+
+        self.context: ExecutionContext = None
+        self.text = None
+        self.sheerka = None
+
+    def init_from_sheerka(self, sheerka):
+        """
+        Use the definitons from Sheerka to initialize
+        :param sheerka:
+        :return:
+        """
+        self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
+        if sheerka.sya_definitions:
+            self.sya_definitions = {}
+            for k, v in sheerka.sya_definitions.items():
+                self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
+
+    def reset_parser(self, context, text):
+        self.context = context
+        self.sheerka = context.sheerka
+        self.text = text
+
+        try:
+            self.tokens = list(self.get_input_as_tokens(text))
+        except LexerError as e:
+            self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
+            return False
+
+        self.token = None
+        self.pos = -1
+        return True
+
+    def add_error(self, error, next_token=True):
+        self.error_sink.append(error)
+        if next_token:
+            self.next_token()
+        return error
+
+    def get_token(self) -> Token:
+        return self.token
+
+    def next_token(self, skip_whitespace=True):
+        if self.token and self.token.type == TokenKind.EOF:
+            return False
+
+        self.pos += 1
+        self.token = self.tokens[self.pos]
+
+        if skip_whitespace:
+            while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
+                self.pos += 1
+                self.token = self.tokens[self.pos]
+
+        return self.token.type != TokenKind.EOF
+
+    def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
+        """
+        To quickly find a concept, we store them in an hash where the key is the first token of the concept
+        example :
+            Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
+            So the key to use will be "foo"
+
+            Concept("a foo").def_prop("a") -> first token is "foo"
+
+            Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
+        Note that under the same key, there will be multiple entry
+        a B-Tree may be a better implementation in the future
+
+        We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
+        :param context:
+        :param concepts: list[Concept]
+        :param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
+        :param use_sheerka: first init with the definitions from Sheerka
+        :return:
+        """
+        self.context = context
+        self.sheerka = context.sheerka
+
+        if use_sheerka:
+            self.init_from_sheerka(self.sheerka)
+
+        if sya_definitions:
+            if self.sya_definitions:
+                self.sya_definitions.update(sya_definitions)
+            else:
+                self.sya_definitions = sya_definitions
+
+        if self.concepts_by_first_keyword is None:
+            self.concepts_by_first_keyword = {}
+
+        for concept in concepts:
+            keywords = concept.key.split()
+            for keyword in keywords:
+                if keyword.startswith(VARIABLE_PREFIX):
+                    continue
+
+                self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
+                break
+
+        return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
+
+    def get_concepts(self, token, to_keep, to_map=None):
+        """
+        Tries to find if there are concepts that match the value of the token
+        :param token:
+        :param to_keep: predicate to tell if the concept is eligible
+        :param to_map:
+        :return:
+        """
+
+        if token.type == TokenKind.STRING:
+            name = token.value[1:-1]
+        elif token.type == TokenKind.KEYWORD:
+            name = token.value.value
+        else:
+            name = token.value
+
+        result = []
+        if name in self.concepts_by_first_keyword:
+            for concept_id in self.concepts_by_first_keyword[name]:
+
+                concept = self.sheerka.get_by_id(concept_id)
+
+                if not to_keep(concept):
+                    continue
+
+                concept = to_map(concept) if to_map else concept
+                result.append(concept)
+            return result
+
+        return None
+
+    @staticmethod
+    def get_token_value(token):
+        if token.type == TokenKind.STRING:
+            return token.value[1:-1]
+        elif token.type == TokenKind.KEYWORD:
+            return token.value.value
+        else:
+            return token.value
+
+
+class UTN(HelperWithPos):
+    """
+    Tester class for UnrecognizedTokenNode
+    compare the source, and start, end  if defined
+    """
+
+    def __init__(self, source, start=None, end=None):
+        """
+        :param concept: Concept or concept_key (only the key is used anyway)
+        :param start:
+        :param end:
+        :param source:
+        """
+        super().__init__(start, end)
+        self.source = source
+
+    def __eq__(self, other):
+        if id(self) == id(other):
+            return True
+
+        if isinstance(other, UnrecognizedTokensNode):
+            return self.start == other.start and \
+                   self.end == other.end and \
+                   self.source == other.source
+
+        if not isinstance(other, UTN):
+            return False
+
+        return self.start == other.start and \
+               self.end == other.end and \
+               self.source == other.source
+
+    def __hash__(self):
+        return hash((self.source, self.start, self.end))
+
+    def __repr__(self):
+        txt = f"UTN( source='{self.source}'"
+        if self.start is not None:
+            txt += f", start={self.start}"
+        if self.end is not None:
+            txt += f", end={self.end}"
+        return txt + ")"
@@ -1,8 +1,8 @@
 from dataclasses import dataclass

-from core.builtin_concepts import BuiltinConcepts
+from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
 from core.concept import Concept
-from core.tokenizer import TokenKind, Keywords, Token
+from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
 from core.sheerka_logger import get_logger
 import core.utils
 import logging
@@ -77,7 +77,6 @@ class BaseParser:
        self.priority = priority
        self.enabled = enabled

-        self.has_error = False
        self.error_sink = []

    def __eq__(self, other):
@@ -91,9 +90,13 @@ class BaseParser:
    def __repr__(self):
        return self.name

-    def parse(self, context, text):
+    def parse(self, context, parser_input):
        pass

+    @property
+    def has_error(self):
+        return len(self.error_sink) > 0
+
    def log_result(self, context, source, ret):
        if not self.log.isEnabledFor(logging.DEBUG):
            return
@@ -132,6 +135,53 @@ class BaseParser:
            body=self.error_sink if self.has_error else tree,
            try_parsed=try_parse)

+    def get_input_as_text(self, parser_input, custom_switcher=None):
+        if isinstance(parser_input, list):
+            return self.get_text_from_tokens(parser_input, custom_switcher)
+
+        if isinstance(parser_input, ParserResultConcept):
+            parser_input = parser_input.source
+
+        if "c:" in parser_input:
+            return self.get_text_from_tokens(list(Tokenizer(parser_input)), custom_switcher)
+
+        return parser_input
+
+    def get_input_as_tokens(self, parser_input):
+        if isinstance(parser_input, list):
+            return self.add_eof_if_needed(parser_input)
+
+        if isinstance(parser_input, ParserResultConcept):
+            if parser_input.tokens:
+                return self.add_eof_if_needed(parser_input.tokens)
+            else:
+                return Tokenizer(parser_input.source)
+
+        return Tokenizer(parser_input)
+
+    def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
+        if not isinstance(parser_input, ParserResultConcept):
+            return None
+
+        if expected_parser and parser_input.parser != expected_parser:
+            return None
+
+        if len(parser_input.value) == 0:
+            return None
+
+        for node in parser_input.value:
+            from parsers.BaseNodeParser import LexerNode
+            if not isinstance(node, LexerNode):
+                return None
+
+        return parser_input.value
+
+    @staticmethod
+    def add_eof_if_needed(lst):
+        if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
+            lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
+        return lst
+
    @staticmethod
    def get_text_from_tokens(tokens, custom_switcher=None):
        if tokens is None:
@@ -9,147 +9,17 @@
 from collections import namedtuple
 from dataclasses import dataclass
 from collections import defaultdict
-from core.builtin_concepts import BuiltinConcepts
+from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
 from core.concept import Concept, ConceptParts, DoNotResolve
 from core.tokenizer import TokenKind, Tokenizer, Token
-from parsers.BaseParser import BaseParser, Node, ErrorNode
+from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
+from parsers.BaseParser import BaseParser, ErrorNode
 import core.utils


-@dataclass()
-class LexerNode(Node):
-    start: int  # starting index in the tokens list
-    end: int  # ending index in the tokens list
-    tokens: list = None  # tokens
-    source: str = None  # string representation of what was parsed
-
-    def __post_init__(self):
-        if self.source is None:
-            self.source = BaseParser.get_text_from_tokens(self.tokens)
-
-    def __eq__(self, other):
-        if not isinstance(other, LexerNode):
-            return False
-
-        return self.start == other.start and \
-               self.end == other.end and \
-               self.source == other.source and \
-               self.tokens == other.tokens
-
-
-class UnrecognizedTokensNode(LexerNode):
-    def __init__(self, start, end, tokens):
-        super().__init__(start, end, tokens)
-
-    def add_token(self, token, pos):
-        self.tokens.append(token)
-        self.end = pos
-
-    def fix_source(self):
-        self.source = BaseParser.get_text_from_tokens(self.tokens)
-
-    def not_whitespace(self):
-        return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
-
-    def __eq__(self, other):
-        if isinstance(other, utnode):
-            return self.start == other.start and \
-                   self.end == other.end and \
-                   self.source == other.source
-
-        if not isinstance(other, UnrecognizedTokensNode):
-            return False
-
-        return self.start == other.start and \
-               self.end == other.end and \
-               self.source == other.source
-
-    def __hash__(self):
-        return hash((self.start, self.end, self.source))
-
-    def __repr__(self):
-        return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
-
-
-class ConceptNode(LexerNode):
-    """
-    Returned by the ConceptLexerParser
-    It represents a recognized concept
-    """
-
-    def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
-        super().__init__(start, end, tokens, source)
-        self.concept = concept
-        self.underlying = underlying
-
-        if self.source is None:
-            self.source = BaseParser.get_text_from_tokens(self.tokens)
-
-    def __eq__(self, other):
-        if isinstance(other, cnode):
-            return self.concept.key == other.concept_key and \
-                   self.start == other.start and \
-                   self.end == other.end and \
-                   self.source == other.source
-
-        if isinstance(other, short_cnode):
-            return self.concept.key == other.concept_key and self.source == other.source
-
-        if not isinstance(other, ConceptNode):
-            return False
-
-        return self.concept == other.concept and \
-               self.start == other.start and \
-               self.end == other.end and \
-               self.source == other.source and \
-               self.underlying == other.underlying
-
-    def __hash__(self):
-        return hash((self.concept, self.start, self.end, self.source, self.underlying))
-
-    def __repr__(self):
-        return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
-
-
-class SourceCodeNode(LexerNode):
-    """
-    Returned when some source code (like Python source code is recognized)
-    """
-
-    def __init__(self, node, start, end, tokens=None, source=None):
-        super().__init__(start, end, tokens, source)
-        self.node = node  # The PythonNode (or whatever language node) that is found
-
-    def __eq__(self, other):
-        if isinstance(other, scnode):
-            return self.start == other.start and \
-                   self.end == other.end and \
-                   self.source == other.source
-
-        if not isinstance(other, SourceCodeNode):
-            return False
-
-        return self.node == other.node and \
-               self.start == other.start and \
-               self.end == other.end and \
-               self.source == other.source
-
-    def __hash__(self):
-        return hash((self.start, self.end, self.source))
-
-    def __repr__(self):
-        return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
-
-
-cnode = namedtuple("ConceptNode", "concept_key start end source")
-short_cnode = namedtuple("ConceptNode", "concept_key source")
-utnode = namedtuple("UnrecognizedTokensNode", "start end source")
-scnode = namedtuple("SourceCodeNode", "start end source")
-
-
 class NonTerminalNode(LexerNode):
    """
-    Returned by the ConceptLexerParser
+    Returned by the BnfNodeParser
    """

    def __init__(self, parsing_expression, start, end, tokens, children=None):
@@ -180,7 +50,7 @@ class NonTerminalNode(LexerNode):

 class TerminalNode(LexerNode):
    """
-    Returned by the ConceptLexerParser
+    Returned by the BnfNodeParser
    """

    def __init__(self, parsing_expression, start, end, value):
@@ -205,11 +75,6 @@ class TerminalNode(LexerNode):
        return hash((self.parsing_expression, self.start, self.end, self.value))


-@dataclass()
-class GrammarErrorNode(ErrorNode):
-    message: str
-
-
@dataclass()
 class UnknownConceptNode(ErrorNode):
    concept_key: str
@@ -574,9 +439,9 @@ class StrMatch(Match):
        return None


-class ConceptLexerParser(BaseParser):
+class BnfNodeParser(BaseParser):
    def __init__(self, **kwargs):
-        super().__init__("ConceptLexer", 50)
+        super().__init__("BnfNode", 50)
        if 'grammars' in kwargs:
            self.concepts_grammars = kwargs.get("grammars")
        elif 'sheerka' in kwargs:
@@ -595,7 +460,6 @@ class ConceptLexerParser(BaseParser):
        self.sheerka = None

    def add_error(self, error, next_token=True):
-        self.has_error = True
        self.error_sink.append(error)
        if next_token:
            self.next_token()
@@ -606,16 +470,11 @@ class ConceptLexerParser(BaseParser):
        self.sheerka = context.sheerka
        self.text = text

-        if isinstance(text, str):
-            try:
-                self.tokens = list(Tokenizer(text))
-            except core.tokenizer.LexerError as e:
-                self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
-                return False
-
-        else:
-            self.tokens = list(text)
-            self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1))  # make sure to finish with end of file token
+        try:
+            self.tokens = list(self.get_input_as_tokens(text))
+        except core.tokenizer.LexerError as e:
+            self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
+            return False

        self.token = None
        self.pos = -1
@@ -785,15 +644,15 @@ class ConceptLexerParser(BaseParser):
                removed_concepts.append(e)
        return removed_concepts

-    def parse(self, context, text):
-        if text == "":
+    def parse(self, context, parser_input):
+        if parser_input == "":
            return context.sheerka.ret(
                self.name,
                False,
                context.sheerka.new(BuiltinConcepts.IS_EMPTY)
            )

-        if not self.reset_parser(context, text):
+        if not self.reset_parser(context, parser_input):
            return self.sheerka.ret(
                self.name,
                False,
@@ -877,15 +736,15 @@ class ConceptLexerParser(BaseParser):
                    self.sheerka.new(
                        BuiltinConcepts.PARSER_RESULT,
                        parser=self,
-                        source=text,
+                        source=parser_input,
                        body=choice,
                        try_parsed=choice)))

        if len(ret) == 1:
-            self.log_result(context, text, ret[0])
+            self.log_result(context, parser_input, ret[0])
            return ret[0]
        else:
-            self.log_multiple_results(context, text, ret)
+            self.log_multiple_results(context, parser_input, ret)
            return ret

    def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
@@ -915,6 +774,11 @@ class ConceptLexerParser(BaseParser):
                    _concept.compiled[prop_name] = new_value

        def _look_for_concept_match(_underlying):
+            """
+            At some point, there is either an StrMatch or a ConceptMatch,
+            that allowed the recognition.
+            Look for the ConceptMatch, with recursion if needed
+            """
            if isinstance(_underlying.parsing_expression, ConceptExpression):
                return _underlying

@@ -929,6 +793,7 @@ class ConceptLexerParser(BaseParser):
        def _get_underlying_value(_underlying):
            concept_match_node = _look_for_concept_match(_underlying)
            if concept_match_node:
+                # the value is a concept
                if id(concept_match_node) in _underlying_value_cache:
                    result = _underlying_value_cache[id(concept_match_node)]
                else:
@@ -936,6 +801,7 @@ class ConceptLexerParser(BaseParser):
                    result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
                    _underlying_value_cache[id(concept_match_node)] = result
            else:
+                # the value is a string
                result = DoNotResolve(_underlying.source)

            return result
@@ -957,6 +823,7 @@ class ConceptLexerParser(BaseParser):
            concept.compiled[ConceptParts.BODY] = value
            if underlying.parsing_expression.rule_name:
                _add_prop(concept, underlying.parsing_expression.rule_name, value)
+                # KSI : Why don't we set concept.metadata.need_validation to True ?

        if isinstance(underlying, NonTerminalNode):
            for node in underlying.children:
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts
 from core.sheerka.Sheerka import ExecutionContext
 from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
 from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
-from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
+from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
    StrMatch, ConceptGroupExpression


@@ -30,7 +30,6 @@ class BnfParser(BaseParser):

    def __init__(self, **kwargs):
        super().__init__("Bnf", 50, False)
-        # self.has_error = False
        # self.error_sink = []
        # self.name = BaseParser.PREFIX + "Bnf"

@@ -61,7 +60,6 @@ class BnfParser(BaseParser):
        self.eat_white_space()

    def add_error(self, error, next_token=True):
-        self.has_error = True
        self.error_sink.append(error)
        if next_token:
            self.next_token()
@@ -115,11 +113,11 @@ class BnfParser(BaseParser):
        token = self.get_token()
        return token.type == second or token.type == first and self.next_after().type == second

-    def parse(self, context: ExecutionContext, text):
+    def parse(self, context: ExecutionContext, parser_input):

        tree = None
        try:
-            self.reset_parser(context, text)
+            self.reset_parser(context, parser_input)
            tree = self.parse_choice()

            token = self.get_token()
@@ -1,10 +1,14 @@
+# try to match something like
+# ConceptNode 'plus' ConceptNode
+#
+# Replaced by SyaNodeParser
 from core.builtin_concepts import BuiltinConcepts
 from core.tokenizer import TokenKind, Token
+from parsers.BaseNodeParser import SourceCodeNode
 from parsers.BaseParser import BaseParser
-from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
+from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
 from parsers.MultipleConceptsParser import MultipleConceptsParser
 from core.concept import VARIABLE_PREFIX
-import logging

 multiple_concepts_parser = MultipleConceptsParser()

@@ -12,6 +16,7 @@ multiple_concepts_parser = MultipleConceptsParser()
 class ConceptsWithConceptsParser(BaseParser):
    def __init__(self, **kwargs):
        super().__init__("ConceptsWithConcepts", 25)
+        self.enabled = False

    @staticmethod
    def get_tokens(nodes):
@@ -71,23 +76,19 @@ class ConceptsWithConceptsParser(BaseParser):

        return concept

-    def parse(self, context, text):
+    def parse(self, context, parser_input):
        sheerka = context.sheerka
-        if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
+        nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
+        if not nodes:
            return None

-        if not text.parser == multiple_concepts_parser:
-            return None
-
-        nodes = text.body
-
        concept_key = self.get_key(nodes)
        concept = sheerka.new(concept_key)
        if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
            return sheerka.ret(
                self.name,
                False,
-                sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
+                sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))

        concepts = concept if hasattr(concept, "__iter__") else [concept]
        for concept in concepts:
@@ -101,7 +102,7 @@ class ConceptsWithConceptsParser(BaseParser):
                sheerka.new(
                    BuiltinConcepts.PARSER_RESULT,
                    parser=self,
-                    source=text.source,
+                    source=parser_input.source,
                    body=concept,
                    try_parsed=None)))

@@ -110,7 +110,7 @@ class DefaultParser(BaseParser):
    """

    def __init__(self, **kwargs):
-        BaseParser.__init__(self, "Default", 50)
+        BaseParser.__init__(self, "Default", 60)
        self.lexer_iter = None
        self._current = None
        self.context: ExecutionContext = None
@@ -168,7 +168,6 @@ class DefaultParser(BaseParser):
        self.next_token()

    def add_error(self, error, next_token=True):
-        self.has_error = True
        self.error_sink.append(error)
        if next_token:
            self.next_token()
@@ -188,19 +187,19 @@ class DefaultParser(BaseParser):

        return

-    def parse(self, context, text):
+    def parse(self, context, parser_input):
        # default parser can only manage string text
-        if not isinstance(text, str):
+        if not isinstance(parser_input, str):
            ret = context.sheerka.ret(
                self.name,
                False,
-                context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
-            self.log_result(context, text, ret)
+                context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
+            self.log_result(context, parser_input, ret)
            return ret

        tree = None
        try:
-            self.reset_parser(context, text)
+            self.reset_parser(context, parser_input)
            tree = self.parse_statement()
        except core.tokenizer.LexerError as e:
            self.add_error(e, False)
@@ -211,7 +210,7 @@ class DefaultParser(BaseParser):
        if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
            body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
        else:
-            body = self.get_return_value_body(context.sheerka, text, tree, tree)
+            body = self.get_return_value_body(context.sheerka, parser_input, tree, tree)
            # body = self.sheerka.new(
            #     BuiltinConcepts.PARSER_RESULT,
            #     parser=self,
@@ -224,7 +223,7 @@ class DefaultParser(BaseParser):
            not self.has_error,
            body)

-        self.log_result(context, text, ret)
+        self.log_result(context, parser_input, ret)
        return ret

    def parse_statement(self):
@@ -10,12 +10,12 @@ class EmptyStringParser(BaseParser):
    def __init__(self, **kwargs):
        BaseParser.__init__(self, "EmptyString", 90)

-    def parse(self, context, text):
+    def parse(self, context, parser_input):
        sheerka = context.sheerka

-        if isinstance(text, str) and text.strip() == "" or \
-            isinstance(text, list) and text == [] or \
-            text is None:
+        if isinstance(parser_input, str) and parser_input.strip() == "" or \
+            isinstance(parser_input, list) and parser_input == [] or \
+            parser_input is None:
            ret = sheerka.ret(self.name, True, sheerka.new(
                BuiltinConcepts.PARSER_RESULT,
                parser=self,
@@ -24,5 +24,5 @@ class EmptyStringParser(BaseParser):
        else:
            ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))

-        self.log_result(context, text, ret)
+        self.log_result(context, parser_input, ret)
        return ret
@@ -16,26 +16,26 @@ class ExactConceptParser(BaseParser):
    def __init__(self, **kwargs):
        BaseParser.__init__(self, "ExactConcept", 80)

-    def parse(self, context, text):
+    def parse(self, context, parser_input):
        """
        text can be string, but text can also be an list of tokens
        :param context:
-        :param text:
+        :param parser_input:
        :return:
        """

-        context.log(f"Parsing '{text}'", self.name)
+        context.log(f"Parsing '{parser_input}'", self.name)
        res = []
        sheerka = context.sheerka
        try:
-            words = self.get_words(text)
+            words = self.get_words(parser_input)
        except LexerError as e:
            context.log(f"Error found in tokenizer {e}", self.name)
            return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))

        if len(words) > self.MAX_WORDS_SIZE:
            context.log(f"Max words reached. Stopping.", self.name)
-            return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
+            return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input))

        recognized = False
        for combination in self.combinations(words):
@@ -69,26 +69,25 @@ class ExactConceptParser(BaseParser):
                    context.sheerka.new(
                        BuiltinConcepts.PARSER_RESULT,
                        parser=self,
-                        source=text if isinstance(text, str) else self.get_text_from_tokens(text),
+                        source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
                        body=concept,
                        try_parsed=concept)))
                recognized = True

        if recognized:
            if len(res) == 1:
-                self.log_result(context, text, res[0])
+                self.log_result(context, parser_input, res[0])
            else:
-                self.log_multiple_results(context, text, res)
+                self.log_multiple_results(context, parser_input, res)
                return res
            return res

-        ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text))
-        self.log_result(context, text, ret)
+        ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input))
+        self.log_result(context, parser_input, ret)
        return ret

-    @staticmethod
-    def get_words(text):
-        tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
+    def get_words(self, text):
+        tokens = self.get_input_as_tokens(text)
        res = []
        for t in tokens:
            if t.type == TokenKind.EOF:
@@ -1,18 +1,20 @@
+# to be replaced by SyaNodeParser
 import ast

 from core.builtin_concepts import BuiltinConcepts
 from core.tokenizer import TokenKind
+from parsers.BaseNodeParser import SourceCodeNode
 from parsers.BaseParser import BaseParser
-from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
+from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
 import core.utils
 from parsers.PythonParser import PythonParser

-concept_lexer_parser = ConceptLexerParser()
+concept_lexer_parser = BnfNodeParser()


 class MultipleConceptsParser(BaseParser):
    """
-    Parser that will take the result of ConceptLexerParser and
+    Parser that will take the result of BnfNodeParser and
    try to resolve the unrecognized tokens token by token

    It is a success when it returns a list ConceptNode exclusively
@@ -20,6 +22,7 @@ class MultipleConceptsParser(BaseParser):

    def __init__(self, **kwargs):
        BaseParser.__init__(self, "MultipleConcepts", 45)
+        self.enabled = False

    @staticmethod
    def finalize(nodes_found, unrecognized_tokens):
@@ -40,16 +43,12 @@ class MultipleConceptsParser(BaseParser):
            unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
        return unrecognized_tokens

-    def parse(self, context, text):
+    def parse(self, context, parser_input):
        sheerka = context.sheerka
-        if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
+        nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
+        if not nodes:
            return None

-        if not text.parser == concept_lexer_parser:
-            return None
-
-        sheerka = context.sheerka
-        nodes = text.value
        nodes_found = [[]]
        concepts_only = True

@@ -97,16 +96,16 @@ class MultipleConceptsParser(BaseParser):
                    sheerka.new(
                        BuiltinConcepts.PARSER_RESULT,
                        parser=self,
-                        source=text.source,
+                        source=parser_input.source,
                        body=choice,
                        try_parsed=None))
            )

        if len(ret) == 1:
-            self.log_result(context, text.source, ret[0])
+            self.log_result(context, parser_input.source, ret[0])
            return ret[0]
        else:
-            self.log_multiple_results(context, text.source, ret)
+            self.log_multiple_results(context,  parser_input.source, ret)
            return ret

    @staticmethod
@@ -1,4 +1,4 @@
-from core.builtin_concepts import BuiltinConcepts
+from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
 from core.tokenizer import Tokenizer, LexerError, TokenKind
 from parsers.BaseParser import BaseParser, Node, ErrorNode
 from dataclasses import dataclass
@@ -6,7 +6,7 @@ import ast
 import logging
 import core.utils

-from parsers.ConceptLexerParser import ConceptNode
+from parsers.BnfNodeParser import ConceptNode

 log = logging.getLogger(__name__)

@@ -67,7 +67,7 @@ class PythonParser(BaseParser):
        BaseParser.__init__(self, "Python", 50)
        self.source = kwargs.get("source", "<undef>")

-    def parse(self, context, text):
+    def parse(self, context, parser_input):
        sheerka = context.sheerka
        tree = None

@@ -76,15 +76,9 @@ class PythonParser(BaseParser):
        }

        try:
-            if isinstance(text, str) and "c:" in text:
-                source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher)
-            elif isinstance(text, str):
-                source = text
-            else:
-                source = self.get_text_from_tokens(text, python_switcher)
+            source = self.get_input_as_text(parser_input, python_switcher)
            source = source.strip()
-
-            text = text if isinstance(text, str) else source
+            parser_input = parser_input if isinstance(parser_input, str) else source

            #  first, try to parse an expression
            res, tree, error = self.try_parse_expression(source)
@@ -92,25 +86,32 @@ class PythonParser(BaseParser):
                # then try to parse a statement
                res, tree, error = self.try_parse_statement(source)
                if not res:
-                    self.has_error = True
-                    error_node = PythonErrorNode(text, error)
+                    error_node = PythonErrorNode(parser_input, error)
                    self.error_sink.append(error_node)

        except LexerError as e:
-            self.has_error = True
            self.error_sink.append(e)

-        ret = sheerka.ret(
-            self.name,
-            not self.has_error,
-            sheerka.new(
-                BuiltinConcepts.PARSER_RESULT,
-                parser=self,
-                source=text,
-                body=self.error_sink if self.has_error else PythonNode(text, tree),
-                try_parsed=None))
+        if self.has_error:
+            ret = sheerka.ret(
+                self.name,
+                False,
+                sheerka.new(
+                    BuiltinConcepts.NOT_FOR_ME,
+                    body=parser_input,
+                    reason=self.error_sink))
+        else:
+            ret = sheerka.ret(
+                self.name,
+                True,
+                sheerka.new(
+                    BuiltinConcepts.PARSER_RESULT,
+                    parser=self,
+                    source=parser_input,
+                    body=PythonNode(parser_input, tree),
+                    try_parsed=None))

-        self.log_result(context, text, ret)
+        self.log_result(context, parser_input, ret)
        return ret

    def try_parse_expression(self, text):
@@ -1,10 +1,11 @@
 from core.builtin_concepts import BuiltinConcepts
 from parsers.BaseParser import BaseParser
-from parsers.ConceptLexerParser import ConceptNode
+from parsers.BnfNodeParser import ConceptNode
 from parsers.MultipleConceptsParser import MultipleConceptsParser
 from parsers.PythonParser import PythonParser
+from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser

-multiple_concepts_parser = MultipleConceptsParser()
+unrecognized_nodes_parser = UnrecognizedNodeParser()


 class PythonWithConceptsParser(BaseParser):
@@ -20,15 +21,12 @@ class PythonWithConceptsParser(BaseParser):
            res += c if c.isalnum() else "0"
        return res

-    def parse(self, context, text):
+    def parse(self, context, parser_input):
        sheerka = context.sheerka
-        if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
+        nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser)
+        if not nodes:
            return None

-        if not text.parser == multiple_concepts_parser:
-            return None
-
-        nodes = text.body
        source = ""
        to_parse = ""
        identifiers = {}
@@ -74,6 +72,7 @@ class PythonWithConceptsParser(BaseParser):
                python_id = _get_identifier(concept)
                to_parse += python_id
                python_ids_mappings[python_id] = concept
+
            else:
                source += node.source
                to_parse += node.source
@@ -0,0 +1,114 @@
+from dataclasses import dataclass
+
+from core.builtin_concepts import BuiltinConcepts
+from core.concept import Concept
+from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
+from parsers.BaseParser import BaseParser, ErrorNode
+from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
+import core.utils
+
+PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"]
+
+
+@dataclass()
+class CannotParseNode(ErrorNode):
+    unrecognized: UnrecognizedTokensNode
+
+
+class UnrecognizedNodeParser(BaseParser):
+    """
+    This parser comes after the other NodeParsers (Atom, Bnf or Sya)
+    It will try to resolve all UnrecognizedTokensNode.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__("UnrecognizedNode", 45)  # lower than AtomNode, BnfNode and SyaNode
+
+    def add_error(self, error):
+        if hasattr(error, "__iter__"):
+            self.error_sink.extend(error)
+        else:
+            self.error_sink.append(error)
+
+    def parse(self, context, parser_input):
+        sheerka = context.sheerka
+        nodes = self.get_input_as_lexer_nodes(parser_input, None)
+        if not nodes:
+            return None
+
+        sequences_found = [[]]
+        has_unrecognized = False
+
+        for node in nodes:
+            if isinstance(node, ConceptNode):
+                res = self.validate_concept_node(context, node)
+                if not res.status:
+                    self.add_error(res.body)
+                else:
+                    sequences_found = core.utils.product(sequences_found, [res.body])
+
+            elif isinstance(node, UnrecognizedTokensNode):
+                res = parse_unrecognized(context, node.source, PARSERS)
+                res = only_successful(context, res)
+                if res.status:
+                    lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens)
+                    sequences_found = core.utils.product(sequences_found, lexer_nodes)
+                else:
+                    sequences_found = core.utils.product(sequences_found, [node])
+                    has_unrecognized = True
+
+            else:  # cannot happen as of today :-)
+                raise NotImplementedError()
+
+        # concept with UnrecognizedToken in their properties is considered as fatal error
+        if self.has_error:
+            return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
+
+        ret = []
+        for choice in sequences_found:
+            ret.append(
+                sheerka.ret(
+                    self.name,
+                    not has_unrecognized,
+                    sheerka.new(
+                        BuiltinConcepts.PARSER_RESULT,
+                        parser=self,
+                        source=parser_input,
+                        body=choice,
+                        try_parsed=choice)))
+
+        if len(ret) == 1:
+            self.log_result(context, parser_input, ret[0])
+            return ret[0]
+        else:
+            self.log_multiple_results(context, parser_input, ret)
+            return ret
+
+    def validate_concept_node(self, context, concept_node):
+
+        sheerka = context.sheerka
+        errors = []
+
+        def _validate_concept(concept):
+            """
+            Recursively browse the compiled properties in order to find unrecognized
+            :param concept:
+            :return:
+            """
+            for name, value in concept.compiled.items():
+                if isinstance(value, Concept):
+                    _validate_concept(value)
+
+                elif isinstance(value, UnrecognizedTokensNode):
+                    res = parse_unrecognized(context, value.tokens, PARSERS)
+                    res = only_successful(context, res)  # only key successful parsers
+                    if res.status:
+                        concept.compiled[name] = res.body.body
+                    else:
+                        errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'"))
+
+        _validate_concept(concept_node.concept)
+        if len(errors) > 0:
+            return context.sheerka.ret(self.name, False, errors)
+        else:
+            return context.sheerka.ret(self.name, True, concept_node)