Added SyaNodeParser (finally, after one month)

2020-04-09 15:42:36 +02:00
parent c9acfa99a1
commit 6c7c529016
56 changed files with 5322 additions and 404 deletions
@@ -0,0 +1,911 @@
+#####################################################################################################
+# This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
+# I don't directly use the project, but it helped me figure out
+# what to do.
+#       Dejanović I., Milosavljević G., Vaderna R.:
+#       Arpeggio: A flexible PEG parser for Python,
+#       Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
+#####################################################################################################
+from collections import namedtuple
+from dataclasses import dataclass
+from collections import defaultdict
+from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
+from core.concept import Concept, ConceptParts, DoNotResolve
+from core.tokenizer import TokenKind, Tokenizer, Token
+from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
+from parsers.BaseParser import BaseParser, ErrorNode
+import core.utils
+
+
+class NonTerminalNode(LexerNode):
+    """
+    Returned by the BnfNodeParser
+    """
+
+    def __init__(self, parsing_expression, start, end, tokens, children=None):
+        super().__init__(start, end, tokens)
+        self.parsing_expression = parsing_expression
+        self.children = children
+
+    def __repr__(self):
+        name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
+        if len(self.children) > 0:
+            sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
+        else:
+            sub_names = ""
+        return name + sub_names
+
+    def __eq__(self, other):
+        if not isinstance(other, NonTerminalNode):
+            return False
+
+        return self.parsing_expression == other.parsing_expression and \
+               self.start == other.start and \
+               self.end == other.end and \
+               self.children == other.children
+
+    def __hash__(self):
+        return hash((self.parsing_expression, self.start, self.end, self.children))
+
+
+class TerminalNode(LexerNode):
+    """
+    Returned by the BnfNodeParser
+    """
+
+    def __init__(self, parsing_expression, start, end, value):
+        super().__init__(start, end, source=value)
+        self.parsing_expression = parsing_expression
+        self.value = value
+
+    def __repr__(self):
+        name = self.parsing_expression.rule_name or ""
+        return name + f"'{self.value}'"
+
+    def __eq__(self, other):
+        if not isinstance(other, TerminalNode):
+            return False
+
+        return self.parsing_expression == other.parsing_expression and \
+               self.start == other.start and \
+               self.end == other.end and \
+               self.value == other.value
+
+    def __hash__(self):
+        return hash((self.parsing_expression, self.start, self.end, self.value))
+
+
+@dataclass()
+class UnknownConceptNode(ErrorNode):
+    concept_key: str
+
+
+@dataclass()
+class TooManyConceptNode(ErrorNode):
+    concept_key: str
+
+
+class ParsingExpression:
+    def __init__(self, *args, **kwargs):
+        self.elements = args
+
+        nodes = kwargs.get('nodes', [])
+        if not hasattr(nodes, '__iter__'):
+            nodes = [nodes]
+        self.nodes = nodes
+
+        self.rule_name = kwargs.get('rule_name', '')
+
+    def __eq__(self, other):
+        if not isinstance(other, ParsingExpression):
+            return False
+
+        return self.rule_name == other.rule_name and self.elements == other.elements
+
+    def __hash__(self):
+        return hash((self.rule_name, self.elements))
+
+    def parse(self, parser):
+        return self._parse(parser)
+
+    def add_rule_name_if_needed(self, text):
+        return text + "=" + self.rule_name if self.rule_name else text
+
+
+class ConceptExpression(ParsingExpression):
+    """
+    Will match a concept
+    It used only for rule definition
+
+    When the grammar is created, it is replaced by the actual concept
+    """
+
+    def __init__(self, concept, rule_name=""):
+        super().__init__(rule_name=rule_name)
+        self.concept = concept
+
+    def __repr__(self):
+        return  self.add_rule_name_if_needed(f"{self.concept}")
+
+    def __eq__(self, other):
+        if not super().__eq__(other):
+            return False
+
+        if not isinstance(other, ConceptExpression):
+            return False
+
+        if isinstance(self.concept, Concept):
+            return self.concept.name == other.concept.name
+
+        # when it's only the name of the concept
+        return self.concept == other.concept
+
+    def __hash__(self):
+        return hash((self.concept, self.rule_name))
+
+    @staticmethod
+    def get_parsing_expression_from_name(name):
+        tokens = Tokenizer(name)
+        nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
+        if len(nodes) == 1:
+            return nodes[0]
+        else:
+            sequence = Sequence(nodes)
+            sequence.nodes = nodes
+            return sequence
+
+    def _parse(self, parser):
+        to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
+        if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
+            return None
+
+        self.concept = to_match  # Memoize
+
+        if to_match not in parser.concepts_grammars:
+            # Try to match the concept using its name
+            expr = self.get_parsing_expression_from_name(to_match.name)
+            node = expr.parse(parser)
+        else:
+            node = parser.concepts_grammars[to_match].parse(parser)
+
+        if node is None:
+            return None
+
+        return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
+
+
+class ConceptGroupExpression(ConceptExpression):
+    def _parse(self, parser):
+        to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
+        if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
+            return None
+
+        self.concept = to_match  # Memoize
+
+        if to_match not in parser.concepts_grammars:
+            concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept)
+            nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
+            expr = OrderedChoice(nodes)
+            expr.nodes = nodes
+            node = expr.parse(parser)
+        else:
+            node = parser.concepts_grammars[to_match].parse(parser)
+
+        if node is None:
+            return None
+
+        return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
+
+
+class Sequence(ParsingExpression):
+    """
+    Will match sequence of parser expressions in exact order they are defined.
+    """
+
+    def _parse(self, parser):
+        init_pos = parser.pos
+        end_pos = parser.pos
+
+        children = []
+        for e in self.nodes:
+            node = e.parse(parser)
+            if node is None:
+                return None
+            else:
+                if node.end != -1:  # because returns -1 when no match
+                    children.append(node)
+                    end_pos = node.end
+
+        return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
+
+    def __repr__(self):
+        to_str = ", ".join(repr(n) for n in self.elements)
+        return self.add_rule_name_if_needed(f"({to_str})")
+
+
+class OrderedChoice(ParsingExpression):
+    """
+    Will match one among multiple
+    It will stop at the first match (so the order of definition is important)
+    """
+
+    def _parse(self, parser):
+        init_pos = parser.pos
+
+        for e in self.nodes:
+            node = e.parse(parser)
+            if node:
+                return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
+
+            parser.seek(init_pos)  # backtrack
+
+        return None
+
+    def __repr__(self):
+        to_str = "| ".join(repr(n) for n in self.elements)
+        return self.add_rule_name_if_needed(f"({to_str})")
+
+
+class Optional(ParsingExpression):
+    """
+    Will match or not the elements
+    if many matches, will choose longest one
+    If you need order, use Optional(OrderedChoice)
+    """
+
+    def _parse(self, parser):
+        init_pos = parser.pos
+        selected_node = NonTerminalNode(self, parser.pos, -1, [], [])  # means that nothing is found
+
+        for e in self.nodes:
+            node = e.parse(parser)
+            if node:
+                if node.end > selected_node.end:
+                    selected_node = NonTerminalNode(
+                        self,
+                        node.start,
+                        node.end,
+                        parser.tokens[node.start: node.end + 1],
+                        [node])
+
+            parser.seek(init_pos)  # backtrack
+
+        if selected_node.end != -1:
+            parser.seek(selected_node.end)
+            parser.next_token()  # eat the tokens found
+
+        return selected_node
+
+    def __repr__(self):
+        if len(self.elements) == 1:
+            return f"{self.elements[0]}?"
+        else:
+            to_str = ", ".join(repr(n) for n in self.elements)
+        return self.add_rule_name_if_needed(f"({to_str})?")
+
+
+class Repetition(ParsingExpression):
+    """
+    Base class for all repetition-like parser expressions (?,*,+)
+    Args:
+        eolterm(bool): Flag that indicates that end of line should
+            terminate repetition match.
+    """
+
+    def __init__(self, *elements, **kwargs):
+        super(Repetition, self).__init__(*elements, **kwargs)
+        self.sep = kwargs.get('sep', None)
+
+
+class ZeroOrMore(Repetition):
+    """
+    ZeroOrMore will try to match parser expression specified zero or more
+    times. It will never fail.
+    """
+
+    def _parse(self, parser):
+        init_pos = parser.pos
+        end_pos = -1
+        children = []
+
+        while True:
+            current_pos = parser.pos
+
+            # maybe eat the separator if needed
+            if self.sep and children:
+                sep_result = self.sep.parse(parser)
+                if sep_result is None:
+                    parser.seek(current_pos)
+                    break
+
+            # eat the ZeroOrMore
+            node = self.nodes[0].parse(parser)
+            if node is None:
+                parser.seek(current_pos)
+                break
+            else:
+                if node.end != -1:  # because returns -1 when no match
+                    children.append(node)
+                    end_pos = node.end
+
+        if len(children) == 0:
+            return NonTerminalNode(self, init_pos, -1, [], [])
+
+        return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
+
+    def __repr__(self):
+        to_str = ", ".join(repr(n) for n in self.elements)
+        return self.add_rule_name_if_needed(f"({to_str})*")
+
+
+class OneOrMore(Repetition):
+    """
+    OneOrMore will try to match parser expression specified one or more times.
+    """
+
+    def _parse(self, parser):
+        init_pos = parser.pos
+        end_pos = -1
+        children = []
+
+        while True:
+            current_pos = parser.pos
+
+            # maybe eat the separator if needed
+            if self.sep and children:
+                sep_result = self.sep.parse(parser)
+                if sep_result is None:
+                    parser.seek(current_pos)
+                    break
+
+            # eat the ZeroOrMore
+            node = self.nodes[0].parse(parser)
+            if node is None:
+                parser.seek(current_pos)
+                break
+            else:
+                if node.end != -1:  # because returns -1 when no match
+                    children.append(node)
+                    end_pos = node.end
+
+        if len(children) == 0:  # if nothing is found, it's an error
+            return None
+
+        return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
+
+    def __repr__(self):
+        to_str = ", ".join(repr(n) for n in self.elements)
+        return self.add_rule_name_if_needed(f"({to_str})+")
+
+
+class UnorderedGroup(Repetition):
+    """
+    Will try to match all of the parsing expression in any order.
+    """
+
+    def _parse(self, parser):
+        raise NotImplementedError()
+
+    # def __repr__(self):
+    #     to_str = ", ".join(repr(n) for n in self.elements)
+    #     return f"({to_str})#"
+
+
+class Match(ParsingExpression):
+    """
+    Base class for all classes that will try to match something from the input.
+    """
+
+    def __init__(self, rule_name, root=False):
+        super(Match, self).__init__(rule_name=rule_name, root=root)
+
+    def parse(self, parser):
+        result = self._parse(parser)
+        return result
+
+
+class StrMatch(Match):
+    """
+    Matches a literal
+    """
+
+    def __init__(self, to_match, rule_name="", ignore_case=True):
+        super(Match, self).__init__(rule_name=rule_name)
+        self.to_match = to_match
+        self.ignore_case = ignore_case
+
+    def __repr__(self):
+        return self.add_rule_name_if_needed(f"'{self.to_match}'")
+
+    def __eq__(self, other):
+        if not super().__eq__(other):
+            return False
+
+        if not isinstance(other, StrMatch):
+            return False
+
+        return self.to_match == other.to_match and self.ignore_case == other.ignore_case
+
+    def _parse(self, parser):
+        token = parser.get_token()
+        m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
+            else token.value == self.to_match
+
+        if m:
+            node = TerminalNode(self, parser.pos, parser.pos, token.value)
+            parser.next_token()
+            return node
+
+        return None
+
+
+class BnfNodeParser(BaseParser):
+    def __init__(self, **kwargs):
+        super().__init__("BnfNode", 50)
+        if 'grammars' in kwargs:
+            self.concepts_grammars = kwargs.get("grammars")
+        elif 'sheerka' in kwargs:
+            self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
+        else:
+            self.concepts_grammars = {}
+
+        self.ignore_case = True
+
+        self.token = None
+        self.pos = -1
+        self.tokens = None
+
+        self.context = None
+        self.text = None
+        self.sheerka = None
+
+    def add_error(self, error, next_token=True):
+        self.error_sink.append(error)
+        if next_token:
+            self.next_token()
+        return error
+
+    def reset_parser(self, context, text):
+        self.context = context
+        self.sheerka = context.sheerka
+        self.text = text
+
+        try:
+            self.tokens = list(self.get_input_as_tokens(text))
+        except core.tokenizer.LexerError as e:
+            self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
+            return False
+
+        self.token = None
+        self.pos = -1
+        self.next_token(False)
+        return True
+
+    def get_token(self) -> Token:
+        return self.token
+
+    def next_token(self, skip_whitespace=True):
+        if self.token and self.token.type == TokenKind.EOF:
+            return False
+
+        self.pos += 1
+        self.token = self.tokens[self.pos]
+
+        if skip_whitespace:
+            while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
+                self.pos += 1
+                self.token = self.tokens[self.pos]
+
+        return self.token.type != TokenKind.EOF
+
+    def seek(self, pos):
+        self.pos = pos
+        self.token = self.tokens[self.pos]
+        return True
+
+    def rewind(self, offset, skip_whitespace=True):
+        self.pos += offset
+        self.token = self.tokens[self.pos]
+
+        if skip_whitespace:
+            while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
+                self.pos -= 1
+                self.token = self.tokens[self.pos]
+
+    def initialize(self, context, concepts_definitions):
+        """
+        Adds a bunch of concepts, and how they can be recognized
+        :param context: execution context
+        :param concepts_definitions: dictionary of concept, concept_definition
+        :return:
+        """
+
+        self.context = context
+        self.sheerka = context.sheerka
+        concepts_to_resolve = set()
+
+        for concept, concept_def in concepts_definitions.items():
+            # ## Gets the grammars
+            context.log(f"Resolving grammar for '{concept}'", context.who)
+            concept.init_key()  # make sure that the key is initialized
+            grammar = self.get_model(concept_def, concepts_to_resolve)
+            self.concepts_grammars[concept] = grammar
+
+        if self.has_error:
+            return self.sheerka.ret(self.name, False, self.error_sink)
+
+        # ## Removes concepts with infinite recursions
+        concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
+        for concept in concepts_to_remove:
+            concepts_to_resolve.remove(concept)
+            del self.concepts_grammars[concept]
+
+        if self.has_error:
+            return self.sheerka.ret(self.name, False, self.error_sink)
+        else:
+            return self.sheerka.ret(self.name, True, self.concepts_grammars)
+
+    def get_concept(self, concept_name):
+        if concept_name in self.context.concepts:
+            return self.context.concepts[concept_name]
+        return self.sheerka.get(concept_name)
+
+    def get_model(self, concept_def, concepts_to_resolve):
+
+        # TODO
+        # inner_get_model must not modify the initial ParsingExpression
+        # A copy must be created
+        def inner_get_model(expression):
+            if isinstance(expression, Concept):
+                if self.sheerka.isaset(self.context, expression):
+                    ret = ConceptGroupExpression(expression, rule_name=expression.name)
+                else:
+                    ret = ConceptExpression(expression, rule_name=expression.name)
+                concepts_to_resolve.add(expression)
+            elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
+                if expression.rule_name is None or expression.rule_name == "":
+                    expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
+                        else expression.concept
+                if isinstance(expression.concept, str):
+                    concept = self.get_concept(expression.concept)
+                    if self.sheerka.is_known(concept):
+                        expression.concept = concept
+                concepts_to_resolve.add(expression.concept)
+                ret = expression
+            elif isinstance(expression, str):
+                ret = StrMatch(expression, ignore_case=self.ignore_case)
+            elif isinstance(expression, StrMatch):
+                ret = expression
+                if ret.ignore_case is None:
+                    ret.ignore_case = self.ignore_case
+            elif isinstance(expression, Sequence) or \
+                isinstance(expression, OrderedChoice) or \
+                isinstance(expression, ZeroOrMore) or \
+                isinstance(expression, OneOrMore) or \
+                isinstance(expression, Optional):
+                ret = expression
+                ret.nodes = [inner_get_model(e) for e in ret.elements]
+            else:
+                ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
+
+            # Translate separator expression.
+            if isinstance(expression, Repetition) and expression.sep:
+                expression.sep = inner_get_model(expression.sep)
+
+            return ret
+
+        model = inner_get_model(concept_def)
+
+        return model
+
+    def detect_infinite_recursion(self, concepts_to_resolve):
+
+        # infinite recursion matcher
+        def _is_infinite_recursion(ref_concept, node):
+            if isinstance(node, ConceptExpression):
+                if node.concept == ref_concept:
+                    return True
+
+                if isinstance(node.concept, str):
+                    to_match = self.get_concept(node.concept)
+                    if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
+                        return False
+                else:
+                    to_match = node.concept
+
+                if to_match not in self.concepts_grammars:
+                    return False
+
+                return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
+
+            if isinstance(node, OrderedChoice):
+                return _is_infinite_recursion(ref_concept, node.nodes[0])
+
+            if isinstance(node, Sequence):
+                for node in node.nodes:
+                    if _is_infinite_recursion(ref_concept, node):
+                        return True
+                return False
+
+            return False
+
+        removed_concepts = []
+        for e in concepts_to_resolve:
+            if isinstance(e, str):
+                e = self.get_concept(e)
+            if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
+                continue
+
+            if e not in self.concepts_grammars:
+                continue
+
+            to_resolve = self.concepts_grammars[e]
+            if _is_infinite_recursion(e, to_resolve):
+                removed_concepts.append(e)
+        return removed_concepts
+
+    def parse(self, context, parser_input):
+        if parser_input == "":
+            return context.sheerka.ret(
+                self.name,
+                False,
+                context.sheerka.new(BuiltinConcepts.IS_EMPTY)
+            )
+
+        if not self.reset_parser(context, parser_input):
+            return self.sheerka.ret(
+                self.name,
+                False,
+                context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
+
+        concepts_found = [[]]
+        unrecognized_tokens = None
+        has_unrecognized = False
+
+        # actually list of list
+        # The first dimension is the number of possibilities found
+        # The second dimension is the number of concepts found, under one possibility
+        #
+        # Example 1
+        # concept foo : 'one' 'two'
+        # concept bar : 'one' 'two'
+        # input 'one two' -> will produce two possibilities (foo and bar).
+        #
+        # Example 2
+        # concept foo : 'one'
+        # concept bar : 'two'
+        # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
+
+        while True:
+            init_pos = self.pos
+            res = []
+
+            for concept, grammar in self.concepts_grammars.items():
+                self.seek(init_pos)
+                node = grammar.parse(self)  # a node is TerminalNode or NonTerminalNode
+                if node is not None and node.end != -1:
+                    updated_concept = self.finalize_concept(context.sheerka, concept, node)
+                    concept_node = ConceptNode(
+                        updated_concept,
+                        node.start,
+                        node.end,
+                        self.tokens[node.start: node.end + 1],
+                        None,
+                        node)
+                    res.append(concept_node)
+
+            if len(res) == 0:  # not recognized
+                self.seek(init_pos)
+                if unrecognized_tokens:
+                    unrecognized_tokens.add_token(self.get_token(), init_pos)
+                else:
+                    unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
+
+                if not self.next_token(False):
+                    break
+
+            else:  # some concepts are recognized
+                if unrecognized_tokens and unrecognized_tokens.not_whitespace():
+                    unrecognized_tokens.fix_source()
+                    concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
+                    has_unrecognized = True
+                unrecognized_tokens = None
+
+                res = self.get_bests(res)  # only keep the concepts that eat the more tokens
+                concepts_found = core.utils.product(concepts_found, res)
+
+                # loop
+                self.seek(res[0].end)
+                if not self.next_token(False):
+                    break
+
+        # Fix the source for unrecognized tokens
+        if unrecognized_tokens and unrecognized_tokens.not_whitespace():
+            unrecognized_tokens.fix_source()
+            concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
+            has_unrecognized = True
+
+        # else
+        # returns as many ReturnValue than choices found
+        ret = []
+        for choice in concepts_found:
+            ret.append(
+                self.sheerka.ret(
+                    self.name,
+                    not has_unrecognized,
+                    self.sheerka.new(
+                        BuiltinConcepts.PARSER_RESULT,
+                        parser=self,
+                        source=parser_input,
+                        body=choice,
+                        try_parsed=choice)))
+
+        if len(ret) == 1:
+            self.log_result(context, parser_input, ret[0])
+            return ret[0]
+        else:
+            self.log_multiple_results(context, parser_input, ret)
+            return ret
+
+    def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
+        """
+        Updates the properties of the concept
+        Goes in recursion if the property is a concept
+        """
+
+        # this cache is to make sure that we return the same concept for the same ConceptExpression
+        _underlying_value_cache = {}
+
+        def _add_prop(_concept, prop_name, value):
+            """
+            Adds a new entry,
+            makes a list if the property already exists
+            """
+            if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
+                # new entry
+                _concept.compiled[prop_name] = value
+            else:
+                # make a list if there was a value
+                previous_value = _concept.compiled[prop_name]
+                if isinstance(previous_value, list):
+                    previous_value.append(value)
+                else:
+                    new_value = [previous_value, value]
+                    _concept.compiled[prop_name] = new_value
+
+        def _look_for_concept_match(_underlying):
+            """
+            At some point, there is either an StrMatch or a ConceptMatch,
+            that allowed the recognition.
+            Look for the ConceptMatch, with recursion if needed
+            """
+            if isinstance(_underlying.parsing_expression, ConceptExpression):
+                return _underlying
+
+            if not isinstance(_underlying, NonTerminalNode):
+                return None
+
+            if len(_underlying.children) != 1:
+                return None
+
+            return _look_for_concept_match(_underlying.children[0])
+
+        def _get_underlying_value(_underlying):
+            concept_match_node = _look_for_concept_match(_underlying)
+            if concept_match_node:
+                # the value is a concept
+                if id(concept_match_node) in _underlying_value_cache:
+                    result = _underlying_value_cache[id(concept_match_node)]
+                else:
+                    ref_tpl = concept_match_node.parsing_expression.concept
+                    result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
+                    _underlying_value_cache[id(concept_match_node)] = result
+            else:
+                # the value is a string
+                result = DoNotResolve(_underlying.source)
+
+            return result
+
+        def _process_rule_name(_concept, _underlying):
+            if _underlying.parsing_expression.rule_name:
+                value = _get_underlying_value(_underlying)
+                _add_prop(_concept, _underlying.parsing_expression.rule_name, value)
+                _concept.metadata.need_validation = True
+
+            if isinstance(_underlying, NonTerminalNode):
+                for child in _underlying.children:
+                    _process_rule_name(_concept, child)
+
+        key = (template.key, template.id) if template.id else template.key
+        concept = sheerka.new(key)
+        if init_empty_body and concept.metadata.body is None:
+            value = _get_underlying_value(underlying)
+            concept.compiled[ConceptParts.BODY] = value
+            if underlying.parsing_expression.rule_name:
+                _add_prop(concept, underlying.parsing_expression.rule_name, value)
+                # KSI : Why don't we set concept.metadata.need_validation to True ?
+
+        if isinstance(underlying, NonTerminalNode):
+            for node in underlying.children:
+                _process_rule_name(concept, node)
+
+        return concept
+
+    def encode_grammar(self, grammar):
+        """
+        Transform the grammar into something that can easily can be serialized
+        :param grammar:
+        :return:
+        """
+
+        def _encode(expression):
+            if isinstance(expression, StrMatch):
+                res = f"'{expression.to_match}'"
+
+            elif isinstance(expression, ConceptExpression):
+                res = core.utils.str_concept(expression.concept)
+
+            elif isinstance(expression, Sequence):
+                res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"
+                
+            elif isinstance(expression, OrderedChoice):
+                res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"
+
+            elif isinstance(expression, Optional):
+                res = _encode(expression.nodes[0]) + "?"
+                
+            elif isinstance(expression, ZeroOrMore):
+                res = _encode(expression.nodes[0]) + "*"
+                
+            elif isinstance(expression, OneOrMore):
+                res = _encode(expression.nodes[0]) + "+"
+
+            if expression.rule_name:
+                res += "=" + expression.rule_name
+
+            return res
+
+        result = {}
+        for k, v in grammar.items():
+            key = core.utils.str_concept(k)
+            value = _encode(v)
+            result[key] = value
+        return result
+
+    @staticmethod
+    def get_bests(results):
+        """
+        Returns the result that is the longest
+        :param results:
+        :return:
+        """
+        by_end_pos = defaultdict(list)
+        for result in results:
+            by_end_pos[result.end].append(result)
+
+        return by_end_pos[max(by_end_pos)]
+
+
+class ParsingExpressionVisitor:
+    """
+    visit ParsingExpression
+    """
+
+    def visit(self, parsing_expression):
+        name = parsing_expression.__class__.__name__
+
+        method = 'visit_' + name
+        visitor = getattr(self, method, self.generic_visit)
+        return visitor(parsing_expression)
+
+    def generic_visit(self, parsing_expression):
+        if hasattr(self, "visit_all"):
+            self.visit_all(parsing_expression)
+
+        for node in parsing_expression.elements:
+            if isinstance(node, Concept):
+                self.visit(ConceptExpression(node.key or node.name))
+            elif isinstance(node, str):
+                self.visit(StrMatch(node))
+            else:
+                self.visit(node)