Sheerka-Old/src/parsers/BnfNodeParser.py

#####################################################################################################
# This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
# I don't directly use the project, but it helped me figure out
# what to do.
#       Dejanović I., Milosavljević G., Vaderna R.:
#       Arpeggio: A flexible PEG parser for Python,
#       Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
#####################################################################################################
from collections import namedtuple
from dataclasses import dataclass
from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept, ConceptParts, DoNotResolve
from core.tokenizer import TokenKind, Tokenizer, Token
from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, ErrorNode
import core.utils


class NonTerminalNode(LexerNode):
    """
    Returned by the BnfNodeParser
    """

    def __init__(self, parsing_expression, start, end, tokens, children=None):
        super().__init__(start, end, tokens)
        self.parsing_expression = parsing_expression
        self.children = children

    def __repr__(self):
        name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
        if len(self.children) > 0:
            sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
        else:
            sub_names = ""
        return name + sub_names

    def __eq__(self, other):
        if not isinstance(other, NonTerminalNode):
            return False

        return self.parsing_expression == other.parsing_expression and \
               self.start == other.start and \
               self.end == other.end and \
               self.children == other.children

    def __hash__(self):
        return hash((self.parsing_expression, self.start, self.end, self.children))


class TerminalNode(LexerNode):
    """
    Returned by the BnfNodeParser
    """

    def __init__(self, parsing_expression, start, end, value):
        super().__init__(start, end, source=value)
        self.parsing_expression = parsing_expression
        self.value = value

    def __repr__(self):
        name = self.parsing_expression.rule_name or ""
        return name + f"'{self.value}'"

    def __eq__(self, other):
        if not isinstance(other, TerminalNode):
            return False

        return self.parsing_expression == other.parsing_expression and \
               self.start == other.start and \
               self.end == other.end and \
               self.value == other.value

    def __hash__(self):
        return hash((self.parsing_expression, self.start, self.end, self.value))


@dataclass()
class UnknownConceptNode(ErrorNode):
    concept_key: str


@dataclass()
class TooManyConceptNode(ErrorNode):
    concept_key: str


class ParsingExpression:
    def __init__(self, *args, **kwargs):
        self.elements = args

        nodes = kwargs.get('nodes', [])
        if not hasattr(nodes, '__iter__'):
            nodes = [nodes]
        self.nodes = nodes

        self.rule_name = kwargs.get('rule_name', '')

    def __eq__(self, other):
        if not isinstance(other, ParsingExpression):
            return False

        return self.rule_name == other.rule_name and self.elements == other.elements

    def __hash__(self):
        return hash((self.rule_name, self.elements))

    def parse(self, parser):
        return self._parse(parser)

    def add_rule_name_if_needed(self, text):
        return text + "=" + self.rule_name if self.rule_name else text


class ConceptExpression(ParsingExpression):
    """
    Will match a concept
    It used only for rule definition

    When the grammar is created, it is replaced by the actual concept
    """

    def __init__(self, concept, rule_name=""):
        super().__init__(rule_name=rule_name)
        self.concept = concept

    def __repr__(self):
        return  self.add_rule_name_if_needed(f"{self.concept}")

    def __eq__(self, other):
        if not super().__eq__(other):
            return False

        if not isinstance(other, ConceptExpression):
            return False

        if isinstance(self.concept, Concept):
            return self.concept.name == other.concept.name

        # when it's only the name of the concept
        return self.concept == other.concept

    def __hash__(self):
        return hash((self.concept, self.rule_name))

    @staticmethod
    def get_parsing_expression_from_name(name):
        tokens = Tokenizer(name)
        nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
        if len(nodes) == 1:
            return nodes[0]
        else:
            sequence = Sequence(nodes)
            sequence.nodes = nodes
            return sequence

    def _parse(self, parser):
        to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
        if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
            return None

        self.concept = to_match  # Memoize

        if to_match not in parser.concepts_grammars:
            # Try to match the concept using its name
            expr = self.get_parsing_expression_from_name(to_match.name)
            node = expr.parse(parser)
        else:
            node = parser.concepts_grammars[to_match].parse(parser)

        if node is None:
            return None

        return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])


class ConceptGroupExpression(ConceptExpression):
    def _parse(self, parser):
        to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
        if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
            return None

        self.concept = to_match  # Memoize

        if to_match not in parser.concepts_grammars:
            concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept)
            nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
            expr = OrderedChoice(nodes)
            expr.nodes = nodes
            node = expr.parse(parser)
        else:
            node = parser.concepts_grammars[to_match].parse(parser)

        if node is None:
            return None

        return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])


class Sequence(ParsingExpression):
    """
    Will match sequence of parser expressions in exact order they are defined.
    """

    def _parse(self, parser):
        init_pos = parser.pos
        end_pos = parser.pos

        children = []
        for e in self.nodes:
            node = e.parse(parser)
            if node is None:
                return None
            else:
                if node.end != -1:  # because returns -1 when no match
                    children.append(node)
                    end_pos = node.end

        return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)

    def __repr__(self):
        to_str = ", ".join(repr(n) for n in self.elements)
        return self.add_rule_name_if_needed(f"({to_str})")


class OrderedChoice(ParsingExpression):
    """
    Will match one among multiple
    It will stop at the first match (so the order of definition is important)
    """

    def _parse(self, parser):
        init_pos = parser.pos

        for e in self.nodes:
            node = e.parse(parser)
            if node:
                return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])

            parser.seek(init_pos)  # backtrack

        return None

    def __repr__(self):
        to_str = "| ".join(repr(n) for n in self.elements)
        return self.add_rule_name_if_needed(f"({to_str})")


class Optional(ParsingExpression):
    """
    Will match or not the elements
    if many matches, will choose longest one
    If you need order, use Optional(OrderedChoice)
    """

    def _parse(self, parser):
        init_pos = parser.pos
        selected_node = NonTerminalNode(self, parser.pos, -1, [], [])  # means that nothing is found

        for e in self.nodes:
            node = e.parse(parser)
            if node:
                if node.end > selected_node.end:
                    selected_node = NonTerminalNode(
                        self,
                        node.start,
                        node.end,
                        parser.tokens[node.start: node.end + 1],
                        [node])

            parser.seek(init_pos)  # backtrack

        if selected_node.end != -1:
            parser.seek(selected_node.end)
            parser.next_token()  # eat the tokens found

        return selected_node

    def __repr__(self):
        if len(self.elements) == 1:
            return f"{self.elements[0]}?"
        else:
            to_str = ", ".join(repr(n) for n in self.elements)
        return self.add_rule_name_if_needed(f"({to_str})?")


class Repetition(ParsingExpression):
    """
    Base class for all repetition-like parser expressions (?,*,+)
    Args:
        eolterm(bool): Flag that indicates that end of line should
            terminate repetition match.
    """

    def __init__(self, *elements, **kwargs):
        super(Repetition, self).__init__(*elements, **kwargs)
        self.sep = kwargs.get('sep', None)


class ZeroOrMore(Repetition):
    """
    ZeroOrMore will try to match parser expression specified zero or more
    times. It will never fail.
    """

    def _parse(self, parser):
        init_pos = parser.pos
        end_pos = -1
        children = []

        while True:
            current_pos = parser.pos

            # maybe eat the separator if needed
            if self.sep and children:
                sep_result = self.sep.parse(parser)
                if sep_result is None:
                    parser.seek(current_pos)
                    break

            # eat the ZeroOrMore
            node = self.nodes[0].parse(parser)
            if node is None:
                parser.seek(current_pos)
                break
            else:
                if node.end != -1:  # because returns -1 when no match
                    children.append(node)
                    end_pos = node.end

        if len(children) == 0:
            return NonTerminalNode(self, init_pos, -1, [], [])

        return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)

    def __repr__(self):
        to_str = ", ".join(repr(n) for n in self.elements)
        return self.add_rule_name_if_needed(f"({to_str})*")


class OneOrMore(Repetition):
    """
    OneOrMore will try to match parser expression specified one or more times.
    """

    def _parse(self, parser):
        init_pos = parser.pos
        end_pos = -1
        children = []

        while True:
            current_pos = parser.pos

            # maybe eat the separator if needed
            if self.sep and children:
                sep_result = self.sep.parse(parser)
                if sep_result is None:
                    parser.seek(current_pos)
                    break

            # eat the ZeroOrMore
            node = self.nodes[0].parse(parser)
            if node is None:
                parser.seek(current_pos)
                break
            else:
                if node.end != -1:  # because returns -1 when no match
                    children.append(node)
                    end_pos = node.end

        if len(children) == 0:  # if nothing is found, it's an error
            return None

        return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)

    def __repr__(self):
        to_str = ", ".join(repr(n) for n in self.elements)
        return self.add_rule_name_if_needed(f"({to_str})+")


class UnorderedGroup(Repetition):
    """
    Will try to match all of the parsing expression in any order.
    """

    def _parse(self, parser):
        raise NotImplementedError()

    # def __repr__(self):
    #     to_str = ", ".join(repr(n) for n in self.elements)
    #     return f"({to_str})#"


class Match(ParsingExpression):
    """
    Base class for all classes that will try to match something from the input.
    """

    def __init__(self, rule_name, root=False):
        super(Match, self).__init__(rule_name=rule_name, root=root)

    def parse(self, parser):
        result = self._parse(parser)
        return result


class StrMatch(Match):
    """
    Matches a literal
    """

    def __init__(self, to_match, rule_name="", ignore_case=True):
        super(Match, self).__init__(rule_name=rule_name)
        self.to_match = to_match
        self.ignore_case = ignore_case

    def __repr__(self):
        return self.add_rule_name_if_needed(f"'{self.to_match}'")

    def __eq__(self, other):
        if not super().__eq__(other):
            return False

        if not isinstance(other, StrMatch):
            return False

        return self.to_match == other.to_match and self.ignore_case == other.ignore_case

    def _parse(self, parser):
        token = parser.get_token()
        m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
            else token.value == self.to_match

        if m:
            node = TerminalNode(self, parser.pos, parser.pos, token.value)
            parser.next_token()
            return node

        return None


class BnfNodeParser(BaseParser):
    def __init__(self, **kwargs):
        super().__init__("BnfNode", 50)
        if 'grammars' in kwargs:
            self.concepts_grammars = kwargs.get("grammars")
        elif 'sheerka' in kwargs:
            self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
        else:
            self.concepts_grammars = {}

        self.ignore_case = True

        self.token = None
        self.pos = -1
        self.tokens = None

        self.context = None
        self.text = None
        self.sheerka = None

    def add_error(self, error, next_token=True):
        self.error_sink.append(error)
        if next_token:
            self.next_token()
        return error

    def reset_parser(self, context, text):
        self.context = context
        self.sheerka = context.sheerka
        self.text = text

        try:
            self.tokens = list(self.get_input_as_tokens(text))
        except core.tokenizer.LexerError as e:
            self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
            return False

        self.token = None
        self.pos = -1
        self.next_token(False)
        return True

    def get_token(self) -> Token:
        return self.token

    def next_token(self, skip_whitespace=True):
        if self.token and self.token.type == TokenKind.EOF:
            return False

        self.pos += 1
        self.token = self.tokens[self.pos]

        if skip_whitespace:
            while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
                self.pos += 1
                self.token = self.tokens[self.pos]

        return self.token.type != TokenKind.EOF

    def seek(self, pos):
        self.pos = pos
        self.token = self.tokens[self.pos]
        return True

    def rewind(self, offset, skip_whitespace=True):
        self.pos += offset
        self.token = self.tokens[self.pos]

        if skip_whitespace:
            while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
                self.pos -= 1
                self.token = self.tokens[self.pos]

    def initialize(self, context, concepts_definitions):
        """
        Adds a bunch of concepts, and how they can be recognized
        :param context: execution context
        :param concepts_definitions: dictionary of concept, concept_definition
        :return:
        """

        self.context = context
        self.sheerka = context.sheerka
        concepts_to_resolve = set()

        for concept, concept_def in concepts_definitions.items():
            # ## Gets the grammars
            context.log(f"Resolving grammar for '{concept}'", context.who)
            concept.init_key()  # make sure that the key is initialized
            grammar = self.get_model(concept_def, concepts_to_resolve)
            self.concepts_grammars[concept] = grammar

        if self.has_error:
            return self.sheerka.ret(self.name, False, self.error_sink)

        # ## Removes concepts with infinite recursions
        concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
        for concept in concepts_to_remove:
            concepts_to_resolve.remove(concept)
            del self.concepts_grammars[concept]

        if self.has_error:
            return self.sheerka.ret(self.name, False, self.error_sink)
        else:
            return self.sheerka.ret(self.name, True, self.concepts_grammars)

    def get_concept(self, concept_name):
        if concept_name in self.context.concepts:
            return self.context.concepts[concept_name]
        return self.sheerka.get(concept_name)

    def get_model(self, concept_def, concepts_to_resolve):

        # TODO
        # inner_get_model must not modify the initial ParsingExpression
        # A copy must be created
        def inner_get_model(expression):
            if isinstance(expression, Concept):
                if self.sheerka.isaset(self.context, expression):
                    ret = ConceptGroupExpression(expression, rule_name=expression.name)
                else:
                    ret = ConceptExpression(expression, rule_name=expression.name)
                concepts_to_resolve.add(expression)
            elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
                if expression.rule_name is None or expression.rule_name == "":
                    expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
                        else expression.concept
                if isinstance(expression.concept, str):
                    concept = self.get_concept(expression.concept)
                    if self.sheerka.is_known(concept):
                        expression.concept = concept
                concepts_to_resolve.add(expression.concept)
                ret = expression
            elif isinstance(expression, str):
                ret = StrMatch(expression, ignore_case=self.ignore_case)
            elif isinstance(expression, StrMatch):
                ret = expression
                if ret.ignore_case is None:
                    ret.ignore_case = self.ignore_case
            elif isinstance(expression, Sequence) or \
                isinstance(expression, OrderedChoice) or \
                isinstance(expression, ZeroOrMore) or \
                isinstance(expression, OneOrMore) or \
                isinstance(expression, Optional):
                ret = expression
                ret.nodes = [inner_get_model(e) for e in ret.elements]
            else:
                ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)

            # Translate separator expression.
            if isinstance(expression, Repetition) and expression.sep:
                expression.sep = inner_get_model(expression.sep)

            return ret

        model = inner_get_model(concept_def)

        return model

    def detect_infinite_recursion(self, concepts_to_resolve):

        # infinite recursion matcher
        def _is_infinite_recursion(ref_concept, node):
            if isinstance(node, ConceptExpression):
                if node.concept == ref_concept:
                    return True

                if isinstance(node.concept, str):
                    to_match = self.get_concept(node.concept)
                    if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
                        return False
                else:
                    to_match = node.concept

                if to_match not in self.concepts_grammars:
                    return False

                return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])

            if isinstance(node, OrderedChoice):
                return _is_infinite_recursion(ref_concept, node.nodes[0])

            if isinstance(node, Sequence):
                for node in node.nodes:
                    if _is_infinite_recursion(ref_concept, node):
                        return True
                return False

            return False

        removed_concepts = []
        for e in concepts_to_resolve:
            if isinstance(e, str):
                e = self.get_concept(e)
            if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
                continue

            if e not in self.concepts_grammars:
                continue

            to_resolve = self.concepts_grammars[e]
            if _is_infinite_recursion(e, to_resolve):
                removed_concepts.append(e)
        return removed_concepts

    def parse(self, context, parser_input):
        if parser_input == "":
            return context.sheerka.ret(
                self.name,
                False,
                context.sheerka.new(BuiltinConcepts.IS_EMPTY)
            )

        if not self.reset_parser(context, parser_input):
            return self.sheerka.ret(
                self.name,
                False,
                context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))

        concepts_found = [[]]
        unrecognized_tokens = None
        has_unrecognized = False

        # actually list of list
        # The first dimension is the number of possibilities found
        # The second dimension is the number of concepts found, under one possibility
        #
        # Example 1
        # concept foo : 'one' 'two'
        # concept bar : 'one' 'two'
        # input 'one two' -> will produce two possibilities (foo and bar).
        #
        # Example 2
        # concept foo : 'one'
        # concept bar : 'two'
        # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)

        while True:
            init_pos = self.pos
            res = []

            for concept, grammar in self.concepts_grammars.items():
                self.seek(init_pos)
                node = grammar.parse(self)  # a node is TerminalNode or NonTerminalNode
                if node is not None and node.end != -1:
                    updated_concept = self.finalize_concept(context.sheerka, concept, node)
                    concept_node = ConceptNode(
                        updated_concept,
                        node.start,
                        node.end,
                        self.tokens[node.start: node.end + 1],
                        None,
                        node)
                    res.append(concept_node)

            if len(res) == 0:  # not recognized
                self.seek(init_pos)
                if unrecognized_tokens:
                    unrecognized_tokens.add_token(self.get_token(), init_pos)
                else:
                    unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])

                if not self.next_token(False):
                    break

            else:  # some concepts are recognized
                if unrecognized_tokens and unrecognized_tokens.not_whitespace():
                    unrecognized_tokens.fix_source()
                    concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
                    has_unrecognized = True
                unrecognized_tokens = None

                res = self.get_bests(res)  # only keep the concepts that eat the more tokens
                concepts_found = core.utils.product(concepts_found, res)

                # loop
                self.seek(res[0].end)
                if not self.next_token(False):
                    break

        # Fix the source for unrecognized tokens
        if unrecognized_tokens and unrecognized_tokens.not_whitespace():
            unrecognized_tokens.fix_source()
            concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
            has_unrecognized = True

        # else
        # returns as many ReturnValue than choices found
        ret = []
        for choice in concepts_found:
            ret.append(
                self.sheerka.ret(
                    self.name,
                    not has_unrecognized,
                    self.sheerka.new(
                        BuiltinConcepts.PARSER_RESULT,
                        parser=self,
                        source=parser_input,
                        body=choice,
                        try_parsed=choice)))

        if len(ret) == 1:
            self.log_result(context, parser_input, ret[0])
            return ret[0]
        else:
            self.log_multiple_results(context, parser_input, ret)
            return ret

    def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
        """
        Updates the properties of the concept
        Goes in recursion if the property is a concept
        """

        # this cache is to make sure that we return the same concept for the same ConceptExpression
        _underlying_value_cache = {}

        def _add_prop(_concept, prop_name, value):
            """
            Adds a new entry,
            makes a list if the property already exists
            """
            if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
                # new entry
                _concept.compiled[prop_name] = value
            else:
                # make a list if there was a value
                previous_value = _concept.compiled[prop_name]
                if isinstance(previous_value, list):
                    previous_value.append(value)
                else:
                    new_value = [previous_value, value]
                    _concept.compiled[prop_name] = new_value

        def _look_for_concept_match(_underlying):
            """
            At some point, there is either an StrMatch or a ConceptMatch,
            that allowed the recognition.
            Look for the ConceptMatch, with recursion if needed
            """
            if isinstance(_underlying.parsing_expression, ConceptExpression):
                return _underlying

            if not isinstance(_underlying, NonTerminalNode):
                return None

            if len(_underlying.children) != 1:
                return None

            return _look_for_concept_match(_underlying.children[0])

        def _get_underlying_value(_underlying):
            concept_match_node = _look_for_concept_match(_underlying)
            if concept_match_node:
                # the value is a concept
                if id(concept_match_node) in _underlying_value_cache:
                    result = _underlying_value_cache[id(concept_match_node)]
                else:
                    ref_tpl = concept_match_node.parsing_expression.concept
                    result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
                    _underlying_value_cache[id(concept_match_node)] = result
            else:
                # the value is a string
                result = DoNotResolve(_underlying.source)

            return result

        def _process_rule_name(_concept, _underlying):
            if _underlying.parsing_expression.rule_name:
                value = _get_underlying_value(_underlying)
                _add_prop(_concept, _underlying.parsing_expression.rule_name, value)
                _concept.metadata.need_validation = True

            if isinstance(_underlying, NonTerminalNode):
                for child in _underlying.children:
                    _process_rule_name(_concept, child)

        key = (template.key, template.id) if template.id else template.key
        concept = sheerka.new(key)
        if init_empty_body and concept.metadata.body is None:
            value = _get_underlying_value(underlying)
            concept.compiled[ConceptParts.BODY] = value
            if underlying.parsing_expression.rule_name:
                _add_prop(concept, underlying.parsing_expression.rule_name, value)
                # KSI : Why don't we set concept.metadata.need_validation to True ?

        if isinstance(underlying, NonTerminalNode):
            for node in underlying.children:
                _process_rule_name(concept, node)

        return concept

    def encode_grammar(self, grammar):
        """
        Transform the grammar into something that can easily can be serialized
        :param grammar:
        :return:
        """

        def _encode(expression):
            if isinstance(expression, StrMatch):
                res = f"'{expression.to_match}'"

            elif isinstance(expression, ConceptExpression):
                res = core.utils.str_concept(expression.concept)

            elif isinstance(expression, Sequence):
                res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"

            elif isinstance(expression, OrderedChoice):
                res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"

            elif isinstance(expression, Optional):
                res = _encode(expression.nodes[0]) + "?"

            elif isinstance(expression, ZeroOrMore):
                res = _encode(expression.nodes[0]) + "*"

            elif isinstance(expression, OneOrMore):
                res = _encode(expression.nodes[0]) + "+"

            if expression.rule_name:
                res += "=" + expression.rule_name

            return res

        result = {}
        for k, v in grammar.items():
            key = core.utils.str_concept(k)
            value = _encode(v)
            result[key] = value
        return result

    @staticmethod
    def get_bests(results):
        """
        Returns the result that is the longest
        :param results:
        :return:
        """
        by_end_pos = defaultdict(list)
        for result in results:
            by_end_pos[result.end].append(result)

        return by_end_pos[max(by_end_pos)]


class ParsingExpressionVisitor:
    """
    visit ParsingExpression
    """

    def visit(self, parsing_expression):
        name = parsing_expression.__class__.__name__

        method = 'visit_' + name
        visitor = getattr(self, method, self.generic_visit)
        return visitor(parsing_expression)

    def generic_visit(self, parsing_expression):
        if hasattr(self, "visit_all"):
            self.visit_all(parsing_expression)

        for node in parsing_expression.elements:
            if isinstance(node, Concept):
                self.visit(ConceptExpression(node.key or node.name))
            elif isinstance(node, str):
                self.visit(StrMatch(node))
            else:
                self.visit(node)