Sheerka/src/parsers/SyaConceptsParser.py

from core.concept import DefinitionType
from parsers.state_machine import ConceptToRecognize, End, ManageUnrecognized, PrepareReadTokens, ReadConcept, \
    ReadTokens, Start, \
    StateMachine, StateMachineContext
from parsers.tokenizer import Token, TokenKind, Tokenizer


class SyaConceptsParser:
    """"
    This class is to parse concepts with parameter
    ex : def concept a plus b as a + b
    It parses a sequence of concepts
    """

    def __init__(self):
        tokens_wkf = {
            Start("start", next_states=["prepare read tokens"]),
            PrepareReadTokens("prepare read tokens", next_states=["read tokens"]),
            ReadTokens("read tokens", next_states=["read tokens", "eof", "concepts found"]),
            ManageUnrecognized("eof", next_states=["end"]),
            ManageUnrecognized("concepts found", next_states=["#concept_wkf"]),
            End("end", next_states=None)
        }

        concept_wkf = {
            Start("start", next_states=["read concept"]),
            ReadConcept("read concept", next_states=["#tokens_wkf"]),
        }

        self.workflows = {
            "#tokens_wkf": {t.name: t for t in tokens_wkf},
            "#concept_wkf": {t.name: t for t in concept_wkf},
        }
        self.error_sink = []

    @staticmethod
    def _get_expected_tokens(concept_key):
        """
        Return of list of pairs of (expected token, number of expected variable before this token)
        ex:
            'if x y then z end' => ('if', 0), ('then', 2), ('end', 1)
        :param concept_key:
        :type concept_key:
        :return:
        :rtype:
        """

        # def custom_strip_tokens(_tokens):
        #     return _tokens

        def custom_strip_tokens(_tokens):
            """
            Removes consecutive whitespace tokens
            Returns empy list if only whitespace tokens
            :param _tokens:
            :type _tokens:
            :return:
            :rtype:
            """
            res = []
            buffer = None
            for t in _tokens:
                if t.type == TokenKind.WHITESPACE:
                    buffer = t
                else:
                    if buffer:
                        res.append(buffer)
                        buffer = None
                    res.append(t)

            if res and buffer:  # add the buffer only is the result is not empty
                res.append(buffer)

            return res

        expected = []  # tuple of expected token and number of expected variables before this token
        tokens = []
        nb_variables = 0
        parsing_tokens = None  # True if we are parsing tokens (and not VAR_DEF)

        for token in Tokenizer(concept_key, yield_eof=False):
            if token.type == TokenKind.WHITESPACE:
                tokens.append(token)
            elif token.type == TokenKind.VAR_DEF:
                if parsing_tokens is not None and parsing_tokens:
                    expected.append((custom_strip_tokens(tokens), nb_variables))
                    nb_variables = 1
                    tokens = []
                    parsing_tokens = False
                else:
                    nb_variables += 1
            else:
                tokens.append(token)
                parsing_tokens = True

        # do not forget the remaining ones
        if tokens or nb_variables:
            expected.append((custom_strip_tokens(tokens), nb_variables))

        return expected

    def get_metadata_from_first_token(self, context, token: Token):
        return [ConceptToRecognize(m, self._get_expected_tokens(m.key), "key")
                for m in context.sheerka.get_metadatas_from_first_token("key", token.value)
                if m.definition_type == DefinitionType.DEFAULT and len(m.parameters) > 0]

    def parse(self, context, parser_input):
        sm = StateMachine(self.workflows)
        sm_context = StateMachineContext(context, parser_input, self.get_metadata_from_first_token)
        sm.run("#tokens_wkf", "start", sm_context)
        pass