from core.concept import DefinitionType from parsers.state_machine import ConceptToRecognize, End, ManageUnrecognized, PrepareReadTokens, ReadConcept, \ ReadTokens, Start, \ StateMachine, StateMachineContext from parsers.tokenizer import Token, TokenKind, Tokenizer class SyaConceptsParser: """" This class is to parse concepts with parameter ex : def concept a plus b as a + b It parses a sequence of concepts """ def __init__(self): tokens_wkf = { Start("start", next_states=["prepare read tokens"]), PrepareReadTokens("prepare read tokens", next_states=["read tokens"]), ReadTokens("read tokens", next_states=["read tokens", "eof", "concepts found"]), ManageUnrecognized("eof", next_states=["end"]), ManageUnrecognized("concepts found", next_states=["#concept_wkf"]), End("end", next_states=None) } concept_wkf = { Start("start", next_states=["read concept"]), ReadConcept("read concept", next_states=["#tokens_wkf"]), } self.workflows = { "#tokens_wkf": {t.name: t for t in tokens_wkf}, "#concept_wkf": {t.name: t for t in concept_wkf}, } self.error_sink = [] @staticmethod def _get_expected_tokens(concept_key): """ Return of list of pairs of (expected token, number of expected variable before this token) ex: 'if x y then z end' => ('if', 0), ('then', 2), ('end', 1) :param concept_key: :type concept_key: :return: :rtype: """ # def custom_strip_tokens(_tokens): # return _tokens def custom_strip_tokens(_tokens): """ Removes consecutive whitespace tokens Returns empy list if only whitespace tokens :param _tokens: :type _tokens: :return: :rtype: """ res = [] buffer = None for t in _tokens: if t.type == TokenKind.WHITESPACE: buffer = t else: if buffer: res.append(buffer) buffer = None res.append(t) if res and buffer: # add the buffer only is the result is not empty res.append(buffer) return res expected = [] # tuple of expected token and number of expected variables before this token tokens = [] nb_variables = 0 parsing_tokens = None # True if we are parsing tokens (and not VAR_DEF) for token in Tokenizer(concept_key, yield_eof=False): if token.type == TokenKind.WHITESPACE: tokens.append(token) elif token.type == TokenKind.VAR_DEF: if parsing_tokens is not None and parsing_tokens: expected.append((custom_strip_tokens(tokens), nb_variables)) nb_variables = 1 tokens = [] parsing_tokens = False else: nb_variables += 1 else: tokens.append(token) parsing_tokens = True # do not forget the remaining ones if tokens or nb_variables: expected.append((custom_strip_tokens(tokens), nb_variables)) return expected def get_metadata_from_first_token(self, context, token: Token): return [ConceptToRecognize(m, self._get_expected_tokens(m.key), "key") for m in context.sheerka.get_metadatas_from_first_token("key", token.value) if m.definition_type == DefinitionType.DEFAULT and len(m.parameters) > 0] def parse(self, context, parser_input): sm = StateMachine(self.workflows) sm_context = StateMachineContext(context, parser_input, self.get_metadata_from_first_token) sm.run("#tokens_wkf", "start", sm_context) pass