112 lines
4.0 KiB
Python
112 lines
4.0 KiB
Python
from core.concept import DefinitionType
|
|
from parsers.state_machine import ConceptToRecognize, End, ManageUnrecognized, PrepareReadTokens, ReadConcept, \
|
|
ReadTokens, Start, \
|
|
StateMachine, StateMachineContext
|
|
from parsers.tokenizer import Token, TokenKind, Tokenizer
|
|
|
|
|
|
class SyaConceptsParser:
|
|
""""
|
|
This class is to parse concepts with parameter
|
|
ex : def concept a plus b as a + b
|
|
It parses a sequence of concepts
|
|
"""
|
|
|
|
def __init__(self):
|
|
tokens_wkf = {
|
|
Start("start", next_states=["prepare read tokens"]),
|
|
PrepareReadTokens("prepare read tokens", next_states=["read tokens"]),
|
|
ReadTokens("read tokens", next_states=["read tokens", "eof", "concepts found"]),
|
|
ManageUnrecognized("eof", next_states=["end"]),
|
|
ManageUnrecognized("concepts found", next_states=["#concept_wkf"]),
|
|
End("end", next_states=None)
|
|
}
|
|
|
|
concept_wkf = {
|
|
Start("start", next_states=["read concept"]),
|
|
ReadConcept("read concept", next_states=["#tokens_wkf"]),
|
|
}
|
|
|
|
self.workflows = {
|
|
"#tokens_wkf": {t.name: t for t in tokens_wkf},
|
|
"#concept_wkf": {t.name: t for t in concept_wkf},
|
|
}
|
|
self.error_sink = []
|
|
|
|
@staticmethod
|
|
def _get_expected_tokens(concept_key):
|
|
"""
|
|
Return of list of pairs of (expected token, number of expected variable before this token)
|
|
ex:
|
|
'if x y then z end' => ('if', 0), ('then', 2), ('end', 1)
|
|
:param concept_key:
|
|
:type concept_key:
|
|
:return:
|
|
:rtype:
|
|
"""
|
|
|
|
# def custom_strip_tokens(_tokens):
|
|
# return _tokens
|
|
|
|
def custom_strip_tokens(_tokens):
|
|
"""
|
|
Removes consecutive whitespace tokens
|
|
Returns empy list if only whitespace tokens
|
|
:param _tokens:
|
|
:type _tokens:
|
|
:return:
|
|
:rtype:
|
|
"""
|
|
res = []
|
|
buffer = None
|
|
for t in _tokens:
|
|
if t.type == TokenKind.WHITESPACE:
|
|
buffer = t
|
|
else:
|
|
if buffer:
|
|
res.append(buffer)
|
|
buffer = None
|
|
res.append(t)
|
|
|
|
if res and buffer: # add the buffer only is the result is not empty
|
|
res.append(buffer)
|
|
|
|
return res
|
|
|
|
expected = [] # tuple of expected token and number of expected variables before this token
|
|
tokens = []
|
|
nb_variables = 0
|
|
parsing_tokens = None # True if we are parsing tokens (and not VAR_DEF)
|
|
|
|
for token in Tokenizer(concept_key, yield_eof=False):
|
|
if token.type == TokenKind.WHITESPACE:
|
|
tokens.append(token)
|
|
elif token.type == TokenKind.VAR_DEF:
|
|
if parsing_tokens is not None and parsing_tokens:
|
|
expected.append((custom_strip_tokens(tokens), nb_variables))
|
|
nb_variables = 1
|
|
tokens = []
|
|
parsing_tokens = False
|
|
else:
|
|
nb_variables += 1
|
|
else:
|
|
tokens.append(token)
|
|
parsing_tokens = True
|
|
|
|
# do not forget the remaining ones
|
|
if tokens or nb_variables:
|
|
expected.append((custom_strip_tokens(tokens), nb_variables))
|
|
|
|
return expected
|
|
|
|
def get_metadata_from_first_token(self, context, token: Token):
|
|
return [ConceptToRecognize(m, self._get_expected_tokens(m.key), "key")
|
|
for m in context.sheerka.get_metadatas_from_first_token("key", token.value)
|
|
if m.definition_type == DefinitionType.DEFAULT and len(m.parameters) > 0]
|
|
|
|
def parse(self, context, parser_input):
|
|
sm = StateMachine(self.workflows)
|
|
sm_context = StateMachineContext(context, parser_input, self.get_metadata_from_first_token)
|
|
sm.run("#tokens_wkf", "start", sm_context)
|
|
pass
|