Working on #21 : Working on SyaConceptsParser.py

2023-07-09 20:16:58 +02:00
parent a7043b1dd8
commit a729d98a0d
3 changed files with 110 additions and 8 deletions
@@ -1,6 +1,8 @@
-from parsers.state_machine import End, ManageUnrecognized, PrepareReadTokens, ReadConcept, ReadTokens, Start, \
+from core.concept import DefinitionType
+from parsers.state_machine import ConceptToRecognize, End, ManageUnrecognized, PrepareReadTokens, ReadConcept, \
+    ReadTokens, Start, \
    StateMachine, StateMachineContext
-from parsers.tokenizer import Token
+from parsers.tokenizer import Token, TokenKind, Tokenizer


 class SyaConceptsParser:
@@ -32,8 +34,75 @@ class SyaConceptsParser:
        self.error_sink = []

    @staticmethod
-    def get_metadata_from_first_token(context, token: Token):
-        pass
+    def _get_expected_tokens(concept_key):
+        """
+        Return of list of pairs of (expected token, number of expected variable before this token)
+        ex:
+            'if x y then z end' => ('if', 0), ('then', 2), ('end', 1)
+        :param concept_key:
+        :type concept_key:
+        :return:
+        :rtype:
+        """
+
+        # def custom_strip_tokens(_tokens):
+        #     return _tokens
+
+        def custom_strip_tokens(_tokens):
+            """
+            Removes consecutive whitespace tokens
+            Returns empy list if only whitespace tokens
+            :param _tokens:
+            :type _tokens:
+            :return:
+            :rtype:
+            """
+            res = []
+            buffer = None
+            for t in _tokens:
+                if t.type == TokenKind.WHITESPACE:
+                    buffer = t
+                else:
+                    if buffer:
+                        res.append(buffer)
+                        buffer = None
+                    res.append(t)
+
+            if res and buffer:  # add the buffer only is the result is not empty
+                res.append(buffer)
+
+            return res
+
+        expected = []  # tuple of expected token and number of expected variables before this token
+        tokens = []
+        nb_variables = 0
+        parsing_tokens = None  # True if we are parsing tokens (and not VAR_DEF)
+
+        for token in Tokenizer(concept_key, yield_eof=False):
+            if token.type == TokenKind.WHITESPACE:
+                tokens.append(token)
+            elif token.type == TokenKind.VAR_DEF:
+                if parsing_tokens is not None and parsing_tokens:
+                    expected.append((custom_strip_tokens(tokens), nb_variables))
+                    nb_variables = 1
+                    tokens = []
+                    parsing_tokens = False
+                else:
+                    nb_variables += 1
+            else:
+                tokens.append(token)
+                parsing_tokens = True
+
+        # do not forget the remaining ones
+        if tokens or nb_variables:
+            expected.append((custom_strip_tokens(tokens), nb_variables))
+
+        return expected
+
+    def get_metadata_from_first_token(self, context, token: Token):
+        return [ConceptToRecognize(m, self._get_expected_tokens(m.key), "key")
+                for m in context.sheerka.get_metadatas_from_first_token("key", token.value)
+                if m.definition_type == DefinitionType.DEFAULT and len(m.parameters) > 0]

    def parse(self, context, parser_input):
        sm = StateMachine(self.workflows)