Hardened DefaultParser

2020-09-22 17:39:42 +02:00
parent 310c9ae839
commit 9b965105e9
5 changed files with 220 additions and 40 deletions
@@ -10,6 +10,11 @@ from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
 from parsers.BnfParser import BnfParser


+class ParsingException(Exception):
+    def __init__(self, error):
+        self.error = error
+
+
@dataclass()
 class DefaultParserNode(Node):
    """
@@ -125,24 +130,35 @@ class DefaultParser(BaseParser):
        :param tokens:
        :return:
        """
+        if len(tokens) == 0:
+            return tokens
+
        tokens = tokens.copy()  # do not modify ParserInput.tokens
+
        if tokens[0].type != TokenKind.COLON:
            return tokens

        if len(tokens) < 3:
-            return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
+            raise ParsingException(UnexpectedTokenErrorNode(tokens[0:2],
+                                                            "Unexpected end of file",
+                                                            [TokenKind.NEWLINE]))
+        pos = DefaultParser.eat_white_space(tokens, 1)
+        if tokens[pos].type != TokenKind.NEWLINE:
+            raise ParsingException(UnexpectedTokenErrorNode([tokens[pos]],
+                                                            "Unexpected token after colon",
+                                                            [TokenKind.NEWLINE]))
+        pos += 1

-        if tokens[1].type != TokenKind.NEWLINE:
-            return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE])
-
-        if tokens[2].type != TokenKind.WHITESPACE:
-            return SyntaxErrorNode([tokens[2]], "Indentation not found.")
-        indent_size = len(tokens[2].value)
+        if tokens[pos].type != TokenKind.WHITESPACE:
+            raise ParsingException(SyntaxErrorNode([tokens[pos]],
+                                                   "Indentation not found."))
+        indent_size = len(tokens[pos].value)
+        pos += 1

        # now fix the other indentations
        # KSI 23/05/2020 Not quite sure this 'fixing' stuff is still relevant,
        #   as I now have an editor in interactive mode
-        i = 3
+        i = pos
        while i < len(tokens) - 1:
            if tokens[i].type == TokenKind.NEWLINE:
                if tokens[i + 1].type != TokenKind.WHITESPACE:
@@ -155,7 +171,17 @@ class DefaultParser(BaseParser):
                tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
            i += 1

-        return tokens[3:]
+        return tokens[pos:]
+
+    @staticmethod
+    def eat_white_space(tokens, index):
+        if index >= len(tokens):
+            return index
+
+        while index < len(tokens) and tokens[index].type == TokenKind.WHITESPACE:
+            index += 1
+
+        return index

    def reset_parser(self, context, parser_input):
        self.context = context
@@ -252,6 +278,22 @@ class DefaultParser(BaseParser):

    def regroup_tokens_by_parts(self, keywords_tokens):

+        def new_part(t, cma, p):
+            """
+
+            :param t: token
+            :param cma: concept_mode_activated
+            :param p: previous token
+            :return:
+            """
+            if not t.value in def_concept_parts:
+                return False
+
+            if not cma or not p:
+                return True
+
+            return p.line != t.line
+
        def_concept_parts = [Keywords.CONCEPT.value,
                             Keywords.FROM.value,
                             Keywords.AS.value,
@@ -273,10 +315,34 @@ class DefaultParser(BaseParser):
        current_part = Keywords.CONCEPT
        token = self.parser_input.token
        first_token = token
+        colon_mode_activated = False  # if activate, use keyword + colon to start a new keyword definition
+        previous_token = None
+
+        # more explanation on colon_mode_activated
+        # You can use the pattern
+        # def concept <name> as:
+        # <tab> xxx
+        # <tab> yyy
+        # ...
+        #
+        # It allows to readability and usage of other keywords inside the bloc#
+        # Example
+        # def concept give the the date as:
+        #   from datetime import date
+        #   return date.today()
+        #
+        # 'from datetime' will not be considered as a keyword because it's lead by a tab
+        # whereas in
+        # def concept in x days as:
+        #   from datetime import date
+        #   return date.today() - x
+        # where x > 0
+        #
+        # where will be recognized as the keyword because it is the first word of the line

        # loop thru the tokens, and put them in the correct tokens_found_by_parts entry
        while token.type != TokenKind.EOF:
-            if token.value in def_concept_parts:
+            if new_part(token, colon_mode_activated, previous_token):
                keywords_tokens.append(token)  # keep track of the keywords
                keyword = Keywords(token.value)
                if tokens_found_by_parts[keyword]:
@@ -286,11 +352,14 @@ class DefaultParser(BaseParser):
                else:
                    tokens_found_by_parts[keyword] = [token]
                current_part = keyword
+                colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON
+
                self.parser_input.next_token()
            else:
                tokens_found_by_parts[current_part].append(token)
                self.parser_input.next_token(False)

+            previous_token = token
            token = self.parser_input.token

        return first_token, tokens_found_by_parts
@@ -335,7 +404,12 @@ class DefaultParser(BaseParser):
        return self.get_concept_simple_definition(definition_tokens)

    def get_concept_bnf_definition(self, current_concept_def, definition_tokens):
-        tokens = core.utils.strip_tokens(definition_tokens[2:])
+        try:
+            tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[2:]))
+        except ParsingException as ex:
+            self.add_error(ex.error)
+            return None, NotInitializedNode()
+
        if len(tokens) == 0:
            self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
            return None, NotInitializedNode()
@@ -358,7 +432,12 @@ class DefaultParser(BaseParser):

    def get_concept_simple_definition(self, definition_tokens):
        start = 2 if definition_tokens[1].value == Keywords.DEF.value else 1
-        tokens = core.utils.strip_tokens(definition_tokens[start:])
+        try:
+            tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[start:]))
+        except ParsingException as ex:
+            self.add_error(ex.error)
+            return None, NotInitializedNode()
+
        if len(tokens) == 0:
            self.add_error(SyntaxErrorNode([definition_tokens[start]], "Empty declaration"), False)
            return None, NotInitializedNode()
@@ -386,9 +465,10 @@ class DefaultParser(BaseParser):
                self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False)
                continue

-            tokens = self.fix_indentation(tokens[1:])  # manage multi-lines declarations
-            if isinstance(tokens, ErrorNode):
-                self.add_error(tokens)
+            try:
+                tokens = self.fix_indentation(tokens[1:])  # manage multi-lines declarations
+            except ParsingException as ex:
+                self.add_error(ex.error)
                continue

            # ask the other parsers if they recognize the tokens