Refactored Parsers. Introduced BaseCustomGrammarParser. Renamed DefaultParser into DefConceptParser

2020-10-02 04:45:47 +02:00
parent d100b7e8b3
commit e8f2705dcf
28 changed files with 1411 additions and 872 deletions
@@ -7,7 +7,7 @@ from core.sheerka.services.SheerkaExecute import ParserInput
 from core.tokenizer import TokenKind, Token
 from core.utils import get_n_clones
 from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode
-from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEof, Node
+from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEofNode, Node
 from parsers.PythonWithConceptsParser import PythonWithConceptsParser

 # No need to check for Python code as the source code node will resolve to python code anyway
@@ -143,7 +143,7 @@ class FunctionParser(BaseParser):
        so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]]
        :param kwargs:
        """
-        super().__init__("Function", 55, True)
+        super().__init__("Function", 55)
        self.sep = sep
        self.longest_concepts_only = longest_concepts_only
        self.record_errors = True
@@ -179,6 +179,7 @@ class FunctionParser(BaseParser):
                False,
                context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))

+        self.parser_input.next_token()
        node = self.parse_function()

        if self.parser_input.next_token():
@@ -219,7 +220,7 @@ class FunctionParser(BaseParser):
            return None

        if not self.parser_input.next_token():
-            self.add_error(UnexpectedEof(f"Unexpected EOF while parsing left parenthesis"))
+            self.add_error(UnexpectedEofNode(f"Unexpected EOF while parsing left parenthesis"))
            return None

        token = self.parser_input.token
@@ -231,7 +232,7 @@ class FunctionParser(BaseParser):

        start_node = NamesNode(start, start + 1, self.parser_input.tokens[start:start + 2])
        if not self.parser_input.next_token():
-            self.add_error(UnexpectedEof(f"Unexpected EOF after left parenthesis"))
+            self.add_error(UnexpectedEofNode(f"Unexpected EOF after left parenthesis"))
            return FunctionNode(start_node, None, None)

        params = self.parse_parameters()
@@ -239,7 +240,7 @@ class FunctionParser(BaseParser):
            return FunctionNode(start_node, None, params)

        token = self.parser_input.token
-        if token.type != TokenKind.RPAR:
+        if not token or token.type != TokenKind.RPAR:
            self.add_error(UnexpectedTokenErrorNode(f"Right parenthesis not found",
                                                    token,
                                                    [TokenKind.RPAR]))
@@ -261,7 +262,7 @@ class FunctionParser(BaseParser):

            token = self.parser_input.token
            if token.type == TokenKind.EOF:
-                self.add_error(UnexpectedEof(f"Unexpected EOF while parsing parameters"))
+                self.add_error(UnexpectedEofNode(f"Unexpected EOF while parsing parameters"))
                return None

            if token.type == TokenKind.RPAR:
@@ -269,10 +270,12 @@ class FunctionParser(BaseParser):

            if token.value == self.sep:
                sep_pos = self.parser_input.pos
-                self.parser_input.next_token()
+                has_next = self.parser_input.next_token()  # it's before add_sep() to capture trailing whitespace
                function_parameter.add_sep(sep_pos,
                                           self.parser_input.pos - 1,
                                           self.parser_input.tokens[sep_pos: self.parser_input.pos])
+                if not has_next:
+                    break

        return nodes

@@ -292,8 +295,8 @@ class FunctionParser(BaseParser):
        tokens = []
        while True:
            token = self.parser_input.token
-            # if token is None:
-            #     break
+            if token is None:
+                break

            if token.value == self.sep or token.type == TokenKind.RPAR:
                break