Hardened DefaultParser

This commit is contained in:
2020-09-22 17:39:42 +02:00
parent 310c9ae839
commit 9b965105e9
5 changed files with 220 additions and 40 deletions
+95 -15
View File
@@ -10,6 +10,11 @@ from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
from parsers.BnfParser import BnfParser
class ParsingException(Exception):
def __init__(self, error):
self.error = error
@dataclass()
class DefaultParserNode(Node):
"""
@@ -125,24 +130,35 @@ class DefaultParser(BaseParser):
:param tokens:
:return:
"""
if len(tokens) == 0:
return tokens
tokens = tokens.copy() # do not modify ParserInput.tokens
if tokens[0].type != TokenKind.COLON:
return tokens
if len(tokens) < 3:
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
raise ParsingException(UnexpectedTokenErrorNode(tokens[0:2],
"Unexpected end of file",
[TokenKind.NEWLINE]))
pos = DefaultParser.eat_white_space(tokens, 1)
if tokens[pos].type != TokenKind.NEWLINE:
raise ParsingException(UnexpectedTokenErrorNode([tokens[pos]],
"Unexpected token after colon",
[TokenKind.NEWLINE]))
pos += 1
if tokens[1].type != TokenKind.NEWLINE:
return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE])
if tokens[2].type != TokenKind.WHITESPACE:
return SyntaxErrorNode([tokens[2]], "Indentation not found.")
indent_size = len(tokens[2].value)
if tokens[pos].type != TokenKind.WHITESPACE:
raise ParsingException(SyntaxErrorNode([tokens[pos]],
"Indentation not found."))
indent_size = len(tokens[pos].value)
pos += 1
# now fix the other indentations
# KSI 23/05/2020 Not quite sure this 'fixing' stuff is still relevant,
# as I now have an editor in interactive mode
i = 3
i = pos
while i < len(tokens) - 1:
if tokens[i].type == TokenKind.NEWLINE:
if tokens[i + 1].type != TokenKind.WHITESPACE:
@@ -155,7 +171,17 @@ class DefaultParser(BaseParser):
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
i += 1
return tokens[3:]
return tokens[pos:]
@staticmethod
def eat_white_space(tokens, index):
if index >= len(tokens):
return index
while index < len(tokens) and tokens[index].type == TokenKind.WHITESPACE:
index += 1
return index
def reset_parser(self, context, parser_input):
self.context = context
@@ -252,6 +278,22 @@ class DefaultParser(BaseParser):
def regroup_tokens_by_parts(self, keywords_tokens):
def new_part(t, cma, p):
"""
:param t: token
:param cma: concept_mode_activated
:param p: previous token
:return:
"""
if not t.value in def_concept_parts:
return False
if not cma or not p:
return True
return p.line != t.line
def_concept_parts = [Keywords.CONCEPT.value,
Keywords.FROM.value,
Keywords.AS.value,
@@ -273,10 +315,34 @@ class DefaultParser(BaseParser):
current_part = Keywords.CONCEPT
token = self.parser_input.token
first_token = token
colon_mode_activated = False # if activate, use keyword + colon to start a new keyword definition
previous_token = None
# more explanation on colon_mode_activated
# You can use the pattern
# def concept <name> as:
# <tab> xxx
# <tab> yyy
# ...
#
# It allows to readability and usage of other keywords inside the bloc#
# Example
# def concept give the the date as:
# from datetime import date
# return date.today()
#
# 'from datetime' will not be considered as a keyword because it's lead by a tab
# whereas in
# def concept in x days as:
# from datetime import date
# return date.today() - x
# where x > 0
#
# where will be recognized as the keyword because it is the first word of the line
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
while token.type != TokenKind.EOF:
if token.value in def_concept_parts:
if new_part(token, colon_mode_activated, previous_token):
keywords_tokens.append(token) # keep track of the keywords
keyword = Keywords(token.value)
if tokens_found_by_parts[keyword]:
@@ -286,11 +352,14 @@ class DefaultParser(BaseParser):
else:
tokens_found_by_parts[keyword] = [token]
current_part = keyword
colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON
self.parser_input.next_token()
else:
tokens_found_by_parts[current_part].append(token)
self.parser_input.next_token(False)
previous_token = token
token = self.parser_input.token
return first_token, tokens_found_by_parts
@@ -335,7 +404,12 @@ class DefaultParser(BaseParser):
return self.get_concept_simple_definition(definition_tokens)
def get_concept_bnf_definition(self, current_concept_def, definition_tokens):
tokens = core.utils.strip_tokens(definition_tokens[2:])
try:
tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[2:]))
except ParsingException as ex:
self.add_error(ex.error)
return None, NotInitializedNode()
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
return None, NotInitializedNode()
@@ -358,7 +432,12 @@ class DefaultParser(BaseParser):
def get_concept_simple_definition(self, definition_tokens):
start = 2 if definition_tokens[1].value == Keywords.DEF.value else 1
tokens = core.utils.strip_tokens(definition_tokens[start:])
try:
tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[start:]))
except ParsingException as ex:
self.add_error(ex.error)
return None, NotInitializedNode()
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([definition_tokens[start]], "Empty declaration"), False)
return None, NotInitializedNode()
@@ -386,9 +465,10 @@ class DefaultParser(BaseParser):
self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False)
continue
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
if isinstance(tokens, ErrorNode):
self.add_error(tokens)
try:
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
except ParsingException as ex:
self.add_error(ex.error)
continue
# ask the other parsers if they recognize the tokens