Hardened DefaultParser
This commit is contained in:
@@ -10,6 +10,11 @@ from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
|
||||
from parsers.BnfParser import BnfParser
|
||||
|
||||
|
||||
class ParsingException(Exception):
|
||||
def __init__(self, error):
|
||||
self.error = error
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserNode(Node):
|
||||
"""
|
||||
@@ -125,24 +130,35 @@ class DefaultParser(BaseParser):
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
if len(tokens) == 0:
|
||||
return tokens
|
||||
|
||||
tokens = tokens.copy() # do not modify ParserInput.tokens
|
||||
|
||||
if tokens[0].type != TokenKind.COLON:
|
||||
return tokens
|
||||
|
||||
if len(tokens) < 3:
|
||||
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
|
||||
raise ParsingException(UnexpectedTokenErrorNode(tokens[0:2],
|
||||
"Unexpected end of file",
|
||||
[TokenKind.NEWLINE]))
|
||||
pos = DefaultParser.eat_white_space(tokens, 1)
|
||||
if tokens[pos].type != TokenKind.NEWLINE:
|
||||
raise ParsingException(UnexpectedTokenErrorNode([tokens[pos]],
|
||||
"Unexpected token after colon",
|
||||
[TokenKind.NEWLINE]))
|
||||
pos += 1
|
||||
|
||||
if tokens[1].type != TokenKind.NEWLINE:
|
||||
return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE])
|
||||
|
||||
if tokens[2].type != TokenKind.WHITESPACE:
|
||||
return SyntaxErrorNode([tokens[2]], "Indentation not found.")
|
||||
indent_size = len(tokens[2].value)
|
||||
if tokens[pos].type != TokenKind.WHITESPACE:
|
||||
raise ParsingException(SyntaxErrorNode([tokens[pos]],
|
||||
"Indentation not found."))
|
||||
indent_size = len(tokens[pos].value)
|
||||
pos += 1
|
||||
|
||||
# now fix the other indentations
|
||||
# KSI 23/05/2020 Not quite sure this 'fixing' stuff is still relevant,
|
||||
# as I now have an editor in interactive mode
|
||||
i = 3
|
||||
i = pos
|
||||
while i < len(tokens) - 1:
|
||||
if tokens[i].type == TokenKind.NEWLINE:
|
||||
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
||||
@@ -155,7 +171,17 @@ class DefaultParser(BaseParser):
|
||||
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
|
||||
i += 1
|
||||
|
||||
return tokens[3:]
|
||||
return tokens[pos:]
|
||||
|
||||
@staticmethod
|
||||
def eat_white_space(tokens, index):
|
||||
if index >= len(tokens):
|
||||
return index
|
||||
|
||||
while index < len(tokens) and tokens[index].type == TokenKind.WHITESPACE:
|
||||
index += 1
|
||||
|
||||
return index
|
||||
|
||||
def reset_parser(self, context, parser_input):
|
||||
self.context = context
|
||||
@@ -252,6 +278,22 @@ class DefaultParser(BaseParser):
|
||||
|
||||
def regroup_tokens_by_parts(self, keywords_tokens):
|
||||
|
||||
def new_part(t, cma, p):
|
||||
"""
|
||||
|
||||
:param t: token
|
||||
:param cma: concept_mode_activated
|
||||
:param p: previous token
|
||||
:return:
|
||||
"""
|
||||
if not t.value in def_concept_parts:
|
||||
return False
|
||||
|
||||
if not cma or not p:
|
||||
return True
|
||||
|
||||
return p.line != t.line
|
||||
|
||||
def_concept_parts = [Keywords.CONCEPT.value,
|
||||
Keywords.FROM.value,
|
||||
Keywords.AS.value,
|
||||
@@ -273,10 +315,34 @@ class DefaultParser(BaseParser):
|
||||
current_part = Keywords.CONCEPT
|
||||
token = self.parser_input.token
|
||||
first_token = token
|
||||
colon_mode_activated = False # if activate, use keyword + colon to start a new keyword definition
|
||||
previous_token = None
|
||||
|
||||
# more explanation on colon_mode_activated
|
||||
# You can use the pattern
|
||||
# def concept <name> as:
|
||||
# <tab> xxx
|
||||
# <tab> yyy
|
||||
# ...
|
||||
#
|
||||
# It allows to readability and usage of other keywords inside the bloc#
|
||||
# Example
|
||||
# def concept give the the date as:
|
||||
# from datetime import date
|
||||
# return date.today()
|
||||
#
|
||||
# 'from datetime' will not be considered as a keyword because it's lead by a tab
|
||||
# whereas in
|
||||
# def concept in x days as:
|
||||
# from datetime import date
|
||||
# return date.today() - x
|
||||
# where x > 0
|
||||
#
|
||||
# where will be recognized as the keyword because it is the first word of the line
|
||||
|
||||
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
|
||||
while token.type != TokenKind.EOF:
|
||||
if token.value in def_concept_parts:
|
||||
if new_part(token, colon_mode_activated, previous_token):
|
||||
keywords_tokens.append(token) # keep track of the keywords
|
||||
keyword = Keywords(token.value)
|
||||
if tokens_found_by_parts[keyword]:
|
||||
@@ -286,11 +352,14 @@ class DefaultParser(BaseParser):
|
||||
else:
|
||||
tokens_found_by_parts[keyword] = [token]
|
||||
current_part = keyword
|
||||
colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON
|
||||
|
||||
self.parser_input.next_token()
|
||||
else:
|
||||
tokens_found_by_parts[current_part].append(token)
|
||||
self.parser_input.next_token(False)
|
||||
|
||||
previous_token = token
|
||||
token = self.parser_input.token
|
||||
|
||||
return first_token, tokens_found_by_parts
|
||||
@@ -335,7 +404,12 @@ class DefaultParser(BaseParser):
|
||||
return self.get_concept_simple_definition(definition_tokens)
|
||||
|
||||
def get_concept_bnf_definition(self, current_concept_def, definition_tokens):
|
||||
tokens = core.utils.strip_tokens(definition_tokens[2:])
|
||||
try:
|
||||
tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[2:]))
|
||||
except ParsingException as ex:
|
||||
self.add_error(ex.error)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if len(tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
|
||||
return None, NotInitializedNode()
|
||||
@@ -358,7 +432,12 @@ class DefaultParser(BaseParser):
|
||||
|
||||
def get_concept_simple_definition(self, definition_tokens):
|
||||
start = 2 if definition_tokens[1].value == Keywords.DEF.value else 1
|
||||
tokens = core.utils.strip_tokens(definition_tokens[start:])
|
||||
try:
|
||||
tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[start:]))
|
||||
except ParsingException as ex:
|
||||
self.add_error(ex.error)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if len(tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([definition_tokens[start]], "Empty declaration"), False)
|
||||
return None, NotInitializedNode()
|
||||
@@ -386,9 +465,10 @@ class DefaultParser(BaseParser):
|
||||
self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False)
|
||||
continue
|
||||
|
||||
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
|
||||
if isinstance(tokens, ErrorNode):
|
||||
self.add_error(tokens)
|
||||
try:
|
||||
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
|
||||
except ParsingException as ex:
|
||||
self.add_error(ex.error)
|
||||
continue
|
||||
|
||||
# ask the other parsers if they recognize the tokens
|
||||
|
||||
Reference in New Issue
Block a user