418 lines
13 KiB
Python
418 lines
13 KiB
Python
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode
|
|
from parsers.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
|
from dataclasses import dataclass, field
|
|
import logging
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass()
|
|
class DefaultParserNode(Node):
|
|
tokens: list = field(compare=False)
|
|
|
|
def is_same(self, other):
|
|
if type(self) != type(other):
|
|
return False
|
|
|
|
if hasattr(self, "value") and self.value != other.value:
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
@dataclass()
|
|
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
|
|
pass
|
|
|
|
|
|
@dataclass()
|
|
class UnexpectedTokenErrorNode(DefaultParserErrorNode):
|
|
message: str
|
|
expected_tokens: list
|
|
|
|
def __post_init__(self):
|
|
log.debug("-> UnexpectedTokenErrorNode: " + self.message)
|
|
|
|
|
|
@dataclass()
|
|
class SyntaxErrorNode(DefaultParserErrorNode):
|
|
"""
|
|
The input is recognized, but there is a syntax error
|
|
"""
|
|
message: str
|
|
|
|
def __post_init__(self):
|
|
log.debug("-> SyntaxErrorNode: " + self.message)
|
|
|
|
|
|
@dataclass()
|
|
class CannotHandleErrorNode(DefaultParserErrorNode):
|
|
"""
|
|
The input is not recognized
|
|
"""
|
|
text: str
|
|
|
|
def __post_init__(self):
|
|
log.debug("-> CannotHandleErrorNode: " + self.text)
|
|
|
|
|
|
@dataclass()
|
|
class DefConceptNode(DefaultParserNode):
|
|
name: str
|
|
where: Node = None
|
|
pre: Node = None
|
|
post: Node = None
|
|
body: Node = NopNode
|
|
|
|
def get_codes(self):
|
|
codes = {}
|
|
for prop in ["where", "pre", "post", "body"]:
|
|
prop_value = getattr(self, prop)
|
|
if hasattr(prop_value, "ast"):
|
|
codes[prop] = prop_value.ast
|
|
return codes
|
|
|
|
|
|
@dataclass()
|
|
class NumberNode(DefaultParserNode):
|
|
value: object
|
|
|
|
def __repr__(self):
|
|
return str(self.value)
|
|
|
|
|
|
@dataclass()
|
|
class StringNode(DefaultParserNode):
|
|
value: str
|
|
quote: str
|
|
|
|
def is_same(self, other):
|
|
if not super(StringNode, self).is_same(other):
|
|
return False
|
|
return self.quote == other.quote
|
|
|
|
def __repr__(self):
|
|
return self.quote + self.value + self.quote
|
|
|
|
|
|
@dataclass()
|
|
class VariableNode(DefaultParserNode):
|
|
value: str
|
|
|
|
def __repr__(self):
|
|
return self.value
|
|
|
|
|
|
@dataclass()
|
|
class TrueNode(DefaultParserNode):
|
|
pass
|
|
|
|
def __repr__(self):
|
|
return "true"
|
|
|
|
|
|
@dataclass()
|
|
class FalseNode(DefaultParserNode):
|
|
pass
|
|
|
|
def __repr__(self):
|
|
return "false"
|
|
|
|
|
|
@dataclass()
|
|
class NullNode(DefaultParserNode):
|
|
pass
|
|
|
|
def __repr__(self):
|
|
return "null"
|
|
|
|
|
|
@dataclass()
|
|
class BinaryNode(DefaultParserNode):
|
|
operator: TokenKind
|
|
left: Node
|
|
right: Node
|
|
|
|
def is_same(self, other):
|
|
if not super(BinaryNode, self).is_same(other):
|
|
return False
|
|
if self.operator != other.operator:
|
|
return False
|
|
if not self.left.is_same(other.left):
|
|
return False
|
|
return self.right.is_same(other.right)
|
|
|
|
def __repr__(self):
|
|
return f"({self.left} {self.operator} {self.right})"
|
|
|
|
|
|
class DefaultParser(BaseParser):
|
|
def __init__(self, text, sub_parser):
|
|
BaseParser.__init__(self, "DefaultParser", text)
|
|
self.sub_parser = sub_parser
|
|
self.lexer = Tokenizer(text)
|
|
self.lexer_iter = iter(Tokenizer(text))
|
|
self._current = None
|
|
|
|
self.next_token()
|
|
|
|
def collect_tokens(self, *args):
|
|
result = []
|
|
for item in args:
|
|
if isinstance(item, Node):
|
|
result.extend(item.tokens)
|
|
else:
|
|
result.append(item)
|
|
return result
|
|
|
|
def add_error(self, error, next_token=True):
|
|
self.has_error = True
|
|
self.error_sink.append(error)
|
|
if next_token:
|
|
self.next_token()
|
|
return error
|
|
|
|
def get_token(self) -> Token:
|
|
return self._current
|
|
|
|
def next_token(self, skip_whitespace=True):
|
|
try:
|
|
self._current = next(self.lexer_iter)
|
|
if skip_whitespace:
|
|
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
|
self._current = next(self.lexer_iter)
|
|
except StopIteration:
|
|
self._current = None
|
|
return
|
|
|
|
@staticmethod
|
|
def get_concept_name(tokens, variables=None):
|
|
name = ""
|
|
first = True
|
|
for token in tokens:
|
|
if token.type == TokenKind.EOF:
|
|
break
|
|
if not first:
|
|
name += " "
|
|
if variables is not None and token.value in variables:
|
|
name += "__var__" + str(variables.index(token.value))
|
|
else:
|
|
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
|
first = False
|
|
|
|
return name
|
|
|
|
@staticmethod
|
|
def fix_indentation(tokens):
|
|
"""
|
|
In the following example
|
|
def concept add one to a as:
|
|
def func(x):
|
|
return x+1
|
|
func(a)
|
|
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
|
|
:param tokens:
|
|
:return:
|
|
"""
|
|
if tokens[1].type != TokenKind.COLON:
|
|
return tokens[1:]
|
|
|
|
if len(tokens) < 3:
|
|
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
|
|
|
|
if tokens[2].type != TokenKind.NEWLINE:
|
|
return UnexpectedTokenErrorNode([tokens[2]], "Unexpected token after colon", [TokenKind.NEWLINE])
|
|
|
|
if tokens[3].type != TokenKind.WHITESPACE:
|
|
return SyntaxErrorNode([tokens[3]], "Indentation not found")
|
|
indent_size = len(tokens[3].value)
|
|
|
|
# now fix the other indentations
|
|
i = 4
|
|
while i < len(tokens) - 1:
|
|
if tokens[i].type == TokenKind.NEWLINE:
|
|
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
|
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
|
|
|
|
if len(tokens[i + 1].value) < indent_size:
|
|
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
|
|
|
|
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
|
|
i += 1
|
|
|
|
return tokens[4:]
|
|
|
|
def parse(self):
|
|
return self.parse_statement()
|
|
|
|
def parse_statement(self):
|
|
token = self.get_token()
|
|
if token.value == Keywords.DEF:
|
|
self.next_token()
|
|
return self.parse_def_concept()
|
|
else:
|
|
return self.add_error(CannotHandleErrorNode([], self.text))
|
|
|
|
def parse_def_concept(self):
|
|
"""
|
|
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
|
|
"""
|
|
|
|
def_concept_parts = [Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
|
|
|
|
tokens_found = {} # Node token is supposed to be a list, but here, it will be a dict
|
|
|
|
token = self.get_token()
|
|
if token.value != Keywords.CONCEPT:
|
|
return self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
|
|
|
self.next_token()
|
|
token = self.get_token()
|
|
|
|
if token.value in (Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST):
|
|
return self.add_error(UnexpectedTokenErrorNode([token], "Concept name is missing.", ["<name>"]))
|
|
|
|
name_as_tokens = []
|
|
while token.type != TokenKind.EOF and token.value not in def_concept_parts:
|
|
name_as_tokens.append(token)
|
|
self.next_token()
|
|
token = self.get_token()
|
|
name = self.get_concept_name(name_as_tokens)
|
|
tokens_found["name"] = name_as_tokens
|
|
|
|
# try to parse as, where, pre and post declarations
|
|
tokens = {
|
|
Keywords.AS: None,
|
|
Keywords.WHERE: None,
|
|
Keywords.PRE: None,
|
|
Keywords.POST: None,
|
|
}
|
|
current_part = None
|
|
while token.type != TokenKind.EOF:
|
|
if token.value in def_concept_parts:
|
|
keyword = token.value
|
|
if tokens[keyword]:
|
|
return self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
|
|
tokens[keyword] = [token] # first element of the list is the keyword
|
|
current_part = keyword
|
|
self.next_token()
|
|
else:
|
|
if current_part is None:
|
|
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", def_concept_parts))
|
|
else:
|
|
tokens[current_part].append(token)
|
|
self.next_token(False)
|
|
|
|
token = self.get_token()
|
|
for t in tokens:
|
|
tokens_found[t.value] = tokens[t]
|
|
|
|
asts = {
|
|
Keywords.AS: NopNode(),
|
|
Keywords.WHERE: NopNode(),
|
|
Keywords.PRE: NopNode(),
|
|
Keywords.POST: NopNode(),
|
|
}
|
|
|
|
# check for empty declarations
|
|
for keyword in tokens:
|
|
current_tokens = tokens[keyword]
|
|
if current_tokens is not None:
|
|
if len(current_tokens) == 0: # only one element means empty decl
|
|
return self.add_error(SyntaxErrorNode([current_tokens[0]], "Empty declaration"), False)
|
|
else:
|
|
current_tokens = self.fix_indentation(current_tokens)
|
|
if isinstance(current_tokens, ErrorNode):
|
|
self.add_error(current_tokens)
|
|
continue
|
|
|
|
# start = current_tokens[0].index
|
|
# end = current_tokens[-1].index + len(current_tokens[-1].value)
|
|
sub_parser = self.sub_parser(current_tokens, source=keyword.value)
|
|
sub_tree = sub_parser.parse()
|
|
if isinstance(sub_tree, ErrorNode):
|
|
self.add_error(sub_tree, False)
|
|
asts[keyword] = sub_tree
|
|
|
|
def_concept_node = DefConceptNode(tokens_found, # dict instead of list is wanted.
|
|
name,
|
|
asts[Keywords.WHERE],
|
|
asts[Keywords.PRE],
|
|
asts[Keywords.POST],
|
|
asts[Keywords.AS])
|
|
|
|
log.debug(f"Found DefConcept node '{def_concept_node}'")
|
|
return def_concept_node
|
|
|
|
def parse_expression(self):
|
|
return self.parse_addition()
|
|
|
|
def parse_addition(self):
|
|
left = self.parse_multiply()
|
|
token = self.get_token()
|
|
if token is None or token.type == TokenKind.EOF:
|
|
return left
|
|
|
|
if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5
|
|
right = self.parse_addition()
|
|
return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right)
|
|
|
|
if token.type not in (TokenKind.PLUS, TokenKind.MINUS):
|
|
return left
|
|
|
|
self.next_token()
|
|
right = self.parse_addition()
|
|
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
|
|
|
|
def parse_multiply(self):
|
|
left = self.parse_atom()
|
|
token = self.get_token()
|
|
if token is None or token.type == TokenKind.EOF:
|
|
return left
|
|
|
|
if token.type not in (TokenKind.STAR, TokenKind.SLASH):
|
|
return left
|
|
|
|
self.next_token()
|
|
right = self.parse_multiply()
|
|
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
|
|
|
|
def parse_atom(self):
|
|
token = self.get_token()
|
|
if token.type == TokenKind.NUMBER:
|
|
self.next_token()
|
|
return NumberNode([token], float(token.value) if '.' in token.value else int(token.value))
|
|
elif token.type == TokenKind.STRING:
|
|
self.next_token()
|
|
return StringNode([token], token.value[1:-1], token.value[0])
|
|
elif token.type == TokenKind.IDENTIFIER:
|
|
if token.value == "true":
|
|
self.next_token()
|
|
return TrueNode([token])
|
|
elif token.value == "false":
|
|
self.next_token()
|
|
return FalseNode([token])
|
|
elif token.value == "null":
|
|
self.next_token()
|
|
return NullNode([token])
|
|
else:
|
|
self.next_token()
|
|
return VariableNode([token], token.value)
|
|
elif token.type == TokenKind.LPAR:
|
|
self.next_token()
|
|
exp = self.parse_expression()
|
|
token = self.get_token()
|
|
self.next_token()
|
|
|
|
if token.type != TokenKind.RPAR:
|
|
error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR])
|
|
self.add_error(error)
|
|
return error
|
|
|
|
return exp
|
|
else:
|
|
error = UnexpectedTokenErrorNode([token], "Unexpected token",
|
|
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false",
|
|
"null", TokenKind.LPAR])
|
|
return self.add_error(error)
|