from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept from core.concept import ConceptParts import core.builtin_helpers from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode from core.tokenizer import Tokenizer, TokenKind, Token, Keywords from dataclasses import dataclass, field import logging log = logging.getLogger(__name__) @dataclass() class DefaultParserNode(Node): """ Base node for all default parser nodes """ tokens: list = field(compare=False, repr=False) @dataclass() class DefaultParserErrorNode(DefaultParserNode, ErrorNode): pass @dataclass() class UnexpectedTokenErrorNode(DefaultParserErrorNode): message: str expected_tokens: list # def __post_init__(self): # log.debug("-> UnexpectedTokenErrorNode: " + self.message) @dataclass() class SyntaxErrorNode(DefaultParserErrorNode): """ The input is recognized, but there is a syntax error """ message: str # def __post_init__(self): # log.debug("-> SyntaxErrorNode: " + self.message) @dataclass() class CannotHandleErrorNode(DefaultParserErrorNode): """ The input is not recognized """ text: str # def __post_init__(self): # log.debug("-> CannotHandleErrorNode: " + self.text) # # @dataclass() # class NumberNode(DefaultParserNode): # value: object # # def __repr__(self): # return str(self.value) # # # @dataclass() # class StringNode(DefaultParserNode): # value: str # quote: str # # def is_same(self, other): # if not super(StringNode, self).is_same(other): # return False # return self.quote == other.quote # # def __repr__(self): # return self.quote + self.value + self.quote # # # @dataclass() # class VariableNode(DefaultParserNode): # value: str # # def __repr__(self): # return self.value # # # @dataclass() # class TrueNode(DefaultParserNode): # pass # # def __repr__(self): # return "true" # # # @dataclass() # class FalseNode(DefaultParserNode): # pass # # def __repr__(self): # return "false" # # # @dataclass() # class NullNode(DefaultParserNode): # pass # # def __repr__(self): # return "null" # # # @dataclass() # class BinaryNode(DefaultParserNode): # operator: TokenKind # left: Node # right: Node # # def is_same(self, other): # if not super(BinaryNode, self).is_same(other): # return False # if self.operator != other.operator: # return False # if not self.left.is_same(other.left): # return False # return self.right.is_same(other.right) # # def __repr__(self): # return f"({self.left} {self.operator} {self.right})" # @staticmethod def get_concept_key(tokens, variables=None): key = "" first = True for token in tokens: if token.type == TokenKind.EOF: break if token.type == TokenKind.WHITESPACE: continue if not first: key += " " if variables is not None and token.value in variables: key += "__var__" + str(variables.index(token.value)) else: key += token.value[1:-1] if token.type == TokenKind.STRING else token.value first = False return key @dataclass() class NameNode(DefaultParserNode): def get_name(self): name = "" first = True for token in self.tokens: if token.type == TokenKind.EOF: break if token.type == TokenKind.WHITESPACE: continue if not first: name += " " name += token.value[1:-1] if token.type == TokenKind.STRING else token.value first = False return name def __repr__(self): return self.get_name() def __eq__(self, other): if not isinstance(other, NameNode): return False return self.get_name() == other.get_name() def __hash__(self): return hash(self.get_name()) @dataclass() class DefConceptNode(DefaultParserNode): name: NameNode = NotInitializedNode() where: ReturnValueConcept = NotInitializedNode() pre: ReturnValueConcept = NotInitializedNode() post: ReturnValueConcept = NotInitializedNode() body: ReturnValueConcept = NotInitializedNode() def get_codes(self): codes = {} for part_key in ConceptParts: prop_value = getattr(self, part_key.value) if hasattr(prop_value, "ast_"): codes[part_key] = prop_value.ast_ return codes class DefaultParser(BaseParser): """ Parse sheerka specific grammar (like def concept) """ def __init__(self): BaseParser.__init__(self, "DefaultParser") self.lexer_iter = None self._current = None self.context = None self.text = None self.sheerka = None @staticmethod def fix_indentation(tokens): """ In the following example def concept add one to a as: def func(x): return x+1 func(a) indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error :param tokens: :return: """ if tokens[0].type != TokenKind.COLON: return tokens if len(tokens) < 3: return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE]) if tokens[1].type != TokenKind.NEWLINE: return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE]) if tokens[2].type != TokenKind.WHITESPACE: return SyntaxErrorNode([tokens[2]], "Indentation not found.") indent_size = len(tokens[2].value) # now fix the other indentations i = 3 while i < len(tokens) - 1: if tokens[i].type == TokenKind.NEWLINE: if tokens[i + 1].type != TokenKind.WHITESPACE: return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE]) if len(tokens[i + 1].value) < indent_size: return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.") tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size) i += 1 return tokens[3:] def reset_parser(self, context, text): self.context = context self.sheerka = context.sheerka self.text = text self.lexer_iter = iter(Tokenizer(text)) self._current = None self.next_token() def add_error(self, error, next_token=True): self.has_error = True self.error_sink.append(error) if next_token: self.next_token() return error def get_token(self) -> Token: return self._current def next_token(self, skip_whitespace=True): try: self._current = next(self.lexer_iter) if skip_whitespace: while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: self._current = next(self.lexer_iter) except StopIteration: self._current = None return def parse(self, context, text): # default parser can only manage string text if not isinstance(text, str): log.debug(f"Failed to recognize '{text}'") return context.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text)) self.reset_parser(context, text) tree = self.parse_statement() # If a error is found it must be sent to error_sink # tree must contain what was recognized ret = self.sheerka.ret( self.name, not self.has_error, self.sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, source=text, body=self.error_sink if self.has_error else tree, try_parsed=tree)) self.log_result(log, text, ret) return ret def parse_statement(self): token = self.get_token() if token.value == Keywords.DEF: self.next_token() return self.parse_def_concept(token) else: return self.add_error(CannotHandleErrorNode([], self.text)) def parse_def_concept(self, def_token): """ def concept name [where xxx] [pre xxx] [post xxx] [as xxx] """ # init log.debug("It may be a definition of a concept") concept_special_tokens = [def_token] concept_found = DefConceptNode(concept_special_tokens) # the definition of a concept consists of several parts # Keywords.CONCEPT to get the name of the concept # Keywords.AS to get the body # Keywords.WHERE to get the conditions to recognize for the variables # Keywords.PRE to know if the conditions to evaluate the concept # Keywords.POST to apply or verify once the concept is executed def_concept_parts = [Keywords.CONCEPT, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST] # tokens found, when trying to recognize the parts tokens_found_by_parts = { Keywords.CONCEPT: [], Keywords.AS: None, Keywords.WHERE: None, Keywords.PRE: None, Keywords.POST: None, } current_part = Keywords.CONCEPT token = self.get_token() first_token = token # loop thru the tokens, and put them in the correct tokens_found_by_parts entry while token.type != TokenKind.EOF: if token.value in def_concept_parts: concept_special_tokens.append(token) # keep track of the keywords keyword = token.value if tokens_found_by_parts[keyword]: # a part is defined more than once self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations.")) tokens_found_by_parts[current_part].append(token) # adds the token again else: tokens_found_by_parts[keyword] = [token] current_part = keyword self.next_token() else: tokens_found_by_parts[current_part].append(token) self.next_token(False) token = self.get_token() # semantic checks name_first_token_index = 1 if first_token.value != Keywords.CONCEPT: self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT])) name_first_token_index = 0 # Manage the name name_tokens = tokens_found_by_parts[Keywords.CONCEPT] if len(name_tokens) == name_first_token_index: self.add_error(SyntaxErrorNode([], "Name is mandatory")) if name_tokens[-1].type == TokenKind.NEWLINE: name_tokens = name_tokens[:-1] # strip trailing newlines if TokenKind.NEWLINE in [t.type for t in name_tokens]: self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name.")) concept_found.name = NameNode(name_tokens[name_first_token_index:]) # skip the first token asts_found_by_parts = { Keywords.AS: NotInitializedNode(), Keywords.WHERE: NotInitializedNode(), Keywords.PRE: NotInitializedNode(), Keywords.POST: NotInitializedNode(), } for keyword in tokens_found_by_parts: if keyword == Keywords.CONCEPT: continue # already done log.debug("Processing part '" + keyword.name + "'") tokens = tokens_found_by_parts[keyword] if tokens is None: continue # nothing to do if len(tokens) == 1: # check for empty declarations self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False) continue tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations if isinstance(tokens, ErrorNode): self.add_error(tokens) continue # ask the other parsers if they recognize the tokens new_context = self.context.push(self.name) parsing_result = core.builtin_helpers.expect_one(new_context, self.sheerka.parse(new_context, tokens)) if not parsing_result.status: self.add_error(parsing_result.value) continue asts_found_by_parts[keyword] = parsing_result concept_found.where = asts_found_by_parts[Keywords.WHERE] concept_found.pre = asts_found_by_parts[Keywords.PRE] concept_found.post = asts_found_by_parts[Keywords.POST] concept_found.body = asts_found_by_parts[Keywords.AS] log.debug(f"Found DefConcept node '{concept_found}'") return concept_found # def parse_expression(self): # return self.parse_addition() # # def parse_addition(self): # left = self.parse_multiply() # token = self.get_token() # if token is None or token.type == TokenKind.EOF: # return left # # if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5 # right = self.parse_addition() # return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right) # # if token.type not in (TokenKind.PLUS, TokenKind.MINUS): # return left # # self.next_token() # right = self.parse_addition() # return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right) # # def parse_multiply(self): # left = self.parse_atom() # token = self.get_token() # if token is None or token.type == TokenKind.EOF: # return left # # if token.type not in (TokenKind.STAR, TokenKind.SLASH): # return left # # self.next_token() # right = self.parse_multiply() # return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right) # # def parse_atom(self): # token = self.get_token() # if token.type == TokenKind.NUMBER: # self.next_token() # return NumberNode([token], float(token.value) if '.' in token.value else int(token.value)) # elif token.type == TokenKind.STRING: # self.next_token() # return StringNode([token], token.value[1:-1], token.value[0]) # elif token.type == TokenKind.IDENTIFIER: # if token.value == "true": # self.next_token() # return TrueNode([token]) # elif token.value == "false": # self.next_token() # return FalseNode([token]) # elif token.value == "null": # self.next_token() # return NullNode([token]) # else: # self.next_token() # return VariableNode([token], token.value) # elif token.type == TokenKind.LPAR: # self.next_token() # exp = self.parse_expression() # token = self.get_token() # self.next_token() # # if token.type != TokenKind.RPAR: # error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR]) # self.add_error(error) # return error # # return exp # else: # error = UnexpectedTokenErrorNode([token], "Unexpected token", # [TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false", # "null", TokenKind.LPAR]) # return self.add_error(error)