from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept from core.concept import ConceptParts from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode from core.tokenizer import Tokenizer, TokenKind, Token, Keywords from dataclasses import dataclass, field import logging log = logging.getLogger(__name__) @dataclass() class DefaultParserNode(Node): """ Base node for all default parser nodes """ tokens: list = field(compare=False, repr=False) @dataclass() class DefaultParserErrorNode(DefaultParserNode, ErrorNode): pass @dataclass() class UnexpectedTokenErrorNode(DefaultParserErrorNode): message: str expected_tokens: list # def __post_init__(self): # log.debug("-> UnexpectedTokenErrorNode: " + self.message) @dataclass() class SyntaxErrorNode(DefaultParserErrorNode): """ The input is recognized, but there is a syntax error """ message: str # def __post_init__(self): # log.debug("-> SyntaxErrorNode: " + self.message) @dataclass() class CannotHandleErrorNode(DefaultParserErrorNode): """ The input is not recognized """ text: str # def __post_init__(self): # log.debug("-> CannotHandleErrorNode: " + self.text) # # @dataclass() # class NumberNode(DefaultParserNode): # value: object # # def __repr__(self): # return str(self.value) # # # @dataclass() # class StringNode(DefaultParserNode): # value: str # quote: str # # def is_same(self, other): # if not super(StringNode, self).is_same(other): # return False # return self.quote == other.quote # # def __repr__(self): # return self.quote + self.value + self.quote # # # @dataclass() # class VariableNode(DefaultParserNode): # value: str # # def __repr__(self): # return self.value # # # @dataclass() # class TrueNode(DefaultParserNode): # pass # # def __repr__(self): # return "true" # # # @dataclass() # class FalseNode(DefaultParserNode): # pass # # def __repr__(self): # return "false" # # # @dataclass() # class NullNode(DefaultParserNode): # pass # # def __repr__(self): # return "null" # # # @dataclass() # class BinaryNode(DefaultParserNode): # operator: TokenKind # left: Node # right: Node # # def is_same(self, other): # if not super(BinaryNode, self).is_same(other): # return False # if self.operator != other.operator: # return False # if not self.left.is_same(other.left): # return False # return self.right.is_same(other.right) # # def __repr__(self): # return f"({self.left} {self.operator} {self.right})" # @staticmethod def get_concept_key(tokens, variables=None): key = "" first = True for token in tokens: if token.type == TokenKind.EOF: break if token.type == TokenKind.WHITESPACE: continue if not first: key += " " if variables is not None and token.value in variables: key += "__var__" + str(variables.index(token.value)) else: key += token.value[1:-1] if token.type == TokenKind.STRING else token.value first = False return key @dataclass() class NameNode(DefaultParserNode): def get_name(self): name = "" first = True for token in self.tokens: if token.type == TokenKind.EOF: break if token.type == TokenKind.WHITESPACE: continue if not first: name += " " name += token.value[1:-1] if token.type == TokenKind.STRING else token.value first = False return name def __repr__(self): return self.get_name() def __eq__(self, other): if not isinstance(other, NameNode): return False return self.get_name() == other.get_name() def __hash__(self): return hash(self.get_name()) @dataclass() class DefConceptNode(DefaultParserNode): name: NameNode = NotInitializedNode() where: ReturnValueConcept = NotInitializedNode() pre: ReturnValueConcept = NotInitializedNode() post: ReturnValueConcept = NotInitializedNode() body: ReturnValueConcept = NotInitializedNode() def get_codes(self): codes = {} for part_key in ConceptParts: prop_value = getattr(self, part_key.value) if hasattr(prop_value, "ast_"): codes[part_key] = prop_value.ast_ return codes class DefaultParser(BaseParser): """ Parse sheerka specific grammar (like def concept) """ def __init__(self): BaseParser.__init__(self, "DefaultParser") self.lexer_iter = None self._current = None self.context = None self.text = None self.sheerka = None @staticmethod def fix_indentation(tokens): """ In the following example def concept add one to a as: def func(x): return x+1 func(a) indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error :param tokens: :return: """ if tokens[0].type != TokenKind.COLON: return tokens if len(tokens) < 3: return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE]) if tokens[1].type != TokenKind.NEWLINE: return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE]) if tokens[2].type != TokenKind.WHITESPACE: return SyntaxErrorNode([tokens[2]], "Indentation not found.") indent_size = len(tokens[2].value) # now fix the other indentations i = 3 while i < len(tokens) - 1: if tokens[i].type == TokenKind.NEWLINE: if tokens[i + 1].type != TokenKind.WHITESPACE: return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE]) if len(tokens[i + 1].value) < indent_size: return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.") tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size) i += 1 return tokens[3:] def reset_parser(self, context, text): self.context = context self.sheerka = context.sheerka self.text = text self.lexer_iter = iter(Tokenizer(text)) self._current = None self.next_token() def add_error(self, error, next_token=True): self.has_error = True self.error_sink.append(error) if next_token: self.next_token() return error def get_token(self) -> Token: return self._current def next_token(self, skip_whitespace=True): try: self._current = next(self.lexer_iter) if skip_whitespace: while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: self._current = next(self.lexer_iter) except StopIteration: self._current = None return def parse(self, context, text): # default parser can only manage string text if not isinstance(text, str): log.debug(f"Failed to recognize '{text}'") return context.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text)) self.reset_parser(context, text) tree = self.parse_statement() # If a error is found it must be sent to error_sink # tree must contain what was recognized ret = self.sheerka.ret( self.name, not self.has_error, self.sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, source=text, body=self.error_sink if self.has_error else tree, try_parsed=tree)) self.log_result(log, text, ret) return ret def parse_statement(self): token = self.get_token() if token.value == Keywords.DEF: self.next_token() return self.parse_def_concept(token) else: return self.add_error(CannotHandleErrorNode([], self.text)) def parse_def_concept(self, def_token): """ def concept name [where xxx] [pre xxx] [post xxx] [as xxx] """ # init log.debug("It may be a definition of a concept") concept_special_tokens = [def_token] concept_found = DefConceptNode(concept_special_tokens) # the definition of a concept consists of several parts # Keywords.CONCEPT to get the name of the concept # Keywords.AS to get the body # Keywords.WHERE to get the conditions to recognize for the variables # Keywords.PRE to know if the conditions to evaluate the concept # Keywords.POST to apply or verify once the concept is executed def_concept_parts = [Keywords.CONCEPT, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST] # tokens found, when trying to recognize the parts tokens_found_by_parts = { Keywords.CONCEPT: [], Keywords.AS: None, Keywords.WHERE: None, Keywords.PRE: None, Keywords.POST: None, } current_part = Keywords.CONCEPT token = self.get_token() first_token = token # loop thru the tokens, and put them in the correct tokens_found_by_parts entry while token.type != TokenKind.EOF: if token.value in def_concept_parts: concept_special_tokens.append(token) # keep track of the keywords keyword = token.value if tokens_found_by_parts[keyword]: # a part is defined more than once self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations.")) tokens_found_by_parts[current_part].append(token) # adds the token again else: tokens_found_by_parts[keyword] = [token] current_part = keyword self.next_token() else: tokens_found_by_parts[current_part].append(token) self.next_token(False) token = self.get_token() # semantic checks name_first_token_index = 1 if first_token.value != Keywords.CONCEPT: self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT])) name_first_token_index = 0 # Manage the name name_tokens = tokens_found_by_parts[Keywords.CONCEPT] if len(name_tokens) == name_first_token_index: self.add_error(SyntaxErrorNode([], "Name is mandatory")) if name_tokens[-1].type == TokenKind.NEWLINE: name_tokens = name_tokens[:-1] # strip trailing newlines if TokenKind.NEWLINE in [t.type for t in name_tokens]: self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name.")) concept_found.name = NameNode(name_tokens[name_first_token_index:]) # skip the first token asts_found_by_parts = { Keywords.AS: NotInitializedNode(), Keywords.WHERE: NotInitializedNode(), Keywords.PRE: NotInitializedNode(), Keywords.POST: NotInitializedNode(), } for keyword in tokens_found_by_parts: if keyword == Keywords.CONCEPT: continue # already done log.debug("Processing part '" + keyword.name + "'") tokens = tokens_found_by_parts[keyword] if tokens is None: continue # nothing to do if len(tokens) == 1: # check for empty declarations self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False) continue tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations if isinstance(tokens, ErrorNode): self.add_error(tokens) continue # ask the other parsers if they recognize the tokens new_context = self.context.push(self) parsing_result = self.sheerka.expect_one(new_context, self.sheerka.parse(new_context, tokens)) if not parsing_result.status: self.add_error(parsing_result.value) continue asts_found_by_parts[keyword] = parsing_result concept_found.where = asts_found_by_parts[Keywords.WHERE] concept_found.pre = asts_found_by_parts[Keywords.PRE] concept_found.post = asts_found_by_parts[Keywords.POST] concept_found.body = asts_found_by_parts[Keywords.AS] log.debug(f"Found DefConcept node '{concept_found}'") return concept_found # def parse_expression(self): # return self.parse_addition() # # def parse_addition(self): # left = self.parse_multiply() # token = self.get_token() # if token is None or token.type == TokenKind.EOF: # return left # # if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5 # right = self.parse_addition() # return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right) # # if token.type not in (TokenKind.PLUS, TokenKind.MINUS): # return left # # self.next_token() # right = self.parse_addition() # return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right) # # def parse_multiply(self): # left = self.parse_atom() # token = self.get_token() # if token is None or token.type == TokenKind.EOF: # return left # # if token.type not in (TokenKind.STAR, TokenKind.SLASH): # return left # # self.next_token() # right = self.parse_multiply() # return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right) # # def parse_atom(self): # token = self.get_token() # if token.type == TokenKind.NUMBER: # self.next_token() # return NumberNode([token], float(token.value) if '.' in token.value else int(token.value)) # elif token.type == TokenKind.STRING: # self.next_token() # return StringNode([token], token.value[1:-1], token.value[0]) # elif token.type == TokenKind.IDENTIFIER: # if token.value == "true": # self.next_token() # return TrueNode([token]) # elif token.value == "false": # self.next_token() # return FalseNode([token]) # elif token.value == "null": # self.next_token() # return NullNode([token]) # else: # self.next_token() # return VariableNode([token], token.value) # elif token.type == TokenKind.LPAR: # self.next_token() # exp = self.parse_expression() # token = self.get_token() # self.next_token() # # if token.type != TokenKind.RPAR: # error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR]) # self.add_error(error) # return error # # return exp # else: # error = UnexpectedTokenErrorNode([token], "Unexpected token", # [TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false", # "null", TokenKind.LPAR]) # return self.add_error(error)