from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept from core.concept import ConceptParts import core.builtin_helpers import core.utils from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode from core.tokenizer import Tokenizer, TokenKind, Token, Keywords from dataclasses import dataclass, field from parsers.BnfParser import BnfParser from core.sheerka import ExecutionContext @dataclass() class DefaultParserNode(Node): """ Base node for all default parser nodes """ tokens: list = field(compare=False, repr=False) @dataclass() class DefaultParserErrorNode(DefaultParserNode, ErrorNode): pass @dataclass() class UnexpectedTokenErrorNode(DefaultParserErrorNode): message: str expected_tokens: list @dataclass() class SyntaxErrorNode(DefaultParserErrorNode): """ The input is recognized, but there is a syntax error """ message: str @dataclass() class CannotHandleErrorNode(DefaultParserErrorNode): """ The input is not recognized """ text: str @dataclass() class NameNode(DefaultParserNode): def get_name(self): name = "" first = True for token in self.tokens: if token.type == TokenKind.EOF: break if token.type == TokenKind.WHITESPACE: continue if not first: name += " " name += token.value[1:-1] if token.type == TokenKind.STRING else token.value first = False return name def __repr__(self): return self.get_name() def __eq__(self, other): if not isinstance(other, NameNode): return False return self.get_name() == other.get_name() def __hash__(self): return hash(self.get_name()) @dataclass() class DefConceptNode(DefaultParserNode): name: NameNode = NotInitializedNode() where: ReturnValueConcept = NotInitializedNode() pre: ReturnValueConcept = NotInitializedNode() post: ReturnValueConcept = NotInitializedNode() body: ReturnValueConcept = NotInitializedNode() definition: ReturnValueConcept = NotInitializedNode() def get_asts(self): asts = {} for part_key in ConceptParts: prop_value = getattr(self, part_key.value) if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body, ParserResultConcept) and hasattr( prop_value.body.body, "ast_"): asts[part_key] = prop_value.body.body.ast_ return asts @dataclass() class IsaConceptNode(DefaultParserNode): concept: NameNode = NotInitializedNode() set: NameNode = NotInitializedNode() class DefaultParser(BaseParser): """ Parse sheerka specific grammar (like def concept) """ def __init__(self, **kwargs): BaseParser.__init__(self, "Default", 50) self.lexer_iter = None self._current = None self.context: ExecutionContext = None self.text = None self.sheerka = None @staticmethod def fix_indentation(tokens): """ In the following example def concept add one to a as: def func(x): return x+1 func(a) indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error :param tokens: :return: """ if tokens[0].type != TokenKind.COLON: return tokens if len(tokens) < 3: return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE]) if tokens[1].type != TokenKind.NEWLINE: return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE]) if tokens[2].type != TokenKind.WHITESPACE: return SyntaxErrorNode([tokens[2]], "Indentation not found.") indent_size = len(tokens[2].value) # now fix the other indentations i = 3 while i < len(tokens) - 1: if tokens[i].type == TokenKind.NEWLINE: if tokens[i + 1].type != TokenKind.WHITESPACE: return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE]) if len(tokens[i + 1].value) < indent_size: return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.") tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size) i += 1 return tokens[3:] def reset_parser(self, context, text): self.context = context self.sheerka = context.sheerka self.text = text self.lexer_iter = iter(Tokenizer(text)) self._current = None self.next_token() def add_error(self, error, next_token=True): self.has_error = True self.error_sink.append(error) if next_token: self.next_token() return error def get_token(self) -> Token: return self._current def next_token(self, skip_whitespace=True): try: self._current = next(self.lexer_iter) if skip_whitespace: while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: self._current = next(self.lexer_iter) except StopIteration: self._current = None return def parse(self, context, text): # default parser can only manage string text if not isinstance(text, str): ret = context.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text)) self.log_result(context, text, ret) return ret tree = None try: self.reset_parser(context, text) tree = self.parse_statement() except core.tokenizer.LexerError as e: self.add_error(e, False) # If a error is found it must be sent to error_sink # tree must contain what was recognized if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode): body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink) else: body = self.sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, source=text, body=self.error_sink if self.has_error else tree, try_parsed=tree) ret = self.sheerka.ret( self.name, not self.has_error, body) self.log_result(context, text, ret) return ret def parse_statement(self): token = self.get_token() if token.value == Keywords.DEF: self.next_token() self.context.log(self.verbose_log, "Keyword DEF found.", self.name) return self.parse_def_concept(token) else: return self.parse_isa_concept() def parse_def_concept(self, def_token): """ def concept name [where xxx] [pre xxx] [post xxx] [as xxx] """ # init keywords_tokens = [def_token] concept_found = DefConceptNode(keywords_tokens) # the definition of a concept consists of several parts # Keywords.CONCEPT to get the name of the concept # Keywords.FROM [Keywords.REGEX] to get the definition of the concept # Keywords.AS to get the body # Keywords.WHERE to get the conditions to recognize for the variables # Keywords.PRE to know if the conditions to evaluate the concept # Keywords.POST to apply or verify once the concept is executed # # Regroup the tokens by parts first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens) if first_token.type == TokenKind.EOF: return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT])) # get the name concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts) # get the definition concept_found.definition = self.get_concept_definition(tokens_found_by_parts) # get the ASTs for the remaining parts asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts) concept_found.where = asts_found_by_parts[Keywords.WHERE] concept_found.pre = asts_found_by_parts[Keywords.PRE] concept_found.post = asts_found_by_parts[Keywords.POST] concept_found.body = asts_found_by_parts[Keywords.AS] return concept_found def parse_isa_concept(self): concept_name = self.parse_concept_name() if isinstance(concept_name, DefaultParserErrorNode): return concept_name keyword = [] token = self.get_token() if token.value != Keywords.ISA: return self.add_error(CannotHandleErrorNode([token], "")) keyword.append(token) self.next_token() set_name = self.parse_concept_name() return IsaConceptNode(keyword, concept_name, set_name) def parse_concept_name(self): tokens = [] token = self.get_token() while not (token.type == TokenKind.EOF or token.type == TokenKind.KEYWORD): tokens.append(token) self.next_token() token = self.get_token() if len(tokens) == 0: return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", [])) else: return NameNode(tokens) def regroup_tokens_by_parts(self, keywords_tokens): def_concept_parts = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST] # tokens found, when trying to recognize the parts tokens_found_by_parts = { Keywords.CONCEPT: [], Keywords.FROM: None, Keywords.AS: None, Keywords.WHERE: None, Keywords.PRE: None, Keywords.POST: None, } current_part = Keywords.CONCEPT token = self.get_token() first_token = token # loop thru the tokens, and put them in the correct tokens_found_by_parts entry while token.type != TokenKind.EOF: if token.value in def_concept_parts: keywords_tokens.append(token) # keep track of the keywords keyword = token.value if tokens_found_by_parts[keyword]: # a part is defined more than once self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations.")) tokens_found_by_parts[current_part].append(token) # adds the token again else: tokens_found_by_parts[keyword] = [token] current_part = keyword self.next_token() else: tokens_found_by_parts[current_part].append(token) self.next_token(False) token = self.get_token() return first_token, tokens_found_by_parts def get_concept_name(self, first_token, tokens_found_by_parts): name_first_token_index = 1 token = self.get_token() if first_token.value != Keywords.CONCEPT: self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT])) name_first_token_index = 0 name_tokens = tokens_found_by_parts[Keywords.CONCEPT] if len(name_tokens) == name_first_token_index: self.add_error(SyntaxErrorNode([], "Name is mandatory")) if name_tokens[-1].type == TokenKind.NEWLINE: name_tokens = name_tokens[:-1] # strip trailing newlines if TokenKind.NEWLINE in [t.type for t in name_tokens]: self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name.")) name_node = NameNode(name_tokens[name_first_token_index:]) # skip the first token return name_node def get_concept_definition(self, tokens_found_by_parts): if tokens_found_by_parts[Keywords.FROM] is None: return NotInitializedNode() definition_tokens = tokens_found_by_parts[Keywords.FROM] if definition_tokens[1].value != Keywords.BNF: return NotInitializedNode() tokens = core.utils.strip_tokens(definition_tokens[2:]) if len(tokens) == 0: self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False) return NotInitializedNode() regex_parser = BnfParser() with self.context.push(self.name) as sub_context: parsing_result = regex_parser.parse(sub_context, tokens) sub_context.add_values(return_values=parsing_result) if not parsing_result.status: self.add_error(parsing_result.value) return NotInitializedNode() return parsing_result def get_concept_parts(self, tokens_found_by_parts): asts_found_by_parts = { Keywords.AS: NotInitializedNode(), Keywords.WHERE: NotInitializedNode(), Keywords.PRE: NotInitializedNode(), Keywords.POST: NotInitializedNode(), } for keyword in tokens_found_by_parts: if keyword == Keywords.CONCEPT or keyword == Keywords.FROM: continue # already done tokens = tokens_found_by_parts[keyword] if tokens is None: continue # nothing to do if len(tokens) == 1: # check for empty declarations self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False) continue tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations if isinstance(tokens, ErrorNode): self.add_error(tokens) continue # ask the other parsers if they recognize the tokens with self.context.push(self.name, desc=f"Parsing {keyword}") as sub_context: sub_context.log_new(self.verbose_log) to_parse = self.sheerka.ret( sub_context.who, True, self.sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens)) steps = [BuiltinConcepts.PARSING] parsed = self.sheerka.execute(sub_context, to_parse, steps, self.verbose_log) parsing_result = core.builtin_helpers.expect_one(sub_context, parsed, self.verbose_log) sub_context.add_values(return_values=parsing_result) if not parsing_result.status: self.add_error(parsing_result.value) continue asts_found_by_parts[keyword] = parsing_result return asts_found_by_parts