from dataclasses import dataclass from core.builtin_concepts import BuiltinConcepts from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled from core.concept import Concept from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind from core.utils import get_n_clones from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, Node from parsers.BnfNodeParser import BnfNodeParser from parsers.PythonWithConceptsParser import PythonWithConceptsParser from parsers.RuleParser import RuleParser from parsers.SequenceNodeParser import SequenceNodeParser from parsers.SyaNodeParser import SyaNodeParser from parsers.expressions import NameExprNode PARSERS = [RuleParser.NAME, SequenceNodeParser.NAME, BnfNodeParser.NAME, SyaNodeParser.NAME] @dataclass class FunctionParserNode(Node): pass @dataclass() class FunctionParameter: """ class the represent result of the parameter parsing """ value: NameExprNode # value parsed separator: NameExprNode = None # holds the value and the position of the separator def add_sep(self, start, end, tokens): self.separator = NameExprNode(start, end, tokens) def value_to_unrecognized(self): return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source() def separator_to_unrecognized(self): if self.separator is None: return None return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source() @dataclass class FunctionNode(FunctionParserNode): first: NameExprNode # beginning of the function (it should represent the name of the function) last: NameExprNode # last part of the function (it should be the trailing parenthesis) parameters: list class FN(FunctionNode): """ Test class only It matches with FunctionNode but with less constraints Thereby, FN("first", "last", ["param1," ...]) can be compared to FunctionNode(NameExprNode("first"), NameExprNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")]) Note that FunctionParameter can easily be defined with a single string * "param" -> FunctionParameter(NameExprNode("param"), None) * "param, " -> FunctionParameter(NameExprNode("param"), NameExprNode(", ")) For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part """ def __init__(self, first, last, parameters): self.first = first self.last = last self.parameters = [] for param in parameters: if isinstance(param, tuple): self.parameters.append(param) elif isinstance(param, str) and (pos := param.find(",")) != -1: self.parameters.append((param[:pos], param[pos:])) else: self.parameters.append((param, None)) def __eq__(self, other): if id(self) == id(other): return True if isinstance(other, FN): return self.first == other.first and self.last == other.last and self.parameters == other.parameters if isinstance(other, FunctionNode): if self.first != other.first.value or self.last != other.last.value: return False if len(self.parameters) != len(other.parameters): return False for self_parameter, other_parameter in zip(self.parameters, other.parameters): value = other_parameter.value.value if isinstance(self_parameter[0], str) else other_parameter.value sep = other_parameter.separator.value if other_parameter.separator else None if self_parameter[0] != value or self_parameter[1] != sep: return False return True return False def __hash__(self): return hash((self.first, self.last, self.parameters)) class FunctionParser(BaseParser): """ The parser will be used to parse func(x, y, z) where x, y and z can be source code, concepts or other functions It will return a SourceCodeNode or SourceCodeNodeWithConcept """ def __init__(self, sep=",", longest_concepts_only=True, **kwargs): """ :param sep: :param longest_concepts_only: When multiples concepts are found, only keep the longest one so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]] :param kwargs: """ super().__init__("Function", 55) self.sep = sep self.longest_concepts_only = longest_concepts_only self.record_errors = True def add_error(self, error, next_token=True): if not self.record_errors: return return super().add_error(error, next_token) def parse(self, context, parser_input: ParserInput): """ :param context: :param parser_input: :return: """ if not isinstance(parser_input, ParserInput): return None context.log(f"Parsing '{parser_input}' with FunctionParser", self.name) sheerka = context.sheerka if parser_input.is_empty(): return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.IS_EMPTY)) if not self.reset_parser(context, parser_input): return self.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) self.parser_input.next_token() node = self.parse_function() if self.parser_input.next_token(): self.add_error(UnexpectedTokenParsingError("Only one function supported", self.parser_input.token, [TokenKind.EOF])) if self.has_error: if node is None: body = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text(), reason=self.error_sink) else: body = context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink) return self.sheerka.ret(self.name, False, body) source_code_nodes = self.to_source_code_node(node) res = [] for source_code_node in source_code_nodes: value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), source_code_node, source_code_node) res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, value)) return res[0] if len(res) == 1 else res def parse_function(self): start = self.parser_input.pos token = self.parser_input.token if token.type != TokenKind.IDENTIFIER: self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier", token, [TokenKind.IDENTIFIER])) return None if not self.parser_input.next_token(): self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis")) return None token = self.parser_input.token if token.type != TokenKind.LPAR: self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis", token, [TokenKind.LPAR])) return None start_node = NameExprNode(start, start + 1, self.parser_input.tokens[start:start + 2]) if not self.parser_input.next_token(): self.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis")) return FunctionNode(start_node, None, None) params = self.parse_parameters() if self.has_error: return FunctionNode(start_node, None, params) token = self.parser_input.token if not token or token.type != TokenKind.RPAR: self.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found", token, [TokenKind.RPAR])) return FunctionNode(start_node, None, params) return FunctionNode(start_node, NameExprNode(self.parser_input.pos, self.parser_input.pos, [token]), params) def parse_parameters(self): nodes = [] while True: param_value = self.parse_parameter_value() if not param_value: break function_parameter = FunctionParameter(param_value) nodes.append(function_parameter) token = self.parser_input.token if token.type == TokenKind.EOF: self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters")) return None if token.type == TokenKind.RPAR: break if token.value == self.sep: sep_pos = self.parser_input.pos has_next = self.parser_input.next_token() # it's before add_sep() to capture trailing whitespace function_parameter.add_sep(sep_pos, self.parser_input.pos - 1, self.parser_input.tokens[sep_pos: self.parser_input.pos]) if not has_next: break return nodes def parse_parameter_value(self): # check if the parameter is a function start_pos = self.parser_input.pos self.record_errors = False func = self.parse_function() self.record_errors = True if func: self.parser_input.next_token() return func # otherwise, eat until LPAR or separator self.parser_input.seek(start_pos) self.record_errors = True tokens = [] while True: token = self.parser_input.token if token is None: break if token.value == self.sep or token.type == TokenKind.RPAR: break tokens.append(token) if not self.parser_input.next_token(skip_whitespace=False): break return NameExprNode(start_pos, self.parser_input.pos - 1, tokens) if len(tokens) else None def to_source_code_node(self, function_node: FunctionNode): python_parser = PythonWithConceptsParser() if len(function_node.parameters) == 0: # validate the source nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()] python_parsing_res = python_parser.parse_nodes(self.context, nodes_to_parse) python_node = python_parsing_res.body.body if python_parsing_res.status else None return [SourceCodeNode(start=function_node.first.start, end=function_node.last.end, tokens=function_node.first.tokens + function_node.last.tokens, python_node=python_node, return_value=python_parsing_res)] def update_source_code_node(scn, nodes, sep): if hasattr(nodes, "__iter__"): for n in nodes: scn.add_node(n) else: scn.add_node(nodes) if sep: scn.add_node(sep.to_unrecognized()) res = [SourceCodeWithConceptNode(function_node.first.to_unrecognized(), function_node.last.to_unrecognized())] # try to recognize every parameter, one by one for param in function_node.parameters: if isinstance(param.value, NameExprNode): # try to recognize concepts unrecognized = param.value.to_unrecognized() nodes_sequences = get_lexer_nodes_from_unrecognized(self.context, unrecognized, PARSERS) else: # the parameter is also a function nodes_sequences = self.to_source_code_node(param.value) if self.longest_concepts_only: nodes_sequences = self.get_longest_concepts(nodes_sequences) if nodes_sequences is None: # no concept found for source_code_node in res: update_source_code_node(source_code_node, unrecognized, param.separator) elif len(nodes_sequences) == 1: # only one result # It is the same code than when there are multiple results # But here, we save the creation of the tmp_res object (not sure it worth it) for source_code_node in res: update_source_code_node(source_code_node, nodes_sequences[0], param.separator) else: # multiple result, make the cartesian product tmp_res = [] for source_code_node in res: instances = get_n_clones(source_code_node, len(nodes_sequences)) tmp_res.extend(instances) for instance, node_sequence in zip(instances, nodes_sequences): update_source_code_node(instance, node_sequence, param.separator) res = tmp_res # check if it is a valid source code for source_code_node in res: source_code_node.fix_all_pos() source_code_node.pseudo_fix_source() python_parsing_res = python_parser.parse_nodes(self.context, source_code_node.get_all_nodes()) if python_parsing_res.status: source_code_node.python_node = python_parsing_res.body.body source_code_node.return_value = python_parsing_res # make sure that concepts found can be evaluated errors = [] for c in [c for c in source_code_node.python_node.objects.values() if isinstance(c, Concept)]: update_compiled(self.context, c, errors) return res @staticmethod def get_longest_concepts(nodes_sequences): """ The longest sequences are the ones that have the less number of concepts For example 'twenty one' resolves to [c:twenty one:] [c:twenty:, c:one:] [c:twenty one:] has only one concept, so it's the longest one (two tokens against one token twice) :param nodes_sequences: :return: """ if nodes_sequences is None: return None res = [] min_len = -1 for current_sequence in nodes_sequences: # awful hack to remove when NodeSequence and ConceptSequence will be implemented current_len = len(current_sequence) if hasattr(current_sequence, "__len__") else 1 if len(res) == 0: res.append(current_sequence) min_len = current_len elif current_len == min_len: res.append(current_sequence) elif current_len < min_len: res.clear() res.append(current_sequence) min_len = current_len return res