From a61a1c0d2b4bbe3cd8914e1486e65aed1085600a Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Sun, 12 Sep 2021 11:26:14 +0200 Subject: [PATCH] Fixed #127 : SyaNodeParser : Allow tokens parsing Fixed #128 : parser_utils.get_node() : Refactor --- src/core/builtin_helpers.py | 93 +---- .../sheerka/services/SheerkaConceptManager.py | 2 +- src/core/sheerka/services/SheerkaExecute.py | 5 +- src/core/tokenizer.py | 3 +- src/core/utils.py | 1 + src/parsers/BaseNodeParser.py | 20 + src/parsers/FunctionParser.py | 3 +- src/parsers/SyaNodeParser.py | 2 +- tests/parsers/parsers_utils.py | 390 +++++++++--------- tests/parsers/test_BnfNodeParser.py | 4 +- tests/parsers/test_SequenceNodeParser.py | 4 +- tests/parsers/test_SyaNodeParser.py | 90 +++- 12 files changed, 327 insertions(+), 290 deletions(-) diff --git a/src/core/builtin_helpers.py b/src/core/builtin_helpers.py index f814040..20b12ab 100644 --- a/src/core/builtin_helpers.py +++ b/src/core/builtin_helpers.py @@ -224,7 +224,8 @@ def resolve_ambiguity(context, concepts): for c in by_complexity[complexity]: from core.sheerka.services.SheerkaEvaluateConcept import EvaluationHints evaluated = context.sheerka.evaluate_concept(context, c, - hints=EvaluationHints(eval_body=False, expression_only=True), + hints=EvaluationHints(eval_body=False, + expression_only=True), metadata=[ConceptParts.PRE, ConceptParts.WHERE]) if context.sheerka.is_success(evaluated) or evaluated.key == c.key: remaining_concepts.append(c) @@ -481,7 +482,7 @@ def get_lexer_nodes(return_values, start, tokens): :return: list of list (list of concept node sequence) """ from evaluators.BaseEvaluator import BaseEvaluator - from parsers.BaseNodeParser import ConceptNode, LexerNode, RuleNode, SourceCodeNode + from parsers.BaseNodeParser import ConceptNode, RuleNode, SourceCodeNode lexer_nodes = [] for ret_val in return_values: @@ -512,15 +513,7 @@ def get_lexer_nodes(return_values, start, tokens): elif who in ("parsers.Bnf", "parsers.Sya", "parsers.Sequence"): nodes = [node.clone() for node in ret_val.body.body] for node in nodes: - node.start += start - node.end += start - if isinstance(node, ConceptNode): - for k, v in node.concept.get_compiled().items(): - if isinstance(v, LexerNode): - v = v.clone() - v.start += start - v.end += start - node.concept.get_compiled()[k] = v + node.shift_pos(start) # but append the whole sequence if when it's a sequence lexer_nodes.append(nodes) @@ -531,85 +524,17 @@ def get_lexer_nodes(return_values, start, tokens): for rule in rules: lexer_nodes.append([RuleNode(rule, start, end, tokens, ret_val.body.source)]) + elif who == "parsers.Function": + node = ret_val.body.body.clone() + node.shift_pos(start) + lexer_nodes.append([node]) + else: raise NotImplementedError(f"get_lexer_nodes who={who}") return lexer_nodes -def get_lexer_nodes_using_positions(return_values, positions): - """ - Transform all elements from return_values into lexer nodes (ConceptNode, UnrecognizedTokensNode, SourceCodeNode...) - Use positions to compute the exact new positions - On the contrary of the other method (get_lexer_nodes), - one return value is mapped with one position. it's not a offset, but an absolute position - :param return_values: - :param positions: is a list of triplets (start, end, tokens) - :return: - """ - - from evaluators.BaseEvaluator import BaseEvaluator - from parsers.BaseNodeParser import ConceptNode, LexerNode, RuleNode, SourceCodeNode - - lexer_nodes = [] - for ret_val, position in zip(return_values, positions): - # To manage AFTER_PARSING evaluators - who = ret_val.parents[0].who if ret_val.who.startswith(BaseEvaluator.PREFIX) else ret_val.who - - if who in ("parsers.Python", 'parsers.PythonWithConcepts'): - - lexer_nodes.append(SourceCodeNode(position.start, - position.end, - position.tokens, - ret_val.body.source, - python_node=ret_val.body.body, - return_value=ret_val)) - - elif who == "parsers.ExactConcept": - concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body] - for concept in concepts: - lexer_nodes.append(ConceptNode(concept, - position.start, - position.end, - position.tokens, - ret_val.body.source)) - - elif who in ("parsers.Bnf", "parsers.Sya", "parsers.Sequence"): - nodes = [node.clone() for node in ret_val.body.body] - for node in nodes: - node.start = position.start - node.end = position.end - if isinstance(node, ConceptNode): - for k, v in node.concept.get_compiled().items(): - if isinstance(v, LexerNode): - v = v.clone() - v.start += position.start - v.end += position.start - node.concept.get_compiled()[k] = v - - # but append the whole sequence if when it's a sequence - lexer_nodes.extend(nodes) - - elif who == "parsers.Rule": - rules = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body] - for rule in rules: - lexer_nodes.append(RuleNode(rule, - position.start, - position.end, - position.tokens, ret_val.body.source)) - - elif who == "parsers.Function": - node = ret_val.body.body - node.start = position.start - node.end = position.end - lexer_nodes.append(node) - - else: - raise NotImplementedError(f"get_lexer_nodes_using_positions {who=}") - - return lexer_nodes - - def ensure_evaluated(context, concept, eval_body=True, metadata=None): """ Evaluate a concept is not already evaluated diff --git a/src/core/sheerka/services/SheerkaConceptManager.py b/src/core/sheerka/services/SheerkaConceptManager.py index aceca0e..7171bb0 100644 --- a/src/core/sheerka/services/SheerkaConceptManager.py +++ b/src/core/sheerka/services/SheerkaConceptManager.py @@ -1152,7 +1152,7 @@ class SheerkaConceptManager(BaseService): :return: """ - if token.type == TokenKind.WHITESPACE: + if token.type in (TokenKind.WHITESPACE, TokenKind.EXPR): return None if token.type == TokenKind.STRING: diff --git a/src/core/sheerka/services/SheerkaExecute.py b/src/core/sheerka/services/SheerkaExecute.py index d6c593f..e5d65bf 100644 --- a/src/core/sheerka/services/SheerkaExecute.py +++ b/src/core/sheerka/services/SheerkaExecute.py @@ -169,7 +169,10 @@ class ParserInput: return True def is_empty(self): - if self.text.strip() == "": + if self.text is not None and self.text.strip() == "": + return True + + if self.from_tokens and len(self.tokens) == 0: return True if self.end == self.start: diff --git a/src/core/tokenizer.py b/src/core/tokenizer.py index 6094405..3c74492 100644 --- a/src/core/tokenizer.py +++ b/src/core/tokenizer.py @@ -11,10 +11,9 @@ class TokenKind(Enum): IDENTIFIER = "identifier" CONCEPT = "concept" RULE = "rule" + EXPR = "expression" STRING = "string" NUMBER = "number" - TRUE = "true" - FALSE = "false" LPAR = "lpar" RPAR = "rpar" LBRACKET = "lbracket" diff --git a/src/core/utils.py b/src/core/utils.py index 38d5f11..7dc7935 100644 --- a/src/core/utils.py +++ b/src/core/utils.py @@ -729,6 +729,7 @@ def get_text_from_tokens(tokens, custom_switcher=None, tracker=None): switcher = { # TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value), + TokenKind.EXPR: lambda t: t.value.get_source() } if custom_switcher: diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index beedbcf..7ec6551 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -45,6 +45,10 @@ class LexerNode(Node): def get_source_to_parse(self): return self.source + def shift_pos(self, offset): + self.start += offset + self.end += offset + class UnrecognizedTokensNode(LexerNode): def __init__(self, start, end, tokens): @@ -234,6 +238,14 @@ class ConceptNode(LexerNode): """ return self.concept + def shift_pos(self, offset): + super().shift_pos(offset) + for k, v in self.concept.get_compiled().items(): + if isinstance(v, LexerNode): + v = v.clone() + v.shift_pos(offset) + self.concept.get_compiled()[k] = v + class SourceCodeNode(LexerNode): """ @@ -403,6 +415,9 @@ class SourceCodeWithConceptNode(LexerNode): clone = SourceCodeWithConceptNode(self.first.clone(), self.last.clone(), nodes, self.has_unrecognized) clone.python_node = self.python_node clone.return_value = self.return_value + clone.source = self.source + if self.tokens: + clone.tokens = self.tokens.copy() return clone def to_short_str(self): @@ -421,6 +436,11 @@ class SourceCodeWithConceptNode(LexerNode): errors.append(n.error) return errors + def shift_pos(self, offset): + super().shift_pos(offset) + for n in [self.first, self.last] + self.nodes: + n.shift_pos(offset) + class VariableNode(LexerNode): """ diff --git a/src/parsers/FunctionParser.py b/src/parsers/FunctionParser.py index 9276347..19350a1 100644 --- a/src/parsers/FunctionParser.py +++ b/src/parsers/FunctionParser.py @@ -254,7 +254,8 @@ class FunctionParser(BaseExpressionParser): # check if it is a valid source code for source_code_node in res: source_code_node.fix_all_pos() - source_code_node.pseudo_fix_source() + source_code_node.source = function_node.get_source() + source_code_node.tokens = function_node.tokens python_parsing_res = python_parser.parse_nodes(context, source_code_node.get_all_nodes()) if python_parsing_res.status: diff --git a/src/parsers/SyaNodeParser.py b/src/parsers/SyaNodeParser.py index a7e46a5..948e53e 100644 --- a/src/parsers/SyaNodeParser.py +++ b/src/parsers/SyaNodeParser.py @@ -12,7 +12,7 @@ from core.utils import flatten, get_text_from_tokens, strip_tokens from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensCache, UnrecognizedTokensNode from parsers.BaseParser import ParsingError -PARSERS = ["Sequence", "Bnf", "Python"] +PARSERS = ["Function", "Sequence", "Bnf", "Python"] class SyaNodeException(Exception): diff --git a/tests/parsers/parsers_utils.py b/tests/parsers/parsers_utils.py index 6c59443..e2e5196 100644 --- a/tests/parsers/parsers_utils.py +++ b/tests/parsers/parsers_utils.py @@ -2,7 +2,7 @@ import ast from dataclasses import dataclass from typing import List, Union -from core.builtin_concepts import ReturnValueConcept +from core.builtin_concepts import ParserResultConcept, ReturnValueConcept from core.builtin_helpers import CreateObjectIdentifiers from core.concept import AllConceptParts, Concept, ConceptParts, DoNotResolve from core.rule import Rule @@ -20,14 +20,6 @@ from sheerkarete.common import V from sheerkarete.conditions import AndConditions, Condition, NegatedCondition, NegatedConjunctiveConditions -@dataclass -class Obj: - prop_a: object - prop_b: object = None - prop_c: object = None - parent: object = None - - class ExprTestObj: @staticmethod def get_pos(nodes): @@ -276,6 +268,130 @@ class LC(ExprTestObj): # for List Comprehension node return ListComprehensionNode(start, end, full_text_as_tokens[start: end + 1], element, comprehensions) +class FN(ExprTestObj): + """ + Test class only + It matches with FunctionNode but with less constraints + + Thereby, + FN("first", "last", ["param1," ...]) can be compared to + FunctionNode(NameExprNode("first"), NameExprNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")]) + + Note that FunctionParameter can easily be defined with a single string + * "param" -> FunctionParameter(NameExprNode("param"), None) + * "param, " -> FunctionParameter(NameExprNode("param"), NameExprNode(", ")) + For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part + """ + + def __init__(self, first, last, parameters): + self.first = first + self.last = last + self.parameters = [] + for param in parameters: + if isinstance(param, tuple): + self.parameters.append(param) + elif isinstance(param, str) and (pos := param.find(",")) != -1: + self.parameters.append((param[:pos], param[pos:])) + else: + self.parameters.append((param, None)) + + def __repr__(self): + res = self.first + for param in self.parameters: + if param[1]: + res += f"{param[0]}{param[1]} " + else: + res += f"{param[0]}" + return res + self.last + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, FN): + return self.first == other.first and self.last == other.last and self.parameters == other.parameters + + return False + + def __hash__(self): + return hash((self.first, self.last, self.parameters)) + + def transform_real_obj(self, other, get_test_obj_delegate): + if isinstance(other, FN): + return other + + if isinstance(other, FunctionNode): + params = [] + for self_parameter, other_parameter in zip(self.parameters, other.parameters): + if isinstance(self_parameter[0], str): + value = other_parameter.value.value + else: + value = get_test_obj_delegate(other_parameter.value, self_parameter[0]) + sep = other_parameter.separator.value if other_parameter.separator else None + params.append((value, sep)) + + return FN(other.first.value, other.last.value, params) + + raise Exception(f"Expecting FunctionNode but received {other=}") + + def get_expr_node(self, full_text_as_tokens=None): + start, end = self.get_pos_from_source(self.first, full_text_as_tokens) + first = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + start, end = self.get_pos_from_source(self.last, full_text_as_tokens) + last = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + parameters = [] + for param_value, sep in self.parameters: + if isinstance(param_value, str): + start, end = self.get_pos_from_source(param_value, full_text_as_tokens) + param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + else: + param_as_expr_node = param_value.get_expr_node(full_text_as_tokens) + + if sep: + sep_tokens = Tokenizer(sep, yield_eof=False) + start = param_as_expr_node.end + 1 + end = start + len(list(sep_tokens)) - 1 + sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + else: + sep_as_expr_node = None + + parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node)) + + start, end = first.start, last.end + return FunctionNode(start, end, full_text_as_tokens[start: end + 1], first, last, parameters) + + +class HelperWithPos: + def __init__(self, start=None, end=None): + self.start = start + self.end = end + + self.start_is_fixed = start is not None + self.end_is_fixed = end is not None + + def fix_pos(self, node): + """ + + :param node: an object or a tuple + :return: + """ + if hasattr(node, "start"): + target_start, target_end = node.start, node.end + elif isinstance(node, tuple): + target_start, target_end = node + else: + target_start, target_end = None, None + + if not self.start_is_fixed: + if target_start is not None and (self.start is None or target_start < self.start): + self.start = target_start + + if not self.end_is_fixed: + if target_end is not None and (self.end is None or target_end > self.end): + self.end = target_end + return self + + class CC: """ Concept class for test purpose @@ -578,29 +694,60 @@ class CIO: raise Exception(f"Expecting Concept but received {other=}") -class HelperWithPos: - def __init__(self, start=None, end=None): - self.start = start - self.end = end +class RETVAL: + """ + Class helper for return value for parser result + """ - self.start_is_fixed = start is not None - self.end_is_fixed = end is not None + def __init__(self, source, who=None, parser=None): + self.source = source + self.who = who + self.parser = parser - def fix_pos(self, node): - if not self.start_is_fixed: - start = node.start if hasattr(node, "start") else \ - node[0] if isinstance(node, tuple) else None + def __eq__(self, other): + if id(self) == id(other): + return True - if start is not None and (self.start is None or start < self.start): - self.start = start + if not isinstance(other, RETVAL): + return False - if not self.end_is_fixed: - end = node.end if hasattr(node, "end") else \ - node[1] if isinstance(node, tuple) else None + return (self.source == other.source and + self.who == other.who and + self.parser == other.parser) - if end is not None and (self.end is None or end > self.end): - self.end = end - return self + def __hash__(self): + return hash((self.source, self.who)) + + def __repr__(self): + txt = f"RV(source='{self.source}'" + if self.who is not None: + txt += f", who={self.who}" + if self.parser is not None: + txt += f", parser={self.parser}" + return txt + ")" + + def transform_real_obj(self, other, get_test_obj_delegate): + """ + Transform other into CNC, to ease the comparison + :param other: + :param get_test_obj_delegate: + :return: + """ + + if isinstance(other, RETVAL): + return other + + if isinstance(other, ReturnValueConcept): + if not isinstance(other.body, ParserResultConcept): + raise Exception(f"ParserResultConcept not found body={other.body}") + + parser_result = other.body + + return RETVAL(parser_result.source, + other.who if self.who is not None else None, + parser_result.parser if self.parser is not None else None) + + raise Exception(f"Expecting ReturnValueConcept but received {other=}") class SCN(HelperWithPos): @@ -992,10 +1139,10 @@ class RN(HelperWithPos): if not isinstance(other, RN): return False - return self.rule_id == other.rule_id and \ - self.start == other.start and \ - self.end == other.end and \ - self.source == other.source + return (self.rule_id == other.rule_id and + self.start == other.start and + self.end == other.end and + self.source == other.source) def __hash__(self): return hash((self.rule_id, self.start, self.end, self.source)) @@ -1032,99 +1179,6 @@ class RN(HelperWithPos): raise Exception(f"Expecting RuleNode but received {other=}") -class FN(ExprTestObj): - """ - Test class only - It matches with FunctionNode but with less constraints - - Thereby, - FN("first", "last", ["param1," ...]) can be compared to - FunctionNode(NameExprNode("first"), NameExprNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")]) - - Note that FunctionParameter can easily be defined with a single string - * "param" -> FunctionParameter(NameExprNode("param"), None) - * "param, " -> FunctionParameter(NameExprNode("param"), NameExprNode(", ")) - For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part - """ - - def __init__(self, first, last, parameters): - self.first = first - self.last = last - self.parameters = [] - for param in parameters: - if isinstance(param, tuple): - self.parameters.append(param) - elif isinstance(param, str) and (pos := param.find(",")) != -1: - self.parameters.append((param[:pos], param[pos:])) - else: - self.parameters.append((param, None)) - - def __repr__(self): - res = self.first - for param in self.parameters: - if param[1]: - res += f"{param[0]}{param[1]} " - else: - res += f"{param[0]}" - return res + self.last - - def __eq__(self, other): - if id(self) == id(other): - return True - - if isinstance(other, FN): - return self.first == other.first and self.last == other.last and self.parameters == other.parameters - - return False - - def __hash__(self): - return hash((self.first, self.last, self.parameters)) - - def transform_real_obj(self, other, get_test_obj_delegate): - if isinstance(other, FN): - return other - - if isinstance(other, FunctionNode): - params = [] - for self_parameter, other_parameter in zip(self.parameters, other.parameters): - if isinstance(self_parameter[0], str): - value = other_parameter.value.value - else: - value = get_test_obj_delegate(other_parameter.value, self_parameter[0]) - sep = other_parameter.separator.value if other_parameter.separator else None - params.append((value, sep)) - - return FN(other.first.value, other.last.value, params) - - raise Exception(f"Expecting FunctionNode but received {other=}") - - def get_expr_node(self, full_text_as_tokens=None): - start, end = self.get_pos_from_source(self.first, full_text_as_tokens) - first = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) - start, end = self.get_pos_from_source(self.last, full_text_as_tokens) - last = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) - parameters = [] - for param_value, sep in self.parameters: - if isinstance(param_value, str): - start, end = self.get_pos_from_source(param_value, full_text_as_tokens) - param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) - else: - param_as_expr_node = param_value.get_expr_node(full_text_as_tokens) - - if sep: - sep_tokens = Tokenizer(sep, yield_eof=False) - start = param_as_expr_node.end + 1 - end = start + len(list(sep_tokens)) - 1 - sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) - else: - sep_as_expr_node = None - - parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node)) - - start, end = first.start, last.end - return FunctionNode(start, end, full_text_as_tokens[start: end + 1], first, last, parameters) - - @dataclass() class NEGCOND: """ @@ -1207,8 +1261,6 @@ def get_node( sub_expr, concept_key=None, skip=0, - is_bnf=False, - sya=False, init_empty_body=False, exclude_body=False): """ @@ -1219,41 +1271,41 @@ def get_node( :param concepts_map: hash of the known concepts :param concept_key: key of the concept if different from sub_expr :param skip: number of occurrences of sub_expr to skip - :param is_bnf: True if the concept to search is a bnf definition - :param sya: Return SyaConceptParserHelper instead of a ConceptNode when needed :param init_empty_body: if True adds the source in the body (actually in compiled.BODY) :param exclude_body: Ask to not compare body :return: """ - if sub_expr == "')'": - return ")" + if isinstance(sub_expr, list): + return [get_node(concepts_map, + expression_as_tokens, + s, + concept_key, + skip, + init_empty_body, + exclude_body) for s in sub_expr] - if isinstance(sub_expr, ReturnValueConcept): - return sub_expr + if isinstance(sub_expr, tuple): + return get_node(concepts_map, + expression_as_tokens, + sub_expr[0], + concept_key, + sub_expr[1], + init_empty_body, + exclude_body) - if isinstance(sub_expr, DoNotResolve): - return sub_expr - - if isinstance(sub_expr, CIO): - sub_expr.set_concept(concepts_map[sub_expr.concept_name]) - source = sub_expr.source or sub_expr.concept_name - if source: - node = get_node(concepts_map, expression_as_tokens, source, sya=sya) - sub_expr.start = node.start - sub_expr.end = node.end + if isinstance(sub_expr, (DoNotResolve, ReturnValueConcept, RETVAL)): return sub_expr if isinstance(sub_expr, SCWC): - sub_expr.first = get_node(concepts_map, expression_as_tokens, sub_expr.first, sya=sya) - sub_expr.last = get_node(concepts_map, expression_as_tokens, sub_expr.last, sya=sya) - sub_expr.content = [get_node(concepts_map, expression_as_tokens, c, sya=sya) for c in sub_expr.content] + sub_expr.first = get_node(concepts_map, expression_as_tokens, sub_expr.first, skip=skip) + sub_expr.last = get_node(concepts_map, expression_as_tokens, sub_expr.last, skip=skip) + sub_expr.content = [get_node(concepts_map, expression_as_tokens, c, skip=skip) for c in sub_expr.content] sub_expr.fix_pos(sub_expr.first) sub_expr.fix_pos(sub_expr.last) return sub_expr - # return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source() if isinstance(sub_expr, SCN): - node = get_node(concepts_map, expression_as_tokens, sub_expr.source, sya=sya) + node = get_node(concepts_map, expression_as_tokens, sub_expr.source, skip=skip) sub_expr.fix_pos(node) return sub_expr @@ -1263,13 +1315,14 @@ def get_node( sub_expr.end = start + length - 1 return sub_expr - if isinstance(sub_expr, (CNC, CC, CN, CMV)): + if isinstance(sub_expr, (CNC, CC, CN, CMV, CIO)): if sub_expr.concept is None or sub_expr.start is None or sub_expr.end is None: concept_node = get_node( concepts_map, expression_as_tokens, sub_expr.source or sub_expr.concept_key, - sub_expr.concept_key, sya=sya) + sub_expr.concept_key, + skip) if not hasattr(concept_node, "concept"): raise Exception(f"'{sub_expr.concept_key}' is not a concept. Check your map.") concept_found = concept_node.concept @@ -1279,7 +1332,7 @@ def get_node( (concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start)) if hasattr(sub_expr, "compiled"): for k, v in sub_expr.compiled.items(): - node = get_node(concepts_map, expression_as_tokens, v, sya=sya, + node = get_node(concepts_map, expression_as_tokens, v, exclude_body=exclude_body) # need to get start and end positions if isinstance(v, str) and v in concepts_map: new_value_concept = concepts_map[v] @@ -1299,27 +1352,17 @@ def get_node( return sub_expr if isinstance(sub_expr, UTN): - node = get_node(concepts_map, expression_as_tokens, sub_expr.source) + node = get_node(concepts_map, expression_as_tokens, sub_expr.source, skip=skip) sub_expr.fix_pos(node) return sub_expr - if isinstance(sub_expr, tuple): - return get_node(concepts_map, expression_as_tokens, sub_expr[0], - concept_key=concept_key, skip=sub_expr[1], is_bnf=is_bnf, sya=sya) - start, length = _index(expression_as_tokens, sub_expr, skip) - # special case of python source code - if "+" in sub_expr and sub_expr.strip() != "+": - return SCN(sub_expr, start, start + length - 1) - # try to match one of the concept from the map concept_key = concept_key or sub_expr concept_found = concepts_map.get(concept_key, None) if concept_found: concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests - # if sya and len(concept_found.get_metadata().variables) > 0 and not is_bnf: - # return SyaConceptParserHelper(concept_found, start, start + length - 1) if init_empty_body: node = CNC(concept_found, sub_expr, start, start + length - 1, exclude_body=exclude_body) init_body(node, concept_found, sub_expr) @@ -1343,13 +1386,12 @@ def init_body(item, concept, value): item.compiled[ConceptParts.BODY] = DoNotResolve(value) -def compute_expected_array(concepts_map, expression, expected, sya=False, init_empty_body=False, exclude_body=False): +def compute_expected_array(concepts_map, expression, expected, init_empty_body=False, exclude_body=False): """ Computes a simple but sufficient version of the result of infix_to_postfix() :param concepts_map: :param expression: :param expected: - :param sya: if true, generate an SyaConceptParserHelper instead of a cnode :param init_empty_body: if True adds the source in the body (actually in compiled.BODY) :param exclude_body: do not include ConceptParts.BODY in comparison :return: @@ -1359,7 +1401,6 @@ def compute_expected_array(concepts_map, expression, expected, sya=False, init_e concepts_map, expression_as_tokens, sub_expr, - sya=sya, init_empty_body=init_empty_body, exclude_body=exclude_body) for sub_expr in expected] @@ -1398,27 +1439,6 @@ def get_source_code_node(start, text, concepts_map, id_manager=None): return SourceCodeNode(start, start + len(tokens) - 1, tokens, text, python_node) -def resolve_test_concept(concept_map, hint): - if isinstance(hint, str): - return concept_map[hint] - - if isinstance(hint, CC): - concept = concept_map[hint.concept_key] - compiled = {k: resolve_test_concept(concept_map, v) for k, v in hint.compiled.items()} - return CC(concept, source=hint.source, exclude_body=hint.exclude_body, **compiled) - - if isinstance(hint, CMV): - concept = concept_map[hint.concept_key] - return CMV(concept, **hint.variables) - - # CV - # - # CMV - # - # CIO - raise NotImplementedError() - - def get_rete_conditions(*conditions): """ Transform a list of string into a list of Condition (Rete conditions) diff --git a/tests/parsers/test_BnfNodeParser.py b/tests/parsers/test_BnfNodeParser.py index 0996a33..85116de 100644 --- a/tests/parsers/test_BnfNodeParser.py +++ b/tests/parsers/test_BnfNodeParser.py @@ -997,9 +997,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): text = "one 'one' one plus two shoe" - unwanted_res = [CN("one"), SCN(" 'one' "), ("one", 1), UTN(" plus "), CN("two")] + unwanted_res = [CN("one"), SCN(" 'one' "), (CN("one"), 1), UTN(" plus "), CN("two")] expected_res = [CNC("foo", - "one 'one' one plus two shoe", + source="one 'one' one plus two shoe", x=CC("one"), y=python_ret_val(" 'one' "), z=CC("plus", source="one plus two", x="one", y="two"))] diff --git a/tests/parsers/test_SequenceNodeParser.py b/tests/parsers/test_SequenceNodeParser.py index b21f7ef..347f5d3 100644 --- a/tests/parsers/test_SequenceNodeParser.py +++ b/tests/parsers/test_SequenceNodeParser.py @@ -90,14 +90,14 @@ class TestSequenceNodeParser(TestUsingMemoryBasedSheerka): ("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]), ("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]), ("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]), - ("foo bar 1 + 1", False, ["foo bar", " 1 + 1"]), + ("foo bar 1 + 1", False, ["foo bar", SCN(" 1 + 1")]), ("foo bar twenty one", False, ["foo bar", " twenty ", "one"]), ("foo bar x$!#", False, ["foo bar", " x$!#"]), ("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]), ("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]), ("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]), - ("1 + 1 foo bar", False, ["1 + 1 ", "foo bar"]), + ("1 + 1 foo bar", False, [SCN("1 + 1 "), "foo bar"]), ("twenty one foo bar", False, ["twenty ", "one", "foo bar"]), ("x$!# foo bar", False, ["x$!# ", "foo bar"]), ("func(one)", False, ["func(", "one", ")"]), diff --git a/tests/parsers/test_SyaNodeParser.py b/tests/parsers/test_SyaNodeParser.py index 52874e4..064b76e 100644 --- a/tests/parsers/test_SyaNodeParser.py +++ b/tests/parsers/test_SyaNodeParser.py @@ -7,17 +7,16 @@ from core.concept import Concept from core.global_symbols import CONCEPT_COMPARISON_CONTEXT from core.sheerka.Sheerka import RECOGNIZED_BY_KEY from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import Tokenizer, comparable_tokens +from core.tokenizer import Token, TokenKind, Tokenizer, comparable_tokens from core.utils import get_text_from_tokens +from parsers.BaseExpressionParser import FunctionNode, FunctionParameter, NameExprNode from parsers.BaseNodeParser import ConceptNode, SourceCodeNode, UnrecognizedTokensNode from parsers.PythonParser import PythonNode from parsers.SyaNodeParser import FunctionDetected, NoSyaConceptFound, NotEnoughParameters, SyaConceptParser, \ - SyaNodeParser, \ - SyaTokensParser, \ - TokensNotFound, TooManyParameters + SyaNodeParser, SyaTokensParser, TokensNotFound, TooManyParameters from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -from tests.parsers.parsers_utils import CC, CN, CNC, SCN, UTN, compute_expected_array, get_test_obj, \ +from tests.parsers.parsers_utils import CC, CN, CNC, RETVAL, SCN, SCWC, UTN, compute_expected_array, get_test_obj, \ prepare_nodes_comparison cmap = { @@ -34,7 +33,7 @@ cmap = { } -class TestSyaNodeParser2(TestUsingMemoryBasedSheerka): +class TestSyaNodeParser(TestUsingMemoryBasedSheerka): shared_ontology = None @classmethod @@ -46,11 +45,11 @@ class TestSyaNodeParser2(TestUsingMemoryBasedSheerka): cmap["plus"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right") cmap["mult"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right") - TestSyaNodeParser2.sheerka.set_is_greater_than(context, - BuiltinConcepts.PRECEDENCE, - cmap["mult"], - cmap["plus"], - CONCEPT_COMPARISON_CONTEXT) + TestSyaNodeParser.sheerka.set_is_greater_than(context, + BuiltinConcepts.PRECEDENCE, + cmap["mult"], + cmap["plus"], + CONCEPT_COMPARISON_CONTEXT) cls.shared_ontology = sheerka.get_ontology(context) sheerka.pop_ontology(context) @@ -386,6 +385,30 @@ class TestSyaNodeParser2(TestUsingMemoryBasedSheerka): assert concept_node_as_test_obj == resolved_expected assert concept_node.concept.get_metadata().variables == [("a", "1 + 1 "), ("b", "2 + 2")] + def test_i_can_concept_parse_function(self): + sheerka, context = self.initialize_test() + + expression = "one plus func(twenty two)" + param1 = self.get_real_node(cmap, expression, "one") + parser_input = ParserInput(expression).reset() + parser_input.seek(2) + sya_node_parser = SyaNodeParser() + tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input) + tokens_parser.stack = [param1] + + concept_parser = SyaConceptParser(tokens_parser, cmap["plus"], tokens_parser.stack) + concept_parser.parse() + concept_node = concept_parser.concept_node + + assert not concept_parser.has_error() + assert len(concept_parser.expected) == 0 + + expected = CNC("plus", a=CNC("one"), b=SCWC("func(", ")", CN("twenties", source="twenty two"))) + resolved_expected = compute_expected_array(cmap, expression, [expected])[0] + concept_node_as_test_obj = get_test_obj(concept_node, expected) + assert concept_node_as_test_obj == resolved_expected + assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "func(twenty two)")] + def test_i_can_concept_parse_concepts_composition(self): sheerka, context = self.initialize_test() @@ -1385,6 +1408,25 @@ class TestSyaNodeParser2(TestUsingMemoryBasedSheerka): # check metadata assert expected_concept.get_metadata().variables == [("a", "twenty one")] + def test_i_can_parse_when_function(self): + sheerka, context, parser = self.init_parser() + + text = "one plus func(twenty two)" + res = parser.parse(context, ParserInput(text)) + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + + expected = [CNC("plus", a=CC("one"), b=[RETVAL("func(twenty two)")], source=text)] + _stack, _expected = prepare_nodes_comparison(cmap, text, lexer_nodes, expected) + assert _stack == _expected + + # check the metadata + expected_concept = lexer_nodes[0].concept + assert expected_concept.get_metadata().variables == [("a", "one"), ("b", "func(twenty two)")] + def test_i_can_parse_sequences(self): sheerka, context, parser = self.init_parser() @@ -1501,6 +1543,32 @@ class TestSyaNodeParser2(TestUsingMemoryBasedSheerka): _stack, _expected = prepare_nodes_comparison(concepts_map, text, lexer_nodes, expected) assert _stack == _expected + def test_i_can_parse_when_expr_tokens(self): + sheerka, context, parser = self.init_parser() + + text = "one plus func(twenty two)" + tokens = list(Tokenizer(text, yield_eof=False)) + fun_token = tokens[4] + expr = FunctionNode(4, 9, tokens[4:10], + NameExprNode(4, 4, tokens[4:5]), + NameExprNode(9, 9, tokens[9:10]), + [FunctionParameter(NameExprNode(6, 8, tokens[6:9]), None)]) + tokens[4:] = [Token(TokenKind.EXPR, expr, fun_token.index, fun_token.line, fun_token.column)] + res = parser.parse(context, ParserInput(None, tokens=tokens)) + wrapper = res.body + lexer_nodes = res.body.body + + assert res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + + expected = [CNC("plus", a=CC("one"), b=[RETVAL("func(twenty two)")], source=text)] + _stack, _expected = prepare_nodes_comparison(cmap, text, lexer_nodes, expected) + assert _stack == _expected + + # check the metadata + expected_concept = lexer_nodes[0].concept + assert expected_concept.get_metadata().variables == [("a", "one"), ("b", "func(twenty two)")] + @pytest.mark.parametrize("text, expected_result", [ ("one plus two foo bar baz", [CNC("plus", a="one", b="two"), UTN(" foo bar baz")]), ("one plus two foo bar", [CNC("plus", a="one", b="two"), UTN(" foo bar")]),