Fixed #127 : SyaNodeParser : Allow tokens parsing

Fixed #128 : parser_utils.get_node() : Refactor
This commit is contained in:
2021-09-12 11:26:14 +02:00
parent 945807b375
commit a61a1c0d2b
12 changed files with 327 additions and 290 deletions
+9 -84
View File
@@ -224,7 +224,8 @@ def resolve_ambiguity(context, concepts):
for c in by_complexity[complexity]: for c in by_complexity[complexity]:
from core.sheerka.services.SheerkaEvaluateConcept import EvaluationHints from core.sheerka.services.SheerkaEvaluateConcept import EvaluationHints
evaluated = context.sheerka.evaluate_concept(context, c, evaluated = context.sheerka.evaluate_concept(context, c,
hints=EvaluationHints(eval_body=False, expression_only=True), hints=EvaluationHints(eval_body=False,
expression_only=True),
metadata=[ConceptParts.PRE, ConceptParts.WHERE]) metadata=[ConceptParts.PRE, ConceptParts.WHERE])
if context.sheerka.is_success(evaluated) or evaluated.key == c.key: if context.sheerka.is_success(evaluated) or evaluated.key == c.key:
remaining_concepts.append(c) remaining_concepts.append(c)
@@ -481,7 +482,7 @@ def get_lexer_nodes(return_values, start, tokens):
:return: list of list (list of concept node sequence) :return: list of list (list of concept node sequence)
""" """
from evaluators.BaseEvaluator import BaseEvaluator from evaluators.BaseEvaluator import BaseEvaluator
from parsers.BaseNodeParser import ConceptNode, LexerNode, RuleNode, SourceCodeNode from parsers.BaseNodeParser import ConceptNode, RuleNode, SourceCodeNode
lexer_nodes = [] lexer_nodes = []
for ret_val in return_values: for ret_val in return_values:
@@ -512,15 +513,7 @@ def get_lexer_nodes(return_values, start, tokens):
elif who in ("parsers.Bnf", "parsers.Sya", "parsers.Sequence"): elif who in ("parsers.Bnf", "parsers.Sya", "parsers.Sequence"):
nodes = [node.clone() for node in ret_val.body.body] nodes = [node.clone() for node in ret_val.body.body]
for node in nodes: for node in nodes:
node.start += start node.shift_pos(start)
node.end += start
if isinstance(node, ConceptNode):
for k, v in node.concept.get_compiled().items():
if isinstance(v, LexerNode):
v = v.clone()
v.start += start
v.end += start
node.concept.get_compiled()[k] = v
# but append the whole sequence if when it's a sequence # but append the whole sequence if when it's a sequence
lexer_nodes.append(nodes) lexer_nodes.append(nodes)
@@ -531,85 +524,17 @@ def get_lexer_nodes(return_values, start, tokens):
for rule in rules: for rule in rules:
lexer_nodes.append([RuleNode(rule, start, end, tokens, ret_val.body.source)]) lexer_nodes.append([RuleNode(rule, start, end, tokens, ret_val.body.source)])
elif who == "parsers.Function":
node = ret_val.body.body.clone()
node.shift_pos(start)
lexer_nodes.append([node])
else: else:
raise NotImplementedError(f"get_lexer_nodes who={who}") raise NotImplementedError(f"get_lexer_nodes who={who}")
return lexer_nodes return lexer_nodes
def get_lexer_nodes_using_positions(return_values, positions):
"""
Transform all elements from return_values into lexer nodes (ConceptNode, UnrecognizedTokensNode, SourceCodeNode...)
Use positions to compute the exact new positions
On the contrary of the other method (get_lexer_nodes),
one return value is mapped with one position. it's not a offset, but an absolute position
:param return_values:
:param positions: is a list of triplets (start, end, tokens)
:return:
"""
from evaluators.BaseEvaluator import BaseEvaluator
from parsers.BaseNodeParser import ConceptNode, LexerNode, RuleNode, SourceCodeNode
lexer_nodes = []
for ret_val, position in zip(return_values, positions):
# To manage AFTER_PARSING evaluators
who = ret_val.parents[0].who if ret_val.who.startswith(BaseEvaluator.PREFIX) else ret_val.who
if who in ("parsers.Python", 'parsers.PythonWithConcepts'):
lexer_nodes.append(SourceCodeNode(position.start,
position.end,
position.tokens,
ret_val.body.source,
python_node=ret_val.body.body,
return_value=ret_val))
elif who == "parsers.ExactConcept":
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
for concept in concepts:
lexer_nodes.append(ConceptNode(concept,
position.start,
position.end,
position.tokens,
ret_val.body.source))
elif who in ("parsers.Bnf", "parsers.Sya", "parsers.Sequence"):
nodes = [node.clone() for node in ret_val.body.body]
for node in nodes:
node.start = position.start
node.end = position.end
if isinstance(node, ConceptNode):
for k, v in node.concept.get_compiled().items():
if isinstance(v, LexerNode):
v = v.clone()
v.start += position.start
v.end += position.start
node.concept.get_compiled()[k] = v
# but append the whole sequence if when it's a sequence
lexer_nodes.extend(nodes)
elif who == "parsers.Rule":
rules = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
for rule in rules:
lexer_nodes.append(RuleNode(rule,
position.start,
position.end,
position.tokens, ret_val.body.source))
elif who == "parsers.Function":
node = ret_val.body.body
node.start = position.start
node.end = position.end
lexer_nodes.append(node)
else:
raise NotImplementedError(f"get_lexer_nodes_using_positions {who=}")
return lexer_nodes
def ensure_evaluated(context, concept, eval_body=True, metadata=None): def ensure_evaluated(context, concept, eval_body=True, metadata=None):
""" """
Evaluate a concept is not already evaluated Evaluate a concept is not already evaluated
@@ -1152,7 +1152,7 @@ class SheerkaConceptManager(BaseService):
:return: :return:
""" """
if token.type == TokenKind.WHITESPACE: if token.type in (TokenKind.WHITESPACE, TokenKind.EXPR):
return None return None
if token.type == TokenKind.STRING: if token.type == TokenKind.STRING:
+4 -1
View File
@@ -169,7 +169,10 @@ class ParserInput:
return True return True
def is_empty(self): def is_empty(self):
if self.text.strip() == "": if self.text is not None and self.text.strip() == "":
return True
if self.from_tokens and len(self.tokens) == 0:
return True return True
if self.end == self.start: if self.end == self.start:
+1 -2
View File
@@ -11,10 +11,9 @@ class TokenKind(Enum):
IDENTIFIER = "identifier" IDENTIFIER = "identifier"
CONCEPT = "concept" CONCEPT = "concept"
RULE = "rule" RULE = "rule"
EXPR = "expression"
STRING = "string" STRING = "string"
NUMBER = "number" NUMBER = "number"
TRUE = "true"
FALSE = "false"
LPAR = "lpar" LPAR = "lpar"
RPAR = "rpar" RPAR = "rpar"
LBRACKET = "lbracket" LBRACKET = "lbracket"
+1
View File
@@ -729,6 +729,7 @@ def get_text_from_tokens(tokens, custom_switcher=None, tracker=None):
switcher = { switcher = {
# TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value), # TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
TokenKind.EXPR: lambda t: t.value.get_source()
} }
if custom_switcher: if custom_switcher:
+20
View File
@@ -45,6 +45,10 @@ class LexerNode(Node):
def get_source_to_parse(self): def get_source_to_parse(self):
return self.source return self.source
def shift_pos(self, offset):
self.start += offset
self.end += offset
class UnrecognizedTokensNode(LexerNode): class UnrecognizedTokensNode(LexerNode):
def __init__(self, start, end, tokens): def __init__(self, start, end, tokens):
@@ -234,6 +238,14 @@ class ConceptNode(LexerNode):
""" """
return self.concept return self.concept
def shift_pos(self, offset):
super().shift_pos(offset)
for k, v in self.concept.get_compiled().items():
if isinstance(v, LexerNode):
v = v.clone()
v.shift_pos(offset)
self.concept.get_compiled()[k] = v
class SourceCodeNode(LexerNode): class SourceCodeNode(LexerNode):
""" """
@@ -403,6 +415,9 @@ class SourceCodeWithConceptNode(LexerNode):
clone = SourceCodeWithConceptNode(self.first.clone(), self.last.clone(), nodes, self.has_unrecognized) clone = SourceCodeWithConceptNode(self.first.clone(), self.last.clone(), nodes, self.has_unrecognized)
clone.python_node = self.python_node clone.python_node = self.python_node
clone.return_value = self.return_value clone.return_value = self.return_value
clone.source = self.source
if self.tokens:
clone.tokens = self.tokens.copy()
return clone return clone
def to_short_str(self): def to_short_str(self):
@@ -421,6 +436,11 @@ class SourceCodeWithConceptNode(LexerNode):
errors.append(n.error) errors.append(n.error)
return errors return errors
def shift_pos(self, offset):
super().shift_pos(offset)
for n in [self.first, self.last] + self.nodes:
n.shift_pos(offset)
class VariableNode(LexerNode): class VariableNode(LexerNode):
""" """
+2 -1
View File
@@ -254,7 +254,8 @@ class FunctionParser(BaseExpressionParser):
# check if it is a valid source code # check if it is a valid source code
for source_code_node in res: for source_code_node in res:
source_code_node.fix_all_pos() source_code_node.fix_all_pos()
source_code_node.pseudo_fix_source() source_code_node.source = function_node.get_source()
source_code_node.tokens = function_node.tokens
python_parsing_res = python_parser.parse_nodes(context, source_code_node.get_all_nodes()) python_parsing_res = python_parser.parse_nodes(context, source_code_node.get_all_nodes())
if python_parsing_res.status: if python_parsing_res.status:
+1 -1
View File
@@ -12,7 +12,7 @@ from core.utils import flatten, get_text_from_tokens, strip_tokens
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensCache, UnrecognizedTokensNode from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensCache, UnrecognizedTokensNode
from parsers.BaseParser import ParsingError from parsers.BaseParser import ParsingError
PARSERS = ["Sequence", "Bnf", "Python"] PARSERS = ["Function", "Sequence", "Bnf", "Python"]
class SyaNodeException(Exception): class SyaNodeException(Exception):
+205 -185
View File
@@ -2,7 +2,7 @@ import ast
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Union from typing import List, Union
from core.builtin_concepts import ReturnValueConcept from core.builtin_concepts import ParserResultConcept, ReturnValueConcept
from core.builtin_helpers import CreateObjectIdentifiers from core.builtin_helpers import CreateObjectIdentifiers
from core.concept import AllConceptParts, Concept, ConceptParts, DoNotResolve from core.concept import AllConceptParts, Concept, ConceptParts, DoNotResolve
from core.rule import Rule from core.rule import Rule
@@ -20,14 +20,6 @@ from sheerkarete.common import V
from sheerkarete.conditions import AndConditions, Condition, NegatedCondition, NegatedConjunctiveConditions from sheerkarete.conditions import AndConditions, Condition, NegatedCondition, NegatedConjunctiveConditions
@dataclass
class Obj:
prop_a: object
prop_b: object = None
prop_c: object = None
parent: object = None
class ExprTestObj: class ExprTestObj:
@staticmethod @staticmethod
def get_pos(nodes): def get_pos(nodes):
@@ -276,6 +268,130 @@ class LC(ExprTestObj): # for List Comprehension node
return ListComprehensionNode(start, end, full_text_as_tokens[start: end + 1], element, comprehensions) return ListComprehensionNode(start, end, full_text_as_tokens[start: end + 1], element, comprehensions)
class FN(ExprTestObj):
"""
Test class only
It matches with FunctionNode but with less constraints
Thereby,
FN("first", "last", ["param1," ...]) can be compared to
FunctionNode(NameExprNode("first"), NameExprNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")])
Note that FunctionParameter can easily be defined with a single string
* "param" -> FunctionParameter(NameExprNode("param"), None)
* "param, " -> FunctionParameter(NameExprNode("param"), NameExprNode(", "))
For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part
"""
def __init__(self, first, last, parameters):
self.first = first
self.last = last
self.parameters = []
for param in parameters:
if isinstance(param, tuple):
self.parameters.append(param)
elif isinstance(param, str) and (pos := param.find(",")) != -1:
self.parameters.append((param[:pos], param[pos:]))
else:
self.parameters.append((param, None))
def __repr__(self):
res = self.first
for param in self.parameters:
if param[1]:
res += f"{param[0]}{param[1]} "
else:
res += f"{param[0]}"
return res + self.last
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, FN):
return self.first == other.first and self.last == other.last and self.parameters == other.parameters
return False
def __hash__(self):
return hash((self.first, self.last, self.parameters))
def transform_real_obj(self, other, get_test_obj_delegate):
if isinstance(other, FN):
return other
if isinstance(other, FunctionNode):
params = []
for self_parameter, other_parameter in zip(self.parameters, other.parameters):
if isinstance(self_parameter[0], str):
value = other_parameter.value.value
else:
value = get_test_obj_delegate(other_parameter.value, self_parameter[0])
sep = other_parameter.separator.value if other_parameter.separator else None
params.append((value, sep))
return FN(other.first.value, other.last.value, params)
raise Exception(f"Expecting FunctionNode but received {other=}")
def get_expr_node(self, full_text_as_tokens=None):
start, end = self.get_pos_from_source(self.first, full_text_as_tokens)
first = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
start, end = self.get_pos_from_source(self.last, full_text_as_tokens)
last = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
parameters = []
for param_value, sep in self.parameters:
if isinstance(param_value, str):
start, end = self.get_pos_from_source(param_value, full_text_as_tokens)
param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
param_as_expr_node = param_value.get_expr_node(full_text_as_tokens)
if sep:
sep_tokens = Tokenizer(sep, yield_eof=False)
start = param_as_expr_node.end + 1
end = start + len(list(sep_tokens)) - 1
sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
sep_as_expr_node = None
parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node))
start, end = first.start, last.end
return FunctionNode(start, end, full_text_as_tokens[start: end + 1], first, last, parameters)
class HelperWithPos:
def __init__(self, start=None, end=None):
self.start = start
self.end = end
self.start_is_fixed = start is not None
self.end_is_fixed = end is not None
def fix_pos(self, node):
"""
:param node: an object or a tuple
:return:
"""
if hasattr(node, "start"):
target_start, target_end = node.start, node.end
elif isinstance(node, tuple):
target_start, target_end = node
else:
target_start, target_end = None, None
if not self.start_is_fixed:
if target_start is not None and (self.start is None or target_start < self.start):
self.start = target_start
if not self.end_is_fixed:
if target_end is not None and (self.end is None or target_end > self.end):
self.end = target_end
return self
class CC: class CC:
""" """
Concept class for test purpose Concept class for test purpose
@@ -578,29 +694,60 @@ class CIO:
raise Exception(f"Expecting Concept but received {other=}") raise Exception(f"Expecting Concept but received {other=}")
class HelperWithPos: class RETVAL:
def __init__(self, start=None, end=None): """
self.start = start Class helper for return value for parser result
self.end = end """
self.start_is_fixed = start is not None def __init__(self, source, who=None, parser=None):
self.end_is_fixed = end is not None self.source = source
self.who = who
self.parser = parser
def fix_pos(self, node): def __eq__(self, other):
if not self.start_is_fixed: if id(self) == id(other):
start = node.start if hasattr(node, "start") else \ return True
node[0] if isinstance(node, tuple) else None
if start is not None and (self.start is None or start < self.start): if not isinstance(other, RETVAL):
self.start = start return False
if not self.end_is_fixed: return (self.source == other.source and
end = node.end if hasattr(node, "end") else \ self.who == other.who and
node[1] if isinstance(node, tuple) else None self.parser == other.parser)
if end is not None and (self.end is None or end > self.end): def __hash__(self):
self.end = end return hash((self.source, self.who))
return self
def __repr__(self):
txt = f"RV(source='{self.source}'"
if self.who is not None:
txt += f", who={self.who}"
if self.parser is not None:
txt += f", parser={self.parser}"
return txt + ")"
def transform_real_obj(self, other, get_test_obj_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param get_test_obj_delegate:
:return:
"""
if isinstance(other, RETVAL):
return other
if isinstance(other, ReturnValueConcept):
if not isinstance(other.body, ParserResultConcept):
raise Exception(f"ParserResultConcept not found body={other.body}")
parser_result = other.body
return RETVAL(parser_result.source,
other.who if self.who is not None else None,
parser_result.parser if self.parser is not None else None)
raise Exception(f"Expecting ReturnValueConcept but received {other=}")
class SCN(HelperWithPos): class SCN(HelperWithPos):
@@ -992,10 +1139,10 @@ class RN(HelperWithPos):
if not isinstance(other, RN): if not isinstance(other, RN):
return False return False
return self.rule_id == other.rule_id and \ return (self.rule_id == other.rule_id and
self.start == other.start and \ self.start == other.start and
self.end == other.end and \ self.end == other.end and
self.source == other.source self.source == other.source)
def __hash__(self): def __hash__(self):
return hash((self.rule_id, self.start, self.end, self.source)) return hash((self.rule_id, self.start, self.end, self.source))
@@ -1032,99 +1179,6 @@ class RN(HelperWithPos):
raise Exception(f"Expecting RuleNode but received {other=}") raise Exception(f"Expecting RuleNode but received {other=}")
class FN(ExprTestObj):
"""
Test class only
It matches with FunctionNode but with less constraints
Thereby,
FN("first", "last", ["param1," ...]) can be compared to
FunctionNode(NameExprNode("first"), NameExprNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")])
Note that FunctionParameter can easily be defined with a single string
* "param" -> FunctionParameter(NameExprNode("param"), None)
* "param, " -> FunctionParameter(NameExprNode("param"), NameExprNode(", "))
For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part
"""
def __init__(self, first, last, parameters):
self.first = first
self.last = last
self.parameters = []
for param in parameters:
if isinstance(param, tuple):
self.parameters.append(param)
elif isinstance(param, str) and (pos := param.find(",")) != -1:
self.parameters.append((param[:pos], param[pos:]))
else:
self.parameters.append((param, None))
def __repr__(self):
res = self.first
for param in self.parameters:
if param[1]:
res += f"{param[0]}{param[1]} "
else:
res += f"{param[0]}"
return res + self.last
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, FN):
return self.first == other.first and self.last == other.last and self.parameters == other.parameters
return False
def __hash__(self):
return hash((self.first, self.last, self.parameters))
def transform_real_obj(self, other, get_test_obj_delegate):
if isinstance(other, FN):
return other
if isinstance(other, FunctionNode):
params = []
for self_parameter, other_parameter in zip(self.parameters, other.parameters):
if isinstance(self_parameter[0], str):
value = other_parameter.value.value
else:
value = get_test_obj_delegate(other_parameter.value, self_parameter[0])
sep = other_parameter.separator.value if other_parameter.separator else None
params.append((value, sep))
return FN(other.first.value, other.last.value, params)
raise Exception(f"Expecting FunctionNode but received {other=}")
def get_expr_node(self, full_text_as_tokens=None):
start, end = self.get_pos_from_source(self.first, full_text_as_tokens)
first = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
start, end = self.get_pos_from_source(self.last, full_text_as_tokens)
last = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
parameters = []
for param_value, sep in self.parameters:
if isinstance(param_value, str):
start, end = self.get_pos_from_source(param_value, full_text_as_tokens)
param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
param_as_expr_node = param_value.get_expr_node(full_text_as_tokens)
if sep:
sep_tokens = Tokenizer(sep, yield_eof=False)
start = param_as_expr_node.end + 1
end = start + len(list(sep_tokens)) - 1
sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
sep_as_expr_node = None
parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node))
start, end = first.start, last.end
return FunctionNode(start, end, full_text_as_tokens[start: end + 1], first, last, parameters)
@dataclass() @dataclass()
class NEGCOND: class NEGCOND:
""" """
@@ -1207,8 +1261,6 @@ def get_node(
sub_expr, sub_expr,
concept_key=None, concept_key=None,
skip=0, skip=0,
is_bnf=False,
sya=False,
init_empty_body=False, init_empty_body=False,
exclude_body=False): exclude_body=False):
""" """
@@ -1219,41 +1271,41 @@ def get_node(
:param concepts_map: hash of the known concepts :param concepts_map: hash of the known concepts
:param concept_key: key of the concept if different from sub_expr :param concept_key: key of the concept if different from sub_expr
:param skip: number of occurrences of sub_expr to skip :param skip: number of occurrences of sub_expr to skip
:param is_bnf: True if the concept to search is a bnf definition
:param sya: Return SyaConceptParserHelper instead of a ConceptNode when needed
:param init_empty_body: if True adds the source in the body (actually in compiled.BODY) :param init_empty_body: if True adds the source in the body (actually in compiled.BODY)
:param exclude_body: Ask to not compare body :param exclude_body: Ask to not compare body
:return: :return:
""" """
if sub_expr == "')'": if isinstance(sub_expr, list):
return ")" return [get_node(concepts_map,
expression_as_tokens,
s,
concept_key,
skip,
init_empty_body,
exclude_body) for s in sub_expr]
if isinstance(sub_expr, ReturnValueConcept): if isinstance(sub_expr, tuple):
return sub_expr return get_node(concepts_map,
expression_as_tokens,
sub_expr[0],
concept_key,
sub_expr[1],
init_empty_body,
exclude_body)
if isinstance(sub_expr, DoNotResolve): if isinstance(sub_expr, (DoNotResolve, ReturnValueConcept, RETVAL)):
return sub_expr
if isinstance(sub_expr, CIO):
sub_expr.set_concept(concepts_map[sub_expr.concept_name])
source = sub_expr.source or sub_expr.concept_name
if source:
node = get_node(concepts_map, expression_as_tokens, source, sya=sya)
sub_expr.start = node.start
sub_expr.end = node.end
return sub_expr return sub_expr
if isinstance(sub_expr, SCWC): if isinstance(sub_expr, SCWC):
sub_expr.first = get_node(concepts_map, expression_as_tokens, sub_expr.first, sya=sya) sub_expr.first = get_node(concepts_map, expression_as_tokens, sub_expr.first, skip=skip)
sub_expr.last = get_node(concepts_map, expression_as_tokens, sub_expr.last, sya=sya) sub_expr.last = get_node(concepts_map, expression_as_tokens, sub_expr.last, skip=skip)
sub_expr.content = [get_node(concepts_map, expression_as_tokens, c, sya=sya) for c in sub_expr.content] sub_expr.content = [get_node(concepts_map, expression_as_tokens, c, skip=skip) for c in sub_expr.content]
sub_expr.fix_pos(sub_expr.first) sub_expr.fix_pos(sub_expr.first)
sub_expr.fix_pos(sub_expr.last) sub_expr.fix_pos(sub_expr.last)
return sub_expr return sub_expr
# return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source()
if isinstance(sub_expr, SCN): if isinstance(sub_expr, SCN):
node = get_node(concepts_map, expression_as_tokens, sub_expr.source, sya=sya) node = get_node(concepts_map, expression_as_tokens, sub_expr.source, skip=skip)
sub_expr.fix_pos(node) sub_expr.fix_pos(node)
return sub_expr return sub_expr
@@ -1263,13 +1315,14 @@ def get_node(
sub_expr.end = start + length - 1 sub_expr.end = start + length - 1
return sub_expr return sub_expr
if isinstance(sub_expr, (CNC, CC, CN, CMV)): if isinstance(sub_expr, (CNC, CC, CN, CMV, CIO)):
if sub_expr.concept is None or sub_expr.start is None or sub_expr.end is None: if sub_expr.concept is None or sub_expr.start is None or sub_expr.end is None:
concept_node = get_node( concept_node = get_node(
concepts_map, concepts_map,
expression_as_tokens, expression_as_tokens,
sub_expr.source or sub_expr.concept_key, sub_expr.source or sub_expr.concept_key,
sub_expr.concept_key, sya=sya) sub_expr.concept_key,
skip)
if not hasattr(concept_node, "concept"): if not hasattr(concept_node, "concept"):
raise Exception(f"'{sub_expr.concept_key}' is not a concept. Check your map.") raise Exception(f"'{sub_expr.concept_key}' is not a concept. Check your map.")
concept_found = concept_node.concept concept_found = concept_node.concept
@@ -1279,7 +1332,7 @@ def get_node(
(concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start)) (concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start))
if hasattr(sub_expr, "compiled"): if hasattr(sub_expr, "compiled"):
for k, v in sub_expr.compiled.items(): for k, v in sub_expr.compiled.items():
node = get_node(concepts_map, expression_as_tokens, v, sya=sya, node = get_node(concepts_map, expression_as_tokens, v,
exclude_body=exclude_body) # need to get start and end positions exclude_body=exclude_body) # need to get start and end positions
if isinstance(v, str) and v in concepts_map: if isinstance(v, str) and v in concepts_map:
new_value_concept = concepts_map[v] new_value_concept = concepts_map[v]
@@ -1299,27 +1352,17 @@ def get_node(
return sub_expr return sub_expr
if isinstance(sub_expr, UTN): if isinstance(sub_expr, UTN):
node = get_node(concepts_map, expression_as_tokens, sub_expr.source) node = get_node(concepts_map, expression_as_tokens, sub_expr.source, skip=skip)
sub_expr.fix_pos(node) sub_expr.fix_pos(node)
return sub_expr return sub_expr
if isinstance(sub_expr, tuple):
return get_node(concepts_map, expression_as_tokens, sub_expr[0],
concept_key=concept_key, skip=sub_expr[1], is_bnf=is_bnf, sya=sya)
start, length = _index(expression_as_tokens, sub_expr, skip) start, length = _index(expression_as_tokens, sub_expr, skip)
# special case of python source code
if "+" in sub_expr and sub_expr.strip() != "+":
return SCN(sub_expr, start, start + length - 1)
# try to match one of the concept from the map # try to match one of the concept from the map
concept_key = concept_key or sub_expr concept_key = concept_key or sub_expr
concept_found = concepts_map.get(concept_key, None) concept_found = concepts_map.get(concept_key, None)
if concept_found: if concept_found:
concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests
# if sya and len(concept_found.get_metadata().variables) > 0 and not is_bnf:
# return SyaConceptParserHelper(concept_found, start, start + length - 1)
if init_empty_body: if init_empty_body:
node = CNC(concept_found, sub_expr, start, start + length - 1, exclude_body=exclude_body) node = CNC(concept_found, sub_expr, start, start + length - 1, exclude_body=exclude_body)
init_body(node, concept_found, sub_expr) init_body(node, concept_found, sub_expr)
@@ -1343,13 +1386,12 @@ def init_body(item, concept, value):
item.compiled[ConceptParts.BODY] = DoNotResolve(value) item.compiled[ConceptParts.BODY] = DoNotResolve(value)
def compute_expected_array(concepts_map, expression, expected, sya=False, init_empty_body=False, exclude_body=False): def compute_expected_array(concepts_map, expression, expected, init_empty_body=False, exclude_body=False):
""" """
Computes a simple but sufficient version of the result of infix_to_postfix() Computes a simple but sufficient version of the result of infix_to_postfix()
:param concepts_map: :param concepts_map:
:param expression: :param expression:
:param expected: :param expected:
:param sya: if true, generate an SyaConceptParserHelper instead of a cnode
:param init_empty_body: if True adds the source in the body (actually in compiled.BODY) :param init_empty_body: if True adds the source in the body (actually in compiled.BODY)
:param exclude_body: do not include ConceptParts.BODY in comparison :param exclude_body: do not include ConceptParts.BODY in comparison
:return: :return:
@@ -1359,7 +1401,6 @@ def compute_expected_array(concepts_map, expression, expected, sya=False, init_e
concepts_map, concepts_map,
expression_as_tokens, expression_as_tokens,
sub_expr, sub_expr,
sya=sya,
init_empty_body=init_empty_body, init_empty_body=init_empty_body,
exclude_body=exclude_body) for sub_expr in expected] exclude_body=exclude_body) for sub_expr in expected]
@@ -1398,27 +1439,6 @@ def get_source_code_node(start, text, concepts_map, id_manager=None):
return SourceCodeNode(start, start + len(tokens) - 1, tokens, text, python_node) return SourceCodeNode(start, start + len(tokens) - 1, tokens, text, python_node)
def resolve_test_concept(concept_map, hint):
if isinstance(hint, str):
return concept_map[hint]
if isinstance(hint, CC):
concept = concept_map[hint.concept_key]
compiled = {k: resolve_test_concept(concept_map, v) for k, v in hint.compiled.items()}
return CC(concept, source=hint.source, exclude_body=hint.exclude_body, **compiled)
if isinstance(hint, CMV):
concept = concept_map[hint.concept_key]
return CMV(concept, **hint.variables)
# CV
#
# CMV
#
# CIO
raise NotImplementedError()
def get_rete_conditions(*conditions): def get_rete_conditions(*conditions):
""" """
Transform a list of string into a list of Condition (Rete conditions) Transform a list of string into a list of Condition (Rete conditions)
+2 -2
View File
@@ -997,9 +997,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
text = "one 'one' one plus two shoe" text = "one 'one' one plus two shoe"
unwanted_res = [CN("one"), SCN(" 'one' "), ("one", 1), UTN(" plus "), CN("two")] unwanted_res = [CN("one"), SCN(" 'one' "), (CN("one"), 1), UTN(" plus "), CN("two")]
expected_res = [CNC("foo", expected_res = [CNC("foo",
"one 'one' one plus two shoe", source="one 'one' one plus two shoe",
x=CC("one"), x=CC("one"),
y=python_ret_val(" 'one' "), y=python_ret_val(" 'one' "),
z=CC("plus", source="one plus two", x="one", y="two"))] z=CC("plus", source="one plus two", x="one", y="two"))]
+2 -2
View File
@@ -90,14 +90,14 @@ class TestSequenceNodeParser(TestUsingMemoryBasedSheerka):
("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]), ("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]),
("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]), ("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]),
("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]), ("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]),
("foo bar 1 + 1", False, ["foo bar", " 1 + 1"]), ("foo bar 1 + 1", False, ["foo bar", SCN(" 1 + 1")]),
("foo bar twenty one", False, ["foo bar", " twenty ", "one"]), ("foo bar twenty one", False, ["foo bar", " twenty ", "one"]),
("foo bar x$!#", False, ["foo bar", " x$!#"]), ("foo bar x$!#", False, ["foo bar", " x$!#"]),
("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]), ("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]),
("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]), ("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]),
("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]), ("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]),
("1 + 1 foo bar", False, ["1 + 1 ", "foo bar"]), ("1 + 1 foo bar", False, [SCN("1 + 1 "), "foo bar"]),
("twenty one foo bar", False, ["twenty ", "one", "foo bar"]), ("twenty one foo bar", False, ["twenty ", "one", "foo bar"]),
("x$!# foo bar", False, ["x$!# ", "foo bar"]), ("x$!# foo bar", False, ["x$!# ", "foo bar"]),
("func(one)", False, ["func(", "one", ")"]), ("func(one)", False, ["func(", "one", ")"]),
+75 -7
View File
@@ -7,17 +7,16 @@ from core.concept import Concept
from core.global_symbols import CONCEPT_COMPARISON_CONTEXT from core.global_symbols import CONCEPT_COMPARISON_CONTEXT
from core.sheerka.Sheerka import RECOGNIZED_BY_KEY from core.sheerka.Sheerka import RECOGNIZED_BY_KEY
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer, comparable_tokens from core.tokenizer import Token, TokenKind, Tokenizer, comparable_tokens
from core.utils import get_text_from_tokens from core.utils import get_text_from_tokens
from parsers.BaseExpressionParser import FunctionNode, FunctionParameter, NameExprNode
from parsers.BaseNodeParser import ConceptNode, SourceCodeNode, UnrecognizedTokensNode from parsers.BaseNodeParser import ConceptNode, SourceCodeNode, UnrecognizedTokensNode
from parsers.PythonParser import PythonNode from parsers.PythonParser import PythonNode
from parsers.SyaNodeParser import FunctionDetected, NoSyaConceptFound, NotEnoughParameters, SyaConceptParser, \ from parsers.SyaNodeParser import FunctionDetected, NoSyaConceptFound, NotEnoughParameters, SyaConceptParser, \
SyaNodeParser, \ SyaNodeParser, SyaTokensParser, TokensNotFound, TooManyParameters
SyaTokensParser, \
TokensNotFound, TooManyParameters
from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import CC, CN, CNC, SCN, UTN, compute_expected_array, get_test_obj, \ from tests.parsers.parsers_utils import CC, CN, CNC, RETVAL, SCN, SCWC, UTN, compute_expected_array, get_test_obj, \
prepare_nodes_comparison prepare_nodes_comparison
cmap = { cmap = {
@@ -34,7 +33,7 @@ cmap = {
} }
class TestSyaNodeParser2(TestUsingMemoryBasedSheerka): class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
shared_ontology = None shared_ontology = None
@classmethod @classmethod
@@ -46,7 +45,7 @@ class TestSyaNodeParser2(TestUsingMemoryBasedSheerka):
cmap["plus"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right") cmap["plus"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right")
cmap["mult"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right") cmap["mult"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right")
TestSyaNodeParser2.sheerka.set_is_greater_than(context, TestSyaNodeParser.sheerka.set_is_greater_than(context,
BuiltinConcepts.PRECEDENCE, BuiltinConcepts.PRECEDENCE,
cmap["mult"], cmap["mult"],
cmap["plus"], cmap["plus"],
@@ -386,6 +385,30 @@ class TestSyaNodeParser2(TestUsingMemoryBasedSheerka):
assert concept_node_as_test_obj == resolved_expected assert concept_node_as_test_obj == resolved_expected
assert concept_node.concept.get_metadata().variables == [("a", "1 + 1 "), ("b", "2 + 2")] assert concept_node.concept.get_metadata().variables == [("a", "1 + 1 "), ("b", "2 + 2")]
def test_i_can_concept_parse_function(self):
sheerka, context = self.initialize_test()
expression = "one plus func(twenty two)"
param1 = self.get_real_node(cmap, expression, "one")
parser_input = ParserInput(expression).reset()
parser_input.seek(2)
sya_node_parser = SyaNodeParser()
tokens_parser = SyaTokensParser(context, sya_node_parser, parser_input)
tokens_parser.stack = [param1]
concept_parser = SyaConceptParser(tokens_parser, cmap["plus"], tokens_parser.stack)
concept_parser.parse()
concept_node = concept_parser.concept_node
assert not concept_parser.has_error()
assert len(concept_parser.expected) == 0
expected = CNC("plus", a=CNC("one"), b=SCWC("func(", ")", CN("twenties", source="twenty two")))
resolved_expected = compute_expected_array(cmap, expression, [expected])[0]
concept_node_as_test_obj = get_test_obj(concept_node, expected)
assert concept_node_as_test_obj == resolved_expected
assert concept_node.concept.get_metadata().variables == [("a", "one"), ("b", "func(twenty two)")]
def test_i_can_concept_parse_concepts_composition(self): def test_i_can_concept_parse_concepts_composition(self):
sheerka, context = self.initialize_test() sheerka, context = self.initialize_test()
@@ -1385,6 +1408,25 @@ class TestSyaNodeParser2(TestUsingMemoryBasedSheerka):
# check metadata # check metadata
assert expected_concept.get_metadata().variables == [("a", "twenty one")] assert expected_concept.get_metadata().variables == [("a", "twenty one")]
def test_i_can_parse_when_function(self):
sheerka, context, parser = self.init_parser()
text = "one plus func(twenty two)"
res = parser.parse(context, ParserInput(text))
wrapper = res.body
lexer_nodes = res.body.body
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
expected = [CNC("plus", a=CC("one"), b=[RETVAL("func(twenty two)")], source=text)]
_stack, _expected = prepare_nodes_comparison(cmap, text, lexer_nodes, expected)
assert _stack == _expected
# check the metadata
expected_concept = lexer_nodes[0].concept
assert expected_concept.get_metadata().variables == [("a", "one"), ("b", "func(twenty two)")]
def test_i_can_parse_sequences(self): def test_i_can_parse_sequences(self):
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
@@ -1501,6 +1543,32 @@ class TestSyaNodeParser2(TestUsingMemoryBasedSheerka):
_stack, _expected = prepare_nodes_comparison(concepts_map, text, lexer_nodes, expected) _stack, _expected = prepare_nodes_comparison(concepts_map, text, lexer_nodes, expected)
assert _stack == _expected assert _stack == _expected
def test_i_can_parse_when_expr_tokens(self):
sheerka, context, parser = self.init_parser()
text = "one plus func(twenty two)"
tokens = list(Tokenizer(text, yield_eof=False))
fun_token = tokens[4]
expr = FunctionNode(4, 9, tokens[4:10],
NameExprNode(4, 4, tokens[4:5]),
NameExprNode(9, 9, tokens[9:10]),
[FunctionParameter(NameExprNode(6, 8, tokens[6:9]), None)])
tokens[4:] = [Token(TokenKind.EXPR, expr, fun_token.index, fun_token.line, fun_token.column)]
res = parser.parse(context, ParserInput(None, tokens=tokens))
wrapper = res.body
lexer_nodes = res.body.body
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
expected = [CNC("plus", a=CC("one"), b=[RETVAL("func(twenty two)")], source=text)]
_stack, _expected = prepare_nodes_comparison(cmap, text, lexer_nodes, expected)
assert _stack == _expected
# check the metadata
expected_concept = lexer_nodes[0].concept
assert expected_concept.get_metadata().variables == [("a", "one"), ("b", "func(twenty two)")]
@pytest.mark.parametrize("text, expected_result", [ @pytest.mark.parametrize("text, expected_result", [
("one plus two foo bar baz", [CNC("plus", a="one", b="two"), UTN(" foo bar baz")]), ("one plus two foo bar baz", [CNC("plus", a="one", b="two"), UTN(" foo bar baz")]),
("one plus two foo bar", [CNC("plus", a="one", b="two"), UTN(" foo bar")]), ("one plus two foo bar", [CNC("plus", a="one", b="two"), UTN(" foo bar")]),