Files
Sheerka-Old/tests/parsers/parsers_utils.py
T
kodjo 646c428edb Fixed #30 : Add variable support in BNF concept definition
Fixed #31 : Add regex support in BNF Concept
Fixed #33 : Do not memorize object during restore
2021-02-24 17:23:03 +01:00

516 lines
17 KiB
Python

import ast
from dataclasses import dataclass
from core.builtin_concepts import ReturnValueConcept
from core.builtin_helpers import CreateObjectIdentifiers
from core.concept import CC, Concept, ConceptParts, DoNotResolve, CIO, CMV
from core.tokenizer import Tokenizer, TokenKind, Token
from core.utils import get_text_from_tokens, tokens_index
from parsers.BaseNodeParser import scnode, utnode, cnode, SCWC, CNC, short_cnode, CN, UTN, \
SCN, RN, UnrecognizedTokensNode, SourceCodeNode
from parsers.PythonParser import PythonNode
from parsers.SyaNodeParser import SyaConceptParserHelper
from parsers.expressions import NameExprNode, AndNode, OrNode, NotNode, VariableNode, ComparisonNode, ComparisonType
from sheerkarete.common import V
from sheerkarete.conditions import Condition, AndConditions
@dataclass
class Obj:
prop_a: object
prop_b: object = None
prop_c: object = None
parent: object = None
class AND:
""" Test class for AndNode"""
def __init__(self, *parts, source=None):
self.parts = parts
self.source = source
class OR:
""" Test class for OrNode"""
def __init__(self, *parts, source=None):
self.parts = parts
self.source = source
@dataclass
class NOT:
""" Test class for NotNode"""
expr: object
source: str = None
@dataclass
class EXPR:
"""Test class for NameNode. E stands for Expression"""
source: str
@dataclass
class VAR:
"""Test class for VarNode"""
full_name: str
source: str = None
@dataclass
class EQ:
left: object
right: object
source = None
@dataclass
class NEQ:
left: object
right: object
source = None
@dataclass
class GT:
left: object
right: object
source = None
@dataclass
class GTE:
left: object
right: object
source = None
@dataclass
class LT:
left: object
right: object
source = None
@dataclass
class LTE:
left: object
right: object
source = None
@dataclass
class IN:
left: object
right: object
source = None
@dataclass
class NIN: # for NOT INT
left: object
right: object
source = None
comparison_type_mapping = {
"EQ": ComparisonType.EQUALS,
"NEQ": ComparisonType.NOT_EQUAlS,
"LT": ComparisonType.LESS_THAN,
"LTE": ComparisonType.LESS_THAN_OR_EQUALS,
"GT": ComparisonType.GREATER_THAN,
"GTE": ComparisonType.GREATER_THAN_OR_EQUALS,
"IN": ComparisonType.IN,
"NIN": ComparisonType.NOT_IN,
}
def get_expr_node_from_test_node(full_text, test_node):
"""
Returns EXPR, OR, NOT, AND object to ease the comparison with the real ExprNode
"""
full_text_as_tokens = list(Tokenizer(full_text, yield_eof=False))
def get_pos(nodes):
start, end = None, None
for n in nodes:
if start is None or start > n.start:
start = n.start
if end is None or end < n.end:
end = n.end
return start, end
def get_pos_from_source(source):
source_as_node = list(Tokenizer(source, yield_eof=False))
start = tokens_index(full_text_as_tokens, source_as_node)
end = start + len(source_as_node) - 1
return start, end
def get_expr_node(node):
if isinstance(node, EXPR):
value_as_tokens = list(Tokenizer(node.source, yield_eof=False))
start = tokens_index(full_text_as_tokens, value_as_tokens, 0)
end = start + len(value_as_tokens) - 1
return NameExprNode(start, end, full_text_as_tokens[start: end + 1])
if isinstance(node, AND):
parts = [get_expr_node(part) for part in node.parts]
start, end = get_pos_from_source(node.source) if node.source else get_pos(parts)
return AndNode(start, end, full_text_as_tokens[start: end + 1], *parts)
if isinstance(node, OR):
parts = [get_expr_node(part) for part in node.parts]
start, end = get_pos_from_source(node.source) if node.source else get_pos(parts)
return OrNode(start, end, full_text_as_tokens[start: end + 1], *parts)
if isinstance(node, NOT):
part = get_expr_node(node.expr)
start, end = get_pos_from_source(node.source) if node.source else (part.start - 2, part.end)
return NotNode(start, end, full_text_as_tokens[start: end + 1], part)
if isinstance(node, VAR):
value_as_tokens = list(Tokenizer(node.source or node.full_name, yield_eof=False))
start = tokens_index(full_text_as_tokens, value_as_tokens, 0)
end = start + len(value_as_tokens) - 1
parts = node.full_name.split(".")
if len(parts) == 1:
return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0])
else:
return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0], *parts[1:])
if isinstance(node, (EQ, NEQ, GT, GTE, LT, LTE, IN, NIN)):
node_type = comparison_type_mapping[type(node).__name__]
left_node, right_node = get_expr_node(node.left), get_expr_node(node.right)
start, end = get_pos_from_source(node.source) if node.source else get_pos([left_node, right_node])
return ComparisonNode(start, end, full_text_as_tokens[start: end + 1],
node_type, left_node, right_node)
return get_expr_node(test_node)
def _index(tokens, expr, index):
"""
Finds a sub list in a bigger list
:param tokens:
:param expr:
:param index:
:return:
"""
expected = [token.str_value for token in Tokenizer(expr) if token.type != TokenKind.EOF]
for i in range(0, len(tokens) - len(expected) + 1):
for j in range(len(expected)):
if tokens[i + j] != expected[j]:
break
else:
if index == 0:
return i, len(expected)
else:
index -= 1
raise ValueError(f"substring '{expr}' not found")
def compute_debug_array(res):
to_compare = []
for r in res:
res_debug = []
for token in r.debug:
if isinstance(token, Token):
if token.type == TokenKind.WHITESPACE:
continue
else:
res_debug.append("T(" + token.value + ")")
else:
res_debug.append("C(" + token.concept.name + ")")
to_compare.append(res_debug)
return to_compare
def get_node(
concepts_map,
expression_as_tokens,
sub_expr,
concept_key=None,
skip=0,
is_bnf=False,
sya=False,
init_empty_body=False,
exclude_body=False):
"""
Tries to find sub in expression
When found, transform it to its correct type
:param expression_as_tokens: full expression
:param sub_expr: sub expression to search in the full expression
:param concepts_map: hash of the known concepts
:param concept_key: key of the concept if different from sub_expr
:param skip: number of occurrences of sub_expr to skip
:param is_bnf: True if the concept to search is a bnf definition
:param sya: Return SyaConceptParserHelper instead of a ConceptNode when needed
:param init_empty_body: if True adds the source in the body (actually in compiled.BODY)
:param exclude_body: Ask to not compare body
:return:
"""
if sub_expr == "')'":
return ")"
if isinstance(sub_expr, ReturnValueConcept):
return sub_expr
if isinstance(sub_expr, (scnode, utnode, DoNotResolve)):
return sub_expr
if isinstance(sub_expr, CIO):
sub_expr.set_concept(concepts_map[sub_expr.concept_name])
source = sub_expr.source or sub_expr.concept_name
if source:
node = get_node(concepts_map, expression_as_tokens, source, sya=sya)
sub_expr.start = node.start
sub_expr.end = node.end
return sub_expr
if isinstance(sub_expr, cnode):
# for cnode, map the concept key to the one from concepts_maps if needed
if sub_expr.concept_key.startswith("#"):
return cnode(
concepts_map[sub_expr.concept_key[1:]].key,
sub_expr.start,
sub_expr.end,
sub_expr.source
)
else:
return sub_expr
if isinstance(sub_expr, SCWC):
sub_expr.first = get_node(concepts_map, expression_as_tokens, sub_expr.first, sya=sya)
sub_expr.last = get_node(concepts_map, expression_as_tokens, sub_expr.last, sya=sya)
sub_expr.content = [get_node(concepts_map, expression_as_tokens, c, sya=sya) for c in sub_expr.content]
sub_expr.fix_pos(sub_expr.first)
sub_expr.fix_pos(sub_expr.last)
return sub_expr
# return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source()
if isinstance(sub_expr, SCN):
node = get_node(concepts_map, expression_as_tokens, sub_expr.source, sya=sya)
sub_expr.fix_pos(node)
return sub_expr
if isinstance(sub_expr, RN):
start, length = _index(expression_as_tokens, sub_expr.source, skip)
sub_expr.start = start
sub_expr.end = start + length - 1
return sub_expr
if isinstance(sub_expr, (CNC, CC, CN)):
concept_node = get_node(
concepts_map,
expression_as_tokens,
sub_expr.source or sub_expr.concept_key,
sub_expr.concept_key, sya=sya)
if not hasattr(concept_node, "concept"):
raise Exception(f"'{sub_expr.concept_key}' is not a concept. Check your map.")
concept_found = concept_node.concept
sub_expr.concept_key = concept_found.key
sub_expr.concept = concept_found
sub_expr.fix_pos((concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start))
if hasattr(sub_expr, "compiled"):
for k, v in sub_expr.compiled.items():
node = get_node(concepts_map, expression_as_tokens, v, sya=sya,
exclude_body=exclude_body) # need to get start and end positions
if isinstance(v, str) and v in concepts_map:
new_value_concept = concepts_map[v]
new_value = CC(Concept().update_from(new_value_concept), exclude_body=exclude_body)
if init_empty_body:
init_body(new_value, concept_found, v)
else:
new_value = node
sub_expr.compiled[k] = new_value
sub_expr.fix_pos(node)
if init_empty_body:
init_body(sub_expr, concept_found, sub_expr.source)
if hasattr(sub_expr, "fix_source"):
sub_expr.fix_source(expression_as_tokens[sub_expr.start: sub_expr.end + 1])
return sub_expr
if isinstance(sub_expr, UTN):
node = get_node(concepts_map, expression_as_tokens, sub_expr.source)
sub_expr.fix_pos(node)
return sub_expr
if isinstance(sub_expr, short_cnode):
return get_node(concepts_map, expression_as_tokens, sub_expr.source,
concept_key=sub_expr.concept_key, skip=skip, is_bnf=True, sya=sya)
if isinstance(sub_expr, tuple):
return get_node(concepts_map, expression_as_tokens, sub_expr[0],
concept_key=concept_key, skip=sub_expr[1], is_bnf=is_bnf, sya=sya)
start, length = _index(expression_as_tokens, sub_expr, skip)
# special case of python source code
if "+" in sub_expr and sub_expr.strip() != "+":
return SCN(sub_expr, start, start + length - 1)
# try to match one of the concept from the map
concept_key = concept_key or sub_expr
concept_found = concepts_map.get(concept_key, None)
if concept_found:
concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests
if sya and len(concept_found.get_metadata().variables) > 0 and not is_bnf:
return SyaConceptParserHelper(concept_found, start, start + length - 1)
elif init_empty_body:
node = CNC(concept_found, start, start + length - 1, source=sub_expr, exclude_body=exclude_body)
init_body(node, concept_found, sub_expr)
return node
else:
return CN(concept_found, start, start + length - 1, source=sub_expr)
else:
# else an UnrecognizedTokensNode
return UTN(sub_expr, start, start + length - 1)
def init_body(item, concept, value):
if "body" in item.compiled:
item.compiled[ConceptParts.BODY] = item.compiled["body"]
del (item.compiled["body"])
return
if not concept or concept.get_metadata().body or ConceptParts.BODY in item.compiled:
return
item.compiled[ConceptParts.BODY] = DoNotResolve(value)
def compute_expected_array(concepts_map, expression, expected, sya=False, init_empty_body=False, exclude_body=False):
"""
Computes a simple but sufficient version of the result of infix_to_postfix()
:param concepts_map:
:param expression:
:param expected:
:param sya: if true, generate an SyaConceptParserHelper instead of a cnode
:param init_empty_body: if True adds the source in the body (actually in compiled.BODY)
:param exclude_body: do not include ConceptParts.BODY in comparison
:return:
"""
expression_as_tokens = [token.str_value for token in Tokenizer(expression) if token.type != TokenKind.EOF]
return [get_node(
concepts_map,
expression_as_tokens,
sub_expr,
sya=sya,
init_empty_body=init_empty_body,
exclude_body=exclude_body) for sub_expr in expected]
def get_unrecognized_node(start, text):
tokens = list(Tokenizer(text, yield_eof=False))
return UnrecognizedTokensNode(start, start + len(tokens) - 1, tokens)
def get_source_code_node(start, text, concepts_map, id_manager=None):
id_manager = id_manager or CreateObjectIdentifiers()
id_mapping = {}
concept_mapping_by_id = {}
# get the concepts, mapped by their new id
for concept_name, concept in concepts_map.items():
concept_identifier = id_manager.get_identifier(concept, "__C__")
id_mapping[concept_name] = concept_identifier
concept_mapping_by_id[concept_identifier] = concept
# transform the source code to use the new id
tokens = list(Tokenizer(text, yield_eof=False))
text_to_compile_tokens = []
for t in tokens:
if t.type == TokenKind.IDENTIFIER and t.value in id_mapping:
text_to_compile_tokens.append(Token(TokenKind.IDENTIFIER, id_mapping[t.value], -1, -1, -1))
else:
text_to_compile_tokens.append(t)
text_to_compile = get_text_from_tokens(text_to_compile_tokens)
# create the python node
ast_ = ast.parse(text_to_compile, "<source>", 'eval')
python_node = PythonNode(text_to_compile, ast_, text)
python_node.objects = concept_mapping_by_id
return SourceCodeNode(start, start + len(tokens) - 1, tokens, text, python_node)
def resolve_test_concept(concept_map, hint):
if isinstance(hint, str):
return concept_map[hint]
if isinstance(hint, CC):
concept = concept_map[hint.concept_key]
compiled = {k: resolve_test_concept(concept_map, v) for k, v in hint.compiled.items()}
return CC(concept, source=hint.source, exclude_body=hint.exclude_body, **compiled)
if isinstance(hint, CMV):
concept = concept_map[hint.concept_key]
return CMV(concept, **hint.variables)
# CV
#
# CMV
#
# CIO
raise NotImplementedError()
def get_rete_conditions(*conditions_as_string):
"""
Transform a list of string into a list of Condition (Rete conditions)
:param conditions_as_string: conditions in the form 'identifier|attribute|value'
when one argument starts with "#" it means that it's a variables
ex : "#__x_00__|__name__|'__ret'" -> Condition(V('#__x_00__'), '__name__', '__ret')
Caution, the value part is evaluated
"identifier|__name__|'True'" -> Condition(identifier, '__name__', 'True') # the string 'True'
"identifier|__name__|True" -> Condition(identifier, '__name__', True) # the bool True
"""
res = []
for as_string in conditions_as_string:
identifier, attribute, value = as_string.split("|")
if identifier.startswith("#"):
identifier = V(identifier[1:])
if value.startswith("'"):
value = value[1:-1]
elif value in ("True", "False"):
value = (value == "True")
else:
value = int(value)
res.append(Condition(identifier, attribute, value))
return AndConditions(res)
def get_test_obj(test_obj, real_obj, to_compare_delegate=None):
"""
From a production object (Concept, ConceptNode, ....)
Create a test object (CNC, CC ...) that can be used to validate the unit tests
:param test_obj:
:param real_obj:
:param to_compare_delegate:
:return:
"""
if isinstance(test_obj, list):
if len(test_obj) != len(real_obj):
raise Exception(f"Not the same size ! {test_obj=}, {real_obj=}")
return [get_test_obj(t, r) for t, r in zip(test_obj, real_obj)]
if isinstance(test_obj, dict):
if len(test_obj) != len(real_obj):
raise Exception(f"Not the same size ! {test_obj=}, {real_obj=}")
return {k: get_test_obj(v, real_obj[k]) for k, v in test_obj.items()}
if not hasattr(test_obj, "to_compare"):
return real_obj
return test_obj.to_compare(real_obj, get_test_obj)