Fixed #109 : Mix python and concept. List comprehension

Fixed #110 : SheerkaDebugManager: add list_debug_settings
Fixed #111 : SheerkaDebugManager: Implement ListDebugLogger
Fixed #112 : SyaNodeParser: rewrite this parser
Fixed #113 : Sheerka.: Add enable_parser_caching to disable parsers caching
Fixed #114 : SyaNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #115 : BnfNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #116 : SequenceNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #117 : ResolveMultiplePluralAmbiguityEvaluator: Resolve Multiple plural ambiguity
This commit is contained in:
2021-09-06 11:51:50 +02:00
parent 71d1b1d1ca
commit 54e5681c5a
57 changed files with 5179 additions and 3125 deletions
+292 -172
View File
@@ -1,24 +1,23 @@
import ast
from dataclasses import dataclass
from typing import Union, List
from typing import List, Union
from core.builtin_concepts import ReturnValueConcept
from core.builtin_helpers import CreateObjectIdentifiers
from core.concept import Concept, ConceptParts, DoNotResolve, AllConceptParts
from core.concept import AllConceptParts, Concept, ConceptParts, DoNotResolve
from core.rule import Rule
from core.tokenizer import Tokenizer, TokenKind, Token
from core.utils import get_text_from_tokens, tokens_index, str_concept
from parsers.BaseExpressionParser import NameExprNode, AndNode, OrNode, NotNode, VariableNode, ComparisonNode, \
ComparisonType, \
FunctionParameter
from parsers.BaseNodeParser import UnrecognizedTokensNode, SourceCodeNode, RuleNode, ConceptNode, \
SourceCodeWithConceptNode
from core.tokenizer import Token, TokenKind, Tokenizer
from core.utils import get_text_from_tokens, str_concept, tokens_index
from parsers.BaseExpressionParser import AndNode, ComparisonNode, ComparisonType, Comprehension, FunctionParameter, \
ListComprehensionNode, ListNode, NameExprNode, \
NotNode, OrNode, VariableNode, comma
from parsers.BaseNodeParser import ConceptNode, RuleNode, SourceCodeNode, SourceCodeWithConceptNode, \
UnrecognizedTokensNode
from parsers.FunctionParser import FunctionNode
from parsers.PythonParser import PythonNode
from parsers.SyaNodeParser import SyaConceptParserHelper
from sheerkapython.python_wrapper import sheerka_globals
from sheerkarete.common import V
from sheerkarete.conditions import Condition, AndConditions, NegatedCondition, NegatedConjunctiveConditions
from sheerkarete.conditions import AndConditions, Condition, NegatedCondition, NegatedConjunctiveConditions
@dataclass
@@ -29,104 +28,254 @@ class Obj:
parent: object = None
class AND:
class ExprTestObj:
@staticmethod
def get_pos(nodes):
start, end = None, None
for n in nodes:
if start is None or start > n.start:
start = n.start
if end is None or end < n.end:
end = n.end
return start, end
@staticmethod
def get_pos_from_source(source, full_text_as_tokens):
if isinstance(source, tuple):
source, to_skip = source[0], source[1]
else:
to_skip = 0
source_as_node = list(Tokenizer(source, yield_eof=False))
start = tokens_index(full_text_as_tokens, source_as_node, skip=to_skip)
end = start + len(source_as_node) - 1
return start, end
@staticmethod
def as_tokens(source):
if isinstance(source, tuple):
source, to_skip = source
else:
source, to_skip = source, 0
return list(Tokenizer(source, yield_eof=False)), to_skip
def get_expr_node(self, full_text_as_tokens=None):
raise NotImplementedError
@staticmethod
def safe_get_expr_node(obj, full_text_as_tokens):
if obj is None:
return None
obj = EXPR(obj) if isinstance(obj, (str, tuple)) else obj
return obj.get_expr_node(full_text_as_tokens)
class AND(ExprTestObj):
""" Test class for AndNode"""
def __init__(self, *parts, source=None):
self.parts = parts
self.source = source
def get_expr_node(self, full_text_as_tokens=None):
parts = [part.get_expr_node(full_text_as_tokens) for part in self.parts]
start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else self.get_pos(parts)
return AndNode(start, end, full_text_as_tokens[start: end + 1], *parts)
class OR:
class OR(ExprTestObj):
""" Test class for OrNode"""
def __init__(self, *parts, source=None):
self.parts = parts
self.source = source
def get_expr_node(self, full_text_as_tokens=None):
parts = [part.get_expr_node(full_text_as_tokens) for part in self.parts]
start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else self.get_pos(parts)
return OrNode(start, end, full_text_as_tokens[start: end + 1], *parts)
@dataclass
class NOT:
class NOT(ExprTestObj):
""" Test class for NotNode"""
expr: object
expr: ExprTestObj
source: str = None
def get_expr_node(self, full_text_as_tokens=None):
part = self.expr.get_expr_node(full_text_as_tokens)
start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else (
part.start - 2, part.end)
return NotNode(start, end, full_text_as_tokens[start: end + 1], part)
@dataclass
class EXPR:
"""Test class for NameNode. E stands for Expression"""
class EXPR(ExprTestObj):
"""Test class for NameNode"""
source: str
def get_expr_node(self, full_text_as_tokens=None):
value_as_tokens, to_skip = self.as_tokens(self.source)
start = tokens_index(full_text_as_tokens, value_as_tokens, to_skip)
end = start + len(value_as_tokens) - 1
return NameExprNode(start, end, full_text_as_tokens[start: end + 1])
@dataclass
class VAR:
class VAR(ExprTestObj):
"""Test class for VarNode"""
full_name: str
source: str = None
def get_expr_node(self, full_text_as_tokens=None):
value_as_tokens = list(Tokenizer(self.source or self.full_name, yield_eof=False))
start = tokens_index(full_text_as_tokens, value_as_tokens, 0)
end = start + len(value_as_tokens) - 1
parts = self.full_name.split(".")
if len(parts) == 1:
return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0])
else:
return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0], *parts[1:])
@dataclass
class EQ:
left: object
right: object
class CompExprTestObj(ExprTestObj):
"""
Test object for comparison ==, <=, ...
"""
left: ExprTestObj
right: ExprTestObj
source: str = None
@dataclass
class NEQ:
left: object
right: object
source: str = None
def get_expr_node(self, full_text_as_tokens=None):
node_type = comparison_type_mapping[type(self).__name__]
left_node = self.left.get_expr_node(full_text_as_tokens)
right_node = self.right.get_expr_node(full_text_as_tokens)
start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else \
self.get_pos([left_node, right_node])
return ComparisonNode(start, end, full_text_as_tokens[start: end + 1], node_type, left_node, right_node)
@dataclass
class GT:
left: object
right: object
source: str = None
class EQ(CompExprTestObj):
pass
@dataclass
class GTE:
left: object
right: object
source: str = None
class NEQ(CompExprTestObj):
pass
@dataclass
class LT:
left: object
right: object
source: str = None
class GT(CompExprTestObj):
pass
@dataclass
class LTE:
left: object
right: object
source: str = None
class GTE(CompExprTestObj):
pass
@dataclass
class IN:
left: object
right: object
source: str = None
class LT(CompExprTestObj):
pass
@dataclass
class NIN: # for NOT INT
left: object
right: object
source: str = None
class LTE(CompExprTestObj):
pass
@dataclass
class PAREN: # for parenthesis node
class IN(CompExprTestObj):
pass
@dataclass
class NIN(CompExprTestObj): # for NOT INT
pass
@dataclass
class PAREN(ExprTestObj): # for parenthesis node
node: object
source: str = None
class L_EXPR(ExprTestObj):
def __init__(self, first, last, *items, sep=None, source=None):
self.first = first
self.last = last
self.items = items
self.sep = sep or comma
self.source = source
def get_expr_node(self, full_text_as_tokens=None):
first = self.safe_get_expr_node(self.first, full_text_as_tokens)
last = self.safe_get_expr_node(self.last, full_text_as_tokens)
items = [self.safe_get_expr_node(item, full_text_as_tokens) for item in self.items]
if self.source is None:
source = self.first if self.first else ""
source += f"{self.sep.value} ".join(item.get_source() for item in items)
if self.last:
source += self.last
else:
source = self.source
start, end = self.get_pos_from_source(source, full_text_as_tokens)
return ListNode(start, end, full_text_as_tokens[start: end + 1], first, last, items, self.sep)
@dataclass
class LCC:
"""
List comprehension comprehension
"""
target: object
iterable: object
if_expr: object
@dataclass
class LC(ExprTestObj): # for List Comprehension node
element: object
generators: list
source: str = None
def get_expr_node(self, full_text_as_tokens=None):
# first transform str into NameExprTestObj (ie EXPR)
if isinstance(self.element, str):
self.element = EXPR(self.element)
comprehensions = []
nodes = []
for comp in self.generators:
target = EXPR(comp[0]) if isinstance(comp[0], (str, tuple)) else comp[0]
iterable = EXPR(comp[1]) if isinstance(comp[1], (str, tuple)) else comp[1]
if_expr = EXPR(comp[2]) if isinstance(comp[2], (str, tuple)) else comp[2]
comprehensions.append(LCC(target, iterable, if_expr))
self.generators = comprehensions
# then transform into ListComprehensionNode
element = self.element.get_expr_node(full_text_as_tokens)
nodes.append(element)
comprehensions = []
for comp in self.generators:
target = comp.target.get_expr_node(full_text_as_tokens)
iterable = comp.iterable.get_expr_node(full_text_as_tokens)
if_expr = comp.if_expr.get_expr_node(full_text_as_tokens) if comp.if_expr else None
comprehensions.append(Comprehension(target, iterable, if_expr))
nodes.extend([target, iterable, if_expr])
start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else self.get_pos(nodes)
return ListComprehensionNode(start, end, full_text_as_tokens[start: end + 1], element, comprehensions)
class CC:
"""
Concept class for test purpose
@@ -320,10 +469,13 @@ class CMV:
Test class that only compare the key and the metadata variables
"""
def __init__(self, concept, **kwargs):
def __init__(self, concept, source=None, **kwargs):
self.concept_key = concept.key if isinstance(concept, Concept) else concept
self.concept = concept if isinstance(concept, Concept) else None
self.variables = kwargs
self.source = source # to use when the key is different from the sub str to search when filling start and stop
self.start = None # for debug purpose, indicate where the concept starts
self.end = None # for debug purpose, indicate where the concept ends
def __eq__(self, other):
if id(self) == id(other):
@@ -352,6 +504,21 @@ class CMV:
txt += f", {k}='{v}'"
return txt + ")"
def fix_pos(self, node):
start = node.start if hasattr(node, "start") else \
node[0] if isinstance(node, tuple) else None
end = node.end if hasattr(node, "end") else \
node[1] if isinstance(node, tuple) else None
if start is not None:
if self.start is None or start < self.start:
self.start = start
if end is not None:
if self.end is None or end > self.end:
self.end = end
return self
def transform_real_obj(self, other, get_test_obj_delegate):
if isinstance(other, CMV):
return other
@@ -730,7 +897,7 @@ class CNC(CN):
self_compile_to_use = self.compiled or compiled
compiled = get_test_obj_delegate(self_compile_to_use, compiled, get_test_obj_delegate)
compiled = get_test_obj_delegate(compiled, self_compile_to_use, get_test_obj_delegate)
return CNC(other.concept,
other.source if self.source is not None else None,
other.start if self.start is not None else None,
@@ -865,7 +1032,7 @@ class RN(HelperWithPos):
raise Exception(f"Expecting RuleNode but received {other=}")
class FN:
class FN(ExprTestObj):
"""
Test class only
It matches with FunctionNode but with less constraints
@@ -931,6 +1098,32 @@ class FN:
raise Exception(f"Expecting FunctionNode but received {other=}")
def get_expr_node(self, full_text_as_tokens=None):
start, end = self.get_pos_from_source(self.first, full_text_as_tokens)
first = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
start, end = self.get_pos_from_source(self.last, full_text_as_tokens)
last = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
parameters = []
for param_value, sep in self.parameters:
if isinstance(param_value, str):
start, end = self.get_pos_from_source(param_value, full_text_as_tokens)
param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
param_as_expr_node = param_value.get_expr_node(full_text_as_tokens)
if sep:
sep_tokens = Tokenizer(sep, yield_eof=False)
start = param_as_expr_node.end + 1
end = start + len(list(sep_tokens)) - 1
sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
sep_as_expr_node = None
parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node))
start, end = first.start, last.end
return FunctionNode(start, end, full_text_as_tokens[start: end + 1], first, last, parameters)
@dataclass()
class NEGCOND:
@@ -966,94 +1159,7 @@ def get_expr_node_from_test_node(full_text, test_node):
Returns EXPR, OR, NOT, AND object to ease the comparison with the real ExprNode
"""
full_text_as_tokens = list(Tokenizer(full_text, yield_eof=False))
def get_pos(nodes):
start, end = None, None
for n in nodes:
if start is None or start > n.start:
start = n.start
if end is None or end < n.end:
end = n.end
return start, end
def get_pos_from_source(source):
if isinstance(source, tuple):
source, to_skip = source[0], source[1]
else:
to_skip = 0
source_as_node = list(Tokenizer(source, yield_eof=False))
start = tokens_index(full_text_as_tokens, source_as_node, skip=to_skip)
end = start + len(source_as_node) - 1
return start, end
def get_expr_node(node):
if isinstance(node, EXPR):
value_as_tokens = list(Tokenizer(node.source, yield_eof=False))
start = tokens_index(full_text_as_tokens, value_as_tokens, 0)
end = start + len(value_as_tokens) - 1
return NameExprNode(start, end, full_text_as_tokens[start: end + 1])
if isinstance(node, AND):
parts = [get_expr_node(part) for part in node.parts]
start, end = get_pos_from_source(node.source) if node.source else get_pos(parts)
return AndNode(start, end, full_text_as_tokens[start: end + 1], *parts)
if isinstance(node, OR):
parts = [get_expr_node(part) for part in node.parts]
start, end = get_pos_from_source(node.source) if node.source else get_pos(parts)
return OrNode(start, end, full_text_as_tokens[start: end + 1], *parts)
if isinstance(node, NOT):
part = get_expr_node(node.expr)
start, end = get_pos_from_source(node.source) if node.source else (part.start - 2, part.end)
return NotNode(start, end, full_text_as_tokens[start: end + 1], part)
if isinstance(node, VAR):
value_as_tokens = list(Tokenizer(node.source or node.full_name, yield_eof=False))
start = tokens_index(full_text_as_tokens, value_as_tokens, 0)
end = start + len(value_as_tokens) - 1
parts = node.full_name.split(".")
if len(parts) == 1:
return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0])
else:
return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0], *parts[1:])
if isinstance(node, (EQ, NEQ, GT, GTE, LT, LTE, IN, NIN)):
node_type = comparison_type_mapping[type(node).__name__]
left_node, right_node = get_expr_node(node.left), get_expr_node(node.right)
start, end = get_pos_from_source(node.source) if node.source else get_pos([left_node, right_node])
return ComparisonNode(start, end, full_text_as_tokens[start: end + 1],
node_type, left_node, right_node)
if isinstance(node, FN):
start, end = get_pos_from_source(node.first)
first = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
start, end = get_pos_from_source(node.last)
last = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
parameters = []
for param_value, sep in node.parameters:
if isinstance(param_value, str):
start, end = get_pos_from_source(param_value)
param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
param_as_expr_node = get_expr_node(param_value)
if sep:
sep_tokens = Tokenizer(sep, yield_eof=False)
start = param_as_expr_node.end + 1
end = start + len(list(sep_tokens)) - 1
sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
sep_as_expr_node = None
parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node))
start, end = first.start, last.end
return FunctionNode(start, end, full_text_as_tokens[start: end + 1], first, last, parameters)
return get_expr_node(test_node)
return test_node.get_expr_node(full_text_as_tokens)
def _index(tokens, expr, index):
@@ -1096,15 +1202,15 @@ def compute_debug_array(res):
def get_node(
concepts_map,
expression_as_tokens,
sub_expr,
concept_key=None,
skip=0,
is_bnf=False,
sya=False,
init_empty_body=False,
exclude_body=False):
concepts_map,
expression_as_tokens,
sub_expr,
concept_key=None,
skip=0,
is_bnf=False,
sya=False,
init_empty_body=False,
exclude_body=False):
"""
Tries to find sub in expression
When found, transform it to its correct type
@@ -1157,18 +1263,20 @@ def get_node(
sub_expr.end = start + length - 1
return sub_expr
if isinstance(sub_expr, (CNC, CC, CN)):
concept_node = get_node(
concepts_map,
expression_as_tokens,
sub_expr.source or sub_expr.concept_key,
sub_expr.concept_key, sya=sya)
if not hasattr(concept_node, "concept"):
raise Exception(f"'{sub_expr.concept_key}' is not a concept. Check your map.")
concept_found = concept_node.concept
sub_expr.concept_key = concept_found.key
sub_expr.concept = concept_found
sub_expr.fix_pos((concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start))
if isinstance(sub_expr, (CNC, CC, CN, CMV)):
if sub_expr.concept is None or sub_expr.start is None or sub_expr.end is None:
concept_node = get_node(
concepts_map,
expression_as_tokens,
sub_expr.source or sub_expr.concept_key,
sub_expr.concept_key, sya=sya)
if not hasattr(concept_node, "concept"):
raise Exception(f"'{sub_expr.concept_key}' is not a concept. Check your map.")
concept_found = concept_node.concept
sub_expr.concept_key = concept_found.key
sub_expr.concept = concept_found
sub_expr.fix_pos(
(concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start))
if hasattr(sub_expr, "compiled"):
for k, v in sub_expr.compiled.items():
node = get_node(concepts_map, expression_as_tokens, v, sya=sya,
@@ -1210,9 +1318,9 @@ def get_node(
concept_found = concepts_map.get(concept_key, None)
if concept_found:
concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests
if sya and len(concept_found.get_metadata().variables) > 0 and not is_bnf:
return SyaConceptParserHelper(concept_found, start, start + length - 1)
elif init_empty_body:
# if sya and len(concept_found.get_metadata().variables) > 0 and not is_bnf:
# return SyaConceptParserHelper(concept_found, start, start + length - 1)
if init_empty_body:
node = CNC(concept_found, sub_expr, start, start + length - 1, exclude_body=exclude_body)
init_body(node, concept_found, sub_expr)
return node
@@ -1354,8 +1462,8 @@ def get_test_obj(real_obj, test_obj, get_test_obj_delegate=None):
"""
From a production object (Concept, ConceptNode, ....)
Create a test object (CNC, CC ...) that can be used to validate the unit tests
:param test_obj:
:param real_obj:
:param test_obj: test object used as a template
:param get_test_obj_delegate:
:return:
"""
@@ -1367,13 +1475,25 @@ def get_test_obj(real_obj, test_obj, get_test_obj_delegate=None):
if isinstance(test_obj, dict):
if len(test_obj) != len(real_obj):
raise Exception(f"Not the same size ! {real_obj=}, {test_obj=}")
return {k: get_test_obj(real_obj[k], v) for k, v in test_obj.items()}
if not hasattr(test_obj, "transform_real_obj"):
return real_obj
if hasattr(test_obj, "transform_real_obj"):
return test_obj.transform_real_obj(real_obj, get_test_obj)
return test_obj.transform_real_obj(real_obj, get_test_obj)
return real_obj
def prepare_nodes_comparison(concepts_map, expression, real_obj, test_obj):
if isinstance(real_obj, list):
assert len(real_obj) == len(
test_obj), f"The two lists do not have the same size {len(real_obj)} != {len(test_obj)}"
resolved_test_obj = compute_expected_array(concepts_map, expression, test_obj)
real_obj_as_test = [get_test_obj(r, t) for r, t in zip(real_obj, resolved_test_obj)]
return real_obj_as_test, resolved_test_obj
else:
resolved_test_obj = compute_expected_array(concepts_map, expression, [test_obj])[0]
real_obj_as_test = get_test_obj(real_obj, resolved_test_obj)
return real_obj_as_test, resolved_test_obj
def compare_with_test_object(actual, expected):