Fixed #109 : Mix python and concept. List comprehension

Fixed #110 : SheerkaDebugManager: add list_debug_settings
Fixed #111 : SheerkaDebugManager: Implement ListDebugLogger
Fixed #112 : SyaNodeParser: rewrite this parser
Fixed #113 : Sheerka.: Add enable_parser_caching to disable parsers caching
Fixed #114 : SyaNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #115 : BnfNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #116 : SequenceNodeParser : Implement fast cache to resolve unrecognized tokens requests
Fixed #117 : ResolveMultiplePluralAmbiguityEvaluator: Resolve Multiple plural ambiguity
This commit is contained in:
2021-09-06 11:51:50 +02:00
parent 71d1b1d1ca
commit 54e5681c5a
57 changed files with 5179 additions and 3125 deletions
+292 -172
View File
@@ -1,24 +1,23 @@
import ast
from dataclasses import dataclass
from typing import Union, List
from typing import List, Union
from core.builtin_concepts import ReturnValueConcept
from core.builtin_helpers import CreateObjectIdentifiers
from core.concept import Concept, ConceptParts, DoNotResolve, AllConceptParts
from core.concept import AllConceptParts, Concept, ConceptParts, DoNotResolve
from core.rule import Rule
from core.tokenizer import Tokenizer, TokenKind, Token
from core.utils import get_text_from_tokens, tokens_index, str_concept
from parsers.BaseExpressionParser import NameExprNode, AndNode, OrNode, NotNode, VariableNode, ComparisonNode, \
ComparisonType, \
FunctionParameter
from parsers.BaseNodeParser import UnrecognizedTokensNode, SourceCodeNode, RuleNode, ConceptNode, \
SourceCodeWithConceptNode
from core.tokenizer import Token, TokenKind, Tokenizer
from core.utils import get_text_from_tokens, str_concept, tokens_index
from parsers.BaseExpressionParser import AndNode, ComparisonNode, ComparisonType, Comprehension, FunctionParameter, \
ListComprehensionNode, ListNode, NameExprNode, \
NotNode, OrNode, VariableNode, comma
from parsers.BaseNodeParser import ConceptNode, RuleNode, SourceCodeNode, SourceCodeWithConceptNode, \
UnrecognizedTokensNode
from parsers.FunctionParser import FunctionNode
from parsers.PythonParser import PythonNode
from parsers.SyaNodeParser import SyaConceptParserHelper
from sheerkapython.python_wrapper import sheerka_globals
from sheerkarete.common import V
from sheerkarete.conditions import Condition, AndConditions, NegatedCondition, NegatedConjunctiveConditions
from sheerkarete.conditions import AndConditions, Condition, NegatedCondition, NegatedConjunctiveConditions
@dataclass
@@ -29,104 +28,254 @@ class Obj:
parent: object = None
class AND:
class ExprTestObj:
@staticmethod
def get_pos(nodes):
start, end = None, None
for n in nodes:
if start is None or start > n.start:
start = n.start
if end is None or end < n.end:
end = n.end
return start, end
@staticmethod
def get_pos_from_source(source, full_text_as_tokens):
if isinstance(source, tuple):
source, to_skip = source[0], source[1]
else:
to_skip = 0
source_as_node = list(Tokenizer(source, yield_eof=False))
start = tokens_index(full_text_as_tokens, source_as_node, skip=to_skip)
end = start + len(source_as_node) - 1
return start, end
@staticmethod
def as_tokens(source):
if isinstance(source, tuple):
source, to_skip = source
else:
source, to_skip = source, 0
return list(Tokenizer(source, yield_eof=False)), to_skip
def get_expr_node(self, full_text_as_tokens=None):
raise NotImplementedError
@staticmethod
def safe_get_expr_node(obj, full_text_as_tokens):
if obj is None:
return None
obj = EXPR(obj) if isinstance(obj, (str, tuple)) else obj
return obj.get_expr_node(full_text_as_tokens)
class AND(ExprTestObj):
""" Test class for AndNode"""
def __init__(self, *parts, source=None):
self.parts = parts
self.source = source
def get_expr_node(self, full_text_as_tokens=None):
parts = [part.get_expr_node(full_text_as_tokens) for part in self.parts]
start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else self.get_pos(parts)
return AndNode(start, end, full_text_as_tokens[start: end + 1], *parts)
class OR:
class OR(ExprTestObj):
""" Test class for OrNode"""
def __init__(self, *parts, source=None):
self.parts = parts
self.source = source
def get_expr_node(self, full_text_as_tokens=None):
parts = [part.get_expr_node(full_text_as_tokens) for part in self.parts]
start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else self.get_pos(parts)
return OrNode(start, end, full_text_as_tokens[start: end + 1], *parts)
@dataclass
class NOT:
class NOT(ExprTestObj):
""" Test class for NotNode"""
expr: object
expr: ExprTestObj
source: str = None
def get_expr_node(self, full_text_as_tokens=None):
part = self.expr.get_expr_node(full_text_as_tokens)
start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else (
part.start - 2, part.end)
return NotNode(start, end, full_text_as_tokens[start: end + 1], part)
@dataclass
class EXPR:
"""Test class for NameNode. E stands for Expression"""
class EXPR(ExprTestObj):
"""Test class for NameNode"""
source: str
def get_expr_node(self, full_text_as_tokens=None):
value_as_tokens, to_skip = self.as_tokens(self.source)
start = tokens_index(full_text_as_tokens, value_as_tokens, to_skip)
end = start + len(value_as_tokens) - 1
return NameExprNode(start, end, full_text_as_tokens[start: end + 1])
@dataclass
class VAR:
class VAR(ExprTestObj):
"""Test class for VarNode"""
full_name: str
source: str = None
def get_expr_node(self, full_text_as_tokens=None):
value_as_tokens = list(Tokenizer(self.source or self.full_name, yield_eof=False))
start = tokens_index(full_text_as_tokens, value_as_tokens, 0)
end = start + len(value_as_tokens) - 1
parts = self.full_name.split(".")
if len(parts) == 1:
return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0])
else:
return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0], *parts[1:])
@dataclass
class EQ:
left: object
right: object
class CompExprTestObj(ExprTestObj):
"""
Test object for comparison ==, <=, ...
"""
left: ExprTestObj
right: ExprTestObj
source: str = None
@dataclass
class NEQ:
left: object
right: object
source: str = None
def get_expr_node(self, full_text_as_tokens=None):
node_type = comparison_type_mapping[type(self).__name__]
left_node = self.left.get_expr_node(full_text_as_tokens)
right_node = self.right.get_expr_node(full_text_as_tokens)
start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else \
self.get_pos([left_node, right_node])
return ComparisonNode(start, end, full_text_as_tokens[start: end + 1], node_type, left_node, right_node)
@dataclass
class GT:
left: object
right: object
source: str = None
class EQ(CompExprTestObj):
pass
@dataclass
class GTE:
left: object
right: object
source: str = None
class NEQ(CompExprTestObj):
pass
@dataclass
class LT:
left: object
right: object
source: str = None
class GT(CompExprTestObj):
pass
@dataclass
class LTE:
left: object
right: object
source: str = None
class GTE(CompExprTestObj):
pass
@dataclass
class IN:
left: object
right: object
source: str = None
class LT(CompExprTestObj):
pass
@dataclass
class NIN: # for NOT INT
left: object
right: object
source: str = None
class LTE(CompExprTestObj):
pass
@dataclass
class PAREN: # for parenthesis node
class IN(CompExprTestObj):
pass
@dataclass
class NIN(CompExprTestObj): # for NOT INT
pass
@dataclass
class PAREN(ExprTestObj): # for parenthesis node
node: object
source: str = None
class L_EXPR(ExprTestObj):
def __init__(self, first, last, *items, sep=None, source=None):
self.first = first
self.last = last
self.items = items
self.sep = sep or comma
self.source = source
def get_expr_node(self, full_text_as_tokens=None):
first = self.safe_get_expr_node(self.first, full_text_as_tokens)
last = self.safe_get_expr_node(self.last, full_text_as_tokens)
items = [self.safe_get_expr_node(item, full_text_as_tokens) for item in self.items]
if self.source is None:
source = self.first if self.first else ""
source += f"{self.sep.value} ".join(item.get_source() for item in items)
if self.last:
source += self.last
else:
source = self.source
start, end = self.get_pos_from_source(source, full_text_as_tokens)
return ListNode(start, end, full_text_as_tokens[start: end + 1], first, last, items, self.sep)
@dataclass
class LCC:
"""
List comprehension comprehension
"""
target: object
iterable: object
if_expr: object
@dataclass
class LC(ExprTestObj): # for List Comprehension node
element: object
generators: list
source: str = None
def get_expr_node(self, full_text_as_tokens=None):
# first transform str into NameExprTestObj (ie EXPR)
if isinstance(self.element, str):
self.element = EXPR(self.element)
comprehensions = []
nodes = []
for comp in self.generators:
target = EXPR(comp[0]) if isinstance(comp[0], (str, tuple)) else comp[0]
iterable = EXPR(comp[1]) if isinstance(comp[1], (str, tuple)) else comp[1]
if_expr = EXPR(comp[2]) if isinstance(comp[2], (str, tuple)) else comp[2]
comprehensions.append(LCC(target, iterable, if_expr))
self.generators = comprehensions
# then transform into ListComprehensionNode
element = self.element.get_expr_node(full_text_as_tokens)
nodes.append(element)
comprehensions = []
for comp in self.generators:
target = comp.target.get_expr_node(full_text_as_tokens)
iterable = comp.iterable.get_expr_node(full_text_as_tokens)
if_expr = comp.if_expr.get_expr_node(full_text_as_tokens) if comp.if_expr else None
comprehensions.append(Comprehension(target, iterable, if_expr))
nodes.extend([target, iterable, if_expr])
start, end = self.get_pos_from_source(self.source, full_text_as_tokens) if self.source else self.get_pos(nodes)
return ListComprehensionNode(start, end, full_text_as_tokens[start: end + 1], element, comprehensions)
class CC:
"""
Concept class for test purpose
@@ -320,10 +469,13 @@ class CMV:
Test class that only compare the key and the metadata variables
"""
def __init__(self, concept, **kwargs):
def __init__(self, concept, source=None, **kwargs):
self.concept_key = concept.key if isinstance(concept, Concept) else concept
self.concept = concept if isinstance(concept, Concept) else None
self.variables = kwargs
self.source = source # to use when the key is different from the sub str to search when filling start and stop
self.start = None # for debug purpose, indicate where the concept starts
self.end = None # for debug purpose, indicate where the concept ends
def __eq__(self, other):
if id(self) == id(other):
@@ -352,6 +504,21 @@ class CMV:
txt += f", {k}='{v}'"
return txt + ")"
def fix_pos(self, node):
start = node.start if hasattr(node, "start") else \
node[0] if isinstance(node, tuple) else None
end = node.end if hasattr(node, "end") else \
node[1] if isinstance(node, tuple) else None
if start is not None:
if self.start is None or start < self.start:
self.start = start
if end is not None:
if self.end is None or end > self.end:
self.end = end
return self
def transform_real_obj(self, other, get_test_obj_delegate):
if isinstance(other, CMV):
return other
@@ -730,7 +897,7 @@ class CNC(CN):
self_compile_to_use = self.compiled or compiled
compiled = get_test_obj_delegate(self_compile_to_use, compiled, get_test_obj_delegate)
compiled = get_test_obj_delegate(compiled, self_compile_to_use, get_test_obj_delegate)
return CNC(other.concept,
other.source if self.source is not None else None,
other.start if self.start is not None else None,
@@ -865,7 +1032,7 @@ class RN(HelperWithPos):
raise Exception(f"Expecting RuleNode but received {other=}")
class FN:
class FN(ExprTestObj):
"""
Test class only
It matches with FunctionNode but with less constraints
@@ -931,6 +1098,32 @@ class FN:
raise Exception(f"Expecting FunctionNode but received {other=}")
def get_expr_node(self, full_text_as_tokens=None):
start, end = self.get_pos_from_source(self.first, full_text_as_tokens)
first = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
start, end = self.get_pos_from_source(self.last, full_text_as_tokens)
last = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
parameters = []
for param_value, sep in self.parameters:
if isinstance(param_value, str):
start, end = self.get_pos_from_source(param_value, full_text_as_tokens)
param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
param_as_expr_node = param_value.get_expr_node(full_text_as_tokens)
if sep:
sep_tokens = Tokenizer(sep, yield_eof=False)
start = param_as_expr_node.end + 1
end = start + len(list(sep_tokens)) - 1
sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
sep_as_expr_node = None
parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node))
start, end = first.start, last.end
return FunctionNode(start, end, full_text_as_tokens[start: end + 1], first, last, parameters)
@dataclass()
class NEGCOND:
@@ -966,94 +1159,7 @@ def get_expr_node_from_test_node(full_text, test_node):
Returns EXPR, OR, NOT, AND object to ease the comparison with the real ExprNode
"""
full_text_as_tokens = list(Tokenizer(full_text, yield_eof=False))
def get_pos(nodes):
start, end = None, None
for n in nodes:
if start is None or start > n.start:
start = n.start
if end is None or end < n.end:
end = n.end
return start, end
def get_pos_from_source(source):
if isinstance(source, tuple):
source, to_skip = source[0], source[1]
else:
to_skip = 0
source_as_node = list(Tokenizer(source, yield_eof=False))
start = tokens_index(full_text_as_tokens, source_as_node, skip=to_skip)
end = start + len(source_as_node) - 1
return start, end
def get_expr_node(node):
if isinstance(node, EXPR):
value_as_tokens = list(Tokenizer(node.source, yield_eof=False))
start = tokens_index(full_text_as_tokens, value_as_tokens, 0)
end = start + len(value_as_tokens) - 1
return NameExprNode(start, end, full_text_as_tokens[start: end + 1])
if isinstance(node, AND):
parts = [get_expr_node(part) for part in node.parts]
start, end = get_pos_from_source(node.source) if node.source else get_pos(parts)
return AndNode(start, end, full_text_as_tokens[start: end + 1], *parts)
if isinstance(node, OR):
parts = [get_expr_node(part) for part in node.parts]
start, end = get_pos_from_source(node.source) if node.source else get_pos(parts)
return OrNode(start, end, full_text_as_tokens[start: end + 1], *parts)
if isinstance(node, NOT):
part = get_expr_node(node.expr)
start, end = get_pos_from_source(node.source) if node.source else (part.start - 2, part.end)
return NotNode(start, end, full_text_as_tokens[start: end + 1], part)
if isinstance(node, VAR):
value_as_tokens = list(Tokenizer(node.source or node.full_name, yield_eof=False))
start = tokens_index(full_text_as_tokens, value_as_tokens, 0)
end = start + len(value_as_tokens) - 1
parts = node.full_name.split(".")
if len(parts) == 1:
return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0])
else:
return VariableNode(start, end, full_text_as_tokens[start: end + 1], parts[0], *parts[1:])
if isinstance(node, (EQ, NEQ, GT, GTE, LT, LTE, IN, NIN)):
node_type = comparison_type_mapping[type(node).__name__]
left_node, right_node = get_expr_node(node.left), get_expr_node(node.right)
start, end = get_pos_from_source(node.source) if node.source else get_pos([left_node, right_node])
return ComparisonNode(start, end, full_text_as_tokens[start: end + 1],
node_type, left_node, right_node)
if isinstance(node, FN):
start, end = get_pos_from_source(node.first)
first = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
start, end = get_pos_from_source(node.last)
last = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
parameters = []
for param_value, sep in node.parameters:
if isinstance(param_value, str):
start, end = get_pos_from_source(param_value)
param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
param_as_expr_node = get_expr_node(param_value)
if sep:
sep_tokens = Tokenizer(sep, yield_eof=False)
start = param_as_expr_node.end + 1
end = start + len(list(sep_tokens)) - 1
sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1])
else:
sep_as_expr_node = None
parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node))
start, end = first.start, last.end
return FunctionNode(start, end, full_text_as_tokens[start: end + 1], first, last, parameters)
return get_expr_node(test_node)
return test_node.get_expr_node(full_text_as_tokens)
def _index(tokens, expr, index):
@@ -1096,15 +1202,15 @@ def compute_debug_array(res):
def get_node(
concepts_map,
expression_as_tokens,
sub_expr,
concept_key=None,
skip=0,
is_bnf=False,
sya=False,
init_empty_body=False,
exclude_body=False):
concepts_map,
expression_as_tokens,
sub_expr,
concept_key=None,
skip=0,
is_bnf=False,
sya=False,
init_empty_body=False,
exclude_body=False):
"""
Tries to find sub in expression
When found, transform it to its correct type
@@ -1157,18 +1263,20 @@ def get_node(
sub_expr.end = start + length - 1
return sub_expr
if isinstance(sub_expr, (CNC, CC, CN)):
concept_node = get_node(
concepts_map,
expression_as_tokens,
sub_expr.source or sub_expr.concept_key,
sub_expr.concept_key, sya=sya)
if not hasattr(concept_node, "concept"):
raise Exception(f"'{sub_expr.concept_key}' is not a concept. Check your map.")
concept_found = concept_node.concept
sub_expr.concept_key = concept_found.key
sub_expr.concept = concept_found
sub_expr.fix_pos((concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start))
if isinstance(sub_expr, (CNC, CC, CN, CMV)):
if sub_expr.concept is None or sub_expr.start is None or sub_expr.end is None:
concept_node = get_node(
concepts_map,
expression_as_tokens,
sub_expr.source or sub_expr.concept_key,
sub_expr.concept_key, sya=sya)
if not hasattr(concept_node, "concept"):
raise Exception(f"'{sub_expr.concept_key}' is not a concept. Check your map.")
concept_found = concept_node.concept
sub_expr.concept_key = concept_found.key
sub_expr.concept = concept_found
sub_expr.fix_pos(
(concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start))
if hasattr(sub_expr, "compiled"):
for k, v in sub_expr.compiled.items():
node = get_node(concepts_map, expression_as_tokens, v, sya=sya,
@@ -1210,9 +1318,9 @@ def get_node(
concept_found = concepts_map.get(concept_key, None)
if concept_found:
concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests
if sya and len(concept_found.get_metadata().variables) > 0 and not is_bnf:
return SyaConceptParserHelper(concept_found, start, start + length - 1)
elif init_empty_body:
# if sya and len(concept_found.get_metadata().variables) > 0 and not is_bnf:
# return SyaConceptParserHelper(concept_found, start, start + length - 1)
if init_empty_body:
node = CNC(concept_found, sub_expr, start, start + length - 1, exclude_body=exclude_body)
init_body(node, concept_found, sub_expr)
return node
@@ -1354,8 +1462,8 @@ def get_test_obj(real_obj, test_obj, get_test_obj_delegate=None):
"""
From a production object (Concept, ConceptNode, ....)
Create a test object (CNC, CC ...) that can be used to validate the unit tests
:param test_obj:
:param real_obj:
:param test_obj: test object used as a template
:param get_test_obj_delegate:
:return:
"""
@@ -1367,13 +1475,25 @@ def get_test_obj(real_obj, test_obj, get_test_obj_delegate=None):
if isinstance(test_obj, dict):
if len(test_obj) != len(real_obj):
raise Exception(f"Not the same size ! {real_obj=}, {test_obj=}")
return {k: get_test_obj(real_obj[k], v) for k, v in test_obj.items()}
if not hasattr(test_obj, "transform_real_obj"):
return real_obj
if hasattr(test_obj, "transform_real_obj"):
return test_obj.transform_real_obj(real_obj, get_test_obj)
return test_obj.transform_real_obj(real_obj, get_test_obj)
return real_obj
def prepare_nodes_comparison(concepts_map, expression, real_obj, test_obj):
if isinstance(real_obj, list):
assert len(real_obj) == len(
test_obj), f"The two lists do not have the same size {len(real_obj)} != {len(test_obj)}"
resolved_test_obj = compute_expected_array(concepts_map, expression, test_obj)
real_obj_as_test = [get_test_obj(r, t) for r, t in zip(real_obj, resolved_test_obj)]
return real_obj_as_test, resolved_test_obj
else:
resolved_test_obj = compute_expected_array(concepts_map, expression, [test_obj])[0]
real_obj_as_test = get_test_obj(real_obj, resolved_test_obj)
return real_obj_as_test, resolved_test_obj
def compare_with_test_object(actual, expected):
@@ -124,7 +124,7 @@ func(a)
assert parser.get_parts(["print", "when"]) is not None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], UnexpectedEofParsingError)
assert parser.error_sink[0].message == "While parsing keyword 'print'."
assert parser.error_sink[0].message == "while parsing keyword 'print'"
def test_i_can_double_quoted_strings_are_expanded(self):
"""
+13 -9
View File
@@ -4,19 +4,20 @@ import pytest
import tests.parsers.parsers_utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve, DEFINITION_TYPE_BNF
from core.concept import Concept, ConceptParts, DEFINITION_TYPE_BNF, DoNotResolve
from core.global_symbols import NotInit
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput
from core.sheerka.services.SheerkaIsAManager import SheerkaIsAManager
from parsers.BaseNodeParser import NoMatchingTokenError
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser, RegExMatch, \
BnfNodeFirstTokenVisitor, Match, RegExDef, VariableExpression
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor, BnfNodeParser, ConceptExpression, Match, NonTerminalNode, \
OneOrMore, Optional, OrderedChoice, RegExDef, RegExMatch, Sequence, StrMatch, TerminalNode, UnOrderedChoice, \
VariableExpression, ZeroOrMore
from tests.BaseTest import BaseTest
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.evaluators.EvaluatorTestsUtils import python_ret_val
from tests.parsers.parsers_utils import CNC, CN, UTN, CC, SCN, get_test_obj, compare_with_test_object
from tests.parsers.parsers_utils import CC, CMV, CN, CNC, SCN, UTN, compare_with_test_object, get_test_obj
cmap = {
"one": Concept("one"),
@@ -1027,7 +1028,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one tiny but beautiful shoe",
[CNC("foo",
"one tiny but beautiful shoe",
x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful "))]),
x=CMV("but", source="tiny but beautiful", x="tiny ", y="beautiful "))]),
])
def test_i_can_match_variable_in_between(self, expr, expected):
my_map = {
@@ -1896,7 +1897,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
def test_i_can_simplify_unordered_choices_that_refer_to_the_same_isa(self):
my_map = {
"light_red": Concept("light red"),
"light_red": Concept("light red", key="light_red"),
"dark_red": Concept("dark red"),
"red colors": Concept("red colors"),
"color": Concept("color"),
@@ -1916,6 +1917,10 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka.set_isa(global_truth_context, my_map["red colors"], my_map["color"])
sheerka.set_isa(global_truth_context, my_map["red colors"], my_map["adjective"])
# hack to ease the tests
sheerka.get_by_id(my_map["light_red"].id).get_metadata().key = "light_red"
sheerka.om.clear(SheerkaIsAManager.CONCEPTS_IN_GROUPS_ENTRY)
text = "light red table"
expected = CNC("qualified table",
@@ -1940,7 +1945,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"isafoo": Concept("x is an foo", body="False", pre="is_question()").def_var("x"),
"q": Concept("q ?", body="question(a)").def_var("q")
}
# I need the concept isafoo to fool SyaNodeParser when parsing the sub text 'is an hex ?'"
# The parser will try to recognize 'is an foo', will fail and will revert the result to UTN()
# It's this UTN that need to be properly handled
@@ -1953,7 +1958,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
res = parser.parse(context, ParserInput(text))
assert not res.status
# @pytest.mark.parametrize("parser_input, expected", [
# ("one", [
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),
+2 -2
View File
@@ -387,8 +387,8 @@ def concept add one to a as:
("def concept name from def", SyntaxErrorNode([], "Empty 'from' declaration.")),
("def concept name from def ", SyntaxErrorNode([], "Empty 'from' declaration.")),
("def concept name from as True", SyntaxErrorNode([], "Empty 'from' declaration.")),
("def concept name from", UnexpectedEofParsingError("While parsing keyword 'from'.")),
("def concept name from ", UnexpectedEofParsingError("While parsing keyword 'from'.")),
("def concept name from", UnexpectedEofParsingError("while parsing keyword 'from'")),
("def concept name from ", UnexpectedEofParsingError("while parsing keyword 'from'")),
])
def test_i_can_detect_empty_def_declaration(self, text, error):
sheerka, context, parser, *concepts = self.init_parser()
+2 -2
View File
@@ -190,8 +190,8 @@ class TestDefRuleParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, expected_error)
@pytest.mark.parametrize("text, error_message", [
("def rule rule_name as", "While parsing 'when'."),
("def rule rule_name as ", "While parsing 'when'."),
("def rule rule_name as", "while parsing 'when'"),
("def rule rule_name as ", "while parsing 'when'"),
])
def test_i_cannot_parse_when_unexpected_eof(self, text, error_message):
sheerka, context, parser = self.init_parser()
@@ -0,0 +1,180 @@
import pytest
from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Token, TokenKind
from parsers.BaseExpressionParser import ParenthesisMismatchError
from parsers.BaseParser import UnexpectedEofParsingError, UnexpectedTokenParsingError
from parsers.ListComprehensionParser import ElementNotFound, FailedToParse, ForNotFound, LeadingParenthesisNotFound, \
ListComprehensionParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import LC, L_EXPR, get_expr_node_from_test_node
class TestListComprehensionParser(TestUsingMemoryBasedSheerka):
def init_parser(self):
sheerka, context = self.init_concepts()
parser = ListComprehensionParser(auto_compile=False)
return sheerka, context, parser
@pytest.mark.parametrize("text, reason", [
("foo", LeadingParenthesisNotFound()),
("[]", ForNotFound()),
("[ x ]", ForNotFound()),
("[ x for]", FailedToParse("target", 5)),
("[ x for x]", UnexpectedEofParsingError("while parsing comprehension")),
("[ x for x in ]", UnexpectedEofParsingError("while parsing comprehension")),
("[ x for x in lst for]", FailedToParse("target", 13)),
("[", UnexpectedEofParsingError("when start parsing")),
("[]", ForNotFound()),
("[ for x in z ]", ElementNotFound()),
("[ x for in z ]", FailedToParse("target", 6)),
("[ x for x in ]", UnexpectedEofParsingError("while parsing comprehension")),
("[ x for x in z if ]", UnexpectedEofParsingError("while parsing comprehension")),
("[ x for x in z", ParenthesisMismatchError(Token(TokenKind.RBRACKET, "]", -1, -1, -1))),
("[ x for x in z if t", ParenthesisMismatchError(Token(TokenKind.RBRACKET, "]", -1, -1, -1))),
("zzz [ x for x in z if t ]", LeadingParenthesisNotFound()),
("[ x for x in z )", ParenthesisMismatchError(Token(TokenKind.RBRACKET, "]", -1, -1, -1))),
("[ x for x in z if t )", ParenthesisMismatchError(Token(TokenKind.RBRACKET, "]", -1, -1, -1))),
])
def test_i_cannot_parse_when_not_for_me(self, text, reason):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert res.body.reason == [reason]
def test_i_cannot_parse_when_trailing_elements(self):
sheerka, context, parser = self.init_parser()
text = "[ x for x in z if t ] zzz"
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert len(res.body.body) == 1
error = res.body.body[0]
assert isinstance(error, UnexpectedTokenParsingError)
def test_i_can_parse_a_simple_expression(self):
sheerka, context, parser = self.init_parser()
expression = "[x for x in ['a', 'b'] if x == 'a']"
res = parser.parse(context, ParserInput(expression))
wrapper = res.body
lc_node = res.body.body
assert res.status
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
expected = LC(L_EXPR(None, None, "x", source="x "), [(("x", 1), "['a', 'b']", "x == 'a'")], source=expression)
to_compare_to = get_expr_node_from_test_node(expression, expected)
assert lc_node == to_compare_to
def test_i_can_parse_when_no_if(self):
sheerka, context, parser = self.init_parser()
expression = "[x for x in ['a', 'b']]"
res = parser.parse(context, ParserInput(expression))
wrapper = res.body
lc_node = res.body.body
assert res.status
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
expected = LC(L_EXPR(None, None, "x", source="x "), [(("x", 1), "['a', 'b']", None)], source=expression)
to_compare_to = get_expr_node_from_test_node(expression, expected)
assert lc_node == to_compare_to
def test_i_can_parse_when_element_is_a_tuple(self):
sheerka, context, parser = self.init_parser()
expression = "[(x + 1, x + 2) for x in [1, 2]]"
res = parser.parse(context, ParserInput(expression))
wrapper = res.body
lc_node = res.body.body
assert res.status
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
elt = L_EXPR("(", ")", "x + 1", "x + 2")
expected = LC(elt, [(("x", 2), "[1, 2]", None)], source=expression)
to_compare_to = get_expr_node_from_test_node(expression, expected)
assert lc_node == to_compare_to
def test_i_can_parse_when_element_is_a_tuple_with_missing_parenthesis(self):
sheerka, context, parser = self.init_parser()
expression = "[x + 1, x + 2 for x in [1, 2]]"
res = parser.parse(context, ParserInput(expression))
wrapper = res.body
lc_node = res.body.body
assert res.status
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
elt = L_EXPR(None, None, "x + 1", "x + 2", source="x + 1, x + 2 ")
expected = LC(elt, [(("x", 2), "[1, 2]", None)], source=expression)
to_compare_to = get_expr_node_from_test_node(expression, expected)
assert lc_node == to_compare_to
def test_i_can_parse_when_element_is_a_context_that_contains_for(self):
sheerka, context, parser = self.init_parser()
expression = "[handle x for me and for you for x in [1, 2]]"
res = parser.parse(context, ParserInput(expression))
wrapper = res.body
lc_node = res.body.body
assert res.status
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
elt = L_EXPR(None, None, "handle x for me and for you", source="handle x for me and for you ")
expected = LC(elt, [(("x", 1), "[1, 2]", None)], source=expression)
to_compare_to = get_expr_node_from_test_node(expression, expected)
assert lc_node == to_compare_to
def test_i_can_parse_when_multiple_generators(self):
sheerka, context, parser = self.init_parser()
expression = "[(x, y) for x in ['a', 'b'] if x == 'a' for y in ['c', 'd'] if y == 'c']"
res = parser.parse(context, ParserInput(expression))
wrapper = res.body
lc_node = res.body.body
assert res.status
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
elt = L_EXPR("(", ")", "x", "y")
expected = LC(elt,
[(("x", 1), "['a', 'b']", "x == 'a'"),
(("y", 1), "['c', 'd']", "y == 'c'")],
source=expression)
to_compare_to = get_expr_node_from_test_node(expression, expected)
assert lc_node == to_compare_to
def test_i_can_parse_when_multiple_generators_when_no_if(self):
sheerka, context, parser = self.init_parser()
expression = "[x, y for x in ['a', 'b'] for y in ['c', 'd'] if y == 'c']"
res = parser.parse(context, ParserInput(expression))
wrapper = res.body
lc_node = res.body.body
assert res.status
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
elt = L_EXPR(None, None, "x", "y", source="x, y ")
expected = LC(elt,
[(("x", 1), "['a', 'b']", None),
(("y", 1), "['c', 'd']", "y == 'c'")],
source=expression)
to_compare_to = get_expr_node_from_test_node(expression, expected)
assert lc_node == to_compare_to
+69
View File
@@ -0,0 +1,69 @@
import pytest
from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Token, TokenKind
from parsers.BaseExpressionParser import ParenthesisMismatchError, end_parenthesis_mapping
from parsers.BaseParser import ErrorSink
from parsers.ListParser import ListParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import EXPR, L_EXPR, get_expr_node_from_test_node
semi_colon = Token(TokenKind.SEMICOLON, ";", -1, -1, -1)
or_token = Token(TokenKind.IDENTIFIER, "or", -1, -1, -1)
class TestListParser(TestUsingMemoryBasedSheerka):
def init_parser(self, sep=None):
sheerka, context = self.init_concepts()
parser = ListParser(sep)
return sheerka, context, parser
@pytest.mark.parametrize("expression, sep, expected", [
("()", None, L_EXPR("(", ")")),
("(x , foo y,z)", None, L_EXPR("(", ")", EXPR("x"), EXPR("foo y"), EXPR("z"), source="(x , foo y,z)")),
("x , foo y,z", None, L_EXPR(None, None, EXPR("x"), EXPR("foo y"), EXPR("z"), source="x , foo y,z")),
("x", None, L_EXPR(None, None, EXPR("x"))),
("[x, foo y, z]", None, L_EXPR("[", "]", EXPR("x"), EXPR("foo y"), EXPR("z"))),
("{x, foo y, z}", None, L_EXPR("{", "}", EXPR("x"), EXPR("foo y"), EXPR("z"))),
("(x; y; z)", semi_colon, L_EXPR("(", ")", EXPR("x"), EXPR("y"), EXPR("z"), sep=semi_colon, source="(x; y; z)")),
("x; y; z", semi_colon, L_EXPR(None, None, EXPR("x"), EXPR("y"), EXPR("z"), sep=semi_colon, source="x; y; z")),
("x or y or z", or_token, L_EXPR(None, None, EXPR("x"), EXPR("y"), EXPR("z"), sep=or_token, source="x or y or z")),
])
def test_i_can_parse_expression(self, expression, sep, expected):
sheerka, context, parser = self.init_parser(sep)
expected = get_expr_node_from_test_node(expression, expected)
res = parser.parse(context, ParserInput(expression))
wrapper = res.body
expressions = res.body.body
assert res.status
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert expressions == expected
@pytest.mark.parametrize("expression, starting", [
("(", TokenKind.LPAR),
("(x, y", TokenKind.LPAR),
("{x, y", TokenKind.LBRACE),
("[x, y", TokenKind.LBRACKET),
])
def test_i_cannot_parse_when_missing_trailing_parenthesis(self, expression, starting):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(expression))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert res.body.body == [ParenthesisMismatchError(end_parenthesis_mapping[starting])]
def test_none_is_return_when_empty_parser_input(self):
sheerka, context, parser = self.init_parser()
parser_input = ParserInput(" ").reset()
parser_input.next_token()
error_sink = ErrorSink()
parsed = parser.parse_input(context, parser_input, error_sink)
assert parsed is None
+6 -6
View File
@@ -100,12 +100,12 @@ class TestLogicalOperatorParser(TestUsingMemoryBasedSheerka):
assert expressions == expected
@pytest.mark.parametrize("expression, expected_errors", [
("one or", [UnexpectedEofParsingError("When parsing 'or'")]),
("one and", [UnexpectedEofParsingError("When parsing 'and'")]),
("and one", [LeftPartNotFoundError()]),
("or one", [LeftPartNotFoundError()]),
("or", [LeftPartNotFoundError(), UnexpectedEofParsingError("When parsing 'or'")]),
("and", [LeftPartNotFoundError(), UnexpectedEofParsingError("When parsing 'and'")]),
("one or", [UnexpectedEofParsingError("while parsing 'or'")]),
("one and", [UnexpectedEofParsingError("while parsing 'and'")]),
("and one", [LeftPartNotFoundError("and", 0)]),
("or one", [LeftPartNotFoundError("or", 0)]),
("or", [LeftPartNotFoundError("or", 0), UnexpectedEofParsingError("while parsing 'or'")]),
("and", [LeftPartNotFoundError("and", 0), UnexpectedEofParsingError("while parsing 'and'")]),
])
def test_i_can_detect_error(self, expression, expected_errors):
sheerka, context, parser = self.init_parser()
+22 -2
View File
@@ -5,8 +5,7 @@ from core.concept import Concept, DEFINITION_TYPE_DEF
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.SequenceNodeParser import SequenceNodeParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array, CN, SCN, get_test_obj, compare_with_test_object, \
UTN
from tests.parsers.parsers_utils import CN, SCN, UTN, compare_with_test_object, compute_expected_array, get_test_obj
class TestSequenceNodeParser(TestUsingMemoryBasedSheerka):
@@ -463,3 +462,24 @@ class TestSequenceNodeParser(TestUsingMemoryBasedSheerka):
assert concept_found.name == "boys"
assert concept_found.key == "boys"
assert concept_found.get_prop(BuiltinConcepts.PLURAL) == boy
def test_i_can_set_body_for_plurals_that_are_a_set(self):
concepts_map = {
"boy": Concept("boy"),
"girl": Concept("girl"),
"human": Concept("human"),
}
sheerka, context, parser = self.init_parser(concepts_map)
global_truth_concept = self.get_context(sheerka, global_truth=True)
sheerka.set_isa(global_truth_concept, concepts_map["boy"], concepts_map["human"])
sheerka.set_isa(global_truth_concept, concepts_map["girl"], concepts_map["human"])
res = parser.parse(context, ParserInput("humans"))
assert res.status
lexer_nodes = res.body.body
assert len(lexer_nodes) == 1
concept_found = lexer_nodes[0].concept
assert concept_found.get_metadata().body == "get_set_elements(c:|1003:)"
File diff suppressed because it is too large Load Diff