Fixed infinite recursion when parsing complex BNF node

This commit is contained in:
2020-06-23 15:22:27 +02:00
parent 912455c343
commit 7310bc5522
28 changed files with 1082 additions and 276 deletions
+1 -1
View File
@@ -71,7 +71,7 @@ def get_node(
if sub_expr == "')'":
return ")"
if isinstance(sub_expr, (scnode, utnode)):
if isinstance(sub_expr, (scnode, utnode, DoNotResolve)):
return sub_expr
if isinstance(sub_expr, cnode):
+11 -11
View File
@@ -24,7 +24,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
sheerka, context, *updated = self.init_concepts(concept)
res = BaseNodeParser.get_concepts_by_first_keyword(context, updated)
res = BaseNodeParser.get_concepts_by_first_token(context, updated)
assert res.status
assert res.body == expected
@@ -54,7 +54,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
concept.bnf = bnf
sheerka.set_id_if_needed(concept, False)
res = BaseNodeParser.get_concepts_by_first_keyword(context, [concept])
res = BaseNodeParser.get_concepts_by_first_token(context, [concept])
assert res.status
assert res.body == expected
@@ -75,7 +75,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
foo.bnf = OrderedChoice(ConceptExpression("bar"), ConceptExpression("baz"), StrMatch("qux"))
sheerka.set_id_if_needed(foo, False)
res = BaseNodeParser.get_concepts_by_first_keyword(context, [bar, baz, foo])
res = BaseNodeParser.get_concepts_by_first_token(context, [bar, baz, foo])
assert res.status
assert res.body == {
@@ -102,7 +102,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
foo.bnf = OrderedChoice(ConceptExpression("one"), ConceptExpression("bar"), StrMatch("qux"))
sheerka.set_id_if_needed(foo, False)
res = BaseNodeParser.get_concepts_by_first_keyword(context, [bar, foo], use_sheerka=True)
res = BaseNodeParser.get_concepts_by_first_token(context, [bar, foo], use_sheerka=True)
assert res.status
assert res.body == {
@@ -149,7 +149,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
sheerka.set_isa(context, sheerka.new("one"), number)
sheerka.set_isa(context, sheerka.new("two"), number)
cbfk = BaseNodeParser.get_concepts_by_first_keyword(context, [one, two, three, number, foo]).body
cbfk = BaseNodeParser.get_concepts_by_first_token(context, [one, two, three, number, foo]).body
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, cbfk)
@@ -171,7 +171,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
ConceptExpression("foo"),
ConceptExpression("bar")))
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword(
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(
context, [good, foo, bar, baz]).body
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
@@ -187,7 +187,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
a = self.create_and_add_in_cache_concept(sheerka, "a", bnf=Sequence("one", "two"))
b = self.create_and_add_in_cache_concept(sheerka, "b", bnf=Sequence(ConceptExpression("a"), "two"))
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword(
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(
context, [a, b]).body
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
@@ -202,7 +202,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
# foo = self.get_concept(sheerka, "foo", ConceptExpression("bar"))
# bar = self.get_concept(sheerka, "bar", ConceptExpression("foo"))
#
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword(sheerka, [good, foo, bar]).body
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, foo, bar]).body
#
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords)
# assert resolved_ret_val.status
@@ -218,7 +218,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
# two = self.get_concept(sheerka, "two", ConceptExpression("three"))
# three = self.get_concept(sheerka, "three", ConceptExpression("two"))
#
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword(sheerka, [good, one, two, three]).body
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, one, two, three]).body
#
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords)
# assert resolved_ret_val.status
@@ -233,7 +233,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
# one = self.get_concept(sheerka, "one", ConceptExpression("two"))
# two = self.get_concept(sheerka, "two", OrderedChoice(ConceptExpression("one"), ConceptExpression("two")))
#
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword(sheerka, [good, one, two]).body
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, one, two]).body
#
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords)
# assert resolved_ret_val.status
@@ -248,7 +248,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
# one = self.get_concept(sheerka, "one", ConceptExpression("two"))
# two = self.get_concept(sheerka, "two", Sequence(StrMatch("yes"), ConceptExpression("one")))
#
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_keyword(sheerka, [good, one, two]).body
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, one, two]).body
#
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords)
# assert resolved_ret_val.status
+503 -36
View File
@@ -1,25 +1,40 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve
from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYPE_BNF
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC, UTN, CN
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
Optional, ZeroOrMore, OneOrMore, ConceptExpression
from parsers.BnfParser import BnfParser
import tests.parsers.parsers_utils
from tests.BaseTest import BaseTest
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
cmap = {
"one": Concept("one"),
"two": Concept("two"),
"three": Concept("three"),
"plus": Concept(name="a plus b").def_var("a").def_var("b"),
"bnf one": Concept("bnf_one", definition="'one'"),
'one and two': Concept("one and two", definition="one two"),
'one or more three': Concept("one or more three", definition="three+"),
'two or four': Concept("two or four", definition="two | 'four'"),
"twenties": Concept("twenties", definition="'twenty' c:two or four:=unit"),
"one or more plus": Concept("one or more plus", definition="c:a plus b:+"), # TODO
"four": Concept("four"),
"thirty": Concept("thirty", body=30),
"forty": Concept("forty", body=40),
"fifty": Concept("fifty", body=50),
"number": Concept("number"),
"foo": Concept("foo"),
"bar": Concept("bar"),
"baz": Concept("baz"),
"bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen
"plus": Concept("plus", definition="one 'plus' two").def_var("a").def_var("b"),
'foo then bar': Concept("foo then bar", definition="foo bar").def_var("foo").def_var("bar"),
'foo or bar': Concept("foo or bar", definition="foo | bar").def_var("foo").def_var("bar"),
'one or more foo': Concept("one or more foo", definition="foo+").def_var("foo"),
"t1": Concept("t1", definition="'twenty' (one|two)=unit").def_var("unit").def_var("one").def_var("two"),
"three_four": Concept("three_four", definition="three | four").def_var("three").def_var("four"),
"t2": Concept("t2", definition="'twenty' three_four=unit").def_var("unit").def_var("three").def_var("four"),
# testing keywords
"def_only": Concept("def"),
@@ -65,15 +80,57 @@ def compute_expected_array(my_concepts_map, expression, expected, exclude_body=F
class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka = None
@staticmethod
def update_bnf(context, concept):
bnf_parser = BnfParser()
res = bnf_parser.parse(context, concept.metadata.definition)
if res.status:
concept.bnf = res.value.value
concept.metadata.definition_type = DEFINITION_TYPE_BNF
else:
raise Exception(res)
return concept
@classmethod
def setup_class(cls):
t = TestBnfNodeParser()
t = cls()
TestBnfNodeParser.sheerka, context, _ = t.init_parser(
cmap,
singleton=False,
create_new=True,
init_from_sheerka=True)
# end of initialisation
sheerka = TestBnfNodeParser.sheerka
sheerka.set_isa(context, sheerka.new("one"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("two"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("three"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("four"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("thirty"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("forty"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number"))
thirties = cls.update_bnf(context, Concept("thirties",
definition="thirty number",
where="number < 10",
body="thirty + number").def_var("thirty").def_var("number"))
cmap["thirties"] = sheerka.create_new_concept(context, thirties).body.body
sheerka.set_isa(context, sheerka.new("thirties"), sheerka.new("number"))
forties = cls.update_bnf(context, Concept("forties",
definition="forty number",
where="number < 10",
body="forty + number").def_var("forty").def_var("number"))
cmap["forties"] = sheerka.create_new_concept(context, forties).body.body
sheerka.set_isa(context, sheerka.new("forties"), sheerka.new("number"))
fifties = cls.update_bnf(context, Concept("fifties",
definition="fifty number",
where="number < 10",
body="fifty + number").def_var("fifty").def_var("number"))
cmap["fifties"] = sheerka.create_new_concept(context, fifties).body.body
sheerka.set_isa(context, sheerka.new("fifties"), sheerka.new("number"))
def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs):
if my_concepts_map is not None:
sheerka, context, *updated = self.init_concepts(*my_concepts_map.values(), **kwargs)
@@ -174,6 +231,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_use_skip_whitespace_when_mixing_sequence_and_strmatch(self):
# to match '--filter' in one word
my_map = {
"filter": self.bnf_concept("filter",
Sequence(StrMatch("-", skip_whitespace=False),
@@ -236,20 +294,50 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("concept_three, expected", [
(Concept("three"), []),
(BaseTest.bnf_concept("three", StrMatch("three")), [UTN('twenty '), "three"])
])
def test_i_can_manage_sequence_with_wrong_order_choice(self, concept_three, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("twenty"),
OrderedChoice(StrMatch("one"), StrMatch("two")))),
"three": concept_three}
text = "twenty three"
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("thirty one ok", [CNC("foo", source="thirty one ok")]),
("twenty one ok", [CNC("foo", source="twenty one ok")]),
("ok thirty one", [CNC("foo", source="ok thirty one")]),
("ok twenty one", [CNC("foo", source="ok twenty one")]),
("ok one", []),
])
def test_i_can_mix_sequence_and_ordered(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("ok"),
OrderedChoice(StrMatch("twenty"), StrMatch("thirty")),
StrMatch("one"),
StrMatch("ok"))
StrMatch("one"))
)}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
# ("twenty one", [CNC("foo", source="twenty one")]),
# ("twenty three", []), # three does not exist
("twenty four", []), # four exists but should not be seen
])
def test_i_can_mix_sequence_and_ordered_2(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("twenty"),
OrderedChoice(StrMatch("one"), StrMatch("two")))),
"four": Concept("four")}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("twenty thirty", [CNC("foo", source="twenty thirty")]),
("one", [CNC("foo", source="one")]),
@@ -531,6 +619,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"bar": self.bnf_concept("bar", Sequence(
ConceptExpression("foo"),
OrderedChoice(StrMatch("one"), StrMatch("two")))),
"three": Concept("three")
}
text = "twenty two"
@@ -553,6 +642,33 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
}
assert concept_bar.compiled["foo"].compiled == {ConceptParts.BODY: DoNotResolve("thirty")}
text = "thirty three"
expected = [[CN("foo", source="thirty"), CN("three")], []]
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
def test_i_can_mix_reference_to_other_concepts_2(self):
# this time, we use concept expression
my_map = {
"twenty": self.bnf_concept("twenty", StrMatch("twenty")),
"number": self.bnf_concept("number", OrderedChoice(StrMatch("one"), StrMatch("two"))),
"twenties": self.bnf_concept("twenties",
Sequence(ConceptExpression("twenty"), ConceptExpression("number"))),
"three": Concept("three")
}
text = "twenty two"
expected = [CNC("twenties",
source="twenty two",
twenty=CC("twenty", body=DoNotResolve("twenty")),
number=CC("number", source="two", body=DoNotResolve("two"))
)]
self.validate_get_concepts_sequences(my_map, text, expected)
text = "twenty three"
expected = [[CN("twenty"), CN("three")], []]
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
def test_i_can_mix_reference_to_other_concepts_when_body(self):
my_map = {
"foo": self.bnf_concept(Concept("foo", body="'foo'"),
@@ -654,12 +770,12 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
'one': my_map["one"],
ConceptParts.BODY: DoNotResolve(value='twenty one')}
@pytest.mark.parametrize("bar_expr", [
ConceptExpression("foo"),
OrderedChoice(ConceptExpression("foo"), StrMatch("one")),
Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two"))
@pytest.mark.parametrize("bar_expr, expected", [
(ConceptExpression("foo"), {}),
(OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}),
(Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']})
])
def test_i_can_detect_infinite_recursion(self, bar_expr):
def test_i_can_detect_infinite_recursion(self, bar_expr, expected):
my_map = {
"foo": self.bnf_concept("foo", ConceptExpression("bar")),
"bar": self.bnf_concept("bar", bar_expr),
@@ -669,14 +785,64 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser.context = context
parser.sheerka = sheerka
parsing_expression = parser.get_parsing_expression(my_map["foo"])
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert parser.concepts_by_first_keyword == expected
# get_parsing_expression() also returns CHICKEN_AND_EGG
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
parsing_expression = parser.get_parsing_expression(my_map["bar"])
parsing_expression = parser.get_parsing_expression(context, my_map["bar"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG)
def test_i_can_detect_longer_infinite_recursion(self):
my_map = {
"foo": self.bnf_concept("foo", ConceptExpression("bar")),
"bar": self.bnf_concept("bar", ConceptExpression("baz")),
"baz": self.bnf_concept("baz", ConceptExpression("qux")),
"qux": self.bnf_concept("qux", ConceptExpression("foo")),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.context = context
parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert parser.concepts_by_first_keyword == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
assert parser.concepts_grammars.get(my_map["foo"].id).body == {"1001", "1002", "1003", "1004"}
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["baz"].id), BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["qux"].id), BuiltinConcepts.CHICKEN_AND_EGG)
@pytest.mark.parametrize("expr, expected", [
(OrderedChoice(StrMatch("bar"), ConceptExpression("foo")), False),
(OrderedChoice(ConceptExpression("foo"), StrMatch("bar")), True),
(OrderedChoice(Sequence(StrMatch("bar"), ConceptExpression("foo")), StrMatch("baz")), False),
(OrderedChoice(Sequence(ConceptExpression("foo"), StrMatch("bar")), StrMatch("baz")), True)
])
def test_i_can_detect_ordered_choice_infinite_recursion(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", expr),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
parser.context = context
parser.sheerka = sheerka
res = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(res, BuiltinConcepts.CHICKEN_AND_EGG) == expected
def test_i_can_get_parsing_expression_when_concept_isa(self):
my_map = {
"one": Concept("one"),
@@ -690,15 +856,87 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"])
parsing_expression = parser.get_parsing_expression(my_map["twenties"])
parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
assert parsing_expression == Sequence(
ConceptExpression(my_map["twenty"], rule_name="twenty"),
ConceptExpression(my_map["number"], rule_name="number"))
assert parsing_expression.nodes[0].nodes == [StrMatch("twenty")]
assert isinstance(parsing_expression.nodes[1].nodes[0], OrderedChoice)
assert ConceptExpression(my_map["one"], rule_name="one") in parsing_expression.nodes[1].nodes[0].elements
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in parsing_expression.nodes[1].nodes[0].elements
assert len(parsing_expression.nodes) == len(parsing_expression.elements)
twenty_nodes = parsing_expression.nodes[0].nodes
assert twenty_nodes == [StrMatch("twenty")]
number_nodes = parsing_expression.nodes[1].nodes
assert len(number_nodes) == 1
assert isinstance(number_nodes[0], OrderedChoice)
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
assert my_map["number"].id not in parser.concepts_grammars
#
# def test_i_cannot_get_parsing_expression_when_concept_is_part_of_a_group(self):
# """
# In this test, twenties isa number
# # So 'number' in Sequence(thirty, number) will spawn 'twenties' which, because there is no other indication,
# # will create an infinite loop
# :return:
# """
# my_map = {
# "one": Concept("one"),
# "twenty": Concept("twenty"),
# "number": Concept("number"),
# "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
# }
# sheerka, context, parser = self.init_parser(my_map, singleton=True)
# parser.context = context
# parser.sheerka = sheerka
# sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
# sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"])
# sheerka.set_isa(context, sheerka.new("twenties"), my_map["number"]) # <- twenties is also a number
#
# parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
#
# parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
# assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
# assert parsing_expression.body == {my_map["twenties"].id, my_map["number"].id}
#
# assert isinstance(parser.concepts_grammars.get(my_map["one"].id), ParsingExpression)
# assert isinstance(parser.concepts_grammars.get(my_map["twenty"].id), ParsingExpression)
def test_i_can_get_parsing_expression_when_concept_is_part_of_a_group(self):
my_map = {
"one": Concept("one"),
"twenty": Concept("twenty"),
"number": Concept("number"),
"twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.context = context
parser.sheerka = sheerka
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"])
sheerka.set_isa(context, sheerka.new("twenties"), my_map["number"]) # <- twenties is also a number
parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
assert parsing_expression == Sequence(
ConceptExpression(my_map["twenty"], rule_name="twenty"),
ConceptExpression(my_map["number"], rule_name="number"))
assert len(parsing_expression.nodes) == len(parsing_expression.elements)
twenty_nodes = parsing_expression.nodes[0].nodes
assert twenty_nodes == [StrMatch("twenty")]
number_nodes = parsing_expression.nodes[1].nodes
assert len(number_nodes) == 1
assert isinstance(number_nodes[0], OrderedChoice)
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
def test_i_can_get_parsing_expression_when_sequence_of_concept(self):
my_map = {
@@ -709,7 +947,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser.context = context
parser.sheerka = sheerka
parsing_expression = parser.get_parsing_expression(my_map["two_ones"])
parsing_expression = parser.get_parsing_expression(context, my_map["two_ones"])
assert parsing_expression == Sequence(
ConceptExpression(my_map["one"], rule_name="one"),
ConceptExpression(my_map["one"], rule_name="one"))
@@ -726,7 +964,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_recognize_unknown_then_they_look_like_known(self):
def test_i_can_recognize_unknown_when_they_look_like_known(self):
my_map = {
"one two": self.bnf_concept("one two", Sequence("one", "two")),
"three": self.bnf_concept("three")
@@ -752,15 +990,13 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert len(sequence) == 1
@pytest.mark.parametrize("parser_input, expected_status, expected", [
("one", True, [CNC("bnf one", source="one")]), # the bnf one is chosen
("one two", True, [CN("one and two", source="one two")]),
("three three three", True, [CN("one or more three", source="three three three")]),
("twenty two", True, [CN("twenties", source="twenty two")]),
("twenty four", True, [CN("twenties", source="twenty four")]),
("twenty one", False, [UTN("twenty "), CN("bnf one", source="one")]),
("twenty two + 1", True, [CN("twenties", source="twenty two"), " + 1"]),
("baz", True, [CNC("bnf baz", source="baz")]), # the bnf one is chosen
("foo bar", True, [CNC("foo then bar", source="foo bar", foo="foo", bar="bar")]),
("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]),
("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]),
("twenty one", True, [CNC("t1", source="twenty one", unit="one", one="one")]),
])
def test_i_can_parse(self, parser_input, expected_status, expected):
def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
res = parser.parse(context, ParserInput(parser_input))
@@ -772,6 +1008,167 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
def test_i_can_when_multiple_times_the_same_variable(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
text = "foo foo foo"
expected_array = compute_expected_array(cmap, text, [CNC("one or more foo", source=text)])
expected_array[0].compiled["foo"] = [cmap["foo"], cmap["foo"], cmap["foo"]]
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
def test_i_can_test_when_expression_references_other_expressions(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
text = "twenty four"
expected = CNC("t2",
source=text,
unit=CC("three_four",
source="four",
four=CC("four", body=DoNotResolve("four")),
body=CC("four", body=DoNotResolve("four"))),
four="four")
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
# def test_i_cannot_parse_bnf_concept_mixed_with_isa_concepts(self):
# sheerka, context, parser = self.init_parser(init_from_sheerka=True)
#
# # thirties = cls.update_bnf(context, Concept("thirties",
# # definition="thirty number",
# # where="number < 10",
# # body="thirty + number").def_var("thirty").def_var("number"))
# # with thirties isa number
# # So number in 'thirty number' will spawn 'thirties' which, because there is no other indication, will
# # create an infinite loop
#
# text = "thirty one"
# expected = CNC("thirties",
# source=text,
# number=CC("number",
# source="one",
# one=CC("one", body=DoNotResolve("one")),
# body=CC("one", body=DoNotResolve("one"))),
# one=CC("one", body=DoNotResolve("one")),
# thirty="thirty")
# expected_array = compute_expected_array(cmap, text, [expected])
#
# res = parser.parse(context, ParserInput(text))
# not_for_me = res.value
# reason = res.value.body
#
# assert not res.status
# assert sheerka.isinstance(not_for_me, BuiltinConcepts.NOT_FOR_ME)
# assert sheerka.isinstance(reason, BuiltinConcepts.CHICKEN_AND_EGG)
# assert reason.body == {cmap["thirties"].id, cmap["number"].id}
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
# thirties = cls.update_bnf(context, Concept("thirties",
# definition="thirty number",
# where="number < 10",
# body="thirty + number").def_var("thirty").def_var("number"))
text = "thirty one"
expected = CNC("thirties",
source=text,
number=CC("number",
source="one",
one=CC("one", body=DoNotResolve("one")),
body=CC("one", body=DoNotResolve("one"))),
one=CC("one", body=DoNotResolve("one")),
thirty="thirty")
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_2(self):
# this time, three is a number, and also part of three_four, even if it is not relevant in t3
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
text = "thirty three"
expected = CNC("thirties",
source=text,
number=CC("number",
source="three",
three=CC("three", body=DoNotResolve("three")),
body=CC("three", body=DoNotResolve("three"))),
three=CC("three", body=DoNotResolve("three")),
thirty="thirty")
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
sheerka.concepts_grammars.clear() # simulate restart
for c in cmap.values():
sheerka.get_by_id(c.id).bnf = None
text = "thirty three"
expected = CNC("thirties",
source=text,
number=CC("number",
source="three",
three=CC("three", body=DoNotResolve("three")),
body=CC("three", body=DoNotResolve("three"))),
three=CC("three", body=DoNotResolve("three")),
thirty="thirty")
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
text = "forty one"
expected = CNC("forties",
source=text,
number=CC("number",
source="one",
one=CC("one", body=DoNotResolve("one")),
body=CC("one", body=DoNotResolve("one"))),
one=CC("one", body=DoNotResolve("one")),
forty="forty")
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
def test_i_can_parse_when_keyword(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
@@ -800,10 +1197,80 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser_result = res.value
concepts_nodes = res.value.value
assert res.status == True
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
def test_i_can_parse_descent_grammar(self):
my_map = {
"factor": Concept("factor", definition="1 | 2 | 3"),
"term": Concept("term", definition="factor ('*' factor)*"),
"expr": Concept("expr", definition="term ('+' term)*"),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
text = "1 + 2 * 3"
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
factor = my_map["factor"]
term = my_map["term"]
expr = my_map["expr"]
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == [CNC(expr,
term=[CC(term,
body=CC(factor, body=DoNotResolve("1")),
factor=CC(factor, body=DoNotResolve("1"))),
CC(term,
body=DoNotResolve("2 * 3"),
factor=[
CC(factor, body=DoNotResolve("2")),
CC(factor, body=DoNotResolve("3")),
])],
factor=[
CC(factor, body=DoNotResolve("1")),
CC(factor, body=DoNotResolve("2")),
CC(factor, body=DoNotResolve("3"))],
body=DoNotResolve("1 + 2 * 3"))]
def test_i_can_parse_recursive_descent_grammar(self):
my_map = {
"factor": Concept("factor", definition="1 | 2 | 3"),
"term": self.bnf_concept("term", OrderedChoice(
Sequence(ConceptExpression("factor"), StrMatch("*"), ConceptExpression("term")),
ConceptExpression("factor"))),
"expr": self.bnf_concept("expr", OrderedChoice(
Sequence(ConceptExpression("term"), StrMatch("+"), ConceptExpression("expr")),
ConceptExpression("term"))),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
text = "1 + 2 * 3"
res = parser.parse(context, ParserInput(text))
# concepts_nodes = res.value.value is too complicated to be validated
assert res.status
def test_i_can_parse_simple_recursive_grammar(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("foo"),
OrderedChoice(StrMatch("bar"), ConceptExpression("foo")))),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
assert parser.parse(context, ParserInput("foo bar")).status
assert parser.parse(context, ParserInput("foo foo foo bar")).status
assert not parser.parse(context, ParserInput("foo baz")).status
# @pytest.mark.parametrize("parser_input, expected", [
# ("one", [
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),
+6 -6
View File
@@ -54,12 +54,12 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
cmap["plus"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right")
cmap["mult"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right")
cmap["minus"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right")
TestSyaNodeParser.sheerka.services[SheerkaComparisonManager.NAME].is_greater_than(context,
BuiltinConcepts.PRECEDENCE,
cmap["mult"], cmap["plus"])
TestSyaNodeParser.sheerka.services[SheerkaComparisonManager.NAME].is_greater_than(context,
BuiltinConcepts.PRECEDENCE,
cmap["mult"], cmap["minus"])
TestSyaNodeParser.sheerka.services[SheerkaComparisonManager.NAME].set_is_greater_than(context,
BuiltinConcepts.PRECEDENCE,
cmap["mult"], cmap["plus"])
TestSyaNodeParser.sheerka.services[SheerkaComparisonManager.NAME].set_is_greater_than(context,
BuiltinConcepts.PRECEDENCE,
cmap["mult"], cmap["minus"])
# TestSyaNodeParser.sheerka.force_sya_def(context, [
# (cmap["plus"].id, 5, SyaAssociativity.Right),