|
|
|
@@ -1,25 +1,40 @@
|
|
|
|
|
import pytest
|
|
|
|
|
from core.builtin_concepts import BuiltinConcepts
|
|
|
|
|
from core.concept import Concept, ConceptParts, DoNotResolve
|
|
|
|
|
from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYPE_BNF
|
|
|
|
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
|
|
|
|
from parsers.BaseNodeParser import CNC, UTN, CN
|
|
|
|
|
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
|
|
|
|
|
Optional, ZeroOrMore, OneOrMore, ConceptExpression
|
|
|
|
|
from parsers.BnfParser import BnfParser
|
|
|
|
|
|
|
|
|
|
import tests.parsers.parsers_utils
|
|
|
|
|
from tests.BaseTest import BaseTest
|
|
|
|
|
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
|
|
|
|
|
|
|
|
cmap = {
|
|
|
|
|
"one": Concept("one"),
|
|
|
|
|
"two": Concept("two"),
|
|
|
|
|
"three": Concept("three"),
|
|
|
|
|
"plus": Concept(name="a plus b").def_var("a").def_var("b"),
|
|
|
|
|
"bnf one": Concept("bnf_one", definition="'one'"),
|
|
|
|
|
'one and two': Concept("one and two", definition="one two"),
|
|
|
|
|
'one or more three': Concept("one or more three", definition="three+"),
|
|
|
|
|
'two or four': Concept("two or four", definition="two | 'four'"),
|
|
|
|
|
"twenties": Concept("twenties", definition="'twenty' c:two or four:=unit"),
|
|
|
|
|
"one or more plus": Concept("one or more plus", definition="c:a plus b:+"), # TODO
|
|
|
|
|
"four": Concept("four"),
|
|
|
|
|
"thirty": Concept("thirty", body=30),
|
|
|
|
|
"forty": Concept("forty", body=40),
|
|
|
|
|
"fifty": Concept("fifty", body=50),
|
|
|
|
|
"number": Concept("number"),
|
|
|
|
|
"foo": Concept("foo"),
|
|
|
|
|
"bar": Concept("bar"),
|
|
|
|
|
"baz": Concept("baz"),
|
|
|
|
|
|
|
|
|
|
"bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen
|
|
|
|
|
|
|
|
|
|
"plus": Concept("plus", definition="one 'plus' two").def_var("a").def_var("b"),
|
|
|
|
|
|
|
|
|
|
'foo then bar': Concept("foo then bar", definition="foo bar").def_var("foo").def_var("bar"),
|
|
|
|
|
'foo or bar': Concept("foo or bar", definition="foo | bar").def_var("foo").def_var("bar"),
|
|
|
|
|
'one or more foo': Concept("one or more foo", definition="foo+").def_var("foo"),
|
|
|
|
|
|
|
|
|
|
"t1": Concept("t1", definition="'twenty' (one|two)=unit").def_var("unit").def_var("one").def_var("two"),
|
|
|
|
|
"three_four": Concept("three_four", definition="three | four").def_var("three").def_var("four"),
|
|
|
|
|
"t2": Concept("t2", definition="'twenty' three_four=unit").def_var("unit").def_var("three").def_var("four"),
|
|
|
|
|
|
|
|
|
|
# testing keywords
|
|
|
|
|
"def_only": Concept("def"),
|
|
|
|
@@ -65,15 +80,57 @@ def compute_expected_array(my_concepts_map, expression, expected, exclude_body=F
|
|
|
|
|
class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
sheerka = None
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def update_bnf(context, concept):
|
|
|
|
|
bnf_parser = BnfParser()
|
|
|
|
|
res = bnf_parser.parse(context, concept.metadata.definition)
|
|
|
|
|
if res.status:
|
|
|
|
|
concept.bnf = res.value.value
|
|
|
|
|
concept.metadata.definition_type = DEFINITION_TYPE_BNF
|
|
|
|
|
else:
|
|
|
|
|
raise Exception(res)
|
|
|
|
|
return concept
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def setup_class(cls):
|
|
|
|
|
t = TestBnfNodeParser()
|
|
|
|
|
t = cls()
|
|
|
|
|
TestBnfNodeParser.sheerka, context, _ = t.init_parser(
|
|
|
|
|
cmap,
|
|
|
|
|
singleton=False,
|
|
|
|
|
create_new=True,
|
|
|
|
|
init_from_sheerka=True)
|
|
|
|
|
|
|
|
|
|
# end of initialisation
|
|
|
|
|
sheerka = TestBnfNodeParser.sheerka
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("one"), sheerka.new("number"))
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("two"), sheerka.new("number"))
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("three"), sheerka.new("number"))
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("four"), sheerka.new("number"))
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("thirty"), sheerka.new("number"))
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("forty"), sheerka.new("number"))
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number"))
|
|
|
|
|
|
|
|
|
|
thirties = cls.update_bnf(context, Concept("thirties",
|
|
|
|
|
definition="thirty number",
|
|
|
|
|
where="number < 10",
|
|
|
|
|
body="thirty + number").def_var("thirty").def_var("number"))
|
|
|
|
|
cmap["thirties"] = sheerka.create_new_concept(context, thirties).body.body
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("thirties"), sheerka.new("number"))
|
|
|
|
|
|
|
|
|
|
forties = cls.update_bnf(context, Concept("forties",
|
|
|
|
|
definition="forty number",
|
|
|
|
|
where="number < 10",
|
|
|
|
|
body="forty + number").def_var("forty").def_var("number"))
|
|
|
|
|
cmap["forties"] = sheerka.create_new_concept(context, forties).body.body
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("forties"), sheerka.new("number"))
|
|
|
|
|
|
|
|
|
|
fifties = cls.update_bnf(context, Concept("fifties",
|
|
|
|
|
definition="fifty number",
|
|
|
|
|
where="number < 10",
|
|
|
|
|
body="fifty + number").def_var("fifty").def_var("number"))
|
|
|
|
|
cmap["fifties"] = sheerka.create_new_concept(context, fifties).body.body
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("fifties"), sheerka.new("number"))
|
|
|
|
|
|
|
|
|
|
def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs):
|
|
|
|
|
if my_concepts_map is not None:
|
|
|
|
|
sheerka, context, *updated = self.init_concepts(*my_concepts_map.values(), **kwargs)
|
|
|
|
@@ -174,6 +231,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
|
|
|
|
|
|
def test_i_can_use_skip_whitespace_when_mixing_sequence_and_strmatch(self):
|
|
|
|
|
# to match '--filter' in one word
|
|
|
|
|
my_map = {
|
|
|
|
|
"filter": self.bnf_concept("filter",
|
|
|
|
|
Sequence(StrMatch("-", skip_whitespace=False),
|
|
|
|
@@ -236,20 +294,50 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
|
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("concept_three, expected", [
|
|
|
|
|
(Concept("three"), []),
|
|
|
|
|
(BaseTest.bnf_concept("three", StrMatch("three")), [UTN('twenty '), "three"])
|
|
|
|
|
])
|
|
|
|
|
def test_i_can_manage_sequence_with_wrong_order_choice(self, concept_three, expected):
|
|
|
|
|
my_map = {
|
|
|
|
|
"foo": self.bnf_concept("foo",
|
|
|
|
|
Sequence(
|
|
|
|
|
StrMatch("twenty"),
|
|
|
|
|
OrderedChoice(StrMatch("one"), StrMatch("two")))),
|
|
|
|
|
"three": concept_three}
|
|
|
|
|
|
|
|
|
|
text = "twenty three"
|
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
|
|
|
("thirty one ok", [CNC("foo", source="thirty one ok")]),
|
|
|
|
|
("twenty one ok", [CNC("foo", source="twenty one ok")]),
|
|
|
|
|
("ok thirty one", [CNC("foo", source="ok thirty one")]),
|
|
|
|
|
("ok twenty one", [CNC("foo", source="ok twenty one")]),
|
|
|
|
|
("ok one", []),
|
|
|
|
|
])
|
|
|
|
|
def test_i_can_mix_sequence_and_ordered(self, text, expected):
|
|
|
|
|
my_map = {
|
|
|
|
|
"foo": self.bnf_concept("foo",
|
|
|
|
|
Sequence(
|
|
|
|
|
StrMatch("ok"),
|
|
|
|
|
OrderedChoice(StrMatch("twenty"), StrMatch("thirty")),
|
|
|
|
|
StrMatch("one"),
|
|
|
|
|
StrMatch("ok"))
|
|
|
|
|
StrMatch("one"))
|
|
|
|
|
)}
|
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
|
|
|
# ("twenty one", [CNC("foo", source="twenty one")]),
|
|
|
|
|
# ("twenty three", []), # three does not exist
|
|
|
|
|
("twenty four", []), # four exists but should not be seen
|
|
|
|
|
])
|
|
|
|
|
def test_i_can_mix_sequence_and_ordered_2(self, text, expected):
|
|
|
|
|
my_map = {
|
|
|
|
|
"foo": self.bnf_concept("foo",
|
|
|
|
|
Sequence(
|
|
|
|
|
StrMatch("twenty"),
|
|
|
|
|
OrderedChoice(StrMatch("one"), StrMatch("two")))),
|
|
|
|
|
"four": Concept("four")}
|
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
|
|
|
("twenty thirty", [CNC("foo", source="twenty thirty")]),
|
|
|
|
|
("one", [CNC("foo", source="one")]),
|
|
|
|
@@ -531,6 +619,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
"bar": self.bnf_concept("bar", Sequence(
|
|
|
|
|
ConceptExpression("foo"),
|
|
|
|
|
OrderedChoice(StrMatch("one"), StrMatch("two")))),
|
|
|
|
|
"three": Concept("three")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
text = "twenty two"
|
|
|
|
@@ -553,6 +642,33 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
}
|
|
|
|
|
assert concept_bar.compiled["foo"].compiled == {ConceptParts.BODY: DoNotResolve("thirty")}
|
|
|
|
|
|
|
|
|
|
text = "thirty three"
|
|
|
|
|
expected = [[CN("foo", source="thirty"), CN("three")], []]
|
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
|
|
|
|
|
|
|
|
|
|
def test_i_can_mix_reference_to_other_concepts_2(self):
|
|
|
|
|
# this time, we use concept expression
|
|
|
|
|
my_map = {
|
|
|
|
|
"twenty": self.bnf_concept("twenty", StrMatch("twenty")),
|
|
|
|
|
"number": self.bnf_concept("number", OrderedChoice(StrMatch("one"), StrMatch("two"))),
|
|
|
|
|
"twenties": self.bnf_concept("twenties",
|
|
|
|
|
Sequence(ConceptExpression("twenty"), ConceptExpression("number"))),
|
|
|
|
|
"three": Concept("three")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
text = "twenty two"
|
|
|
|
|
|
|
|
|
|
expected = [CNC("twenties",
|
|
|
|
|
source="twenty two",
|
|
|
|
|
twenty=CC("twenty", body=DoNotResolve("twenty")),
|
|
|
|
|
number=CC("number", source="two", body=DoNotResolve("two"))
|
|
|
|
|
)]
|
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
|
|
|
|
|
|
text = "twenty three"
|
|
|
|
|
expected = [[CN("twenty"), CN("three")], []]
|
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
|
|
|
|
|
|
|
|
|
|
def test_i_can_mix_reference_to_other_concepts_when_body(self):
|
|
|
|
|
my_map = {
|
|
|
|
|
"foo": self.bnf_concept(Concept("foo", body="'foo'"),
|
|
|
|
@@ -654,12 +770,12 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
'one': my_map["one"],
|
|
|
|
|
ConceptParts.BODY: DoNotResolve(value='twenty one')}
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("bar_expr", [
|
|
|
|
|
ConceptExpression("foo"),
|
|
|
|
|
OrderedChoice(ConceptExpression("foo"), StrMatch("one")),
|
|
|
|
|
Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two"))
|
|
|
|
|
@pytest.mark.parametrize("bar_expr, expected", [
|
|
|
|
|
(ConceptExpression("foo"), {}),
|
|
|
|
|
(OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}),
|
|
|
|
|
(Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']})
|
|
|
|
|
])
|
|
|
|
|
def test_i_can_detect_infinite_recursion(self, bar_expr):
|
|
|
|
|
def test_i_can_detect_infinite_recursion(self, bar_expr, expected):
|
|
|
|
|
my_map = {
|
|
|
|
|
"foo": self.bnf_concept("foo", ConceptExpression("bar")),
|
|
|
|
|
"bar": self.bnf_concept("bar", bar_expr),
|
|
|
|
@@ -669,14 +785,64 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
parser.context = context
|
|
|
|
|
parser.sheerka = sheerka
|
|
|
|
|
|
|
|
|
|
parsing_expression = parser.get_parsing_expression(my_map["foo"])
|
|
|
|
|
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
|
|
|
|
|
parser.init_from_concepts(context, my_map.values())
|
|
|
|
|
assert parser.concepts_by_first_keyword == expected
|
|
|
|
|
|
|
|
|
|
# get_parsing_expression() also returns CHICKEN_AND_EGG
|
|
|
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
|
|
|
|
|
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
|
|
|
|
|
|
parsing_expression = parser.get_parsing_expression(my_map["bar"])
|
|
|
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["bar"])
|
|
|
|
|
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
|
|
|
|
|
|
def test_i_can_detect_longer_infinite_recursion(self):
|
|
|
|
|
my_map = {
|
|
|
|
|
"foo": self.bnf_concept("foo", ConceptExpression("bar")),
|
|
|
|
|
"bar": self.bnf_concept("bar", ConceptExpression("baz")),
|
|
|
|
|
"baz": self.bnf_concept("baz", ConceptExpression("qux")),
|
|
|
|
|
"qux": self.bnf_concept("qux", ConceptExpression("foo")),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
|
|
|
parser.context = context
|
|
|
|
|
parser.sheerka = sheerka
|
|
|
|
|
|
|
|
|
|
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
|
|
|
|
|
parser.init_from_concepts(context, my_map.values())
|
|
|
|
|
assert parser.concepts_by_first_keyword == {}
|
|
|
|
|
|
|
|
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
|
|
|
|
|
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
|
assert parser.concepts_grammars.get(my_map["foo"].id).body == {"1001", "1002", "1003", "1004"}
|
|
|
|
|
|
|
|
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["baz"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["qux"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("expr, expected", [
|
|
|
|
|
(OrderedChoice(StrMatch("bar"), ConceptExpression("foo")), False),
|
|
|
|
|
(OrderedChoice(ConceptExpression("foo"), StrMatch("bar")), True),
|
|
|
|
|
(OrderedChoice(Sequence(StrMatch("bar"), ConceptExpression("foo")), StrMatch("baz")), False),
|
|
|
|
|
(OrderedChoice(Sequence(ConceptExpression("foo"), StrMatch("bar")), StrMatch("baz")), True)
|
|
|
|
|
])
|
|
|
|
|
def test_i_can_detect_ordered_choice_infinite_recursion(self, expr, expected):
|
|
|
|
|
my_map = {
|
|
|
|
|
"foo": self.bnf_concept("foo", expr),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
|
|
|
parser.init_from_concepts(context, my_map.values())
|
|
|
|
|
parser.context = context
|
|
|
|
|
parser.sheerka = sheerka
|
|
|
|
|
|
|
|
|
|
res = parser.get_parsing_expression(context, my_map["foo"])
|
|
|
|
|
assert sheerka.isinstance(res, BuiltinConcepts.CHICKEN_AND_EGG) == expected
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_i_can_get_parsing_expression_when_concept_isa(self):
|
|
|
|
|
my_map = {
|
|
|
|
|
"one": Concept("one"),
|
|
|
|
@@ -690,15 +856,87 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"])
|
|
|
|
|
|
|
|
|
|
parsing_expression = parser.get_parsing_expression(my_map["twenties"])
|
|
|
|
|
parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
|
|
|
|
|
|
|
|
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
|
|
|
|
|
assert parsing_expression == Sequence(
|
|
|
|
|
ConceptExpression(my_map["twenty"], rule_name="twenty"),
|
|
|
|
|
ConceptExpression(my_map["number"], rule_name="number"))
|
|
|
|
|
|
|
|
|
|
assert parsing_expression.nodes[0].nodes == [StrMatch("twenty")]
|
|
|
|
|
assert isinstance(parsing_expression.nodes[1].nodes[0], OrderedChoice)
|
|
|
|
|
assert ConceptExpression(my_map["one"], rule_name="one") in parsing_expression.nodes[1].nodes[0].elements
|
|
|
|
|
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in parsing_expression.nodes[1].nodes[0].elements
|
|
|
|
|
assert len(parsing_expression.nodes) == len(parsing_expression.elements)
|
|
|
|
|
twenty_nodes = parsing_expression.nodes[0].nodes
|
|
|
|
|
assert twenty_nodes == [StrMatch("twenty")]
|
|
|
|
|
|
|
|
|
|
number_nodes = parsing_expression.nodes[1].nodes
|
|
|
|
|
assert len(number_nodes) == 1
|
|
|
|
|
assert isinstance(number_nodes[0], OrderedChoice)
|
|
|
|
|
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
|
|
|
|
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
|
|
|
|
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
|
|
|
|
|
|
|
|
|
|
assert my_map["number"].id not in parser.concepts_grammars
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# def test_i_cannot_get_parsing_expression_when_concept_is_part_of_a_group(self):
|
|
|
|
|
# """
|
|
|
|
|
# In this test, twenties isa number
|
|
|
|
|
# # So 'number' in Sequence(thirty, number) will spawn 'twenties' which, because there is no other indication,
|
|
|
|
|
# # will create an infinite loop
|
|
|
|
|
# :return:
|
|
|
|
|
# """
|
|
|
|
|
# my_map = {
|
|
|
|
|
# "one": Concept("one"),
|
|
|
|
|
# "twenty": Concept("twenty"),
|
|
|
|
|
# "number": Concept("number"),
|
|
|
|
|
# "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
|
|
|
|
|
# }
|
|
|
|
|
# sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
|
|
|
# parser.context = context
|
|
|
|
|
# parser.sheerka = sheerka
|
|
|
|
|
# sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
|
|
|
|
|
# sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"])
|
|
|
|
|
# sheerka.set_isa(context, sheerka.new("twenties"), my_map["number"]) # <- twenties is also a number
|
|
|
|
|
#
|
|
|
|
|
# parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
|
|
|
|
|
#
|
|
|
|
|
# parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
|
|
|
|
|
# assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
|
# assert parsing_expression.body == {my_map["twenties"].id, my_map["number"].id}
|
|
|
|
|
#
|
|
|
|
|
# assert isinstance(parser.concepts_grammars.get(my_map["one"].id), ParsingExpression)
|
|
|
|
|
# assert isinstance(parser.concepts_grammars.get(my_map["twenty"].id), ParsingExpression)
|
|
|
|
|
|
|
|
|
|
def test_i_can_get_parsing_expression_when_concept_is_part_of_a_group(self):
|
|
|
|
|
my_map = {
|
|
|
|
|
"one": Concept("one"),
|
|
|
|
|
"twenty": Concept("twenty"),
|
|
|
|
|
"number": Concept("number"),
|
|
|
|
|
"twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
|
|
|
|
|
}
|
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
|
|
|
parser.context = context
|
|
|
|
|
parser.sheerka = sheerka
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"])
|
|
|
|
|
sheerka.set_isa(context, sheerka.new("twenties"), my_map["number"]) # <- twenties is also a number
|
|
|
|
|
|
|
|
|
|
parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
|
|
|
|
|
|
|
|
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
|
|
|
|
|
assert parsing_expression == Sequence(
|
|
|
|
|
ConceptExpression(my_map["twenty"], rule_name="twenty"),
|
|
|
|
|
ConceptExpression(my_map["number"], rule_name="number"))
|
|
|
|
|
|
|
|
|
|
assert len(parsing_expression.nodes) == len(parsing_expression.elements)
|
|
|
|
|
twenty_nodes = parsing_expression.nodes[0].nodes
|
|
|
|
|
assert twenty_nodes == [StrMatch("twenty")]
|
|
|
|
|
|
|
|
|
|
number_nodes = parsing_expression.nodes[1].nodes
|
|
|
|
|
assert len(number_nodes) == 1
|
|
|
|
|
assert isinstance(number_nodes[0], OrderedChoice)
|
|
|
|
|
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
|
|
|
|
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
|
|
|
|
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
|
|
|
|
|
|
|
|
|
|
def test_i_can_get_parsing_expression_when_sequence_of_concept(self):
|
|
|
|
|
my_map = {
|
|
|
|
@@ -709,7 +947,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
parser.context = context
|
|
|
|
|
parser.sheerka = sheerka
|
|
|
|
|
|
|
|
|
|
parsing_expression = parser.get_parsing_expression(my_map["two_ones"])
|
|
|
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["two_ones"])
|
|
|
|
|
assert parsing_expression == Sequence(
|
|
|
|
|
ConceptExpression(my_map["one"], rule_name="one"),
|
|
|
|
|
ConceptExpression(my_map["one"], rule_name="one"))
|
|
|
|
@@ -726,7 +964,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
|
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
|
|
|
|
|
|
def test_i_can_recognize_unknown_then_they_look_like_known(self):
|
|
|
|
|
def test_i_can_recognize_unknown_when_they_look_like_known(self):
|
|
|
|
|
my_map = {
|
|
|
|
|
"one two": self.bnf_concept("one two", Sequence("one", "two")),
|
|
|
|
|
"three": self.bnf_concept("three")
|
|
|
|
@@ -752,15 +990,13 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
assert len(sequence) == 1
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("parser_input, expected_status, expected", [
|
|
|
|
|
("one", True, [CNC("bnf one", source="one")]), # the bnf one is chosen
|
|
|
|
|
("one two", True, [CN("one and two", source="one two")]),
|
|
|
|
|
("three three three", True, [CN("one or more three", source="three three three")]),
|
|
|
|
|
("twenty two", True, [CN("twenties", source="twenty two")]),
|
|
|
|
|
("twenty four", True, [CN("twenties", source="twenty four")]),
|
|
|
|
|
("twenty one", False, [UTN("twenty "), CN("bnf one", source="one")]),
|
|
|
|
|
("twenty two + 1", True, [CN("twenties", source="twenty two"), " + 1"]),
|
|
|
|
|
("baz", True, [CNC("bnf baz", source="baz")]), # the bnf one is chosen
|
|
|
|
|
("foo bar", True, [CNC("foo then bar", source="foo bar", foo="foo", bar="bar")]),
|
|
|
|
|
("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]),
|
|
|
|
|
("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]),
|
|
|
|
|
("twenty one", True, [CNC("t1", source="twenty one", unit="one", one="one")]),
|
|
|
|
|
])
|
|
|
|
|
def test_i_can_parse(self, parser_input, expected_status, expected):
|
|
|
|
|
def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected):
|
|
|
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
|
|
|
|
|
|
res = parser.parse(context, ParserInput(parser_input))
|
|
|
|
@@ -772,6 +1008,167 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
|
|
|
assert concepts_nodes == expected_array
|
|
|
|
|
|
|
|
|
|
def test_i_can_when_multiple_times_the_same_variable(self):
|
|
|
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
|
|
|
|
|
|
text = "foo foo foo"
|
|
|
|
|
expected_array = compute_expected_array(cmap, text, [CNC("one or more foo", source=text)])
|
|
|
|
|
expected_array[0].compiled["foo"] = [cmap["foo"], cmap["foo"], cmap["foo"]]
|
|
|
|
|
|
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
|
|
|
parser_result = res.value
|
|
|
|
|
concepts_nodes = res.value.value
|
|
|
|
|
|
|
|
|
|
assert res.status
|
|
|
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
|
|
|
assert concepts_nodes == expected_array
|
|
|
|
|
|
|
|
|
|
def test_i_can_test_when_expression_references_other_expressions(self):
|
|
|
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
|
|
|
|
|
|
text = "twenty four"
|
|
|
|
|
expected = CNC("t2",
|
|
|
|
|
source=text,
|
|
|
|
|
unit=CC("three_four",
|
|
|
|
|
source="four",
|
|
|
|
|
four=CC("four", body=DoNotResolve("four")),
|
|
|
|
|
body=CC("four", body=DoNotResolve("four"))),
|
|
|
|
|
four="four")
|
|
|
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
|
|
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
|
|
|
parser_result = res.value
|
|
|
|
|
concepts_nodes = res.value.value
|
|
|
|
|
|
|
|
|
|
assert res.status
|
|
|
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
|
|
|
assert concepts_nodes == expected_array
|
|
|
|
|
|
|
|
|
|
# def test_i_cannot_parse_bnf_concept_mixed_with_isa_concepts(self):
|
|
|
|
|
# sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
|
#
|
|
|
|
|
# # thirties = cls.update_bnf(context, Concept("thirties",
|
|
|
|
|
# # definition="thirty number",
|
|
|
|
|
# # where="number < 10",
|
|
|
|
|
# # body="thirty + number").def_var("thirty").def_var("number"))
|
|
|
|
|
# # with thirties isa number
|
|
|
|
|
# # So number in 'thirty number' will spawn 'thirties' which, because there is no other indication, will
|
|
|
|
|
# # create an infinite loop
|
|
|
|
|
#
|
|
|
|
|
# text = "thirty one"
|
|
|
|
|
# expected = CNC("thirties",
|
|
|
|
|
# source=text,
|
|
|
|
|
# number=CC("number",
|
|
|
|
|
# source="one",
|
|
|
|
|
# one=CC("one", body=DoNotResolve("one")),
|
|
|
|
|
# body=CC("one", body=DoNotResolve("one"))),
|
|
|
|
|
# one=CC("one", body=DoNotResolve("one")),
|
|
|
|
|
# thirty="thirty")
|
|
|
|
|
# expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
|
#
|
|
|
|
|
# res = parser.parse(context, ParserInput(text))
|
|
|
|
|
# not_for_me = res.value
|
|
|
|
|
# reason = res.value.body
|
|
|
|
|
#
|
|
|
|
|
# assert not res.status
|
|
|
|
|
# assert sheerka.isinstance(not_for_me, BuiltinConcepts.NOT_FOR_ME)
|
|
|
|
|
# assert sheerka.isinstance(reason, BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
|
# assert reason.body == {cmap["thirties"].id, cmap["number"].id}
|
|
|
|
|
|
|
|
|
|
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts(self):
|
|
|
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
|
|
|
|
|
|
# thirties = cls.update_bnf(context, Concept("thirties",
|
|
|
|
|
# definition="thirty number",
|
|
|
|
|
# where="number < 10",
|
|
|
|
|
# body="thirty + number").def_var("thirty").def_var("number"))
|
|
|
|
|
|
|
|
|
|
text = "thirty one"
|
|
|
|
|
expected = CNC("thirties",
|
|
|
|
|
source=text,
|
|
|
|
|
number=CC("number",
|
|
|
|
|
source="one",
|
|
|
|
|
one=CC("one", body=DoNotResolve("one")),
|
|
|
|
|
body=CC("one", body=DoNotResolve("one"))),
|
|
|
|
|
one=CC("one", body=DoNotResolve("one")),
|
|
|
|
|
thirty="thirty")
|
|
|
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
|
|
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
|
|
|
parser_result = res.value
|
|
|
|
|
concepts_nodes = res.value.value
|
|
|
|
|
|
|
|
|
|
assert res.status
|
|
|
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
|
|
|
assert concepts_nodes == expected_array
|
|
|
|
|
|
|
|
|
|
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_2(self):
|
|
|
|
|
# this time, three is a number, and also part of three_four, even if it is not relevant in t3
|
|
|
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
|
|
|
|
|
|
text = "thirty three"
|
|
|
|
|
expected = CNC("thirties",
|
|
|
|
|
source=text,
|
|
|
|
|
number=CC("number",
|
|
|
|
|
source="three",
|
|
|
|
|
three=CC("three", body=DoNotResolve("three")),
|
|
|
|
|
body=CC("three", body=DoNotResolve("three"))),
|
|
|
|
|
three=CC("three", body=DoNotResolve("three")),
|
|
|
|
|
thirty="thirty")
|
|
|
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
|
|
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
|
|
|
parser_result = res.value
|
|
|
|
|
concepts_nodes = res.value.value
|
|
|
|
|
|
|
|
|
|
assert res.status
|
|
|
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
|
|
|
assert concepts_nodes == expected_array
|
|
|
|
|
|
|
|
|
|
def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self):
|
|
|
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
|
sheerka.concepts_grammars.clear() # simulate restart
|
|
|
|
|
for c in cmap.values():
|
|
|
|
|
sheerka.get_by_id(c.id).bnf = None
|
|
|
|
|
|
|
|
|
|
text = "thirty three"
|
|
|
|
|
expected = CNC("thirties",
|
|
|
|
|
source=text,
|
|
|
|
|
number=CC("number",
|
|
|
|
|
source="three",
|
|
|
|
|
three=CC("three", body=DoNotResolve("three")),
|
|
|
|
|
body=CC("three", body=DoNotResolve("three"))),
|
|
|
|
|
three=CC("three", body=DoNotResolve("three")),
|
|
|
|
|
thirty="thirty")
|
|
|
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
|
|
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
|
|
|
parser_result = res.value
|
|
|
|
|
concepts_nodes = res.value.value
|
|
|
|
|
|
|
|
|
|
assert res.status
|
|
|
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
|
|
|
assert concepts_nodes == expected_array
|
|
|
|
|
|
|
|
|
|
text = "forty one"
|
|
|
|
|
expected = CNC("forties",
|
|
|
|
|
source=text,
|
|
|
|
|
number=CC("number",
|
|
|
|
|
source="one",
|
|
|
|
|
one=CC("one", body=DoNotResolve("one")),
|
|
|
|
|
body=CC("one", body=DoNotResolve("one"))),
|
|
|
|
|
one=CC("one", body=DoNotResolve("one")),
|
|
|
|
|
forty="forty")
|
|
|
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
|
|
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
|
|
|
parser_result = res.value
|
|
|
|
|
concepts_nodes = res.value.value
|
|
|
|
|
|
|
|
|
|
assert res.status
|
|
|
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
|
|
|
assert concepts_nodes == expected_array
|
|
|
|
|
|
|
|
|
|
def test_i_can_parse_when_keyword(self):
|
|
|
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
|
|
|
|
|
@@ -800,10 +1197,80 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
|
parser_result = res.value
|
|
|
|
|
concepts_nodes = res.value.value
|
|
|
|
|
|
|
|
|
|
assert res.status == True
|
|
|
|
|
assert res.status
|
|
|
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
|
|
|
assert concepts_nodes == expected_array
|
|
|
|
|
|
|
|
|
|
def test_i_can_parse_descent_grammar(self):
|
|
|
|
|
my_map = {
|
|
|
|
|
"factor": Concept("factor", definition="1 | 2 | 3"),
|
|
|
|
|
"term": Concept("term", definition="factor ('*' factor)*"),
|
|
|
|
|
"expr": Concept("expr", definition="term ('+' term)*"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
|
|
|
parser.init_from_concepts(context, my_map.values())
|
|
|
|
|
|
|
|
|
|
text = "1 + 2 * 3"
|
|
|
|
|
|
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
|
|
|
parser_result = res.value
|
|
|
|
|
concepts_nodes = res.value.value
|
|
|
|
|
|
|
|
|
|
factor = my_map["factor"]
|
|
|
|
|
term = my_map["term"]
|
|
|
|
|
expr = my_map["expr"]
|
|
|
|
|
|
|
|
|
|
assert res.status
|
|
|
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
|
|
|
assert concepts_nodes == [CNC(expr,
|
|
|
|
|
term=[CC(term,
|
|
|
|
|
body=CC(factor, body=DoNotResolve("1")),
|
|
|
|
|
factor=CC(factor, body=DoNotResolve("1"))),
|
|
|
|
|
CC(term,
|
|
|
|
|
body=DoNotResolve("2 * 3"),
|
|
|
|
|
factor=[
|
|
|
|
|
CC(factor, body=DoNotResolve("2")),
|
|
|
|
|
CC(factor, body=DoNotResolve("3")),
|
|
|
|
|
])],
|
|
|
|
|
factor=[
|
|
|
|
|
CC(factor, body=DoNotResolve("1")),
|
|
|
|
|
CC(factor, body=DoNotResolve("2")),
|
|
|
|
|
CC(factor, body=DoNotResolve("3"))],
|
|
|
|
|
body=DoNotResolve("1 + 2 * 3"))]
|
|
|
|
|
|
|
|
|
|
def test_i_can_parse_recursive_descent_grammar(self):
|
|
|
|
|
my_map = {
|
|
|
|
|
"factor": Concept("factor", definition="1 | 2 | 3"),
|
|
|
|
|
"term": self.bnf_concept("term", OrderedChoice(
|
|
|
|
|
Sequence(ConceptExpression("factor"), StrMatch("*"), ConceptExpression("term")),
|
|
|
|
|
ConceptExpression("factor"))),
|
|
|
|
|
"expr": self.bnf_concept("expr", OrderedChoice(
|
|
|
|
|
Sequence(ConceptExpression("term"), StrMatch("+"), ConceptExpression("expr")),
|
|
|
|
|
ConceptExpression("term"))),
|
|
|
|
|
}
|
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
|
|
|
parser.init_from_concepts(context, my_map.values())
|
|
|
|
|
|
|
|
|
|
text = "1 + 2 * 3"
|
|
|
|
|
|
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
|
|
|
# concepts_nodes = res.value.value is too complicated to be validated
|
|
|
|
|
assert res.status
|
|
|
|
|
|
|
|
|
|
def test_i_can_parse_simple_recursive_grammar(self):
|
|
|
|
|
my_map = {
|
|
|
|
|
"foo": self.bnf_concept("foo", Sequence(StrMatch("foo"),
|
|
|
|
|
OrderedChoice(StrMatch("bar"), ConceptExpression("foo")))),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
|
|
|
parser.init_from_concepts(context, my_map.values())
|
|
|
|
|
|
|
|
|
|
assert parser.parse(context, ParserInput("foo bar")).status
|
|
|
|
|
assert parser.parse(context, ParserInput("foo foo foo bar")).status
|
|
|
|
|
assert not parser.parse(context, ParserInput("foo baz")).status
|
|
|
|
|
|
|
|
|
|
# @pytest.mark.parametrize("parser_input, expected", [
|
|
|
|
|
# ("one", [
|
|
|
|
|
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),
|
|
|
|
|