298 lines
12 KiB
Python
298 lines
12 KiB
Python
import pytest
|
|
from core.concept import Concept
|
|
from parsers.BaseNodeParser import BaseNodeParser, NoFirstTokenError
|
|
from parsers.BnfNodeParser import StrMatch, Sequence, OrderedChoice, Optional, ZeroOrMore, OneOrMore, ConceptExpression
|
|
|
|
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
|
|
|
|
class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
|
|
@pytest.mark.parametrize("concept, expected", [
|
|
(Concept("foo"), {"foo": ["1001"]}),
|
|
(Concept("foo a").def_var("a"), {"foo": ["1001"]}),
|
|
(Concept("a b foo").def_var("a").def_var("b"), {"foo": ["1001"]}),
|
|
])
|
|
def test_i_can_get_concepts_by_first_keyword(self, concept, expected):
|
|
"""
|
|
Given a concept, i can find the first know token
|
|
example:
|
|
Concept("a foo b").def_var("a").def_var("b")
|
|
'a' and 'b' are properties
|
|
the first 'real' token is foo
|
|
:return:
|
|
"""
|
|
|
|
sheerka, context, *updated = self.init_concepts(concept)
|
|
|
|
res = BaseNodeParser.get_concepts_by_first_token(context, updated)
|
|
|
|
assert res.status
|
|
assert res.body == expected
|
|
|
|
@pytest.mark.parametrize("bnf, expected", [
|
|
(StrMatch("foo"), {"foo": ["1002"]}),
|
|
(StrMatch("bar"), {"bar": ["1002"]}),
|
|
(ConceptExpression("bar"), {"c:|1001:": ["1002"]}),
|
|
(Sequence(StrMatch("foo"), StrMatch("bar")), {"foo": ["1002"]}),
|
|
(Sequence(StrMatch("foo"), ConceptExpression("bar")), {"foo": ["1002"]}),
|
|
(Sequence(ConceptExpression("bar"), StrMatch("foo")), {"c:|1001:": ["1002"]}),
|
|
(OrderedChoice(StrMatch("foo"), StrMatch("bar")), {"foo": ["1002"], "bar": ["1002"]}),
|
|
(Optional(StrMatch("foo")), {"foo": ["1002"]}),
|
|
(ZeroOrMore(StrMatch("foo")), {"foo": ["1002"]}),
|
|
(OneOrMore(StrMatch("foo")), {"foo": ["1002"]}),
|
|
(StrMatch("--filter"), {"--filter": ["1002"]}), # add both entries
|
|
])
|
|
def test_i_can_get_concepts_by_first_keyword_with_bnf(self, bnf, expected):
|
|
sheerka = self.get_sheerka()
|
|
context = self.get_context(sheerka)
|
|
|
|
bar = Concept("bar").init_key()
|
|
sheerka.set_id_if_needed(bar, False)
|
|
sheerka.add_in_cache(bar)
|
|
|
|
concept = Concept("foo").init_key()
|
|
concept.set_bnf(bnf)
|
|
sheerka.set_id_if_needed(concept, False)
|
|
|
|
res = BaseNodeParser.get_concepts_by_first_token(context, [concept])
|
|
|
|
assert res.status
|
|
assert res.body == expected
|
|
|
|
def test_i_can_get_concepts_by_first_keyword_when_multiple_concepts(self):
|
|
sheerka = self.get_sheerka()
|
|
context = self.get_context(sheerka)
|
|
|
|
bar = Concept("bar").init_key()
|
|
sheerka.set_id_if_needed(bar, False)
|
|
sheerka.add_in_cache(bar)
|
|
|
|
baz = Concept("baz").init_key()
|
|
sheerka.set_id_if_needed(baz, False)
|
|
sheerka.add_in_cache(baz)
|
|
|
|
foo = Concept("foo").init_key()
|
|
foo.set_bnf(OrderedChoice(ConceptExpression("bar"), ConceptExpression("baz"), StrMatch("qux")))
|
|
sheerka.set_id_if_needed(foo, False)
|
|
|
|
res = BaseNodeParser.get_concepts_by_first_token(context, [bar, baz, foo])
|
|
|
|
assert res.status
|
|
assert res.body == {
|
|
"bar": ["1001"],
|
|
"baz": ["1002"],
|
|
"c:|1001:": ["1003"],
|
|
"c:|1002:": ["1003"],
|
|
"qux": ["1003"],
|
|
}
|
|
|
|
def test_i_can_get_concepts_by_first_keyword_using_sheerka(self):
|
|
sheerka, context, *updated = self.init_concepts(
|
|
"one",
|
|
"two",
|
|
Concept("twenty", definition="'twenty' (one|two)"),
|
|
create_new=True
|
|
)
|
|
|
|
bar = Concept("bar").init_key()
|
|
sheerka.set_id_if_needed(bar, False)
|
|
sheerka.add_in_cache(bar)
|
|
|
|
foo = Concept("foo").init_key()
|
|
foo.set_bnf(OrderedChoice(ConceptExpression("one"), ConceptExpression("bar"), StrMatch("qux")))
|
|
sheerka.set_id_if_needed(foo, False)
|
|
|
|
res = BaseNodeParser.get_concepts_by_first_token(context, [bar, foo], use_sheerka=True)
|
|
|
|
assert res.status
|
|
assert res.body == {
|
|
"one": ["1001"],
|
|
"two": ["1002"],
|
|
"twenty": ["1003"],
|
|
"bar": ["1004"],
|
|
"c:|1001:": ["1005"],
|
|
"c:|1004:": ["1005"],
|
|
"qux": ["1005"],
|
|
}
|
|
|
|
def test_i_cannot_get_concept_by_first_keyword_when_no_first_keyword(self):
|
|
sheerka, context, foo = self.init_concepts(Concept("x y", body="x y").def_var("x").def_var("y"))
|
|
res = BaseNodeParser.get_concepts_by_first_token(context, [foo])
|
|
|
|
assert not res.status
|
|
assert res.body == NoFirstTokenError(foo, foo.key)
|
|
|
|
def test_i_can_resolve_concepts_by_first_keyword(self):
|
|
sheerka, context, *updated = self.init_concepts(
|
|
"one",
|
|
Concept("two", definition="one"),
|
|
Concept("three", definition="two"),
|
|
create_new=False
|
|
)
|
|
|
|
concepts_by_first_keywords = {
|
|
"one": ["1001"],
|
|
"c:|1001:": ["1002"],
|
|
"c:|1002:": ["1003"],
|
|
}
|
|
|
|
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
|
|
|
|
assert resolved_ret_val.status
|
|
assert resolved_ret_val.body == {
|
|
"one": ["1001", "1002", "1003"],
|
|
}
|
|
|
|
def test_i_can_resolve_when_concepts_are_sets(self):
|
|
sheerka, context, number, *concepts = self.init_concepts(
|
|
"number",
|
|
"one",
|
|
"two",
|
|
"twenty",
|
|
"hundred",
|
|
Concept("twenties", definition="twenty number"),
|
|
Concept("hundreds", definition="number hundred"),
|
|
create_new=True # mandatory because set_isa() needs it
|
|
)
|
|
|
|
sheerka.set_isa(context, sheerka.new("one"), number)
|
|
sheerka.set_isa(context, sheerka.new("two"), number)
|
|
sheerka.set_isa(context, sheerka.new("twenty"), number)
|
|
sheerka.set_isa(context, sheerka.new("thirty"), number)
|
|
sheerka.set_isa(context, sheerka.new("hundred"), number)
|
|
sheerka.set_isa(context, sheerka.new("twenties"), number)
|
|
sheerka.set_isa(context, sheerka.new("hundreds"), number)
|
|
|
|
sheerka.concepts_grammars.clear() # reset all the grammar to simulate Sheerka restart
|
|
|
|
# cbft : concept_by_first_token (I usually don't use abbreviation)
|
|
cbft = BaseNodeParser.get_concepts_by_first_token(context, [number] + concepts).body
|
|
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, cbft)
|
|
|
|
assert resolved_ret_val.status
|
|
assert resolved_ret_val.body == {
|
|
'number': ['1001'],
|
|
'one': ['1002', '1007'],
|
|
'two': ['1003', '1007'],
|
|
'twenty': ['1004', '1006', '1007'],
|
|
'hundred': ['1005', '1007'],
|
|
}
|
|
|
|
def test_concepts_are_defined_once(self):
|
|
sheerka = self.get_sheerka()
|
|
context = self.get_context(sheerka)
|
|
good = self.create_and_add_in_cache_concept(sheerka, "good")
|
|
foo = self.create_and_add_in_cache_concept(sheerka, "foo", bnf=ConceptExpression("good"))
|
|
bar = self.create_and_add_in_cache_concept(sheerka, "bar", bnf=ConceptExpression("good"))
|
|
baz = self.create_and_add_in_cache_concept(sheerka, "baz", bnf=OrderedChoice(
|
|
ConceptExpression("foo"),
|
|
ConceptExpression("bar")))
|
|
|
|
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(
|
|
context, [good, foo, bar, baz]).body
|
|
|
|
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
|
|
assert resolved_ret_val.status
|
|
assert resolved_ret_val.body == {
|
|
"good": ["1001", "1002", "1003", "1004"],
|
|
}
|
|
|
|
def test_i_can_resolve_more_complex(self):
|
|
sheerka = self.get_sheerka()
|
|
context = self.get_context(sheerka)
|
|
|
|
a = self.create_and_add_in_cache_concept(sheerka, "a", bnf=Sequence("one", "two"))
|
|
b = self.create_and_add_in_cache_concept(sheerka, "b", bnf=Sequence(ConceptExpression("a"), "two"))
|
|
|
|
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(
|
|
context, [a, b]).body
|
|
|
|
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
|
|
assert resolved_ret_val.status
|
|
assert resolved_ret_val.body == {
|
|
"one": ["1001", "1002"],
|
|
}
|
|
|
|
def tests_i_can_detect_direct_recursion(self):
|
|
sheerka, context, good, foo, bar = self.init_concepts(
|
|
"good",
|
|
self.bnf_concept("foo", ConceptExpression("bar")),
|
|
self.bnf_concept("bar", ConceptExpression("foo")),
|
|
)
|
|
|
|
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, foo, bar]).body
|
|
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
|
|
assert resolved_ret_val.status
|
|
assert resolved_ret_val.body == {
|
|
"good": ["1001"],
|
|
}
|
|
assert sheerka.chicken_and_eggs.get(foo.id) == {foo.id, bar.id}
|
|
assert sheerka.chicken_and_eggs.get(bar.id) == {foo.id, bar.id}
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion(self):
|
|
sheerka, context, good, one, two, three = self.init_concepts(
|
|
"good",
|
|
self.bnf_concept("one", ConceptExpression("two")),
|
|
self.bnf_concept("two", ConceptExpression("three")),
|
|
self.bnf_concept("three", ConceptExpression("two")),
|
|
)
|
|
|
|
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, one, two, three]).body
|
|
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
|
|
assert resolved_ret_val.status
|
|
assert resolved_ret_val.body == {
|
|
"good": ["1001"],
|
|
}
|
|
assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id}
|
|
assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id}
|
|
assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id}
|
|
|
|
def test_i_can_detect_the_longest_infinite_recursion_chain(self):
|
|
sheerka, context, good, one, two, three = self.init_concepts(
|
|
"good",
|
|
self.bnf_concept("two", ConceptExpression("three")),
|
|
self.bnf_concept("three", ConceptExpression("two")),
|
|
self.bnf_concept("one", ConceptExpression("three")),
|
|
)
|
|
|
|
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, one, two, three]).body
|
|
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
|
|
assert resolved_ret_val.status
|
|
assert resolved_ret_val.body == {
|
|
"good": ["1001"],
|
|
}
|
|
assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id}
|
|
assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id}
|
|
assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id}
|
|
|
|
#
|
|
# def test_i_can_detect_infinite_recursion_from_ordered_choice(self):
|
|
# sheerka = self.get_sheerka()
|
|
# good = self.get_concept(sheerka, "good")
|
|
# one = self.get_concept(sheerka, "one", ConceptExpression("two"))
|
|
# two = self.get_concept(sheerka, "two", OrderedChoice(ConceptExpression("one"), ConceptExpression("two")))
|
|
#
|
|
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, one, two]).body
|
|
#
|
|
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords)
|
|
# assert resolved_ret_val.status
|
|
# assert resolved_ret_val.body == {
|
|
# "good": ["1001"],
|
|
# BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1003"]
|
|
# }
|
|
#
|
|
# def test_i_can_detect_infinite_recursion_with_sequence(self):
|
|
# sheerka = self.get_sheerka()
|
|
# good = self.get_concept(sheerka, "good")
|
|
# one = self.get_concept(sheerka, "one", ConceptExpression("two"))
|
|
# two = self.get_concept(sheerka, "two", Sequence(StrMatch("yes"), ConceptExpression("one")))
|
|
#
|
|
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, one, two]).body
|
|
#
|
|
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords)
|
|
# assert resolved_ret_val.status
|
|
# assert resolved_ret_val.body == {
|
|
# "good": ["1001"],
|
|
# BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1003"]
|
|
# }
|