Fixed #3: Added sheerka.resolve_rule()

Fixed #5: Refactored SheerkaComparisonManager
Fixed #6: Sya parser no longer works after restart
This commit is contained in:
2021-01-15 07:11:04 +01:00
parent e26c83a825
commit 821dbed189
44 changed files with 1617 additions and 1068 deletions
+274 -20
View File
@@ -1,18 +1,20 @@
import pytest
from cache.CacheManager import ConceptNotFound
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import ensure_bnf
from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF, get_concept_attrs, \
DEFINITION_TYPE_BNF
from core.global_symbols import NotInit, NotFound
from core.sheerka.Sheerka import Sheerka
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager, NoModificationFound, ForbiddenAttribute, \
UnknownAttribute, CannotRemoveMeta, ValueNotFound, ConceptIsReferenced
from parsers.BaseNodeParser import BaseNodeParser
from parsers.BnfNodeParser import Sequence, StrMatch, ConceptExpression
UnknownAttribute, CannotRemoveMeta, ValueNotFound, ConceptIsReferenced, NoFirstTokenError
from parsers.BnfNodeParser import Sequence, StrMatch, ConceptExpression, OrderedChoice, Optional, ZeroOrMore, OneOrMore
from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
compute_concepts_by_first_token = SheerkaConceptManager.compute_concepts_by_first_token
resolve_concepts_by_first_keyword = SheerkaConceptManager.resolve_concepts_by_first_keyword
class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
def test_i_can_create_a_concept(self):
@@ -48,15 +50,15 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.get_by_hash(concept.get_definition_hash()) == concept
# I can get by the first entry
assert sheerka.om.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+") == [concept.id]
assert sheerka.om.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+") == [concept.id]
assert sheerka.om.get(service.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+") == [concept.id]
assert sheerka.om.get(service.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+") == [concept.id]
# saved in sdp
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_ID_ENTRY, concept.id)
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_KEY_ENTRY, concept.key)
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_NAME_ENTRY, concept.name)
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash())
assert sheerka.om.current_sdp().exists(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+")
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+")
def test_i_cannot_create_a_bnf_concept_that_references_a_concept_that_cannot_be_resolved(self):
sheerka, context, one_1, one_1_0 = self.init_concepts(Concept("one", body="1"), Concept("one", body="1.0"))
@@ -104,7 +106,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_KEY_ENTRY, concept.key)
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_NAME_ENTRY, concept.name)
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash())
assert sheerka.om.current_sdp().exists(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "hello")
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "hello")
def test_i_cannot_add_the_same_concept_twice(self):
"""
@@ -185,8 +187,8 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert res.status
# I can get by the first entry
assert sheerka.om.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id]
assert sheerka.om.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id]
assert sheerka.om.get(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id]
assert sheerka.om.get(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id]
@pytest.mark.parametrize("expression", [
"--'filter' ('one' | 'two') ",
@@ -198,8 +200,8 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
create_new=True).unpack()
# I can get by the first entry
assert sheerka.om.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id]
assert sheerka.om.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id]
assert sheerka.om.get(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id]
assert sheerka.om.get(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id]
def test_concept_references_are_updated_1(self):
sheerka, context, one, two, number, twenty, twenties = self.init_test().with_concepts(
@@ -494,11 +496,11 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
Concept("baz", definition="foo"),
create_new=True).unpack()
assert sheerka.om.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"foo": ["1001"],
"bar": ["1002"],
'c:|1001:': ['1003']}
assert sheerka.om.copy(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
'foo': ['1001', '1003'],
'bar': ['1002']}
@@ -506,10 +508,10 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
res = sheerka.modify_concept(context, foo, to_add)
assert res.status
assert sheerka.om.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"bar": ["1002", "1001"],
'c:|1001:': ['1003']}
assert sheerka.om.copy(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
'bar': ['1002', '1001', '1003']}
def test_references_are_updated_after_concept_modification(self):
@@ -531,7 +533,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert twenty_one.get_metadata().definition == "'twenty' one"
assert twenty_one.get_bnf() is None
BaseNodeParser.ensure_bnf(context, twenty_one)
ensure_bnf(context, twenty_one)
assert twenty_one.get_bnf() == Sequence(StrMatch('twenty'), ConceptExpression(modified, rule_name='one'))
def test_i_can_modify_on_top_of_a_new_ontology_layer(self):
@@ -730,6 +732,258 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NEW_CONCEPT)
@pytest.mark.parametrize("concept, expected", [
(Concept("foo"), {"foo": ["1001"]}),
(Concept("foo a").def_var("a"), {"foo": ["1001"]}),
(Concept("a b foo").def_var("a").def_var("b"), {"foo": ["1001"]}),
])
def test_i_can_get_concepts_by_first_keyword(self, concept, expected):
"""
Given a concept, i can find the first know token
example:
Concept("a foo b").def_var("a").def_var("b")
'a' and 'b' are properties
the first 'real' token is foo
:return:
"""
sheerka, context, *updated = self.init_concepts(concept)
res = SheerkaConceptManager.compute_concepts_by_first_token(context, updated)
assert res.status
assert res.body == expected
@pytest.mark.parametrize("bnf, expected", [
(StrMatch("foo"), {"foo": ["1002"]}),
(StrMatch("bar"), {"bar": ["1002"]}),
(ConceptExpression("bar"), {"c:|1001:": ["1002"]}),
(Sequence(StrMatch("foo"), StrMatch("bar")), {"foo": ["1002"]}),
(Sequence(StrMatch("foo"), ConceptExpression("bar")), {"foo": ["1002"]}),
(Sequence(ConceptExpression("bar"), StrMatch("foo")), {"c:|1001:": ["1002"]}),
(OrderedChoice(StrMatch("foo"), StrMatch("bar")), {"foo": ["1002"], "bar": ["1002"]}),
(Optional(StrMatch("foo")), {"foo": ["1002"]}),
(ZeroOrMore(StrMatch("foo")), {"foo": ["1002"]}),
(OneOrMore(StrMatch("foo")), {"foo": ["1002"]}),
(StrMatch("--filter"), {"--filter": ["1002"]}), # add both entries
])
def test_i_can_get_concepts_by_first_keyword_with_bnf(self, bnf, expected):
sheerka, context = self.init_test().unpack()
bar = Concept("bar").init_key()
sheerka.set_id_if_needed(bar, False)
sheerka.test_only_add_in_cache(bar)
concept = Concept("foo").init_key()
concept.set_bnf(bnf)
sheerka.set_id_if_needed(concept, False)
res = compute_concepts_by_first_token(context, [concept])
assert res.status
assert res.body == expected
def test_i_can_get_concepts_by_first_keyword_when_multiple_concepts(self):
sheerka = self.get_sheerka()
context = self.get_context(sheerka)
bar = Concept("bar").init_key()
sheerka.set_id_if_needed(bar, False)
sheerka.test_only_add_in_cache(bar)
baz = Concept("baz").init_key()
sheerka.set_id_if_needed(baz, False)
sheerka.test_only_add_in_cache(baz)
foo = Concept("foo").init_key()
foo.set_bnf(OrderedChoice(ConceptExpression("bar"), ConceptExpression("baz"), StrMatch("qux")))
sheerka.set_id_if_needed(foo, False)
res = compute_concepts_by_first_token(context, [bar, baz, foo])
assert res.status
assert res.body == {
"bar": ["1001"],
"baz": ["1002"],
"c:|1001:": ["1003"],
"c:|1002:": ["1003"],
"qux": ["1003"],
}
def test_i_can_get_concepts_by_first_keyword_using_sheerka(self):
sheerka, context, *updated = self.init_test().with_concepts(
"one",
"two",
Concept("twenty", definition="'twenty' (one|two)"),
create_new=True
).unpack()
bar = Concept("bar").init_key()
sheerka.set_id_if_needed(bar, False)
sheerka.test_only_add_in_cache(bar)
foo = Concept("foo").init_key()
foo.set_bnf(OrderedChoice(ConceptExpression("one"), ConceptExpression("bar"), StrMatch("qux")))
sheerka.set_id_if_needed(foo, False)
res = compute_concepts_by_first_token(context, [bar, foo], use_sheerka=True)
assert res.status
assert res.body == {
"one": ["1001"],
"two": ["1002"],
"twenty": ["1003"],
"bar": ["1004"],
"c:|1001:": ["1005"],
"c:|1004:": ["1005"],
"qux": ["1005"],
}
def test_i_cannot_get_concept_by_first_keyword_when_no_first_keyword(self):
sheerka, context, foo = self.init_concepts(Concept("x y", body="x y").def_var("x").def_var("y"))
res = compute_concepts_by_first_token(context, [foo])
assert not res.status
assert res.body == NoFirstTokenError(foo, foo.key)
def test_i_can_resolve_concepts_by_first_keyword(self):
sheerka, context, *updated = self.init_concepts(
"one",
Concept("two", definition="one"),
Concept("three", definition="two"))
concepts_by_first_keywords = {
"one": ["1001"],
"c:|1001:": ["1002"],
"c:|1002:": ["1003"],
}
resolved_ret_val = resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"one": ["1001", "1002", "1003"],
}
def test_i_can_resolve_when_concepts_are_sets(self):
sheerka, context, number, *concepts = self.init_concepts(
"number",
"one",
"two",
"twenty",
"hundred",
Concept("twenties", definition="twenty number"),
Concept("hundreds", definition="number hundred"),
)
sheerka.set_isa(context, sheerka.new("one"), number)
sheerka.set_isa(context, sheerka.new("two"), number)
sheerka.set_isa(context, sheerka.new("twenty"), number)
sheerka.set_isa(context, sheerka.new("thirty"), number)
sheerka.set_isa(context, sheerka.new("hundred"), number)
sheerka.set_isa(context, sheerka.new("twenties"), number)
sheerka.set_isa(context, sheerka.new("hundreds"), number)
sheerka.concepts_grammars.clear() # reset all the grammar to simulate Sheerka restart
# cbft : concept_by_first_token (I usually don't use abbreviation)
cbft = compute_concepts_by_first_token(context, [number] + concepts).body
resolved_ret_val = resolve_concepts_by_first_keyword(context, cbft)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
'number': ['1001'],
'one': ['1002', '1007'],
'two': ['1003', '1007'],
'twenty': ['1004', '1006', '1007'],
'hundred': ['1005', '1007'],
}
def test_concepts_are_defined_once(self):
sheerka = self.get_sheerka()
context = self.get_context(sheerka)
good = self.create_and_add_in_cache_concept(sheerka, "good")
foo = self.create_and_add_in_cache_concept(sheerka, "foo", bnf=ConceptExpression("good"))
bar = self.create_and_add_in_cache_concept(sheerka, "bar", bnf=ConceptExpression("good"))
baz = self.create_and_add_in_cache_concept(sheerka, "baz", bnf=OrderedChoice(
ConceptExpression("foo"),
ConceptExpression("bar")))
concepts_by_first_keywords = compute_concepts_by_first_token(context, [good, foo, bar, baz]).body
resolved_ret_val = resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"good": ["1001", "1002", "1003", "1004"],
}
def test_i_can_resolve_more_complex(self):
sheerka = self.get_sheerka()
context = self.get_context(sheerka)
a = self.create_and_add_in_cache_concept(sheerka, "a", bnf=Sequence("one", "two"))
b = self.create_and_add_in_cache_concept(sheerka, "b", bnf=Sequence(ConceptExpression("a"), "two"))
concepts_by_first_keywords = compute_concepts_by_first_token(context, [a, b]).body
resolved_ret_val = resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"one": ["1001", "1002"],
}
def tests_i_can_detect_direct_recursion(self):
sheerka, context, good, foo, bar = self.init_concepts(
"good",
self.bnf_concept("foo", ConceptExpression("bar")),
self.bnf_concept("bar", ConceptExpression("foo")),
)
concepts_by_first_keywords = compute_concepts_by_first_token(context, [good, foo, bar]).body
resolved_ret_val = resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"good": ["1001"],
}
assert sheerka.chicken_and_eggs.get(foo.id) == {foo.id, bar.id}
assert sheerka.chicken_and_eggs.get(bar.id) == {foo.id, bar.id}
def test_i_can_detect_indirect_infinite_recursion(self):
sheerka, context, good, one, two, three = self.init_concepts(
"good",
self.bnf_concept("one", ConceptExpression("two")),
self.bnf_concept("two", ConceptExpression("three")),
self.bnf_concept("three", ConceptExpression("two")),
)
concepts_by_first_keywords = compute_concepts_by_first_token(context, [good, one, two, three]).body
resolved_ret_val = resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"good": ["1001"],
}
assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id}
def test_i_can_detect_the_longest_infinite_recursion_chain(self):
sheerka, context, good, one, two, three = self.init_concepts(
"good",
self.bnf_concept("two", ConceptExpression("three")),
self.bnf_concept("three", ConceptExpression("two")),
self.bnf_concept("one", ConceptExpression("three")),
)
concepts_by_first_keywords = compute_concepts_by_first_token(context, [good, one, two, three]).body
resolved_ret_val = resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"good": ["1001"],
}
assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id}
class TestSheerkaConceptManagerUsingFileBasedSheerka(TestUsingFileBasedSheerka):
def test_i_can_add_several_concepts(self):
@@ -759,8 +1013,8 @@ class TestSheerkaConceptManagerUsingFileBasedSheerka(TestUsingFileBasedSheerka):
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_HASH_ENTRY, hello.get_definition_hash())
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_HASH_ENTRY, greeting.get_definition_hash())
assert sheerka.om.current_sdp().exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Hello")
assert sheerka.om.current_sdp().exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Greeting")
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Hello")
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Greeting")
def test_i_cannot_add_the_same_concept_twice_using_sdp(self):
"""