Fixed #3: Added sheerka.resolve_rule()

Fixed #5: Refactored SheerkaComparisonManager
Fixed #6: Sya parser no longer works after restart
This commit is contained in:
2021-01-15 07:11:04 +01:00
parent e26c83a825
commit 821dbed189
44 changed files with 1617 additions and 1068 deletions
-294
View File
@@ -1,294 +0,0 @@
import pytest
from core.concept import Concept
from parsers.BaseNodeParser import BaseNodeParser, NoFirstTokenError
from parsers.BnfNodeParser import StrMatch, Sequence, OrderedChoice, Optional, ZeroOrMore, OneOrMore, ConceptExpression
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("concept, expected", [
(Concept("foo"), {"foo": ["1001"]}),
(Concept("foo a").def_var("a"), {"foo": ["1001"]}),
(Concept("a b foo").def_var("a").def_var("b"), {"foo": ["1001"]}),
])
def test_i_can_get_concepts_by_first_keyword(self, concept, expected):
"""
Given a concept, i can find the first know token
example:
Concept("a foo b").def_var("a").def_var("b")
'a' and 'b' are properties
the first 'real' token is foo
:return:
"""
sheerka, context, *updated = self.init_concepts(concept)
res = BaseNodeParser.compute_concepts_by_first_token(context, updated)
assert res.status
assert res.body == expected
@pytest.mark.parametrize("bnf, expected", [
(StrMatch("foo"), {"foo": ["1002"]}),
(StrMatch("bar"), {"bar": ["1002"]}),
(ConceptExpression("bar"), {"c:|1001:": ["1002"]}),
(Sequence(StrMatch("foo"), StrMatch("bar")), {"foo": ["1002"]}),
(Sequence(StrMatch("foo"), ConceptExpression("bar")), {"foo": ["1002"]}),
(Sequence(ConceptExpression("bar"), StrMatch("foo")), {"c:|1001:": ["1002"]}),
(OrderedChoice(StrMatch("foo"), StrMatch("bar")), {"foo": ["1002"], "bar": ["1002"]}),
(Optional(StrMatch("foo")), {"foo": ["1002"]}),
(ZeroOrMore(StrMatch("foo")), {"foo": ["1002"]}),
(OneOrMore(StrMatch("foo")), {"foo": ["1002"]}),
(StrMatch("--filter"), {"--filter": ["1002"]}), # add both entries
])
def test_i_can_get_concepts_by_first_keyword_with_bnf(self, bnf, expected):
sheerka = self.get_sheerka()
context = self.get_context(sheerka)
bar = Concept("bar").init_key()
sheerka.set_id_if_needed(bar, False)
sheerka.test_only_add_in_cache(bar)
concept = Concept("foo").init_key()
concept.set_bnf(bnf)
sheerka.set_id_if_needed(concept, False)
res = BaseNodeParser.compute_concepts_by_first_token(context, [concept])
assert res.status
assert res.body == expected
def test_i_can_get_concepts_by_first_keyword_when_multiple_concepts(self):
sheerka = self.get_sheerka()
context = self.get_context(sheerka)
bar = Concept("bar").init_key()
sheerka.set_id_if_needed(bar, False)
sheerka.test_only_add_in_cache(bar)
baz = Concept("baz").init_key()
sheerka.set_id_if_needed(baz, False)
sheerka.test_only_add_in_cache(baz)
foo = Concept("foo").init_key()
foo.set_bnf(OrderedChoice(ConceptExpression("bar"), ConceptExpression("baz"), StrMatch("qux")))
sheerka.set_id_if_needed(foo, False)
res = BaseNodeParser.compute_concepts_by_first_token(context, [bar, baz, foo])
assert res.status
assert res.body == {
"bar": ["1001"],
"baz": ["1002"],
"c:|1001:": ["1003"],
"c:|1002:": ["1003"],
"qux": ["1003"],
}
def test_i_can_get_concepts_by_first_keyword_using_sheerka(self):
sheerka, context, *updated = self.init_test().with_concepts(
"one",
"two",
Concept("twenty", definition="'twenty' (one|two)"),
create_new=True
).unpack()
bar = Concept("bar").init_key()
sheerka.set_id_if_needed(bar, False)
sheerka.test_only_add_in_cache(bar)
foo = Concept("foo").init_key()
foo.set_bnf(OrderedChoice(ConceptExpression("one"), ConceptExpression("bar"), StrMatch("qux")))
sheerka.set_id_if_needed(foo, False)
res = BaseNodeParser.compute_concepts_by_first_token(context, [bar, foo], use_sheerka=True)
assert res.status
assert res.body == {
"one": ["1001"],
"two": ["1002"],
"twenty": ["1003"],
"bar": ["1004"],
"c:|1001:": ["1005"],
"c:|1004:": ["1005"],
"qux": ["1005"],
}
def test_i_cannot_get_concept_by_first_keyword_when_no_first_keyword(self):
sheerka, context, foo = self.init_concepts(Concept("x y", body="x y").def_var("x").def_var("y"))
res = BaseNodeParser.compute_concepts_by_first_token(context, [foo])
assert not res.status
assert res.body == NoFirstTokenError(foo, foo.key)
def test_i_can_resolve_concepts_by_first_keyword(self):
sheerka, context, *updated = self.init_concepts(
"one",
Concept("two", definition="one"),
Concept("three", definition="two"))
concepts_by_first_keywords = {
"one": ["1001"],
"c:|1001:": ["1002"],
"c:|1002:": ["1003"],
}
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"one": ["1001", "1002", "1003"],
}
def test_i_can_resolve_when_concepts_are_sets(self):
sheerka, context, number, *concepts = self.init_concepts(
"number",
"one",
"two",
"twenty",
"hundred",
Concept("twenties", definition="twenty number"),
Concept("hundreds", definition="number hundred"),
)
sheerka.set_isa(context, sheerka.new("one"), number)
sheerka.set_isa(context, sheerka.new("two"), number)
sheerka.set_isa(context, sheerka.new("twenty"), number)
sheerka.set_isa(context, sheerka.new("thirty"), number)
sheerka.set_isa(context, sheerka.new("hundred"), number)
sheerka.set_isa(context, sheerka.new("twenties"), number)
sheerka.set_isa(context, sheerka.new("hundreds"), number)
sheerka.concepts_grammars.clear() # reset all the grammar to simulate Sheerka restart
# cbft : concept_by_first_token (I usually don't use abbreviation)
cbft = BaseNodeParser.compute_concepts_by_first_token(context, [number] + concepts).body
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, cbft)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
'number': ['1001'],
'one': ['1002', '1007'],
'two': ['1003', '1007'],
'twenty': ['1004', '1006', '1007'],
'hundred': ['1005', '1007'],
}
def test_concepts_are_defined_once(self):
sheerka = self.get_sheerka()
context = self.get_context(sheerka)
good = self.create_and_add_in_cache_concept(sheerka, "good")
foo = self.create_and_add_in_cache_concept(sheerka, "foo", bnf=ConceptExpression("good"))
bar = self.create_and_add_in_cache_concept(sheerka, "bar", bnf=ConceptExpression("good"))
baz = self.create_and_add_in_cache_concept(sheerka, "baz", bnf=OrderedChoice(
ConceptExpression("foo"),
ConceptExpression("bar")))
concepts_by_first_keywords = BaseNodeParser.compute_concepts_by_first_token(
context, [good, foo, bar, baz]).body
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"good": ["1001", "1002", "1003", "1004"],
}
def test_i_can_resolve_more_complex(self):
sheerka = self.get_sheerka()
context = self.get_context(sheerka)
a = self.create_and_add_in_cache_concept(sheerka, "a", bnf=Sequence("one", "two"))
b = self.create_and_add_in_cache_concept(sheerka, "b", bnf=Sequence(ConceptExpression("a"), "two"))
concepts_by_first_keywords = BaseNodeParser.compute_concepts_by_first_token(
context, [a, b]).body
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"one": ["1001", "1002"],
}
def tests_i_can_detect_direct_recursion(self):
sheerka, context, good, foo, bar = self.init_concepts(
"good",
self.bnf_concept("foo", ConceptExpression("bar")),
self.bnf_concept("bar", ConceptExpression("foo")),
)
concepts_by_first_keywords = BaseNodeParser.compute_concepts_by_first_token(context, [good, foo, bar]).body
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"good": ["1001"],
}
assert sheerka.chicken_and_eggs.get(foo.id) == {foo.id, bar.id}
assert sheerka.chicken_and_eggs.get(bar.id) == {foo.id, bar.id}
def test_i_can_detect_indirect_infinite_recursion(self):
sheerka, context, good, one, two, three = self.init_concepts(
"good",
self.bnf_concept("one", ConceptExpression("two")),
self.bnf_concept("two", ConceptExpression("three")),
self.bnf_concept("three", ConceptExpression("two")),
)
concepts_by_first_keywords = BaseNodeParser.compute_concepts_by_first_token(context, [good, one, two, three]).body
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"good": ["1001"],
}
assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id}
def test_i_can_detect_the_longest_infinite_recursion_chain(self):
sheerka, context, good, one, two, three = self.init_concepts(
"good",
self.bnf_concept("two", ConceptExpression("three")),
self.bnf_concept("three", ConceptExpression("two")),
self.bnf_concept("one", ConceptExpression("three")),
)
concepts_by_first_keywords = BaseNodeParser.compute_concepts_by_first_token(context, [good, one, two, three]).body
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"good": ["1001"],
}
assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id}
#
# def test_i_can_detect_infinite_recursion_from_ordered_choice(self):
# sheerka = self.get_sheerka()
# good = self.get_concept(sheerka, "good")
# one = self.get_concept(sheerka, "one", ConceptExpression("two"))
# two = self.get_concept(sheerka, "two", OrderedChoice(ConceptExpression("one"), ConceptExpression("two")))
#
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, one, two]).body
#
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords)
# assert resolved_ret_val.status
# assert resolved_ret_val.body == {
# "good": ["1001"],
# BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1003"]
# }
#
# def test_i_can_detect_infinite_recursion_with_sequence(self):
# sheerka = self.get_sheerka()
# good = self.get_concept(sheerka, "good")
# one = self.get_concept(sheerka, "one", ConceptExpression("two"))
# two = self.get_concept(sheerka, "two", Sequence(StrMatch("yes"), ConceptExpression("one")))
#
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, one, two]).body
#
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords)
# assert resolved_ret_val.status
# assert resolved_ret_val.body == {
# "good": ["1001"],
# BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1003"]
# }
+4 -3
View File
@@ -4,6 +4,7 @@ import tests.parsers.parsers_utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYPE_BNF
from core.global_symbols import NotInit
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC, UTN, CN
from parsers.BnfDefinitionParser import BnfDefinitionParser
@@ -833,7 +834,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == expected
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == expected
# get_parsing_expression() also returns CHICKEN_AND_EGG
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
@@ -858,7 +859,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
@@ -884,7 +885,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
+1 -1
View File
@@ -87,7 +87,7 @@ class TestRuleParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert len(rules) == 1
assert rules[0].id == "xxx"
assert rules[0].metadata.action_type == "deferred"
assert rules[0].metadata.id_is_unresolved
@pytest.mark.parametrize("text", [
"r:|1:xxx",
+36 -3
View File
@@ -1,6 +1,8 @@
import pytest
import tests.parsers.parsers_utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, CIO, ALL_ATTRIBUTES, CMV
from core.concept import Concept, CIO, CMV
from core.global_symbols import CONCEPT_COMPARISON_CONTEXT
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer
@@ -10,8 +12,7 @@ from parsers.BaseNodeParser import utnode, cnode, short_cnode, UnrecognizedToken
from parsers.PythonParser import PythonNode
from parsers.SyaNodeParser import SyaNodeParser, SyaConceptParserHelper, SyaAssociativity, \
NoneAssociativeSequenceError, TooManyParametersFoundError, InFixToPostFix, ParenthesisMismatchError
import tests.parsers.parsers_utils
from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -1356,3 +1357,35 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == [CN(cmap["suffixed"], 0, 6, source=text)]
class TestFileBaseSyaNodeParser(TestUsingFileBasedSheerka):
def test_i_can_parse_after_restart(self):
sheerka, context, one, two, plus = self.init_test().with_concepts("one",
"two",
Concept("a plus b").def_var("a").def_var("b"),
create_new=True).unpack()
sheerka.om.commit(context)
parser = SyaNodeParser()
# sanity check
# Remove this commented section to make sure that the nominal case still works
# res = parser.parse(context, ParserInput("one plus two"))
# assert res.status
# assert sheerka.isinstance(res.body.body[0].concept, plus.key)
sheerka = self.new_sheerka_instance(False)
context = self.get_context(sheerka)
res = parser.parse(context, ParserInput("one plus two"))
assert res.status
assert sheerka.isinstance(res.body.body[0].concept, plus.key)
# adds an ontology layer and make the test again
sheerka.push_ontology(context, "new ontology")
sheerka = self.new_sheerka_instance(False)
context = self.get_context(sheerka)
res = parser.parse(context, ParserInput("one plus two"))
assert res.status
assert sheerka.isinstance(res.body.body[0].concept, plus.key)