Implemented SheerkaOntology

This commit is contained in:
2021-01-11 15:36:03 +01:00
parent e3c2adb533
commit e26c83a825
119 changed files with 6876 additions and 2002 deletions
+74 -42
View File
@@ -1,13 +1,14 @@
import pytest
import tests.parsers.parsers_utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYPE_BNF, NotInit
from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYPE_BNF
from core.global_symbols import NotInit
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC, UTN, CN
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser
import tests.parsers.parsers_utils
from tests.BaseTest import BaseTest
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -82,28 +83,26 @@ def compute_expected_array(my_concepts_map, expression, expected, exclude_body=F
class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka = None
shared_ontology = None
@classmethod
def setup_class(cls):
t = cls()
TestBnfNodeParser.sheerka, context, _ = t.init_parser(
cmap,
singleton=False,
create_new=True,
init_from_sheerka=True)
init_test_helper = cls().init_test(cache_only=False, ontology="#TestBnfNodeParser#")
sheerka, context, *updated = init_test_helper.with_concepts(*cmap.values(), create_new=True).unpack()
for i, concept_name in enumerate(cmap):
cmap[concept_name] = updated[i]
# end of initialisation
sheerka = TestBnfNodeParser.sheerka
sheerka.set_isa(context, sheerka.new("one"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("two"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("three"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("four"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("thirty"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("forty"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("one hundred"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("hundreds"), sheerka.new("number"))
sheerka.set_isa(context, cmap["one"], cmap["number"])
sheerka.set_isa(context, cmap["two"], cmap["number"])
sheerka.set_isa(context, cmap["three"], cmap["number"])
sheerka.set_isa(context, cmap["four"], cmap["number"])
sheerka.set_isa(context, cmap["thirty"], cmap["number"])
sheerka.set_isa(context, cmap["forty"], cmap["number"])
sheerka.set_isa(context, cmap["fifty"], cmap["number"])
sheerka.set_isa(context, cmap["one hundred"], cmap["number"])
sheerka.set_isa(context, cmap["hundreds"], cmap["number"])
# Pay attention. 'twenties (t1 and t2) are not set as number
@@ -135,6 +134,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
cmap["thousands"] = sheerka.create_new_concept(context, thousands).body.body
sheerka.set_isa(context, sheerka.new("thousands"), sheerka.new("number"))
cls.shared_ontology = sheerka.get_ontology(context)
sheerka.pop_ontology()
@staticmethod
def update_bnf(context, concept):
bnf_parser = BnfDefinitionParser()
@@ -147,19 +149,38 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
return concept
def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs):
if my_concepts_map is not None:
sheerka, context, *updated = self.init_concepts(*my_concepts_map.values(), **kwargs)
if my_concepts_map is None:
sheerka, context = self.init_test().unpack()
sheerka.add_ontology(context, self.shared_ontology)
else:
sheerka, context, *updated = self.init_test().with_concepts(*my_concepts_map.values(), **kwargs).unpack()
for i, pair in enumerate(my_concepts_map):
my_concepts_map[pair] = updated[i]
else:
sheerka = TestBnfNodeParser.sheerka
context = self.get_context(sheerka)
parser = BnfNodeParser(sheerka=sheerka) if init_from_sheerka else BnfNodeParser()
return sheerka, context, parser
def exec_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True)
def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
sheerka, context, *updated = self.init_test().with_concepts(*my_map.values(), create_new=False).unpack()
sequences = self.exec_get_concepts_sequences(context,
my_map,
text,
expected,
multiple_result,
post_init_concepts,
*updated)
return sequences
@staticmethod
def exec_get_concepts_sequences(context,
my_map,
text,
expected,
multiple_result=False,
post_init_concepts=None,
*concepts):
sheerka = context.sheerka
if not multiple_result:
expected_array = [compute_expected_array(my_map, text, expected)]
else:
@@ -169,7 +190,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
post_init_concepts(sheerka, context)
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
parser.init_from_concepts(context, concepts)
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
@@ -179,15 +200,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert parser_helper.sequence == expected_sequence
if len(bnf_parsers_helpers) == 1:
return sheerka, context, bnf_parsers_helpers[0].sequence
return bnf_parsers_helpers[0].sequence
else:
return sheerka, context, [pe.sequence for pe in bnf_parsers_helpers]
def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
sheerka, context, sequences = self.exec_get_concepts_sequences(
my_map, text, expected, multiple_result, post_init_concepts
)
return sequences
return [pe.sequence for pe in bnf_parsers_helpers]
def test_i_cannot_parse_empty_strings(self):
sheerka, context, parser = self.init_parser({}, singleton=True)
@@ -706,10 +721,11 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
ConceptExpression("foo"),
OrderedChoice(StrMatch("one"), StrMatch("two")))),
}
sheerka, context, *concepts = self.init_test().with_concepts(*my_map.values(), create_new=False).unpack()
text = "twenty two"
expected = [CN("bar", source="twenty two")]
sheerka, context, sequences = self.exec_get_concepts_sequences(my_map, text, expected)
sequences = self.exec_get_concepts_sequences(context, my_map, text, expected, False, None, *concepts)
concept_bar = sequences[0].concept
assert concept_bar.get_compiled() == {
@@ -720,7 +736,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
text = "thirty one"
expected = [CN("bar", source="thirty one")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
sequences = self.exec_get_concepts_sequences(context, my_map, text, expected, False, None, *concepts)
concept_bar = sequences[0].concept
assert concept_bar.get_compiled() == {
ConceptParts.BODY: DoNotResolve("thirty one"),
@@ -817,7 +833,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert parser.concepts_by_first_keyword == expected
assert sheerka.om.copy(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == expected
# get_parsing_expression() also returns CHICKEN_AND_EGG
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
@@ -842,7 +858,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert parser.concepts_by_first_keyword == {}
assert sheerka.om.copy(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
@@ -868,7 +884,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert parser.concepts_by_first_keyword == {}
assert sheerka.om.copy(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
@@ -1008,8 +1024,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
ConceptExpression(my_map["one"], rule_name="one"))
@pytest.mark.parametrize("expr, text, expected", [
(ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]),
(StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]),
# (ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]),
# (StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]),
(StrMatch("one"), "two one", [UTN("two "), CNC("foo", source="one")]),
])
def test_i_can_recognize_unknown_concepts(self, expr, text, expected):
@@ -1442,6 +1458,22 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
def test_i_can_get_expression_from_concept_name(self, name, expected):
assert BnfNodeParser.get_expression_from_concept_name(name) == expected
def test_i_can_parse_when_multiple_layers(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
# sanity
text = "thirty one"
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.value.value == compute_expected_array(cmap, text, [CN("thirties", source=text)])
# add a layer, I still can parse the text
sheerka.push_ontology(context, "new layer")
parser = BnfNodeParser(sheerka=sheerka)
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.value.value == compute_expected_array(cmap, text, [CN("thirties", source=text)])
# @pytest.mark.parametrize("parser_input, expected", [
# ("one", [
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),