Fixed parsing of BNF concepts mixed with isaset concepts

This commit is contained in:
2020-07-02 16:32:02 +02:00
parent 2c5840752a
commit f26c391d3f
12 changed files with 413 additions and 123 deletions
+89 -22
View File
@@ -4,7 +4,7 @@ from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYP
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC, UTN, CN
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
Optional, ZeroOrMore, OneOrMore, ConceptExpression, LongestChoice
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice
from parsers.BnfParser import BnfParser
import tests.parsers.parsers_utils
@@ -25,6 +25,8 @@ cmap = {
"baz": Concept("baz"),
"one hundred": Concept("one hundred", body="100"),
"one_hundred": Concept("'one hundred'", body="100"),
"hundreds": Concept("hundreds", definition="number=n1 'hundred' 'and' number=n2",
where="n1 < 10 and n2 < 100", body="n1 * 100 + n2"),
"bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen
@@ -102,6 +104,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("one hundred"), sheerka.new("number"))
# Pay attention. 'twenties (t1 and t2) are not set as number
thirties = cls.update_bnf(context, Concept("thirties",
definition="thirty number",
where="number < 10",
@@ -367,9 +371,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("three", []),
])
def test_i_can_parse_longest_choice(self, text, expected):
def test_i_can_parse_unordered_choice(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", LongestChoice(
"foo": self.bnf_concept("foo", UnOrderedChoice(
StrMatch("one"),
Sequence(StrMatch("one"), StrMatch("two")))),
}
@@ -799,7 +803,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("bar_expr, expected", [
(ConceptExpression("foo"), {}),
(OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}),
(Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']})
(Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']}),
# (UnOrderedChoice(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']})
])
def test_i_can_detect_infinite_recursion(self, bar_expr, expected):
my_map = {
@@ -894,7 +899,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
number_nodes = parsing_expression.nodes[1].nodes
assert len(number_nodes) == 1
assert isinstance(number_nodes[0], LongestChoice)
assert isinstance(number_nodes[0], UnOrderedChoice)
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
@@ -927,7 +932,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
number_nodes = parsing_expression.nodes[0].nodes
assert len(number_nodes) == 1
assert isinstance(number_nodes[0], LongestChoice)
assert isinstance(number_nodes[0], UnOrderedChoice)
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
assert ConceptExpression(my_map["two"], rule_name="two") in number_nodes[0].nodes
@@ -959,7 +964,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
number_nodes = parsing_expression.nodes[1].nodes
assert len(number_nodes) == 1
assert isinstance(number_nodes[0], LongestChoice)
assert isinstance(number_nodes[0], UnOrderedChoice)
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
@@ -1117,11 +1122,16 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_when_concept_starts_with_isa(self):
def test_i_can_parse_one_thousand(self):
"""
Test of simple number + 'thousand'
:return:
"""
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
one = CC("one", body=DoNotResolve("one"))
sheerka.concepts_grammars.clear() # to simulate restart
text = "one thousand"
one = CC("one", body=DoNotResolve("one"))
expected = CNC("thousands",
source=text,
number=CC("number",
@@ -1138,27 +1148,51 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
def test_i_can_parse_fifty_one_thousand(self):
"""
Test of complex number + 'thousand' (complex because the number is a BNF concept)
:return:
"""
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
sheerka.concepts_grammars.clear() # to simulate restart
text = "fifty one thousand"
one = CC("one", body=DoNotResolve("one"))
fifty_one = CC("fifties",
source="fifty one",
fifty="fifty",
number=CC("number", source="one", body=one, one=one))
expected = CNC("thousands",
source=text,
number=CC("number",
source="fifty one",
fifties=fifty_one,
body=fifty_one))
expected_array = compute_expected_array(cmap, text, [expected])
one_thousand = CC("thousands",
source="one thousand",
number=CC("number", source="one", body=one, one=one))
expected_thousand = CNC("thousands",
source=text,
number=CC("number",
source="fifty one",
fifties=fifty_one,
body=fifty_one))
expected_fifties = CNC("fifties",
source=text,
fifty="fifty",
number=CC("number",
source="one thousand",
thousands=one_thousand,
body=one_thousand))
expected_thousands = compute_expected_array(cmap, text, [expected_thousand])
expected_fifties = compute_expected_array(cmap, text, [expected_fifties])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
assert res[0].status
assert res[0].value.value == expected_thousands
assert res[1].status
assert res[1].value.value == expected_fifties
def test_i_can_parse_one_hundred_thousand(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
sheerka.concepts_grammars.clear() # to simulate restart
text = "one hundred thousand"
res = parser.parse(context, ParserInput(text))
@@ -1167,6 +1201,39 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
def test_i_can_parse_hundreds_like_expression(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
text = "three hundred and thirty two"
three = CC("three", body=DoNotResolve("three"))
two = CC("two", body=DoNotResolve("two"))
thirty_two = CC("thirties",
source="thirty two",
thirty="thirty",
number=CC("number",
source="two",
body=two,
two=two))
expected = CNC("hundreds",
source=text,
n1=CC("number",
source="three",
body=three,
three=three),
n2=CC("number",
source="thirty two",
body=thirty_two,
thirties=thirty_two))
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
sheerka.concepts_grammars.clear() # simulate restart
+13 -1
View File
@@ -22,6 +22,16 @@ def c(name, rule_name=None):
return ConceptExpression(concept, rule_name=rule_name or name)
def update_concepts_ids(sheerka, parsing_expression):
if isinstance(parsing_expression, ConceptExpression):
if not parsing_expression.concept.id:
concept = sheerka.get_by_key(parsing_expression.concept.key)
parsing_expression.concept.metadata.id = concept.id
for pe in parsing_expression.elements:
update_concepts_ids(sheerka, pe)
eof_token = Token(TokenKind.EOF, "", 0, 0, 0)
@@ -109,7 +119,9 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
("def 'concept'", Sequence(c("def"), StrMatch("concept"))),
])
def test_i_can_parse_regex_with_concept(self, expression, expected):
sheerka, context, parser, foo, bar, var, _def = self.init_parser("foo", "bar", "var", "def")
sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def")
update_concepts_ids(sheerka, expected)
res = parser.parse(context, Tokenizer(expression))