Fixed parsing of BNF concepts mixed with isaset concepts
This commit is contained in:
@@ -4,7 +4,7 @@ from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYP
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from parsers.BaseNodeParser import CNC, UTN, CN
|
||||
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
|
||||
Optional, ZeroOrMore, OneOrMore, ConceptExpression, LongestChoice
|
||||
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice
|
||||
from parsers.BnfParser import BnfParser
|
||||
|
||||
import tests.parsers.parsers_utils
|
||||
@@ -25,6 +25,8 @@ cmap = {
|
||||
"baz": Concept("baz"),
|
||||
"one hundred": Concept("one hundred", body="100"),
|
||||
"one_hundred": Concept("'one hundred'", body="100"),
|
||||
"hundreds": Concept("hundreds", definition="number=n1 'hundred' 'and' number=n2",
|
||||
where="n1 < 10 and n2 < 100", body="n1 * 100 + n2"),
|
||||
|
||||
"bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen
|
||||
|
||||
@@ -102,6 +104,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number"))
|
||||
sheerka.set_isa(context, sheerka.new("one hundred"), sheerka.new("number"))
|
||||
|
||||
# Pay attention. 'twenties (t1 and t2) are not set as number
|
||||
|
||||
thirties = cls.update_bnf(context, Concept("thirties",
|
||||
definition="thirty number",
|
||||
where="number < 10",
|
||||
@@ -367,9 +371,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
("three", []),
|
||||
|
||||
])
|
||||
def test_i_can_parse_longest_choice(self, text, expected):
|
||||
def test_i_can_parse_unordered_choice(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", LongestChoice(
|
||||
"foo": self.bnf_concept("foo", UnOrderedChoice(
|
||||
StrMatch("one"),
|
||||
Sequence(StrMatch("one"), StrMatch("two")))),
|
||||
}
|
||||
@@ -799,7 +803,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
@pytest.mark.parametrize("bar_expr, expected", [
|
||||
(ConceptExpression("foo"), {}),
|
||||
(OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}),
|
||||
(Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']})
|
||||
(Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']}),
|
||||
# (UnOrderedChoice(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']})
|
||||
])
|
||||
def test_i_can_detect_infinite_recursion(self, bar_expr, expected):
|
||||
my_map = {
|
||||
@@ -894,7 +899,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
|
||||
number_nodes = parsing_expression.nodes[1].nodes
|
||||
assert len(number_nodes) == 1
|
||||
assert isinstance(number_nodes[0], LongestChoice)
|
||||
assert isinstance(number_nodes[0], UnOrderedChoice)
|
||||
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
||||
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
||||
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
|
||||
@@ -927,7 +932,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
|
||||
number_nodes = parsing_expression.nodes[0].nodes
|
||||
assert len(number_nodes) == 1
|
||||
assert isinstance(number_nodes[0], LongestChoice)
|
||||
assert isinstance(number_nodes[0], UnOrderedChoice)
|
||||
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
||||
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
||||
assert ConceptExpression(my_map["two"], rule_name="two") in number_nodes[0].nodes
|
||||
@@ -959,7 +964,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
|
||||
number_nodes = parsing_expression.nodes[1].nodes
|
||||
assert len(number_nodes) == 1
|
||||
assert isinstance(number_nodes[0], LongestChoice)
|
||||
assert isinstance(number_nodes[0], UnOrderedChoice)
|
||||
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
||||
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
||||
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
|
||||
@@ -1117,11 +1122,16 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||
assert concepts_nodes == expected_array
|
||||
|
||||
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_when_concept_starts_with_isa(self):
|
||||
def test_i_can_parse_one_thousand(self):
|
||||
"""
|
||||
Test of simple number + 'thousand'
|
||||
:return:
|
||||
"""
|
||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||
one = CC("one", body=DoNotResolve("one"))
|
||||
|
||||
sheerka.concepts_grammars.clear() # to simulate restart
|
||||
text = "one thousand"
|
||||
|
||||
one = CC("one", body=DoNotResolve("one"))
|
||||
expected = CNC("thousands",
|
||||
source=text,
|
||||
number=CC("number",
|
||||
@@ -1138,27 +1148,51 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||
assert concepts_nodes == expected_array
|
||||
|
||||
def test_i_can_parse_fifty_one_thousand(self):
|
||||
"""
|
||||
Test of complex number + 'thousand' (complex because the number is a BNF concept)
|
||||
:return:
|
||||
"""
|
||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||
sheerka.concepts_grammars.clear() # to simulate restart
|
||||
text = "fifty one thousand"
|
||||
|
||||
one = CC("one", body=DoNotResolve("one"))
|
||||
fifty_one = CC("fifties",
|
||||
source="fifty one",
|
||||
fifty="fifty",
|
||||
number=CC("number", source="one", body=one, one=one))
|
||||
expected = CNC("thousands",
|
||||
source=text,
|
||||
number=CC("number",
|
||||
source="fifty one",
|
||||
fifties=fifty_one,
|
||||
body=fifty_one))
|
||||
expected_array = compute_expected_array(cmap, text, [expected])
|
||||
one_thousand = CC("thousands",
|
||||
source="one thousand",
|
||||
number=CC("number", source="one", body=one, one=one))
|
||||
|
||||
expected_thousand = CNC("thousands",
|
||||
source=text,
|
||||
number=CC("number",
|
||||
source="fifty one",
|
||||
fifties=fifty_one,
|
||||
body=fifty_one))
|
||||
expected_fifties = CNC("fifties",
|
||||
source=text,
|
||||
fifty="fifty",
|
||||
number=CC("number",
|
||||
source="one thousand",
|
||||
thousands=one_thousand,
|
||||
body=one_thousand))
|
||||
expected_thousands = compute_expected_array(cmap, text, [expected_thousand])
|
||||
expected_fifties = compute_expected_array(cmap, text, [expected_fifties])
|
||||
|
||||
res = parser.parse(context, ParserInput(text))
|
||||
parser_result = res.value
|
||||
concepts_nodes = res.value.value
|
||||
|
||||
assert res.status
|
||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||
assert concepts_nodes == expected_array
|
||||
assert res[0].status
|
||||
assert res[0].value.value == expected_thousands
|
||||
|
||||
assert res[1].status
|
||||
assert res[1].value.value == expected_fifties
|
||||
|
||||
def test_i_can_parse_one_hundred_thousand(self):
|
||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||
sheerka.concepts_grammars.clear() # to simulate restart
|
||||
|
||||
text = "one hundred thousand"
|
||||
res = parser.parse(context, ParserInput(text))
|
||||
@@ -1167,6 +1201,39 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
assert res.status
|
||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||
|
||||
def test_i_can_parse_hundreds_like_expression(self):
|
||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||
|
||||
text = "three hundred and thirty two"
|
||||
three = CC("three", body=DoNotResolve("three"))
|
||||
two = CC("two", body=DoNotResolve("two"))
|
||||
thirty_two = CC("thirties",
|
||||
source="thirty two",
|
||||
thirty="thirty",
|
||||
number=CC("number",
|
||||
source="two",
|
||||
body=two,
|
||||
two=two))
|
||||
expected = CNC("hundreds",
|
||||
source=text,
|
||||
n1=CC("number",
|
||||
source="three",
|
||||
body=three,
|
||||
three=three),
|
||||
n2=CC("number",
|
||||
source="thirty two",
|
||||
body=thirty_two,
|
||||
thirties=thirty_two))
|
||||
|
||||
expected_array = compute_expected_array(cmap, text, [expected])
|
||||
res = parser.parse(context, ParserInput(text))
|
||||
parser_result = res.value
|
||||
concepts_nodes = res.value.value
|
||||
|
||||
assert res.status
|
||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||
assert concepts_nodes == expected_array
|
||||
|
||||
def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self):
|
||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||
sheerka.concepts_grammars.clear() # simulate restart
|
||||
|
||||
@@ -22,6 +22,16 @@ def c(name, rule_name=None):
|
||||
return ConceptExpression(concept, rule_name=rule_name or name)
|
||||
|
||||
|
||||
def update_concepts_ids(sheerka, parsing_expression):
|
||||
if isinstance(parsing_expression, ConceptExpression):
|
||||
if not parsing_expression.concept.id:
|
||||
concept = sheerka.get_by_key(parsing_expression.concept.key)
|
||||
parsing_expression.concept.metadata.id = concept.id
|
||||
|
||||
for pe in parsing_expression.elements:
|
||||
update_concepts_ids(sheerka, pe)
|
||||
|
||||
|
||||
eof_token = Token(TokenKind.EOF, "", 0, 0, 0)
|
||||
|
||||
|
||||
@@ -109,7 +119,9 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
|
||||
("def 'concept'", Sequence(c("def"), StrMatch("concept"))),
|
||||
])
|
||||
def test_i_can_parse_regex_with_concept(self, expression, expected):
|
||||
sheerka, context, parser, foo, bar, var, _def = self.init_parser("foo", "bar", "var", "def")
|
||||
sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def")
|
||||
|
||||
update_concepts_ids(sheerka, expected)
|
||||
|
||||
res = parser.parse(context, Tokenizer(expression))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user