1438 lines
63 KiB
Python
1438 lines
63 KiB
Python
import pytest
|
|
from core.builtin_concepts import BuiltinConcepts
|
|
from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYPE_BNF
|
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
|
from parsers.BaseNodeParser import CNC, UTN, CN
|
|
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
|
|
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice
|
|
from parsers.BnfParser import BnfParser
|
|
|
|
import tests.parsers.parsers_utils
|
|
from tests.BaseTest import BaseTest
|
|
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
|
|
cmap = {
|
|
"one": Concept("one"),
|
|
"two": Concept("two"),
|
|
"three": Concept("three"),
|
|
"four": Concept("four"),
|
|
"thirty": Concept("thirty", body="30"),
|
|
"forty": Concept("forty", body="40"),
|
|
"fifty": Concept("fifty", body="50"),
|
|
"number": Concept("number"),
|
|
"foo": Concept("foo"),
|
|
"bar": Concept("bar"),
|
|
"baz": Concept("baz"),
|
|
"one hundred": Concept("one hundred", body="100"),
|
|
"one_hundred": Concept("'one hundred'", body="100"),
|
|
"hundreds": Concept("hundreds", definition="number=n1 'hundred' 'and' number=n2",
|
|
where="n1 < 10 and n2 < 100", body="n1 * 100 + n2").def_var("n1").def_var("n2"),
|
|
|
|
"bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen
|
|
|
|
"plus": Concept("plus", definition="one 'plus' two").def_var("a").def_var("b"),
|
|
|
|
'foo then bar': Concept("foo then bar", definition="foo bar").def_var("foo").def_var("bar"),
|
|
'foo or bar': Concept("foo or bar", definition="foo | bar").def_var("foo").def_var("bar"),
|
|
'one or more foo': Concept("one or more foo", definition="foo+").def_var("foo"),
|
|
|
|
"t1": Concept("t1", definition="'twenty' (one|two)=unit").def_var("unit").def_var("one").def_var("two"),
|
|
"three_four": Concept("three_four", definition="three | four").def_var("three").def_var("four"),
|
|
"t2": Concept("t2", definition="'twenty' three_four=unit").def_var("unit").def_var("three").def_var("four"),
|
|
|
|
# testing keywords
|
|
"def_only": Concept("def"),
|
|
"def number": Concept("def number", definition="def (one|two)=number"),
|
|
# sequence of keywords using bnf definition
|
|
# "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"),
|
|
# "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"),
|
|
# sequence of keywords using def definition
|
|
# "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF),
|
|
# "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"),
|
|
|
|
"filter": Concept("filter", definition="'--filter' (one | two)")
|
|
}
|
|
|
|
|
|
def u(parsing_expression, start, end, children=None):
|
|
"""
|
|
u stands for underlying
|
|
:param parsing_expression:
|
|
:param start:
|
|
:param end:
|
|
:param children:
|
|
:return:
|
|
"""
|
|
if isinstance(parsing_expression, str):
|
|
parsing_expression = StrMatch(parsing_expression)
|
|
|
|
if isinstance(parsing_expression, StrMatch):
|
|
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
|
|
|
|
return NonTerminalNode(parsing_expression, start, end, [], children)
|
|
|
|
|
|
def compute_expected_array(my_concepts_map, expression, expected, exclude_body=False):
|
|
return tests.parsers.parsers_utils.compute_expected_array(
|
|
my_concepts_map,
|
|
expression,
|
|
expected,
|
|
init_empty_body=True,
|
|
exclude_body=exclude_body)
|
|
|
|
|
|
class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
sheerka = None
|
|
|
|
@classmethod
|
|
def setup_class(cls):
|
|
t = cls()
|
|
TestBnfNodeParser.sheerka, context, _ = t.init_parser(
|
|
cmap,
|
|
singleton=False,
|
|
create_new=True,
|
|
init_from_sheerka=True)
|
|
|
|
# end of initialisation
|
|
sheerka = TestBnfNodeParser.sheerka
|
|
sheerka.set_isa(context, sheerka.new("one"), sheerka.new("number"))
|
|
sheerka.set_isa(context, sheerka.new("two"), sheerka.new("number"))
|
|
sheerka.set_isa(context, sheerka.new("three"), sheerka.new("number"))
|
|
sheerka.set_isa(context, sheerka.new("four"), sheerka.new("number"))
|
|
sheerka.set_isa(context, sheerka.new("thirty"), sheerka.new("number"))
|
|
sheerka.set_isa(context, sheerka.new("forty"), sheerka.new("number"))
|
|
sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number"))
|
|
sheerka.set_isa(context, sheerka.new("one hundred"), sheerka.new("number"))
|
|
sheerka.set_isa(context, sheerka.new("hundreds"), sheerka.new("number"))
|
|
|
|
# Pay attention. 'twenties (t1 and t2) are not set as number
|
|
|
|
thirties = cls.update_bnf(context, Concept("thirties",
|
|
definition="thirty number",
|
|
where="number < 10",
|
|
body="thirty + number").def_var("thirty").def_var("number"))
|
|
cmap["thirties"] = sheerka.create_new_concept(context, thirties).body.body
|
|
sheerka.set_isa(context, sheerka.new("thirties"), sheerka.new("number"))
|
|
|
|
forties = cls.update_bnf(context, Concept("forties",
|
|
definition="forty number",
|
|
where="number < 10",
|
|
body="forty + number").def_var("forty").def_var("number"))
|
|
cmap["forties"] = sheerka.create_new_concept(context, forties).body.body
|
|
sheerka.set_isa(context, sheerka.new("forties"), sheerka.new("number"))
|
|
|
|
fifties = cls.update_bnf(context, Concept("fifties",
|
|
definition="fifty number",
|
|
where="number < 10",
|
|
body="fifty + number").def_var("fifty").def_var("number"))
|
|
cmap["fifties"] = sheerka.create_new_concept(context, fifties).body.body
|
|
sheerka.set_isa(context, sheerka.new("fifties"), sheerka.new("number"))
|
|
|
|
thousands = cls.update_bnf(context, Concept("thousands",
|
|
definition="number 'thousand'",
|
|
where="number < 999",
|
|
body="number * 1000").def_var("number"))
|
|
cmap["thousands"] = sheerka.create_new_concept(context, thousands).body.body
|
|
sheerka.set_isa(context, sheerka.new("thousands"), sheerka.new("number"))
|
|
|
|
@staticmethod
|
|
def update_bnf(context, concept):
|
|
bnf_parser = BnfParser()
|
|
res = bnf_parser.parse(context, concept.metadata.definition)
|
|
if res.status:
|
|
concept.bnf = res.value.value
|
|
concept.metadata.definition_type = DEFINITION_TYPE_BNF
|
|
else:
|
|
raise Exception(res)
|
|
return concept
|
|
|
|
def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs):
|
|
if my_concepts_map is not None:
|
|
sheerka, context, *updated = self.init_concepts(*my_concepts_map.values(), **kwargs)
|
|
for i, pair in enumerate(my_concepts_map):
|
|
my_concepts_map[pair] = updated[i]
|
|
else:
|
|
sheerka = TestBnfNodeParser.sheerka
|
|
context = self.get_context(sheerka)
|
|
|
|
parser = BnfNodeParser(sheerka=sheerka) if init_from_sheerka else BnfNodeParser()
|
|
return sheerka, context, parser
|
|
|
|
def exec_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
|
|
sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True)
|
|
if not multiple_result:
|
|
expected_array = [compute_expected_array(my_map, text, expected)]
|
|
else:
|
|
expected_array = [compute_expected_array(my_map, text, e) for e in expected]
|
|
|
|
if post_init_concepts:
|
|
post_init_concepts(sheerka, context)
|
|
|
|
parser = BnfNodeParser()
|
|
parser.init_from_concepts(context, updated)
|
|
parser.reset_parser(context, ParserInput(text))
|
|
|
|
bnf_parsers_helpers = parser.get_concepts_sequences()
|
|
|
|
assert len(bnf_parsers_helpers) == len(expected_array)
|
|
for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array):
|
|
assert parser_helper.sequence == expected_sequence
|
|
|
|
if len(bnf_parsers_helpers) == 1:
|
|
return sheerka, context, bnf_parsers_helpers[0].sequence
|
|
else:
|
|
return sheerka, context, [pe.sequence for pe in bnf_parsers_helpers]
|
|
|
|
def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
|
|
sheerka, context, sequences = self.exec_get_concepts_sequences(
|
|
my_map, text, expected, multiple_result, post_init_concepts
|
|
)
|
|
return sequences
|
|
|
|
def test_i_cannot_parse_empty_strings(self):
|
|
sheerka, context, parser = self.init_parser({}, singleton=True)
|
|
|
|
res = parser.parse(context, ParserInput(""))
|
|
|
|
assert not res.status
|
|
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
|
|
assert res.body.reason == BuiltinConcepts.IS_EMPTY
|
|
|
|
@pytest.mark.parametrize("expr, text", [
|
|
(StrMatch("foo"), "foo"),
|
|
(StrMatch("'foo'"), "'foo'"),
|
|
(StrMatch("1"), "1"),
|
|
(StrMatch("3.14"), "3.14"),
|
|
(StrMatch("+"), "+"),
|
|
])
|
|
def test_i_can_match_simple_bnf(self, expr, text):
|
|
my_map = {
|
|
text: self.bnf_concept("foo", expr)
|
|
}
|
|
|
|
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
|
|
assert sequence[0].underlying == u(expr, 0, 0)
|
|
|
|
def test_i_can_match_multiple_concepts_in_one_input(self):
|
|
my_map = {
|
|
"one": self.bnf_concept("one"),
|
|
"two": self.bnf_concept("two"),
|
|
}
|
|
|
|
text = "one two one"
|
|
expected = ["one", "two", ("one", 1)]
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("one two three", [CNC("foo", source="one two three")]),
|
|
("one two", []),
|
|
("one two four", []),
|
|
])
|
|
def test_i_can_match_sequence(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", Sequence("one", "two", "three")),
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
def test_i_always_choose_the_longest_match(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", Sequence("one", "two", "three")),
|
|
"bar": self.bnf_concept("bar", Sequence("one", "two")),
|
|
}
|
|
|
|
text = "one two three"
|
|
expected = [CNC("foo", source=text)]
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
def test_i_can_use_skip_whitespace_when_mixing_sequence_and_strmatch(self):
|
|
# to match '--filter' in one word
|
|
my_map = {
|
|
"filter": self.bnf_concept("filter",
|
|
Sequence(StrMatch("-", skip_whitespace=False),
|
|
StrMatch("-", skip_whitespace=False),
|
|
"filter")),
|
|
}
|
|
|
|
sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True)
|
|
parser = BnfNodeParser()
|
|
parser.init_from_concepts(context, updated)
|
|
|
|
text = "--filter"
|
|
expected = [CN("filter", source="--filter")]
|
|
expected_array = compute_expected_array(my_map, text, expected)
|
|
|
|
parser.reset_parser(context, ParserInput(text))
|
|
bnf_parsers_helpers = parser.get_concepts_sequences()
|
|
assert bnf_parsers_helpers[0].sequence == expected_array
|
|
assert not bnf_parsers_helpers[0].has_unrecognized
|
|
|
|
# but I cannot parse
|
|
text = "- - filter"
|
|
parser.reset_parser(context, ParserInput(text))
|
|
bnf_parsers_helpers = parser.get_concepts_sequences()
|
|
assert bnf_parsers_helpers[0].has_unrecognized
|
|
|
|
def test_i_can_match_multiple_sequences(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"), StrMatch("three"))),
|
|
"bar": self.bnf_concept("bar", Sequence(StrMatch("one"), StrMatch("two"))),
|
|
}
|
|
|
|
text = "one two three one two"
|
|
expected = [
|
|
CNC("foo", source="one two three"),
|
|
CNC("bar", source="one two", start=6, end=8)]
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("one", [CNC("foo", source="one")]),
|
|
("two", [CNC("foo", source="two")]),
|
|
("three", []),
|
|
|
|
])
|
|
def test_i_can_match_ordered_choice(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two")))
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
def test_i_do_not_match_ordered_choice_with_empty_alternative(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("")))
|
|
}
|
|
|
|
text = ""
|
|
expected = []
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("concept_three, expected", [
|
|
(Concept("three"), []),
|
|
(BaseTest.bnf_concept("three", StrMatch("three")), [UTN('twenty '), "three"])
|
|
])
|
|
def test_i_can_manage_sequence_with_wrong_order_choice(self, concept_three, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo",
|
|
Sequence(
|
|
StrMatch("twenty"),
|
|
OrderedChoice(StrMatch("one"), StrMatch("two")))),
|
|
"three": concept_three}
|
|
|
|
text = "twenty three"
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("ok thirty one", [CNC("foo", source="ok thirty one")]),
|
|
("ok twenty one", [CNC("foo", source="ok twenty one")]),
|
|
("ok one", []),
|
|
])
|
|
def test_i_can_mix_sequence_and_ordered(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo",
|
|
Sequence(
|
|
StrMatch("ok"),
|
|
OrderedChoice(StrMatch("twenty"), StrMatch("thirty")),
|
|
StrMatch("one"))
|
|
)}
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
# ("twenty one", [CNC("foo", source="twenty one")]),
|
|
# ("twenty three", []), # three does not exist
|
|
("twenty four", []), # four exists but should not be seen
|
|
])
|
|
def test_i_can_mix_sequence_and_ordered_2(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo",
|
|
Sequence(
|
|
StrMatch("twenty"),
|
|
OrderedChoice(StrMatch("one"), StrMatch("two")))),
|
|
"four": Concept("four")}
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("twenty thirty", [CNC("foo", source="twenty thirty")]),
|
|
("one", [CNC("foo", source="one")]),
|
|
])
|
|
def test_i_can_mix_ordered_choices_and_sequences(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo",
|
|
OrderedChoice(
|
|
Sequence(StrMatch("twenty"), StrMatch("thirty")),
|
|
StrMatch("one")))}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("one", [CNC("foo", source="one")]),
|
|
("one two", [CNC("foo", source="one two")]),
|
|
("three", []),
|
|
|
|
])
|
|
def test_i_can_parse_unordered_choice(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", UnOrderedChoice(
|
|
StrMatch("one"),
|
|
Sequence(StrMatch("one"), StrMatch("two")))),
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("one", [CNC("foo", source="one")]),
|
|
("", []),
|
|
("two", []),
|
|
])
|
|
def test_i_can_parse_optional(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", Optional(StrMatch("one")))
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("twenty one", [CNC("foo", source="twenty one")]),
|
|
("one", [CNC("foo", source="one")]),
|
|
])
|
|
def test_i_can_parse_sequence_starting_with_optional(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo",
|
|
Sequence(
|
|
Optional(StrMatch("twenty")),
|
|
StrMatch("one")))
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("one two three", [CNC("foo", source="one two three")]),
|
|
("one two", [CNC("foo", source="one two")]),
|
|
])
|
|
def test_i_can_parse_sequence_ending_with_optional(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo",
|
|
Sequence(
|
|
StrMatch("one"),
|
|
StrMatch("two"),
|
|
Optional(StrMatch("three"))))
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("one two three", [CNC("foo", source="one two three")]),
|
|
("one three", [CNC("foo", source="one three")]),
|
|
])
|
|
def test_i_can_parse_sequence_with_optional_in_between(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo",
|
|
Sequence(
|
|
StrMatch("one"),
|
|
Optional(StrMatch("two")),
|
|
StrMatch("three")))
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("", []),
|
|
("two", []),
|
|
("one", [CNC("foo", source="one")]),
|
|
("one one", [CNC("foo", source="one one")]),
|
|
])
|
|
def test_i_can_parse_zero_or_more(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one")))
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("two", [CNC("foo", source="two")]),
|
|
("one two", [CNC("foo", source="one two")]),
|
|
("one one two", [CNC("foo", source="one one two")]),
|
|
])
|
|
def test_i_can_parse_sequence_and_zero_or_more(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo",
|
|
Sequence(
|
|
ZeroOrMore(StrMatch("one")),
|
|
StrMatch("two")
|
|
))
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("one, one , one", [CNC("foo", source="one, one , one")]),
|
|
])
|
|
def test_i_can_parse_zero_or_more_with_separator(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"), sep=","))
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
def test_that_zero_or_more_is_greedy(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"))),
|
|
"bar": self.bnf_concept("foo", StrMatch("one"))
|
|
}
|
|
|
|
text = "one one one"
|
|
expected = [CNC("foo", source=text)]
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("", []),
|
|
("two", []),
|
|
("one", [CNC("foo", source="one")]),
|
|
("one one one", [CNC("foo", source="one one one")]),
|
|
])
|
|
def test_i_can_parse_one_or_more(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))),
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("two", []),
|
|
("one two", [CNC("foo", source="one two")]),
|
|
("one one two", [CNC("foo", source="one one two")]),
|
|
])
|
|
def test_i_can_parse_sequence_one_and_or_more(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo",
|
|
Sequence(
|
|
OneOrMore(StrMatch("one")),
|
|
StrMatch("two")
|
|
))
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("one, one , one", [CNC("foo", source="one, one , one")]),
|
|
])
|
|
def test_i_can_parse_one_or_more_with_separator(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"), sep=","))
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
def test_that_one_or_more_is_greedy(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))),
|
|
"bar": self.bnf_concept("foo", StrMatch("one"))
|
|
}
|
|
|
|
text = "one one one"
|
|
expected = [CNC("foo", source=text)]
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
@pytest.mark.parametrize("text, expected", [
|
|
("one two", [
|
|
[CNC("foo", source="one two")],
|
|
[CNC("bar", source="one two")]]),
|
|
("one two one two", [
|
|
[CNC("bar", source="one two"), CNC("bar", source="one two")],
|
|
[CNC("foo", source="one two"), CNC("bar", source="one two")],
|
|
[CNC("bar", source="one two"), CNC("foo", source="one two")],
|
|
[CNC("foo", source="one two"), CNC("foo", source="one two")]]),
|
|
])
|
|
def test_i_can_have_multiple_results(self, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))),
|
|
"bar": self.bnf_concept("bar", Sequence(
|
|
StrMatch("one"),
|
|
OrderedChoice(StrMatch("two"), StrMatch("three")))),
|
|
}
|
|
|
|
text = "one two"
|
|
expected = [[CNC("foo", source=text)], [CNC("bar", source=text)]]
|
|
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
|
|
|
|
def test_i_can_refer_to_other_concepts(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))),
|
|
"bar": self.bnf_concept("bar", ConceptExpression("foo"))
|
|
}
|
|
|
|
text = "one two"
|
|
expected = [
|
|
[CNC("foo", source=text)],
|
|
[CN("bar", source=text)] # Do not check the compiled part
|
|
]
|
|
sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
|
|
|
|
# explicit validations of the compiled
|
|
concept_foo = sequences[0][0].concept
|
|
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
|
|
assert concept_foo.compiled == {ConceptParts.BODY: DoNotResolve("one two")}
|
|
|
|
concept_bar = sequences[1][0].concept
|
|
assert concept_bar.body == BuiltinConcepts.NOT_INITIALIZED
|
|
assert concept_bar.compiled == {
|
|
ConceptParts.BODY: concept_foo,
|
|
"foo": concept_foo
|
|
}
|
|
assert id(concept_bar.compiled[ConceptParts.BODY]) == id(concept_bar.compiled["foo"])
|
|
|
|
def test_i_can_refer_to_other_concepts_with_body(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept(Concept("foo", body="'foo'"), Sequence(StrMatch("one"), StrMatch("two"))),
|
|
"bar": self.bnf_concept("bar", ConceptExpression("foo"))
|
|
}
|
|
|
|
text = "one two"
|
|
expected = [
|
|
[CNC("foo", source=text)],
|
|
[CN("bar", source=text)] # Do not check the compiled part
|
|
]
|
|
sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
|
|
|
|
# explicit validations of the compiled
|
|
concept_foo = sequences[0][0].concept
|
|
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
|
|
assert len(concept_foo.compiled) == 0 # because there is a body defined in the metadata
|
|
|
|
concept_bar = sequences[1][0].concept
|
|
assert concept_bar.body == BuiltinConcepts.NOT_INITIALIZED
|
|
assert concept_bar.compiled == {
|
|
ConceptParts.BODY: concept_foo,
|
|
"foo": concept_foo
|
|
}
|
|
|
|
def test_i_can_manage_concepts_reference_when_multiple_levels(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))),
|
|
"bar": self.bnf_concept("bar", ConceptExpression("foo")),
|
|
"baz": self.bnf_concept("baz", ConceptExpression("bar")),
|
|
}
|
|
|
|
text = "one two"
|
|
expected = [
|
|
[CNC("foo", source=text)],
|
|
[CN("bar", source=text)], # Do not check the compiled part
|
|
[CN("baz", source=text)], # Do not check the compiled part
|
|
]
|
|
sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
|
|
|
|
# explicit validations of the compiled
|
|
concept_foo = sequences[0][0].concept
|
|
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
|
|
assert concept_foo.compiled == {ConceptParts.BODY: DoNotResolve("one two")}
|
|
|
|
concept_bar = sequences[1][0].concept
|
|
assert concept_bar.body == BuiltinConcepts.NOT_INITIALIZED
|
|
assert concept_bar.compiled == {
|
|
ConceptParts.BODY: concept_foo,
|
|
"foo": concept_foo
|
|
}
|
|
|
|
concept_baz = sequences[2][0].concept
|
|
assert concept_baz.body == BuiltinConcepts.NOT_INITIALIZED
|
|
assert concept_baz.compiled == {
|
|
ConceptParts.BODY: concept_bar,
|
|
"bar": concept_bar
|
|
}
|
|
|
|
def test_i_can_mix_reference_to_other_concepts(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))),
|
|
"bar": self.bnf_concept("bar", Sequence(
|
|
ConceptExpression("foo"),
|
|
OrderedChoice(StrMatch("one"), StrMatch("two")))),
|
|
"three": Concept("three")
|
|
}
|
|
|
|
text = "twenty two"
|
|
expected = [CN("bar", source="twenty two")]
|
|
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
|
|
concept_bar = sequences[0].concept
|
|
assert concept_bar.compiled == {
|
|
ConceptParts.BODY: DoNotResolve("twenty two"),
|
|
"foo": my_map["foo"],
|
|
}
|
|
assert concept_bar.compiled["foo"].compiled == {ConceptParts.BODY: DoNotResolve("twenty")}
|
|
|
|
text = "thirty one"
|
|
expected = [CN("bar", source="thirty one")]
|
|
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
|
|
concept_bar = sequences[0].concept
|
|
assert concept_bar.compiled == {
|
|
ConceptParts.BODY: DoNotResolve("thirty one"),
|
|
"foo": my_map["foo"],
|
|
}
|
|
assert concept_bar.compiled["foo"].compiled == {ConceptParts.BODY: DoNotResolve("thirty")}
|
|
|
|
text = "thirty three"
|
|
expected = [[CN("foo", source="thirty"), CN("three")], []]
|
|
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
|
|
|
|
def test_i_can_mix_reference_to_other_concepts_2(self):
|
|
# this time, we use concept expression
|
|
my_map = {
|
|
"twenty": self.bnf_concept("twenty", StrMatch("twenty")),
|
|
"number": self.bnf_concept("number", OrderedChoice(StrMatch("one"), StrMatch("two"))),
|
|
"twenties": self.bnf_concept("twenties",
|
|
Sequence(ConceptExpression("twenty"), ConceptExpression("number"))),
|
|
"three": Concept("three")
|
|
}
|
|
|
|
text = "twenty two"
|
|
|
|
expected = [CNC("twenties",
|
|
source="twenty two",
|
|
twenty=CC("twenty", body=DoNotResolve("twenty")),
|
|
number=CC("number", source="two", body=DoNotResolve("two"))
|
|
)]
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
text = "twenty three"
|
|
expected = [[CN("twenty"), CN("three")], []]
|
|
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
|
|
|
|
def test_i_can_mix_reference_to_other_concepts_when_body(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept(Concept("foo", body="'foo'"),
|
|
OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))),
|
|
"bar": self.bnf_concept("bar", Sequence(
|
|
ConceptExpression("foo"),
|
|
OrderedChoice(StrMatch("one"), StrMatch("two")))),
|
|
}
|
|
|
|
text = "twenty two"
|
|
expected = [CN("bar", source="twenty two")]
|
|
sheerka, context, sequences = self.exec_get_concepts_sequences(my_map, text, expected)
|
|
|
|
concept_bar = sequences[0].concept
|
|
assert concept_bar.compiled == {
|
|
ConceptParts.BODY: DoNotResolve("twenty two"),
|
|
"foo": sheerka.new("foo"),
|
|
}
|
|
assert concept_bar.compiled["foo"].compiled == {} # as foo as a body
|
|
|
|
text = "thirty one"
|
|
expected = [CN("bar", source="thirty one")]
|
|
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
|
|
concept_bar = sequences[0].concept
|
|
assert concept_bar.compiled == {
|
|
ConceptParts.BODY: DoNotResolve("thirty one"),
|
|
"foo": sheerka.new("foo"),
|
|
}
|
|
assert concept_bar.compiled["foo"].compiled == {}
|
|
|
|
def test_i_can_mix_zero_and_more_and_reference_to_other_concepts(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two"), StrMatch("three"))),
|
|
"bar": self.bnf_concept("bar", ZeroOrMore(ConceptExpression("foo"))),
|
|
}
|
|
|
|
text = "one two three"
|
|
expected = [CN("bar", source="one two three")]
|
|
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
|
|
concept_bar = sequences[0].concept
|
|
assert concept_bar.compiled == {
|
|
ConceptParts.BODY: DoNotResolve("one two three"),
|
|
"foo": [my_map["foo"], my_map["foo"], my_map["foo"]]
|
|
}
|
|
assert concept_bar.compiled["foo"][0].compiled == {ConceptParts.BODY: DoNotResolve("one")}
|
|
assert concept_bar.compiled["foo"][1].compiled == {ConceptParts.BODY: DoNotResolve("two")}
|
|
assert concept_bar.compiled["foo"][2].compiled == {ConceptParts.BODY: DoNotResolve("three")}
|
|
|
|
def test_i_can_parse_concept_reference_that_is_not_in_grammar(self):
|
|
my_map = {
|
|
"one": Concept("one"),
|
|
"two": Concept("two"),
|
|
"foo": self.bnf_concept("foo",
|
|
Sequence(
|
|
StrMatch("twenty"),
|
|
OrderedChoice(
|
|
ConceptExpression("one"),
|
|
ConceptExpression("two"),
|
|
rule_name="unit"))),
|
|
}
|
|
|
|
text = "twenty one"
|
|
expected = [CN("foo", source="twenty one")]
|
|
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
|
|
concept_foo = sequences[0].concept
|
|
assert concept_foo.compiled == {
|
|
ConceptParts.BODY: DoNotResolve("twenty one"),
|
|
"unit": my_map["one"],
|
|
}
|
|
|
|
def test_i_can_refer_to_group_concepts(self):
|
|
my_map = {
|
|
"one": Concept("one"),
|
|
"two": Concept("two"),
|
|
"number": Concept("number"),
|
|
"foo": self.bnf_concept("foo", Sequence("twenty", ConceptExpression("number")))
|
|
}
|
|
|
|
def pic(s, c):
|
|
s.add_concept_to_set(c, my_map["one"], my_map["number"])
|
|
s.add_concept_to_set(c, my_map["two"], my_map["number"])
|
|
|
|
text = "twenty two"
|
|
expected = [CN("foo", source="twenty two")]
|
|
sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic)
|
|
|
|
# explicit validations of the compiled
|
|
concept_foo = sequences[0].concept
|
|
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
|
|
assert concept_foo.compiled == {'number': CC(my_map["number"], body=my_map["two"], two=my_map["two"]),
|
|
ConceptParts.BODY: DoNotResolve(value='twenty two')}
|
|
|
|
text = "twenty one"
|
|
expected = [CN("foo", source="twenty one")]
|
|
sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic)
|
|
|
|
# explicit validations of the compiled
|
|
concept_foo = sequences[0].concept
|
|
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
|
|
assert concept_foo.compiled == {'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]),
|
|
ConceptParts.BODY: DoNotResolve(value='twenty one')}
|
|
|
|
@pytest.mark.parametrize("bar_expr, expected", [
|
|
(ConceptExpression("foo"), {}),
|
|
(OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}),
|
|
(Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']}),
|
|
# (UnOrderedChoice(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']})
|
|
])
|
|
def test_i_can_detect_infinite_recursion(self, bar_expr, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", ConceptExpression("bar")),
|
|
"bar": self.bnf_concept("bar", bar_expr),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
parser.context = context
|
|
parser.sheerka = sheerka
|
|
|
|
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
|
|
parser.init_from_concepts(context, my_map.values())
|
|
assert parser.concepts_by_first_keyword == expected
|
|
|
|
# get_parsing_expression() also returns CHICKEN_AND_EGG
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
|
|
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["bar"])
|
|
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
def test_i_can_detect_longer_infinite_recursion(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", ConceptExpression("bar")),
|
|
"bar": self.bnf_concept("bar", ConceptExpression("baz")),
|
|
"baz": self.bnf_concept("baz", ConceptExpression("qux")),
|
|
"qux": self.bnf_concept("qux", ConceptExpression("foo")),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
parser.context = context
|
|
parser.sheerka = sheerka
|
|
|
|
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
|
|
parser.init_from_concepts(context, my_map.values())
|
|
assert parser.concepts_by_first_keyword == {}
|
|
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
|
|
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
assert parser.concepts_grammars.get(my_map["foo"].id).body == ["1001", "1002", "1003", "1004", "1001"]
|
|
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["baz"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["qux"].id), BuiltinConcepts.CHICKEN_AND_EGG)
|
|
|
|
@pytest.mark.parametrize("expr, expected", [
|
|
(OrderedChoice(StrMatch("bar"), ConceptExpression("foo")), False),
|
|
(OrderedChoice(ConceptExpression("foo"), StrMatch("bar")), True),
|
|
(OrderedChoice(Sequence(StrMatch("bar"), ConceptExpression("foo")), StrMatch("baz")), False),
|
|
(OrderedChoice(Sequence(ConceptExpression("foo"), StrMatch("bar")), StrMatch("baz")), True)
|
|
])
|
|
def test_i_can_detect_ordered_choice_infinite_recursion(self, expr, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", expr),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
parser.init_from_concepts(context, my_map.values())
|
|
parser.context = context
|
|
parser.sheerka = sheerka
|
|
|
|
res = parser.get_parsing_expression(context, my_map["foo"])
|
|
assert sheerka.isinstance(res, BuiltinConcepts.CHICKEN_AND_EGG) == expected
|
|
|
|
def test_i_can_get_parsing_expression_when_ending_by_concept_isa(self):
|
|
my_map = {
|
|
"one": Concept("one"),
|
|
"twenty": Concept("twenty"),
|
|
"number": Concept("number"),
|
|
"twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
|
|
}
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
parser.context = context
|
|
parser.sheerka = sheerka
|
|
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
|
|
sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"])
|
|
|
|
parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
|
|
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
|
|
assert parsing_expression == Sequence(
|
|
ConceptExpression(my_map["twenty"], rule_name="twenty"),
|
|
ConceptExpression(my_map["number"], rule_name="number"))
|
|
|
|
assert len(parsing_expression.nodes) == len(parsing_expression.elements)
|
|
twenty_nodes = parsing_expression.nodes[0].nodes
|
|
assert twenty_nodes == [StrMatch("twenty")]
|
|
|
|
number_nodes = parsing_expression.nodes[1].nodes
|
|
assert len(number_nodes) == 1
|
|
assert isinstance(number_nodes[0], UnOrderedChoice)
|
|
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
|
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
|
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
|
|
|
|
assert my_map["number"].id not in parser.concepts_grammars
|
|
|
|
def test_i_can_get_parsing_expression_when_starting_by_isa_concept(self):
|
|
my_map = {
|
|
"one": Concept("one"),
|
|
"two": Concept("two"),
|
|
"number": Concept("number"),
|
|
"hundreds": self.bnf_concept("hundreds", Sequence(ConceptExpression("number"), StrMatch("hundred")))
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
parser.context = context
|
|
parser.sheerka = sheerka
|
|
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
|
|
sheerka.set_isa(context, sheerka.new("two"), my_map["number"])
|
|
sheerka.set_isa(context, sheerka.new("hundreds"), my_map["number"])
|
|
|
|
parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["hundreds"])
|
|
|
|
assert parsing_expression == Sequence(
|
|
ConceptExpression(my_map["number"], rule_name="number"),
|
|
StrMatch("hundred"))
|
|
|
|
assert len(parsing_expression.nodes) == len(parsing_expression.elements)
|
|
|
|
number_nodes = parsing_expression.nodes[0].nodes
|
|
assert len(number_nodes) == 1
|
|
assert isinstance(number_nodes[0], UnOrderedChoice)
|
|
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
|
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
|
assert ConceptExpression(my_map["two"], rule_name="two") in number_nodes[0].nodes
|
|
|
|
def test_i_can_get_parsing_expression_when_concept_is_part_of_a_group(self):
|
|
my_map = {
|
|
"one": Concept("one"),
|
|
"twenty": Concept("twenty"),
|
|
"number": Concept("number"),
|
|
"twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
|
|
}
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
parser.context = context
|
|
parser.sheerka = sheerka
|
|
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
|
|
sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"])
|
|
sheerka.set_isa(context, sheerka.new("twenties"), my_map["number"]) # <- twenties is also a number
|
|
|
|
parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
|
|
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
|
|
assert parsing_expression == Sequence(
|
|
ConceptExpression(my_map["twenty"], rule_name="twenty"),
|
|
ConceptExpression(my_map["number"], rule_name="number"))
|
|
|
|
assert len(parsing_expression.nodes) == len(parsing_expression.elements)
|
|
twenty_nodes = parsing_expression.nodes[0].nodes
|
|
assert twenty_nodes == [StrMatch("twenty")]
|
|
|
|
number_nodes = parsing_expression.nodes[1].nodes
|
|
assert len(number_nodes) == 1
|
|
assert isinstance(number_nodes[0], UnOrderedChoice)
|
|
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
|
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
|
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
|
|
|
|
def test_i_can_get_parsing_expression_when_sequence_of_concept(self):
|
|
my_map = {
|
|
"one": Concept("one"),
|
|
"two_ones": self.bnf_concept("two_ones", Sequence(ConceptExpression("one"), ConceptExpression("one")))
|
|
}
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
parser.context = context
|
|
parser.sheerka = sheerka
|
|
|
|
parsing_expression = parser.get_parsing_expression(context, my_map["two_ones"])
|
|
assert parsing_expression == Sequence(
|
|
ConceptExpression(my_map["one"], rule_name="one"),
|
|
ConceptExpression(my_map["one"], rule_name="one"))
|
|
|
|
@pytest.mark.parametrize("expr, text, expected", [
|
|
(ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]),
|
|
(StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]),
|
|
(StrMatch("one"), "two one", [UTN("two "), CNC("foo", source="one")]),
|
|
])
|
|
def test_i_can_recognize_unknown_concepts(self, expr, text, expected):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", expr)
|
|
}
|
|
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
def test_i_can_recognize_unknown_when_they_look_like_known(self):
|
|
my_map = {
|
|
"one two": self.bnf_concept("one two", Sequence("one", "two")),
|
|
"three": self.bnf_concept("three")
|
|
}
|
|
|
|
text = "one three"
|
|
expected = [UTN("one "), CNC("three", source="three")]
|
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
|
|
|
def test_i_can_remove_duplicates(self):
|
|
my_map = {
|
|
"one two": self.bnf_concept("one two", Sequence("one", "two")),
|
|
"one four": self.bnf_concept("one four", Sequence("one", "four")),
|
|
"three": self.bnf_concept("three")
|
|
}
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
parser.init_from_concepts(context, my_map.values())
|
|
|
|
parser.reset_parser(context, ParserInput("one three"))
|
|
sequences = parser.get_concepts_sequences()
|
|
sequence = parser.get_valid(sequences)
|
|
|
|
assert len(sequence) == 1
|
|
|
|
@pytest.mark.parametrize("parser_input, expected_status, expected", [
|
|
("baz", True, [CNC("bnf baz", source="baz")]), # the bnf one is chosen
|
|
("foo bar", True, [CNC("foo then bar", source="foo bar", foo="foo", bar="bar")]),
|
|
("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]),
|
|
("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]),
|
|
("twenty one", True, [CNC("t1", source="twenty one", unit="one")]),
|
|
])
|
|
def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected):
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
res = parser.parse(context, ParserInput(parser_input))
|
|
expected_array = compute_expected_array(cmap, parser_input, expected)
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
assert res.status == expected_status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == expected_array
|
|
|
|
def test_i_can_parse_when_multiple_times_the_same_variable(self):
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
text = "foo foo foo"
|
|
expected_array = compute_expected_array(cmap, text, [CNC("one or more foo", source=text)])
|
|
expected_array[0].compiled["foo"] = [cmap["foo"], cmap["foo"], cmap["foo"]]
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == expected_array
|
|
|
|
def test_i_can_test_when_expression_references_other_expressions(self):
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
text = "twenty four"
|
|
expected = CNC("t2",
|
|
source=text,
|
|
unit=CC("three_four",
|
|
source="four",
|
|
four=CC("four", body=DoNotResolve("four")),
|
|
body=CC("four", body=DoNotResolve("four"))))
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == expected_array
|
|
|
|
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts(self):
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
# thirties is defined in the global variable cmap as
|
|
# thirties = cls.update_bnf(context, Concept("thirties",
|
|
# definition="thirty number",
|
|
# where="number < 10",
|
|
# body="thirty + number").def_var("thirty").def_var("number"))
|
|
|
|
text = "thirty one"
|
|
expected = CNC("thirties",
|
|
source=text,
|
|
number=CC("number",
|
|
source="one",
|
|
one=CC("one", body=DoNotResolve("one")),
|
|
body=CC("one", body=DoNotResolve("one"))),
|
|
thirty="thirty")
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == expected_array
|
|
|
|
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_2(self):
|
|
# this time, three is a number, and also part of three_four, even if it is not relevant in t3
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
text = "thirty three"
|
|
expected = CNC("thirties",
|
|
source=text,
|
|
number=CC("number",
|
|
source="three",
|
|
three=CC("three", body=DoNotResolve("three")),
|
|
body=CC("three", body=DoNotResolve("three"))),
|
|
thirty="thirty")
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == expected_array
|
|
|
|
def test_i_can_parse_one_thousand(self):
|
|
"""
|
|
Test of simple number + 'thousand'
|
|
:return:
|
|
"""
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
sheerka.concepts_grammars.clear() # to simulate restart
|
|
text = "one thousand"
|
|
|
|
one = CC("one", body=DoNotResolve("one"))
|
|
expected = CNC("thousands",
|
|
source=text,
|
|
number=CC("number",
|
|
source="one",
|
|
one=one,
|
|
body=one))
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == expected_array
|
|
|
|
def test_i_can_parse_fifty_one_thousand(self):
|
|
"""
|
|
Test of complex number + 'thousand' (complex because the number is a BNF concept)
|
|
:return:
|
|
"""
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
sheerka.concepts_grammars.clear() # to simulate restart
|
|
text = "fifty one thousand"
|
|
|
|
one = CC("one", body=DoNotResolve("one"))
|
|
fifty_one = CC("fifties",
|
|
source="fifty one",
|
|
fifty="fifty",
|
|
number=CC("number", source="one", body=one, one=one))
|
|
one_thousand = CC("thousands",
|
|
source="one thousand",
|
|
number=CC("number", source="one", body=one, one=one))
|
|
|
|
expected_thousand = CNC("thousands",
|
|
source=text,
|
|
number=CC("number",
|
|
source="fifty one",
|
|
fifties=fifty_one,
|
|
body=fifty_one))
|
|
expected_fifties = CNC("fifties",
|
|
source=text,
|
|
fifty="fifty",
|
|
number=CC("number",
|
|
source="one thousand",
|
|
thousands=one_thousand,
|
|
body=one_thousand))
|
|
expected_thousands = compute_expected_array(cmap, text, [expected_thousand])
|
|
expected_fifties = compute_expected_array(cmap, text, [expected_fifties])
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
|
|
assert res[0].status
|
|
assert res[0].value.value == expected_thousands
|
|
|
|
assert res[1].status
|
|
assert res[1].value.value == expected_fifties
|
|
|
|
def test_i_can_parse_one_hundred_thousand(self):
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
sheerka.concepts_grammars.clear() # to simulate restart
|
|
|
|
text = "one hundred thousand"
|
|
res = parser.parse(context, ParserInput(text))
|
|
parser_result = res.value
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
|
|
def test_i_can_parse_hundreds_like_expression(self):
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
text = "three hundred and thirty two"
|
|
three = CC("three", body=DoNotResolve("three"))
|
|
two = CC("two", body=DoNotResolve("two"))
|
|
thirty_two = CC("thirties",
|
|
source="thirty two",
|
|
thirty="thirty",
|
|
number=CC("number",
|
|
source="two",
|
|
body=two,
|
|
two=two))
|
|
expected = CNC("hundreds",
|
|
source=text,
|
|
n1=CC("number",
|
|
source="three",
|
|
body=three,
|
|
three=three),
|
|
n2=CC("number",
|
|
source="thirty two",
|
|
body=thirty_two,
|
|
thirties=thirty_two))
|
|
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
res = parser.parse(context, ParserInput(text))
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == expected_array
|
|
|
|
def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self):
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
sheerka.concepts_grammars.clear() # simulate restart
|
|
for c in cmap.values():
|
|
sheerka.get_by_id(c.id).bnf = None
|
|
|
|
text = "thirty three"
|
|
expected = CNC("thirties",
|
|
source=text,
|
|
number=CC("number",
|
|
source="three",
|
|
three=CC("three", body=DoNotResolve("three")),
|
|
body=CC("three", body=DoNotResolve("three"))),
|
|
thirty="thirty")
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == expected_array
|
|
|
|
text = "forty one"
|
|
expected = CNC("forties",
|
|
source=text,
|
|
number=CC("number",
|
|
source="one",
|
|
one=CC("one", body=DoNotResolve("one")),
|
|
body=CC("one", body=DoNotResolve("one"))),
|
|
forty="forty")
|
|
expected_array = compute_expected_array(cmap, text, [expected])
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == expected_array
|
|
|
|
def test_i_can_parse_when_keyword(self):
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
parser_input = "def one"
|
|
expected = [CNC("def number", source="def one", number="one")]
|
|
|
|
res = parser.parse(context, ParserInput(parser_input))
|
|
expected_array = compute_expected_array(cmap, parser_input, expected)
|
|
expected_array[0].compiled["def"] = cmap["def_only"]
|
|
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == expected_array
|
|
|
|
def test_i_can_parse_filter(self):
|
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
|
|
expression = "--filter one"
|
|
expected = [CN("filter", source="--filter one")]
|
|
|
|
res = parser.parse(context, ParserInput(expression))
|
|
expected_array = compute_expected_array(cmap, expression, expected)
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == expected_array
|
|
|
|
def test_i_can_parse_descent_grammar(self):
|
|
my_map = {
|
|
"factor": Concept("factor", definition="1 | 2 | 3"),
|
|
"term": Concept("term", definition="factor ('*' factor)*"),
|
|
"expr": Concept("expr", definition="term ('+' term)*"),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
parser.init_from_concepts(context, my_map.values())
|
|
|
|
text = "1 + 2 * 3"
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
|
|
factor = my_map["factor"]
|
|
term = my_map["term"]
|
|
expr = my_map["expr"]
|
|
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == [CNC(expr,
|
|
term=[CC(term,
|
|
body=CC(factor, body=DoNotResolve("1")),
|
|
factor=CC(factor, body=DoNotResolve("1"))),
|
|
CC(term,
|
|
body=DoNotResolve("2 * 3"),
|
|
factor=[
|
|
CC(factor, body=DoNotResolve("2")),
|
|
CC(factor, body=DoNotResolve("3")),
|
|
])],
|
|
body=DoNotResolve("1 + 2 * 3"))]
|
|
|
|
def test_i_can_parse_recursive_descent_grammar(self):
|
|
my_map = {
|
|
"factor": Concept("factor", definition="1 | 2 | 3"),
|
|
"term": self.bnf_concept("term", OrderedChoice(
|
|
Sequence(ConceptExpression("factor"), StrMatch("*"), ConceptExpression("term")),
|
|
ConceptExpression("factor"))),
|
|
"expr": self.bnf_concept("expr", OrderedChoice(
|
|
Sequence(ConceptExpression("term"), StrMatch("+"), ConceptExpression("expr")),
|
|
ConceptExpression("term"))),
|
|
}
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
parser.init_from_concepts(context, my_map.values())
|
|
|
|
text = "1 + 2 * 3"
|
|
|
|
res = parser.parse(context, ParserInput(text))
|
|
parser_result = res.value
|
|
concepts_nodes = res.value.value
|
|
factor = my_map["factor"]
|
|
term = my_map["term"]
|
|
expr = my_map["expr"]
|
|
|
|
# concepts_nodes = res.value.value is too complicated to be validated
|
|
assert res.status
|
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
|
assert concepts_nodes == [CNC(expr,
|
|
term=CC(term,
|
|
body=CC(factor, body=DoNotResolve("1")),
|
|
factor=CC(factor, body=DoNotResolve("1"))),
|
|
expr=CC(expr,
|
|
body=CC(term,
|
|
body=DoNotResolve("2 * 3"),
|
|
factor=CC(factor, body=DoNotResolve("2")),
|
|
term=CC(term,
|
|
body=CC(factor, body=DoNotResolve("3")),
|
|
factor=CC(factor, body=DoNotResolve("3")))),
|
|
term=CC(term,
|
|
body=DoNotResolve("2 * 3"),
|
|
factor=CC(factor, body=DoNotResolve("2")),
|
|
term=CC(term,
|
|
body=CC(factor, body=DoNotResolve("3")),
|
|
factor=CC(factor, body=DoNotResolve("3"))))),
|
|
|
|
body=DoNotResolve("1 + 2 * 3"))]
|
|
|
|
def test_i_can_parse_simple_recursive_grammar(self):
|
|
my_map = {
|
|
"foo": self.bnf_concept("foo", Sequence(StrMatch("foo"),
|
|
OrderedChoice(StrMatch("bar"), ConceptExpression("foo")))),
|
|
}
|
|
|
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
parser.init_from_concepts(context, my_map.values())
|
|
|
|
assert parser.parse(context, ParserInput("foo bar")).status
|
|
assert parser.parse(context, ParserInput("foo foo foo bar")).status
|
|
assert not parser.parse(context, ParserInput("foo baz")).status
|
|
|
|
@pytest.mark.parametrize("name, expected", [
|
|
(None, []),
|
|
("", []),
|
|
("foo", StrMatch("foo")),
|
|
("foo bar", Sequence(StrMatch("foo"), StrMatch("bar"))),
|
|
("'foo bar baz' qux", Sequence(StrMatch("foo", skip_whitespace=False),
|
|
StrMatch(" ", skip_whitespace=False),
|
|
StrMatch("bar", skip_whitespace=False),
|
|
StrMatch(" ", skip_whitespace=False),
|
|
StrMatch("baz"),
|
|
StrMatch("qux"))),
|
|
])
|
|
def test_i_can_get_expression_from_concept_name(self, name, expected):
|
|
assert BnfNodeParser.get_expression_from_concept_name(name) == expected
|
|
|
|
# @pytest.mark.parametrize("parser_input, expected", [
|
|
# ("one", [
|
|
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),
|
|
# (True, [CNC("one_or_two", source="one", one="one", body="one")]),
|
|
# ]),
|
|
# ("two plus two", [
|
|
# (False, [CN("bnf_one"), UTN(" plus "), CN("one_or_two")]),
|
|
# (False, [CN("one_or_two"), UTN(" plus "), CN("one_or_two")]),
|
|
# ])
|
|
# ])
|
|
# def test_i_can_parse_when_multiple_results(self, parser_input, expected):
|
|
# sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
#
|
|
# res = parser.parse(context, parser_input)
|
|
# assert len(res) == len(expected)
|
|
#
|
|
# for res_i, expected_i in zip(res, expected):
|
|
# assert res_i.status == expected_i[0]
|
|
# expected_array = compute_expected_array(cmap, parser_input, expected_i[1])
|
|
# assert res_i.value.value == expected_array
|