Files
Sheerka-Old/tests/parsers/test_BnfNodeParser.py
T

806 lines
33 KiB
Python

import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve, DEFINITION_TYPE_DEF, CC
from parsers.BaseNodeParser import CNC, UTN, CN
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
Optional, ZeroOrMore, OneOrMore, ConceptExpression
import tests.parsers.parsers_utils
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
cmap = {
"one": Concept("one"),
"two": Concept("two"),
"three": Concept("three"),
"plus": Concept(name="a plus b").def_var("a").def_var("b"),
"bnf one": Concept("bnf_one", definition="'one'"),
'one and two': Concept("one and two", definition="one two"),
'one or more three': Concept("one or more three", definition="three+"),
'two or four': Concept("two or four", definition="two | 'four'"),
"twenties": Concept("twenties", definition="'twenty' c:two or four:=unit"),
"one or more plus": Concept("one or more plus", definition="c:a plus b:+"), # TODO
# testing keywords
"def_only": Concept("def"),
"def number": Concept("def number", definition="def (one|two)=number"),
# sequence of keywords using bnf definition
# "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"),
# "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"),
# sequence of keywords using def definition
# "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF),
# "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"),
}
def u(parsing_expression, start, end, children=None):
"""
u stands for underlying
:param parsing_expression:
:param start:
:param end:
:param children:
:return:
"""
if isinstance(parsing_expression, str):
parsing_expression = StrMatch(parsing_expression)
if isinstance(parsing_expression, StrMatch):
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
return NonTerminalNode(parsing_expression, start, end, [], children)
def compute_expected_array(my_concepts_map, expression, expected, exclude_body=False):
return tests.parsers.parsers_utils.compute_expected_array(
my_concepts_map,
expression,
expected,
init_empty_body=True,
exclude_body=exclude_body)
class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka = None
@classmethod
def setup_class(cls):
t = TestBnfNodeParser()
TestBnfNodeParser.sheerka, context, _ = t.init_parser(
cmap,
singleton=False,
create_new=True,
init_from_sheerka=True)
def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs):
if my_concepts_map is not None:
sheerka, context, *updated = self.init_concepts(*my_concepts_map.values(), **kwargs)
for i, pair in enumerate(my_concepts_map):
my_concepts_map[pair] = updated[i]
else:
sheerka = TestBnfNodeParser.sheerka
context = self.get_context(sheerka)
parser = BnfNodeParser(sheerka=sheerka) if init_from_sheerka else BnfNodeParser()
return sheerka, context, parser
def exec_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True)
if not multiple_result:
expected_array = [compute_expected_array(my_map, text, expected)]
else:
expected_array = [compute_expected_array(my_map, text, e) for e in expected]
if post_init_concepts:
post_init_concepts(sheerka, context)
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
parser.reset_parser(context, text)
bnf_parsers_helpers = parser.get_concepts_sequences()
assert len(bnf_parsers_helpers) == len(expected_array)
for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array):
assert parser_helper.sequence == expected_sequence
if len(bnf_parsers_helpers) == 1:
return sheerka, context, bnf_parsers_helpers[0].sequence
else:
return sheerka, context, [pe.sequence for pe in bnf_parsers_helpers]
def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
sheerka, context, sequences = self.exec_get_concepts_sequences(
my_map, text, expected, multiple_result, post_init_concepts
)
return sequences
# sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True)
# if not multiple_result:
# expected_array = [compute_expected_array(my_map, text, expected)]
# else:
# expected_array = [compute_expected_array(my_map, text, e) for e in expected]
#
# if post_init_concepts:
# post_init_concepts(sheerka, context)
#
# parser = BnfNodeParser()
# parser.init_from_concepts(context, updated)
# parser.reset_parser(context, text)
#
# bnf_parsers_helpers = parser.get_concepts_sequences()
#
# assert len(bnf_parsers_helpers) == len(expected_array)
# for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array):
# assert parser_helper.sequence == expected_sequence
#
# if len(bnf_parsers_helpers) == 1:
# return bnf_parsers_helpers[0].sequence
# else:
# return [pe.sequence for pe in bnf_parsers_helpers]
def test_i_cannot_parse_empty_strings(self):
sheerka, context, parser = self.init_parser({}, singleton=True)
res = parser.parse(context, "")
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert res.body.reason == BuiltinConcepts.IS_EMPTY
@pytest.mark.parametrize("expr, text", [
# (StrMatch("foo"), "foo"),
(StrMatch("'foo'"), "'foo'"),
(StrMatch("1"), "1"),
(StrMatch("3.14"), "3.14"),
(StrMatch("+"), "+"),
])
def test_i_can_match_simple_bnf(self, expr, text):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, 0)
def test_i_can_match_multiple_concepts_in_one_input(self):
my_map = {
"one": self.bnf_concept("one"),
"two": self.bnf_concept("two"),
}
text = "one two one"
expected = ["one", "two", ("one", 1)]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one two three", [CNC("foo", source="one two three")]),
("one two", []),
("one two four", []),
])
def test_i_can_match_sequence(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence("one", "two", "three")),
}
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_always_choose_the_longest_match(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence("one", "two", "three")),
"bar": self.bnf_concept("bar", Sequence("one", "two")),
}
text = "one two three"
expected = [CNC("foo", source=text)]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_match_multiple_sequences(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"), StrMatch("three"))),
"bar": self.bnf_concept("bar", Sequence(StrMatch("one"), StrMatch("two"))),
}
text = "one two three one two"
expected = [
CNC("foo", source="one two three"),
CNC("bar", source="one two", start=6, end=8)]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one", [CNC("foo", source="one")]),
("two", [CNC("foo", source="two")]),
("three", []),
])
def test_i_can_match_ordered_choice(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two")))
}
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_do_not_match_ordered_choice_with_empty_alternative(self):
my_map = {
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("")))
}
text = ""
expected = []
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("thirty one ok", [CNC("foo", source="thirty one ok")]),
("twenty one ok", [CNC("foo", source="twenty one ok")]),
])
def test_i_can_mix_sequence_and_ordered(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
OrderedChoice(StrMatch("twenty"), StrMatch("thirty")),
StrMatch("one"),
StrMatch("ok"))
)}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("twenty thirty", [CNC("foo", source="twenty thirty")]),
("one", [CNC("foo", source="one")]),
])
def test_i_can_mix_ordered_choices_and_sequences(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
OrderedChoice(
Sequence(StrMatch("twenty"), StrMatch("thirty")),
StrMatch("one")))}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one", [CNC("foo", source="one")]),
("", []),
("two", []),
])
def test_i_can_parse_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", Optional(StrMatch("one")))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("twenty one", [CNC("foo", source="twenty one")]),
("one", [CNC("foo", source="one")]),
])
def test_i_can_parse_sequence_starting_with_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
Optional(StrMatch("twenty")),
StrMatch("one")))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one two three", [CNC("foo", source="one two three")]),
("one two", [CNC("foo", source="one two")]),
])
def test_i_can_parse_sequence_ending_with_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("one"),
StrMatch("two"),
Optional(StrMatch("three"))))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one two three", [CNC("foo", source="one two three")]),
("one three", [CNC("foo", source="one three")]),
])
def test_i_can_parse_sequence_with_optional_in_between(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("one"),
Optional(StrMatch("two")),
StrMatch("three")))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("", []),
("two", []),
("one", [CNC("foo", source="one")]),
("one one", [CNC("foo", source="one one")]),
])
def test_i_can_parse_zero_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one")))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("two", [CNC("foo", source="two")]),
("one two", [CNC("foo", source="one two")]),
("one one two", [CNC("foo", source="one one two")]),
])
def test_i_can_parse_sequence_and_zero_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
ZeroOrMore(StrMatch("one")),
StrMatch("two")
))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one, one , one", [CNC("foo", source="one, one , one")]),
])
def test_i_can_parse_zero_or_more_with_separator(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"), sep=","))
}
self.validate_get_concepts_sequences(my_map, text, expected)
def test_that_zero_or_more_is_greedy(self):
my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"))),
"bar": self.bnf_concept("foo", StrMatch("one"))
}
text = "one one one"
expected = [CNC("foo", source=text)]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("", []),
("two", []),
("one", [CNC("foo", source="one")]),
("one one one", [CNC("foo", source="one one one")]),
])
def test_i_can_parse_one_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))),
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("two", []),
("one two", [CNC("foo", source="one two")]),
("one one two", [CNC("foo", source="one one two")]),
])
def test_i_can_parse_sequence_one_and_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
OneOrMore(StrMatch("one")),
StrMatch("two")
))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one, one , one", [CNC("foo", source="one, one , one")]),
])
def test_i_can_parse_one_or_more_with_separator(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"), sep=","))
}
self.validate_get_concepts_sequences(my_map, text, expected)
def test_that_one_or_more_is_greedy(self):
my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))),
"bar": self.bnf_concept("foo", StrMatch("one"))
}
text = "one one one"
expected = [CNC("foo", source=text)]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one two", [
[CNC("foo", source="one two")],
[CNC("bar", source="one two")]]),
("one two one two", [
[CNC("bar", source="one two"), CNC("bar", source="one two")],
[CNC("foo", source="one two"), CNC("bar", source="one two")],
[CNC("bar", source="one two"), CNC("foo", source="one two")],
[CNC("foo", source="one two"), CNC("foo", source="one two")]]),
])
def test_i_can_have_multiple_results(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))),
"bar": self.bnf_concept("bar", Sequence(
StrMatch("one"),
OrderedChoice(StrMatch("two"), StrMatch("three")))),
}
text = "one two"
expected = [[CNC("foo", source=text)], [CNC("bar", source=text)]]
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
def test_i_can_refer_to_other_concepts(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))),
"bar": self.bnf_concept("bar", ConceptExpression("foo"))
}
text = "one two"
expected = [
[CNC("foo", source=text)],
[CN("bar", source=text)] # Do not check the compiled part
]
sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
# explicit validations of the compiled
concept_foo = sequences[0][0].concept
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
assert concept_foo.compiled == {ConceptParts.BODY: DoNotResolve("one two")}
concept_bar = sequences[1][0].concept
assert concept_bar.body == BuiltinConcepts.NOT_INITIALIZED
assert concept_bar.compiled == {
ConceptParts.BODY: concept_foo,
"foo": concept_foo
}
assert id(concept_bar.compiled[ConceptParts.BODY]) == id(concept_bar.compiled["foo"])
def test_i_can_refer_to_other_concepts_with_body(self):
my_map = {
"foo": self.bnf_concept(Concept("foo", body="'foo'"), Sequence(StrMatch("one"), StrMatch("two"))),
"bar": self.bnf_concept("bar", ConceptExpression("foo"))
}
text = "one two"
expected = [
[CNC("foo", source=text)],
[CN("bar", source=text)] # Do not check the compiled part
]
sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
# explicit validations of the compiled
concept_foo = sequences[0][0].concept
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
assert len(concept_foo.compiled) == 0 # because there is a body defined in the metadata
concept_bar = sequences[1][0].concept
assert concept_bar.body == BuiltinConcepts.NOT_INITIALIZED
assert concept_bar.compiled == {
ConceptParts.BODY: concept_foo,
"foo": concept_foo
}
def test_i_can_use_context_reference_with_multiple_levels(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))),
"bar": self.bnf_concept("bar", ConceptExpression("foo")),
"baz": self.bnf_concept("baz", ConceptExpression("bar")),
}
text = "one two"
expected = [
[CNC("foo", source=text)],
[CN("bar", source=text)], # Do not check the compiled part
[CN("baz", source=text)], # Do not check the compiled part
]
sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
# explicit validations of the compiled
concept_foo = sequences[0][0].concept
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
assert concept_foo.compiled == {ConceptParts.BODY: DoNotResolve("one two")}
concept_bar = sequences[1][0].concept
assert concept_bar.body == BuiltinConcepts.NOT_INITIALIZED
assert concept_bar.compiled == {
ConceptParts.BODY: concept_foo,
"foo": concept_foo
}
concept_baz = sequences[2][0].concept
assert concept_baz.body == BuiltinConcepts.NOT_INITIALIZED
assert concept_baz.compiled == {
ConceptParts.BODY: concept_bar,
"bar": concept_bar,
"foo": concept_foo,
}
def test_i_can_mix_reference_to_other_concepts(self):
my_map = {
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))),
"bar": self.bnf_concept("bar", Sequence(
ConceptExpression("foo"),
OrderedChoice(StrMatch("one"), StrMatch("two")))),
}
text = "twenty two"
expected = [CN("bar", source="twenty two")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
concept_bar = sequences[0].concept
assert concept_bar.compiled == {
ConceptParts.BODY: DoNotResolve("twenty two"),
"foo": my_map["foo"],
}
assert concept_bar.compiled["foo"].compiled == {ConceptParts.BODY: DoNotResolve("twenty")}
text = "thirty one"
expected = [CN("bar", source="thirty one")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
concept_bar = sequences[0].concept
assert concept_bar.compiled == {
ConceptParts.BODY: DoNotResolve("thirty one"),
"foo": my_map["foo"],
}
assert concept_bar.compiled["foo"].compiled == {ConceptParts.BODY: DoNotResolve("thirty")}
def test_i_can_mix_reference_to_other_concepts_when_body(self):
my_map = {
"foo": self.bnf_concept(Concept("foo", body="'foo'"),
OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))),
"bar": self.bnf_concept("bar", Sequence(
ConceptExpression("foo"),
OrderedChoice(StrMatch("one"), StrMatch("two")))),
}
text = "twenty two"
expected = [CN("bar", source="twenty two")]
sheerka, context, sequences = self.exec_get_concepts_sequences(my_map, text, expected)
concept_bar = sequences[0].concept
assert concept_bar.compiled == {
ConceptParts.BODY: DoNotResolve("twenty two"),
"foo": sheerka.new("foo"),
}
assert concept_bar.compiled["foo"].compiled == {} # as foo as a body
text = "thirty one"
expected = [CN("bar", source="thirty one")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
concept_bar = sequences[0].concept
assert concept_bar.compiled == {
ConceptParts.BODY: DoNotResolve("thirty one"),
"foo": sheerka.new("foo"),
}
assert concept_bar.compiled["foo"].compiled == {}
def test_i_can_mix_zero_and_more_and_reference_to_other_concepts(self):
my_map = {
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two"), StrMatch("three"))),
"bar": self.bnf_concept("bar", ZeroOrMore(ConceptExpression("foo"))),
}
text = "one two three"
expected = [CN("bar", source="one two three")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
concept_bar = sequences[0].concept
assert concept_bar.compiled == {
ConceptParts.BODY: DoNotResolve("one two three"),
"foo": [my_map["foo"], my_map["foo"], my_map["foo"]]
}
assert concept_bar.compiled["foo"][0].compiled == {ConceptParts.BODY: DoNotResolve("one")}
assert concept_bar.compiled["foo"][1].compiled == {ConceptParts.BODY: DoNotResolve("two")}
assert concept_bar.compiled["foo"][2].compiled == {ConceptParts.BODY: DoNotResolve("three")}
def test_i_can_parse_concept_reference_that_is_not_in_grammar(self):
my_map = {
"one": Concept("one"),
"two": Concept("two"),
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("twenty"),
OrderedChoice(ConceptExpression("one"), ConceptExpression("two")))),
}
text = "twenty one"
expected = [CN("foo", source="twenty one")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
concept_foo = sequences[0].concept
assert concept_foo.compiled == {
ConceptParts.BODY: DoNotResolve("twenty one"),
"one": my_map["one"],
}
def test_i_can_refer_to_group_concepts(self):
my_map = {
"one": Concept("one"),
"two": Concept("two"),
"number": Concept("number"),
"foo": self.bnf_concept("foo", Sequence("twenty", ConceptExpression("number")))
}
def pic(s, c):
s.add_concept_to_set(c, my_map["one"], my_map["number"])
s.add_concept_to_set(c, my_map["two"], my_map["number"])
text = "twenty two"
expected = [CN("foo", source="twenty two")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic)
# explicit validations of the compiled
concept_foo = sequences[0].concept
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
assert concept_foo.compiled == {'number': my_map["number"],
'two': my_map["two"],
ConceptParts.BODY: DoNotResolve(value='twenty two')}
text = "twenty one"
expected = [CN("foo", source="twenty one")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic)
# explicit validations of the compiled
concept_foo = sequences[0].concept
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
assert concept_foo.compiled == {'number': my_map["number"],
'one': my_map["one"],
ConceptParts.BODY: DoNotResolve(value='twenty one')}
@pytest.mark.parametrize("bar_expr", [
ConceptExpression("foo"),
OrderedChoice(ConceptExpression("foo"), StrMatch("one")),
Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two"))
])
def test_i_can_detect_infinite_recursion(self, bar_expr):
my_map = {
"foo": self.bnf_concept("foo", ConceptExpression("bar")),
"bar": self.bnf_concept("bar", bar_expr),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.context = context
parser.sheerka = sheerka
parsing_expression = parser.get_parsing_expression(my_map["foo"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
parsing_expression = parser.get_parsing_expression(my_map["bar"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG)
def test_i_can_get_parsing_expression_when_concept_isa(self):
my_map = {
"one": Concept("one"),
"twenty": Concept("twenty"),
"number": Concept("number"),
"twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.context = context
parser.sheerka = sheerka
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"])
parsing_expression = parser.get_parsing_expression(my_map["twenties"])
assert parsing_expression == Sequence(
ConceptExpression(my_map["twenty"], rule_name="twenty"),
ConceptExpression(my_map["number"], rule_name="number"))
assert parsing_expression.nodes[0].nodes == [StrMatch("twenty")]
assert isinstance(parsing_expression.nodes[1].nodes[0], OrderedChoice)
assert ConceptExpression(my_map["one"], rule_name="one") in parsing_expression.nodes[1].nodes[0].elements
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in parsing_expression.nodes[1].nodes[0].elements
def test_i_can_get_parsing_expression_when_sequence_of_concept(self):
my_map = {
"one": Concept("one"),
"two_ones": self.bnf_concept("two_ones", Sequence(ConceptExpression("one"), ConceptExpression("one")))
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.context = context
parser.sheerka = sheerka
parsing_expression = parser.get_parsing_expression(my_map["two_ones"])
assert parsing_expression == Sequence(
ConceptExpression(my_map["one"], rule_name="one"),
ConceptExpression(my_map["one"], rule_name="one"))
@pytest.mark.parametrize("expr, text, expected", [
(ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]),
(StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]),
(StrMatch("one"), "two one", [UTN("two "), CNC("foo", source="one")]),
])
def test_i_can_recognize_unknown_concepts(self, expr, text, expected):
my_map = {
"foo": self.bnf_concept("foo", expr)
}
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_recognize_unknown_then_they_look_like_known(self):
my_map = {
"one two": self.bnf_concept("one two", Sequence("one", "two")),
"three": self.bnf_concept("three")
}
text = "one three"
expected = [UTN("one "), CNC("three", source="three")]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_remove_duplicates(self):
my_map = {
"one two": self.bnf_concept("one two", Sequence("one", "two")),
"one four": self.bnf_concept("one four", Sequence("one", "four")),
"three": self.bnf_concept("three")
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
parser.reset_parser(context, "one three")
sequences = parser.get_concepts_sequences()
sequence = parser.get_valid(sequences)
assert len(sequence) == 1
@pytest.mark.parametrize("parser_input, expected_status, expected", [
("one", True, [CNC("bnf one", source="one")]), # the bnf one is chosen
("one two", True, [CN("one and two", source="one two")]),
("three three three", True, [CN("one or more three", source="three three three")]),
("twenty two", True, [CN("twenties", source="twenty two")]),
("twenty four", True, [CN("twenties", source="twenty four")]),
("twenty one", False, [UTN("twenty "), CN("bnf one", source="one")]),
("twenty two + 1", True, [CN("twenties", source="twenty two"), " + 1"]),
])
def test_i_can_parse(self, parser_input, expected_status, expected):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
res = parser.parse(context, parser_input)
expected_array = compute_expected_array(cmap, parser_input, expected)
parser_result = res.value
concepts_nodes = res.value.value
assert res.status == expected_status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
def test_i_can_parse_when_keyword(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
parser_input = "def one"
expected = [CNC("def number", source="def one", number="one", one="one")]
res = parser.parse(context, parser_input)
expected_array = compute_expected_array(cmap, parser_input, expected)
expected_array[0].compiled["def"] = cmap["def_only"]
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
# @pytest.mark.parametrize("parser_input, expected", [
# ("one", [
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),
# (True, [CNC("one_or_two", source="one", one="one", body="one")]),
# ]),
# ("two plus two", [
# (False, [CN("bnf_one"), UTN(" plus "), CN("one_or_two")]),
# (False, [CN("one_or_two"), UTN(" plus "), CN("one_or_two")]),
# ])
# ])
# def test_i_can_parse_when_multiple_results(self, parser_input, expected):
# sheerka, context, parser = self.init_parser(init_from_sheerka=True)
#
# res = parser.parse(context, parser_input)
# assert len(res) == len(expected)
#
# for res_i, expected_i in zip(res, expected):
# assert res_i.status == expected_i[0]
# expected_array = compute_expected_array(cmap, parser_input, expected_i[1])
# assert res_i.value.value == expected_array