Files
Sheerka-Old/tests/parsers/test_BnfNodeParser.py
kodjo 87cab44fb8 Fixed #125: SheerkaErrorManager
Fixed #135: Change services service priorities
Fixed #136: ErrorManager: Implement recognize_error
Fixed #137: BNFNodeParser : Error when parsing regex with sub parsers
Fixed #138: get_last_errors(): real errors sources are lost
Fixed #139: OneError return value removes the origin of the error
Fixed #140: Concept variables are not correctly handled when parsing sub expression
Fixed #143: Implement has_unknown_concepts()
2021-10-28 14:04:41 +02:00

2020 lines
90 KiB
Python

import re
import pytest
import tests.parsers.parsers_utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DEFINITION_TYPE_BNF, DoNotResolve
from core.global_symbols import NotInit
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput
from core.sheerka.services.SheerkaIsAManager import SheerkaIsAManager
from core.tokenizer import Tokenizer
from parsers.BaseNodeParser import NoMatchingTokenError
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor, BnfNodeParser, ConceptExpression, Match, NonTerminalNode, \
OneOrMore, Optional, OrderedChoice, RegExDef, RegExMatch, Sequence, StrMatch, TerminalNode, UnOrderedChoice, \
VariableExpression, ZeroOrMore
from tests.BaseTest import BaseTest
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.evaluators.EvaluatorTestsUtils import python_ret_val
from tests.parsers.parsers_utils import CC, CMV, CN, CNC, SCN, UTN, compare_with_test_object, get_test_obj
cmap = {
"one": Concept("one"),
"two": Concept("two"),
"three": Concept("three"),
"four": Concept("four"),
"thirty": Concept("thirty", body="30"),
"forty": Concept("forty", body="40"),
"fifty": Concept("fifty", body="50"),
"number": Concept("number"),
"foo": Concept("foo"),
"bar": Concept("bar"),
"baz": Concept("baz"),
"one hundred": Concept("one hundred", body="100"),
"one_hundred": Concept("'one hundred'", body="100"),
"hundreds": Concept("hundreds", definition="number=n1 'hundred' 'and' number=n2",
where="n1 < 10 and n2 < 100", body="n1 * 100 + n2").def_var("n1").def_var("n2"),
"bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen
"plus": Concept("plus", definition="one 'plus' two").def_var("a").def_var("b"),
'foo then bar': Concept("foo then bar", definition="foo bar").def_var("foo").def_var("bar"),
'foo or bar': Concept("foo or bar", definition="foo | bar").def_var("foo").def_var("bar"),
'one or more foo': Concept("one or more foo", definition="foo+").def_var("foo"),
"t1": Concept("t1", definition="'twenty' (one|two)=unit").def_var("unit").def_var("one").def_var("two"),
"three_four": Concept("three_four", definition="three | four").def_var("three").def_var("four"),
"t2": Concept("t2", definition="'twenty' three_four=unit").def_var("unit").def_var("three").def_var("four"),
# bnf with variable
"one thing": Concept("one x", definition="one x").def_var("x"),
"x shoe": Concept("x shoe", definition="x 'shoe'").def_var("x"),
# testing keywords
"def_only": Concept("def"),
"def number": Concept("def number", definition="def (one|two)=number"),
# sequence of keywords using bnf definition
# "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"),
# "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"),
# sequence of keywords using def definition
# "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF),
# "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"),
"filter": Concept("filter", definition="'--filter' (one | two)")
}
def u(parsing_expression, start, end, children=None):
"""
u stands for underlying
:param parsing_expression:
:param start:
:param end:
:param children:
:return:
"""
if isinstance(parsing_expression, str):
parsing_expression = StrMatch(parsing_expression)
if isinstance(parsing_expression, Match):
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match, parsing_expression.to_match)
return NonTerminalNode(parsing_expression, start, end, [], children)
def compute_expected_array(my_concepts_map, expression, expected, exclude_body=False):
return tests.parsers.parsers_utils.compute_expected_array(
my_concepts_map,
expression,
expected,
init_empty_body=True,
exclude_body=exclude_body)
class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
shared_ontology = None
@classmethod
def setup_class(cls):
test_instance = cls()
init_test_helper = test_instance.init_test(cache_only=False, ontology="#TestBnfNodeParser#")
sheerka, context, *updated = init_test_helper.with_concepts(*cmap.values(), create_new=True).unpack()
for i, concept_name in enumerate(cmap):
cmap[concept_name] = updated[i]
# end of initialisation
global_truth_context = test_instance.get_context(sheerka, global_truth=True)
sheerka = TestBnfNodeParser.sheerka
sheerka.set_isa(global_truth_context, cmap["one"], cmap["number"])
sheerka.set_isa(global_truth_context, cmap["two"], cmap["number"])
sheerka.set_isa(global_truth_context, cmap["three"], cmap["number"])
sheerka.set_isa(global_truth_context, cmap["four"], cmap["number"])
sheerka.set_isa(global_truth_context, cmap["thirty"], cmap["number"])
sheerka.set_isa(global_truth_context, cmap["forty"], cmap["number"])
sheerka.set_isa(global_truth_context, cmap["fifty"], cmap["number"])
sheerka.set_isa(global_truth_context, cmap["one hundred"], cmap["number"])
sheerka.set_isa(global_truth_context, cmap["hundreds"], cmap["number"])
# Pay attention. 'twenties (t1 and t2) are not set as 'number'
thirties = cls.update_bnf(context, Concept("thirties",
definition="thirty number",
where="number < 10",
body="thirty + number").def_var("thirty").def_var("number"))
cmap["thirties"] = sheerka.create_new_concept(context, thirties).body.body
sheerka.set_isa(global_truth_context, sheerka.new("thirties"), sheerka.new("number"))
forties = cls.update_bnf(context, Concept("forties",
definition="forty number",
where="number < 10",
body="forty + number").def_var("forty").def_var("number"))
cmap["forties"] = sheerka.create_new_concept(context, forties).body.body
sheerka.set_isa(global_truth_context, sheerka.new("forties"), sheerka.new("number"))
fifties = cls.update_bnf(context, Concept("fifties",
definition="fifty number",
where="number < 10",
body="fifty + number").def_var("fifty").def_var("number"))
cmap["fifties"] = sheerka.create_new_concept(context, fifties).body.body
sheerka.set_isa(global_truth_context, sheerka.new("fifties"), sheerka.new("number"))
thousands = cls.update_bnf(context, Concept("thousands",
definition="number 'thousand'",
where="number < 999",
body="number * 1000").def_var("number"))
cmap["thousands"] = sheerka.create_new_concept(context, thousands).body.body
sheerka.set_isa(global_truth_context, sheerka.new("thousands"), sheerka.new("number"))
cls.shared_ontology = sheerka.get_ontology(context)
sheerka.pop_ontology(context)
@staticmethod
def update_bnf(context, concept):
bnf_parser = BnfDefinitionParser()
res = bnf_parser.parse(context, concept.get_metadata().definition)
if res.status:
concept.set_bnf(res.value.value)
concept.get_metadata().definition_type = DEFINITION_TYPE_BNF
else:
raise Exception(res)
return concept
def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs):
if my_concepts_map is None:
sheerka, context = self.init_test().unpack()
sheerka.add_ontology(context, self.shared_ontology)
else:
sheerka, context, *updated = self.init_test().with_concepts(*my_concepts_map.values(), **kwargs).unpack()
for i, pair in enumerate(my_concepts_map):
my_concepts_map[pair] = updated[i]
if init_from_sheerka:
parser = BnfNodeParser(sheerka=sheerka)
else:
parser = BnfNodeParser().init_from_concepts(context, my_concepts_map.values())
return sheerka, context, parser
def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
sheerka, context, *updated = self.init_test().with_concepts(*my_map.values(), create_new=False).unpack()
sequences = self.exec_get_concepts_sequences(context,
my_map,
text,
expected,
multiple_result,
post_init_concepts,
*updated)
return sequences
@staticmethod
def exec_get_concepts_sequences(context,
my_map,
text,
expected,
multiple_result=False,
post_init_concepts=None,
*concepts):
sheerka = context.sheerka
if not multiple_result:
expected_array = [compute_expected_array(my_map, text, expected)]
else:
expected_array = [compute_expected_array(my_map, text, e) for e in expected]
if post_init_concepts:
post_init_concepts(sheerka, context)
parser = BnfNodeParser()
parser.init_from_concepts(context, concepts)
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
assert len(bnf_parsers_helpers) == len(expected_array)
for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array):
to_compare = tests.parsers.parsers_utils.get_test_obj(parser_helper.sequence, expected_sequence)
# assert parser_helper.sequence == expected_sequence
assert to_compare == expected_sequence
if len(bnf_parsers_helpers) == 1:
return bnf_parsers_helpers[0].sequence
else:
return [pe.sequence for pe in bnf_parsers_helpers]
def test_i_cannot_parse_empty_strings(self):
sheerka, context, parser = self.init_parser({}, singleton=True)
res = parser.parse(context, ParserInput(""))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert res.body.reason == BuiltinConcepts.IS_EMPTY
@pytest.mark.parametrize("expr, text", [
(StrMatch("foo"), "foo"),
(StrMatch("'foo'"), "'foo'"),
(StrMatch("1"), "1"),
(StrMatch("3.14"), "3.14"),
(StrMatch("+"), "+"),
])
def test_i_can_match_str_bnf(self, expr, text):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, 0)
@pytest.mark.parametrize("expr, text, end", [
(RegExMatch("bar"), "bar", 0),
(RegExMatch("[a-z]+"), "xyz", 0),
(RegExMatch("[a-z=]+"), "uvt=xyz=abc", 4),
])
def test_i_can_match_regex_bnf(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying.start == 0
assert sequence[0].underlying.end == end
assert sequence[0].underlying.parsing_expression == expr
@pytest.mark.parametrize("expr, text, end", [
(Sequence(StrMatch("foo"), RegExMatch("bar")), "foo bar", 2),
(Sequence(StrMatch("foo"), RegExMatch("[a-z]+")), "foo xyz", 2),
(Sequence(StrMatch("foo"), RegExMatch("[a-z=]+")), "foo uvt=xyz=abc", 6),
])
def test_i_can_match_sequence_str_regex(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children)
@pytest.mark.parametrize("expr, text, end", [
(Sequence(RegExMatch("bar"), StrMatch("foo")), "bar foo", 2),
(Sequence(RegExMatch("[a-z]+"), StrMatch("foo")), "xyz foo", 2),
(Sequence(RegExMatch("[a-z=]+"), StrMatch("foo")), "uvt=xyz=abc foo", 6),
])
def test_i_can_match_sequence_regex_str(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children)
def test_i_can_match_sequence_str_regex_str(self):
text = "foo uvt=xyz=abc baz"
expr = Sequence(StrMatch("foo"), RegExMatch("[a-z=]+"), StrMatch("baz"))
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, 8, sequence[0].underlying.children)
def test_i_can_match_multiple_concepts_in_one_input(self):
my_map = {
"one": self.bnf_concept("one"),
"two": self.bnf_concept("two"),
}
text = "one two one"
expected = ["one", "two", ("one", 1)]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one two three", [CNC("foo", "one two three")]),
("one two", []),
("one two four", []),
])
def test_i_can_match_sequence(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence("one", "two", "three")),
}
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_always_choose_the_longest_match(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence("one", "two", "three")),
"bar": self.bnf_concept("bar", Sequence("one", "two")),
}
text = "one two three"
expected = [CNC("foo", source=text)]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_use_skip_whitespace_when_mixing_sequence_and_strmatch(self):
# to match '--filter' in one word
my_map = {
"filter": self.bnf_concept("filter",
Sequence(StrMatch("-", skip_whitespace=False),
StrMatch("-", skip_whitespace=False),
"filter")),
}
sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True)
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
text = "--filter"
expected = [CN("filter", source="--filter")]
expected_array = compute_expected_array(my_map, text, expected)
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
transformed = get_test_obj(bnf_parsers_helpers[0].sequence, expected_array)
assert transformed == expected_array
assert not bnf_parsers_helpers[0].has_unrecognized
# but I cannot parse
text = "- - filter"
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
assert bnf_parsers_helpers[0].has_unrecognized
def test_i_can_match_multiple_sequences(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"), StrMatch("three"))),
"bar": self.bnf_concept("bar", Sequence(StrMatch("one"), StrMatch("two"))),
}
text = "one two three one two"
expected = [
CNC("foo", "one two three"),
CNC("bar", "one two", 6, 8)]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one", [CNC("foo", source="one")]),
("two", [CNC("foo", source="two")]),
("three", []),
])
def test_i_can_match_ordered_choice(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two")))
}
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_do_not_match_ordered_choice_with_empty_alternative(self):
my_map = {
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("")))
}
text = ""
expected = []
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("concept_three, expected", [
(Concept("three"), []),
(BaseTest.bnf_concept("three", StrMatch("three")), [UTN('twenty '), "three"])
])
def test_i_can_manage_sequence_with_wrong_order_choice(self, concept_three, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("twenty"),
OrderedChoice(StrMatch("one"), StrMatch("two")))),
"three": concept_three}
text = "twenty three"
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("ok thirty one", [CNC("foo", "ok thirty one")]),
("ok twenty one", [CNC("foo", "ok twenty one")]),
("ok one", []),
])
def test_i_can_mix_sequence_and_ordered(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("ok"),
OrderedChoice(StrMatch("twenty"), StrMatch("thirty")),
StrMatch("one"))
)}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("twenty one", [CNC("foo", "twenty one")]),
("twenty three", []), # three does not exist
("twenty four", []), # four exists but should not be seen
])
def test_i_can_mix_sequence_and_ordered_2(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("twenty"),
OrderedChoice(StrMatch("one"), StrMatch("two")))),
"four": Concept("four")}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("twenty thirty", [CNC("foo", "twenty thirty")]),
("one", [CNC("foo", "one")]),
])
def test_i_can_mix_ordered_choices_and_sequences(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
OrderedChoice(
Sequence(StrMatch("twenty"), StrMatch("thirty")),
StrMatch("one")))}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one", [CNC("foo", "one")]),
("one two", [CNC("foo", "one two")]),
("three", []),
])
def test_i_can_match_unordered_choice(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", UnOrderedChoice(
StrMatch("one"),
Sequence(StrMatch("one"), StrMatch("two")))),
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one", [CNC("foo", "one")]),
("", []),
("two", []),
])
def test_i_can_match_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", Optional(StrMatch("one")))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("twenty one", [CNC("foo", "twenty one")]),
("one", [CNC("foo", "one")]),
])
def test_i_can_match_sequence_starting_with_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
Optional(StrMatch("twenty")),
StrMatch("one")))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one two three", [CNC("foo", "one two three")]),
("one two", [CNC("foo", "one two")]),
])
def test_i_can_match_sequence_ending_with_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("one"),
StrMatch("two"),
Optional(StrMatch("three"))))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one two three", [CNC("foo", "one two three")]),
("one three", [CNC("foo", "one three")]),
])
def test_i_can_match_sequence_with_optional_in_between(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("one"),
Optional(StrMatch("two")),
StrMatch("three")))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("", []),
("two", []),
("one", [CNC("foo", "one")]),
("one one", [CNC("foo", "one one")]),
])
def test_i_can_match_zero_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one")))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("two", [CNC("foo", "two")]),
("one two", [CNC("foo", "one two")]),
("one one two", [CNC("foo", "one one two")]),
])
def test_i_can_match_sequence_and_zero_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
ZeroOrMore(StrMatch("one")),
StrMatch("two")
))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one, one , one", [CNC("foo", "one, one , one")]),
])
def test_i_can_match_zero_or_more_with_separator(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"), sep=","))
}
self.validate_get_concepts_sequences(my_map, text, expected)
def test_that_zero_or_more_is_greedy(self):
my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"))),
"bar": self.bnf_concept("foo", StrMatch("one"))
}
text = "one one one"
expected = [CNC("foo", text)]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("", []),
("two", []),
("one", [CNC("foo", "one")]),
("one one one", [CNC("foo", "one one one")]),
])
def test_i_can_match_one_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))),
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("two", []),
("one two", [CNC("foo", "one two")]),
("one one two", [CNC("foo", "one one two")]),
])
def test_i_can_match_sequence_one_and_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
OneOrMore(StrMatch("one")),
StrMatch("two")
))
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one, one , one", [CNC("foo", "one, one , one")]),
])
def test_i_can_match_one_or_more_with_separator(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"), sep=","))
}
self.validate_get_concepts_sequences(my_map, text, expected)
def test_that_one_or_more_is_greedy(self):
my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))),
"bar": self.bnf_concept("foo", StrMatch("one"))
}
text = "one one one"
expected = [CNC("foo", text)]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one two", [
[CNC("foo", "one two")],
[CNC("bar", "one two")]]),
("one two one two", [
[CNC("bar", "one two"), CNC("bar", "one two")],
[CNC("foo", "one two"), CNC("bar", "one two")],
[CNC("bar", "one two"), CNC("foo", "one two")],
[CNC("foo", "one two"), CNC("foo", "one two")]]),
])
def test_i_can_have_multiple_results(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))),
"bar": self.bnf_concept("bar", Sequence(
StrMatch("one"),
OrderedChoice(StrMatch("two"), StrMatch("three")))),
}
text = "one two"
expected = [[CNC("foo", text)], [CNC("bar", text)]]
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
def test_i_can_refer_to_other_concepts(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))),
"bar": self.bnf_concept("bar", ConceptExpression("foo"))
}
text = "one two"
expected = [
[CNC("foo", text)],
[CN("bar", text)] # Do not check the compiled part
]
sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
# explicit validations of the compiled
concept_foo = sequences[0][0].concept
assert concept_foo.body == NotInit
assert concept_foo.get_compiled() == {ConceptParts.BODY: DoNotResolve("one two")}
concept_bar = sequences[1][0].concept
assert concept_bar.body == NotInit
assert concept_bar.get_compiled() == {
ConceptParts.BODY: concept_foo,
"foo": concept_foo
}
assert id(concept_bar.get_compiled()[ConceptParts.BODY]) == id(concept_bar.get_compiled()["foo"])
def test_i_can_refer_to_other_concepts_with_body(self):
my_map = {
"foo": self.bnf_concept(Concept("foo", body="'foo'"), Sequence(StrMatch("one"), StrMatch("two"))),
"bar": self.bnf_concept("bar", ConceptExpression("foo"))
}
text = "one two"
expected = [
[CNC("foo", text)],
[CN("bar", text)] # Do not check the compiled part
]
sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
# explicit validations of the compiled
concept_foo = sequences[0][0].concept
assert concept_foo.body == NotInit
assert len(concept_foo.get_compiled()) == 0 # because there is a body defined in the metadata
concept_bar = sequences[1][0].concept
assert concept_bar.body == NotInit
assert concept_bar.get_compiled() == {
ConceptParts.BODY: concept_foo,
"foo": concept_foo
}
def test_i_can_manage_concepts_reference_when_multiple_levels(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"))),
"bar": self.bnf_concept("bar", ConceptExpression("foo")),
"baz": self.bnf_concept("baz", ConceptExpression("bar")),
}
text = "one two"
expected = [
[CNC("foo", text)],
[CN("bar", text)], # Do not check the compiled part
[CN("baz", text)], # Do not check the compiled part
]
sequences = self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
# explicit validations of the compiled
concept_foo = sequences[0][0].concept
assert concept_foo.body == NotInit
assert concept_foo.get_compiled() == {ConceptParts.BODY: DoNotResolve("one two")}
concept_bar = sequences[1][0].concept
assert concept_bar.body == NotInit
assert concept_bar.get_compiled() == {
ConceptParts.BODY: concept_foo,
"foo": concept_foo
}
concept_baz = sequences[2][0].concept
assert concept_baz.body == NotInit
assert concept_baz.get_compiled() == {
ConceptParts.BODY: concept_bar,
"bar": concept_bar
}
def test_i_can_mix_reference_to_other_concepts(self):
my_map = {
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))),
"bar": self.bnf_concept("bar", Sequence(
ConceptExpression("foo"),
OrderedChoice(StrMatch("one"), StrMatch("two")))),
"three": Concept("three")
}
text = "twenty two"
expected = [CN("bar", source="twenty two")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
concept_bar = sequences[0].concept
assert concept_bar.get_compiled() == {
ConceptParts.BODY: DoNotResolve("twenty two"),
"foo": my_map["foo"],
}
assert concept_bar.get_compiled()["foo"].get_compiled() == {ConceptParts.BODY: DoNotResolve("twenty")}
text = "thirty one"
expected = [CN("bar", source="thirty one")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
concept_bar = sequences[0].concept
assert concept_bar.get_compiled() == {
ConceptParts.BODY: DoNotResolve("thirty one"),
"foo": my_map["foo"],
}
assert concept_bar.get_compiled()["foo"].get_compiled() == {ConceptParts.BODY: DoNotResolve("thirty")}
text = "thirty three"
expected = [[CN("foo", source="thirty"), CN("three")], []]
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
def test_i_can_mix_reference_to_other_concepts_2(self):
# this time, we use concept expression
my_map = {
"twenty": self.bnf_concept("twenty", StrMatch("twenty")),
"number": self.bnf_concept("number", OrderedChoice(StrMatch("one"), StrMatch("two"))),
"twenties": self.bnf_concept("twenties",
Sequence(ConceptExpression("twenty"), ConceptExpression("number"))),
"three": Concept("three")
}
text = "twenty two"
expected = [CNC("twenties",
"twenty two",
twenty=CC("twenty", body=DoNotResolve("twenty")),
number=CC("number", source="two", body=DoNotResolve("two"))
)]
self.validate_get_concepts_sequences(my_map, text, expected)
text = "twenty three"
expected = [[CN("twenty"), CN("three")], []]
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
def test_i_can_mix_reference_to_other_concepts_when_body(self):
my_map = {
"foo": self.bnf_concept(Concept("foo", body="'foo'"),
OrderedChoice(StrMatch("twenty"), StrMatch("thirty"))),
"bar": self.bnf_concept("bar", Sequence(
ConceptExpression("foo"),
OrderedChoice(StrMatch("one"), StrMatch("two")))),
}
sheerka, context, *concepts = self.init_test().with_concepts(*my_map.values(), create_new=False).unpack()
text = "twenty two"
expected = [CN("bar", source="twenty two")]
sequences = self.exec_get_concepts_sequences(context, my_map, text, expected, False, None, *concepts)
concept_bar = sequences[0].concept
assert concept_bar.get_compiled() == {
ConceptParts.BODY: DoNotResolve("twenty two"),
"foo": sheerka.new("foo"),
}
assert concept_bar.get_compiled()["foo"].get_compiled() == {} # as foo as a body
text = "thirty one"
expected = [CN("bar", source="thirty one")]
sequences = self.exec_get_concepts_sequences(context, my_map, text, expected, False, None, *concepts)
concept_bar = sequences[0].concept
assert concept_bar.get_compiled() == {
ConceptParts.BODY: DoNotResolve("thirty one"),
"foo": sheerka.new("foo"),
}
assert concept_bar.get_compiled()["foo"].get_compiled() == {}
def test_i_can_mix_zero_and_more_and_reference_to_other_concepts(self):
my_map = {
"foo": self.bnf_concept("foo", OrderedChoice(StrMatch("one"), StrMatch("two"), StrMatch("three"))),
"bar": self.bnf_concept("bar", ZeroOrMore(ConceptExpression("foo"))),
}
text = "one two three"
expected = [CN("bar", source="one two three")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
concept_bar = sequences[0].concept
assert concept_bar.get_compiled() == {
ConceptParts.BODY: DoNotResolve("one two three"),
"foo": [my_map["foo"], my_map["foo"], my_map["foo"]]
}
assert concept_bar.get_compiled()["foo"][0].get_compiled() == {ConceptParts.BODY: DoNotResolve("one")}
assert concept_bar.get_compiled()["foo"][1].get_compiled() == {ConceptParts.BODY: DoNotResolve("two")}
assert concept_bar.get_compiled()["foo"][2].get_compiled() == {ConceptParts.BODY: DoNotResolve("three")}
def test_i_can_match_concept_reference_that_is_not_in_grammar(self):
my_map = {
"one": Concept("one"),
"two": Concept("two"),
"foo": self.bnf_concept("foo",
Sequence(
StrMatch("twenty"),
OrderedChoice(
ConceptExpression("one"),
ConceptExpression("two"),
rule_name="unit"))),
}
text = "twenty one"
expected = [CN("foo", "twenty one")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected)
concept_foo = sequences[0].concept
assert concept_foo.get_compiled() == {
ConceptParts.BODY: DoNotResolve("twenty one"),
"unit": my_map["one"],
}
def test_i_can_refer_to_group_concepts(self):
my_map = {
"one": Concept("one"),
"two": Concept("two"),
"number": Concept("number"),
"foo": self.bnf_concept("foo", Sequence("twenty", ConceptExpression("number")))
}
def pic(s, c):
s.add_concept_to_set(c, my_map["one"], my_map["number"])
s.add_concept_to_set(c, my_map["two"], my_map["number"])
text = "twenty two"
expected = [CN("foo", source="twenty two")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic)
# explicit validations of the compiled
concept_foo = sequences[0].concept
assert concept_foo.body == NotInit
compare_with_test_object(concept_foo.get_compiled(), {
'number': CC(my_map["number"], body=my_map["two"], two=my_map["two"]),
ConceptParts.BODY: DoNotResolve(value='twenty two')})
text = "twenty one"
expected = [CN("foo", source="twenty one")]
sequences = self.validate_get_concepts_sequences(my_map, text, expected, post_init_concepts=pic)
# explicit validations of the compiled
concept_foo = sequences[0].concept
assert concept_foo.body == NotInit
compare_with_test_object(concept_foo.get_compiled(), {
'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]),
ConceptParts.BODY: DoNotResolve(value='twenty one')})
@pytest.mark.parametrize("expr, expected", [
("one 'car'", [CNC("foo", "one 'car'", x=python_ret_val("'car'"))]), # python
("one bar", [CNC("foo", "one bar", x=CC("bar"))]), # simple concept
("one super car", [CNC("foo", "one super car", x=CC("super car"))]), # long concept
("one shoe", [CNC("foo", "one shoe", x=CC("thing", source="shoe", body=DoNotResolve("shoe")))]), # bnf
])
def test_i_can_match_variable_when_ending_with_one_variable(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
"thing": Concept("thing", definition="'shoe'|'skirt'"),
"super car": Concept("super car"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_ending_with_one_variable_and_sya(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
expr = "one bar plus baz"
expected = [
[CNC("foo", "one bar", x=CC("bar")), UTN(" plus "), CN("baz")],
[CNC("foo", "one bar plus baz", x=CC("plus", source="bar plus baz", x="bar", y="baz"))],
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_variable_when_ending_with_one_variable_and_multiple_results(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"pretty big": Concept("pretty big", body="'pretty big'"),
"pbig": Concept("pretty big"),
}
expr = "one pretty big"
expected = [
[CNC("foo", "one pretty big", x=CC("pretty big"))],
[CNC("foo", "one pretty big", x=CC("pbig", source="pretty big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_variable_when_ending_with_multiple_variables_and_multiple_results(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), VariableExpression("y"))),
"pretty": Concept("pretty", body="pretty"),
"pretty2": Concept("pretty"),
"big": Concept("big", body="big"),
}
expr = "one pretty big"
expected = [
[CNC("foo", "one pretty big", x=CC("pretty"), y=CC("big"))],
[CNC("foo", "one pretty big", x=CC("pretty2", source="pretty"), y=CC("big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
@pytest.mark.parametrize("expr, expected", [
("'my' shoe", [CNC("foo", "'my' shoe", x=python_ret_val("'my' "))]), # python
("one shoe", [CNC("foo", "one shoe", x=CC("one"))]), # concept
("my little shoe", [CNC("foo", "my little shoe", x=CC("my little"))]), # long concept
("black shoe", [CNC("foo", "black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]),
])
def test_i_can_match_variable_when_starting_with_one_variable(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"one": Concept("one"),
"my little": Concept("my little"),
"color": Concept("color", definition="'blue'|'black'"),
"and": Concept("x and y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_starting_with_one_variable_and_sya(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"tiny": Concept("tiny"),
"beautiful": Concept("beautiful"),
"but": Concept("x but y").def_var("x").def_var("y"),
}
expr = "tiny but beautiful shoe"
expected_res = [
CNC("foo",
"tiny but beautiful shoe",
x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful"))]
unwanted_res = [CN("tiny"), UTN(" but "), CNC("foo", "beautiful shoe", x=CC("beautiful"))]
self.validate_get_concepts_sequences(my_map, expr, [unwanted_res, expected_res], multiple_result=True)
def test_i_can_match_variable_when_starting_with_multiple_variables(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
VariableExpression("y"),
VariableExpression("z"),
StrMatch("shoe"))),
"one": Concept("one"),
"two": Concept("two"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
text = "one 'one' one plus two shoe"
unwanted_res = [CN("one"), SCN(" 'one' "), (CN("one"), 1), UTN(" plus "), CN("two")]
expected_res = [CNC("foo",
source="one 'one' one plus two shoe",
x=CC("one"),
y=python_ret_val(" 'one' "),
z=CC("plus", source="one plus two", x="one", y="two"))]
expected = [unwanted_res, expected_res]
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
def test_i_can_match_variable_when_starting_with_one_variable_and_longer_str(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("foo"),
StrMatch("bar"),
StrMatch("baz"))),
"one": Concept("one")
}
text = "one foo bar baz"
expected = [CNC("foo", "one foo bar baz", x=CC("one"))]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("expr, expected", [
("one 'pretty' shoe", [CNC("foo", "one 'pretty' shoe", x=python_ret_val("'pretty' "))]), # python
("one little shoe", [CNC("foo", "one little shoe", x=CC("little"))]), # concept
("one very big shoe", [CNC("foo", "one very big shoe", x=CC("very big"))]), # long concept
("one black shoe",
[CNC("foo", "one black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]),
("one tiny but beautiful shoe",
[CNC("foo",
"one tiny but beautiful shoe",
x=CMV("but", source="tiny but beautiful", x="tiny ", y="beautiful "))]),
])
def test_i_can_match_variable_in_between(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))),
"little": Concept("little"),
"very big": Concept("very big"),
"color": Concept("color", definition="'blue'|'black'"),
"but": Concept("x but y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_multiple_results_in_between(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))),
"pretty big": Concept("pretty big", body="'pretty big'"),
"pbig": Concept("pretty big"),
}
expr = "one pretty big shoe"
expected = [
[CNC("foo", "one pretty big shoe", x=CC("pretty big"))],
[CNC("foo", "one pretty big shoe", x=CC("pbig", source="pretty big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_regex_and_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(RegExMatch("[a-z]+"),
VariableExpression("x"))),
"shoe": Concept("shoe")
}
text = "onyx shoe"
expected = [CNC("foo", "onyx shoe", x=CC("shoe"))]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_match_variable_and_regex(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
RegExMatch("[a-z]+"))),
"one": Concept("one")
}
text = "one onyx"
expected = [CNC("foo", "one onyx", x=CC("one"))]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_match_regex_when_sub_parser_input(self):
my_map = {
"foo": self.bnf_concept("foo", RegExMatch("[a-f0-9]{4}")),
}
text = "begin 0af0 end"
parser_input = ParserInput(text).reset()
sub_parser = parser_input.sub_part(2, 3)
sheerka, context, parser = self.init_parser(my_map)
res = parser.parse(context, sub_parser)
assert res.status
concept_nodes = res.body.body
expected = [CN("foo", "0af0")]
actual, expected = tests.parsers.parsers_utils.prepare_nodes_comparison(my_map, text, concept_nodes, expected)
assert actual == expected
def test_i_can_match_regex_when_from_tokens(self):
my_map = {
"foo": self.bnf_concept("foo", RegExMatch("[a-f0-9]{4}")),
}
text = "begin 0af0 end"
tokens = list(Tokenizer(text))
parser_input = ParserInput(None, tokens[2:4]).reset()
sheerka, context, parser = self.init_parser(my_map)
res = parser.parse(context, parser_input)
assert res.status
concept_nodes = res.body.body
expected = [CN("foo", "0af0")]
actual, expected = tests.parsers.parsers_utils.prepare_nodes_comparison(my_map, "0af0", concept_nodes, expected)
assert actual == expected
def test_i_can_reuse_the_same_variable(self):
# in this test, the variable appears several times, but only once in concept.compiled
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("equals"),
VariableExpression("x"))),
"one": Concept("one"),
"two": Concept("two"),
}
sheerka, context, *updated = self.init_concepts(*my_map.values())
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
# same variable appears only once in the compiled variables
text = "one equals one"
expected = [CNC("foo", "one equals one", x=CC("one"))]
expected_sequence = compute_expected_array(my_map, text, expected)
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
to_compare = tests.parsers.parsers_utils.get_test_obj(bnf_parsers_helpers[0].sequence, expected_sequence)
assert to_compare == expected
def test_i_cannot_match_variable_when_variables_discrepancy(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("equals"),
VariableExpression("x"))),
"one": Concept("one"),
"one_1": Concept("one", body="1"),
"two": Concept("two"),
"two_2": Concept("two", body="2"),
}
sheerka, context, *updated = self.init_concepts(*my_map.values())
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
text = "one equals two"
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
assert bnf_parsers_helpers[0].sequence == []
@pytest.mark.parametrize("bar_expr, expected", [
(ConceptExpression("foo"), {}),
(OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}),
(Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']}),
])
def test_i_can_detect_infinite_recursion(self, bar_expr, expected):
my_map = {
"foo": self.bnf_concept("foo", ConceptExpression("bar")),
"bar": self.bnf_concept("bar", bar_expr),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.context = context
parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == expected
# get_parsing_expression() also returns CHICKEN_AND_EGG
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
parsing_expression = parser.get_parsing_expression(context, my_map["bar"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG)
def test_i_can_detect_longer_infinite_recursion(self):
my_map = {
"foo": self.bnf_concept("foo", ConceptExpression("bar")),
"bar": self.bnf_concept("bar", ConceptExpression("baz")),
"baz": self.bnf_concept("baz", ConceptExpression("qux")),
"qux": self.bnf_concept("qux", ConceptExpression("foo")),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.context = context
parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
assert parser.concepts_grammars.get(my_map["foo"].id).body == ["1001", "1002", "1003", "1004", "1001"]
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["baz"].id), BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["qux"].id), BuiltinConcepts.CHICKEN_AND_EGG)
def test_i_can_detect_partial_infinite_recursion(self):
my_map = {
"foo": self.bnf_concept("foo", ConceptExpression("bar")),
"bar": self.bnf_concept("bar", ConceptExpression("baz")),
"baz": self.bnf_concept("baz", ConceptExpression("qux")),
"qux": self.bnf_concept("qux", ConceptExpression("baz")),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.context = context
parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
assert parser.concepts_grammars.get(my_map["foo"].id).body == ["1001", "1002", "1003", "1004", "1003"]
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["foo"].id), BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["bar"].id), BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["baz"].id), BuiltinConcepts.CHICKEN_AND_EGG)
assert sheerka.isinstance(parser.concepts_grammars.get(my_map["qux"].id), BuiltinConcepts.CHICKEN_AND_EGG)
@pytest.mark.parametrize("expr, expected", [
(OrderedChoice(StrMatch("bar"), ConceptExpression("foo")), False),
(OrderedChoice(ConceptExpression("foo"), StrMatch("bar")), True),
(OrderedChoice(Sequence(StrMatch("bar"), ConceptExpression("foo")), StrMatch("baz")), False),
(OrderedChoice(Sequence(ConceptExpression("foo"), StrMatch("bar")), StrMatch("baz")), True)
])
def test_i_can_detect_ordered_choice_infinite_recursion(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", expr),
}
sheerka, context, parser = self.init_parser(my_map)
parser.context = context
parser.sheerka = sheerka
res = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(res, BuiltinConcepts.CHICKEN_AND_EGG) == expected
def test_i_can_get_parsing_expression_when_ending_by_concept_isa(self):
my_map = {
"one": Concept("one"),
"twenty": Concept("twenty"),
"number": Concept("number"),
"twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
}
sheerka, context, parser = self.init_parser(my_map)
global_truth_context = self.get_context(sheerka, global_truth=True)
parser.context = context
parser.sheerka = sheerka
sheerka.set_isa(global_truth_context, sheerka.new("one"), my_map["number"])
sheerka.set_isa(global_truth_context, sheerka.new("twenty"), my_map["number"])
parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
assert parsing_expression == Sequence(
ConceptExpression(my_map["twenty"], rule_name="twenty"),
ConceptExpression(my_map["number"], rule_name="number"))
assert len(parsing_expression.nodes) == len(parsing_expression.elements)
twenty_nodes = parsing_expression.nodes[0].nodes
assert twenty_nodes == [StrMatch("twenty")]
number_nodes = parsing_expression.nodes[1].nodes
assert len(number_nodes) == 1
assert isinstance(number_nodes[0], UnOrderedChoice)
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
def test_i_can_get_parsing_expression_when_starting_by_isa_concept(self):
my_map = {
"one": Concept("one"),
"two": Concept("two"),
"number": Concept("number"),
"hundreds": self.bnf_concept("hundreds", Sequence(ConceptExpression("number"), StrMatch("hundred")))
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.context = context
parser.sheerka = sheerka
global_truth_context = self.get_context(sheerka, global_truth=True)
sheerka.set_isa(global_truth_context, sheerka.new("one"), my_map["number"])
sheerka.set_isa(global_truth_context, sheerka.new("two"), my_map["number"])
sheerka.set_isa(global_truth_context, sheerka.new("hundreds"), my_map["number"])
parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
parsing_expression = parser.get_parsing_expression(context, my_map["hundreds"])
assert parsing_expression == Sequence(
ConceptExpression(my_map["number"], rule_name="number"),
StrMatch("hundred"))
assert len(parsing_expression.nodes) == len(parsing_expression.elements)
number_nodes = parsing_expression.nodes[0].nodes
assert len(number_nodes) == 1
assert isinstance(number_nodes[0], UnOrderedChoice)
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
assert ConceptExpression(my_map["two"], rule_name="two") in number_nodes[0].nodes
def test_i_can_get_parsing_expression_when_concept_is_part_of_a_group(self):
my_map = {
"one": Concept("one"),
"twenty": Concept("twenty"),
"number": Concept("number"),
"twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.context = context
parser.sheerka = sheerka
global_truth_context = self.get_context(sheerka, global_truth=True)
sheerka.set_isa(global_truth_context, sheerka.new("one"), my_map["number"])
sheerka.set_isa(global_truth_context, sheerka.new("twenty"), my_map["number"])
sheerka.set_isa(global_truth_context, sheerka.new("twenties"), my_map["number"]) # <- twenties is also a number
parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
assert parsing_expression == Sequence(
ConceptExpression(my_map["twenty"], rule_name="twenty"),
ConceptExpression(my_map["number"], rule_name="number"))
assert len(parsing_expression.nodes) == len(parsing_expression.elements)
twenty_nodes = parsing_expression.nodes[0].nodes
assert twenty_nodes == [StrMatch("twenty")]
number_nodes = parsing_expression.nodes[1].nodes
assert len(number_nodes) == 1
assert isinstance(number_nodes[0], UnOrderedChoice)
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
def test_i_can_get_parsing_expression_when_sequence_of_concepts(self):
my_map = {
"one": Concept("one"),
"two_ones": self.bnf_concept("two_ones", Sequence(ConceptExpression("one"), ConceptExpression("one")))
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.context = context
parser.sheerka = sheerka
parsing_expression = parser.get_parsing_expression(context, my_map["two_ones"])
assert parsing_expression == Sequence(
ConceptExpression(my_map["one"], rule_name="one"),
ConceptExpression(my_map["one"], rule_name="one"))
@pytest.mark.parametrize("expr, text, expected", [
(ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", "one"), UTN(",")]),
(StrMatch("one"), "one two", [CNC("foo", "one"), UTN(" two")]),
(StrMatch("one"), "two one", [UTN("two "), CNC("foo", "one")]),
])
def test_i_can_recognize_unknown_concepts(self, expr, text, expected):
my_map = {
"foo": self.bnf_concept("foo", expr)
}
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_recognize_unknown_when_they_look_like_known(self):
my_map = {
"one two": self.bnf_concept("one two", Sequence("one", "two")),
"three": self.bnf_concept("three")
}
text = "one three"
expected = [UTN("one "), CNC("three", "three")]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_remove_duplicates(self):
my_map = {
"one two": self.bnf_concept("one two", Sequence("one", "two")),
"one four": self.bnf_concept("one four", Sequence("one", "four")),
"three": self.bnf_concept("three")
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.reset_parser(context, ParserInput("one three"))
sequences = parser.get_concepts_sequences(context)
sequence = parser.get_valid(sequences)
assert len(sequence) == 1
@pytest.mark.parametrize("parser_input, expected_status, expected", [
("baz", True, [CNC("bnf baz", "baz")]), # the bnf one is chosen
("foo bar", True, [CNC("foo then bar", "foo bar", foo="foo", bar="bar")]),
("bar", True, [CNC("foo or bar", "bar", bar="bar", body="bar")]),
("one plus two", True, [CNC("plus", "one plus two", one="one", two="two")]),
("twenty one", True, [CNC("t1", "twenty one", unit="one")]),
("one 'car'", True, [CNC("one thing", "one 'car'", x=python_ret_val("'car'"), one="one")])
])
def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
res = parser.parse(context, ParserInput(parser_input))
expected_array = compute_expected_array(cmap, parser_input, expected)
parser_result = res.value
concepts_nodes = res.value.value
assert res.status == expected_status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, expected_array)
def test_i_can_parse_when_multiple_times_the_same_variable(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
text = "foo foo foo"
expected_array = compute_expected_array(cmap, text, [CNC("one or more foo", source=text)])
expected_array[0].compiled["foo"] = [cmap["foo"], cmap["foo"], cmap["foo"]]
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, expected_array)
def test_i_can_test_when_expression_references_other_expressions(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
text = "twenty four"
expected = CNC("t2",
source=text,
unit=CC("three_four",
source="four",
four=CC("four", body=DoNotResolve("four")),
body=CC("four", body=DoNotResolve("four"))))
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, expected_array)
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
# thirties is defined in the global variable cmap as
# thirties = cls.update_bnf(context, Concept("thirties",
# definition="thirty number",
# where="number < 10",
# body="thirty + number").def_var("thirty").def_var("number"))
text = "thirty one"
expected = CNC("thirties",
source=text,
number=CC("number",
source="one",
one=CC("one", body=DoNotResolve("one")),
body=CC("one", body=DoNotResolve("one"))),
thirty="thirty")
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, expected_array)
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_2(self):
# this time, three is a number, and also part of three_four, even if it is not relevant in t3
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
text = "thirty three"
expected = CNC("thirties",
source=text,
number=CC("number",
source="three",
three=CC("three", body=DoNotResolve("three")),
body=CC("three", body=DoNotResolve("three"))),
thirty="thirty")
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, expected_array)
def test_i_can_parse_when_starting_by_isa_concept(self):
"""
Test of simple number + 'thousand'
:return:
"""
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
sheerka.clear_bnf_definition() # to simulate restart
text = "one thousand"
one = CC("one", body=DoNotResolve("one"))
expected = CNC("thousands",
source=text,
number=CC("number",
source="one",
one=one,
body=one))
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, expected_array)
def test_i_can_parse_fifty_one_thousand(self):
"""
Test of complex number + 'thousand' (complex because the number is a BNF concept)
:return:
"""
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
sheerka.clear_bnf_definition() # to simulate restart
text = "fifty one thousand"
one = CC("one", body=DoNotResolve("one"))
fifty_one = CC("fifties",
source="fifty one",
fifty="fifty",
number=CC("number", source="one", body=one, one=one))
one_thousand = CC("thousands",
source="one thousand",
number=CC("number", source="one", body=one, one=one))
expected_thousand = CNC("thousands",
source=text,
number=CC("number",
source="fifty one",
fifties=fifty_one,
body=fifty_one))
expected_fifties = CNC("fifties",
source=text,
fifty="fifty",
number=CC("number",
source="one thousand",
thousands=one_thousand,
body=one_thousand))
expected_thousands = compute_expected_array(cmap, text, [expected_thousand])
expected_fifties = compute_expected_array(cmap, text, [expected_fifties])
res = parser.parse(context, ParserInput(text))
assert res[0].status
compare_with_test_object(res[0].value.value, expected_thousands)
assert res[1].status
compare_with_test_object(res[1].value.value, expected_fifties)
def test_i_can_parse_one_hundred_thousand(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
sheerka.clear_bnf_definition() # to simulate restart
text = "one hundred thousand"
res = parser.parse(context, ParserInput(text))
parser_result = res.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
def test_i_can_parse_hundreds_like_expression(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
sheerka.clear_bnf_definition() # to simulate restart
text = "three hundred and thirty two"
three = CC("three", body=DoNotResolve("three"))
two = CC("two", body=DoNotResolve("two"))
thirty_two = CC("thirties",
source="thirty two",
thirty="thirty",
number=CC("number",
source="two",
body=two,
two=two))
expected = CNC("hundreds",
source=text,
n1=CC("number",
source="three",
body=three,
three=three),
n2=CC("number",
source="thirty two",
body=thirty_two,
thirties=thirty_two))
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, expected_array)
def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
sheerka.clear_bnf_definition() # to simulate restart
for c in cmap.values():
sheerka.get_by_id(c.id).set_bnf(None)
text = "thirty three"
expected = CNC("thirties",
source=text,
number=CC("number",
source="three",
three=CC("three", body=DoNotResolve("three")),
body=CC("three", body=DoNotResolve("three"))),
thirty="thirty")
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, expected_array)
text = "forty one"
expected = CNC("forties",
source=text,
number=CC("number",
source="one",
one=CC("one", body=DoNotResolve("one")),
body=CC("one", body=DoNotResolve("one"))),
forty="forty")
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, expected_array)
def test_i_can_parse_when_keyword(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
parser_input = "def one"
expected = [CNC("def number", "def one", number="one")]
res = parser.parse(context, ParserInput(parser_input))
expected_array = compute_expected_array(cmap, parser_input, expected)
expected_array[0].compiled["def"] = cmap["def_only"]
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, expected_array)
def test_i_can_parse_filter(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = "--filter one"
expected = [CN("filter", source="--filter one")]
res = parser.parse(context, ParserInput(expression))
expected_array = compute_expected_array(cmap, expression, expected)
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, expected_array)
def test_i_can_parse_descent_grammar(self):
my_map = {
"factor": Concept("factor", definition="1 | 2 | 3"),
"term": Concept("term", definition="factor ('*' factor)*"),
"expr": Concept("expr", definition="term ('+' term)*"),
}
sheerka, context, parser = self.init_parser(my_map)
text = "1 + 2 * 3"
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
factor = my_map["factor"]
term = my_map["term"]
expr = my_map["expr"]
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, [CNC(expr,
term=[CC(term,
body=CC(factor, body=DoNotResolve("1")),
factor=CC(factor, body=DoNotResolve("1"))),
CC(term,
body=DoNotResolve("2 * 3"),
factor=[
CC(factor, body=DoNotResolve("2")),
CC(factor, body=DoNotResolve("3")),
])],
body=DoNotResolve("1 + 2 * 3"))])
def test_i_can_parse_recursive_descent_grammar(self):
my_map = {
"factor": Concept("factor", definition="1 | 2 | 3"),
"term": self.bnf_concept("term", OrderedChoice(
Sequence(ConceptExpression("factor"), StrMatch("*"), ConceptExpression("term")),
ConceptExpression("factor"))),
"expr": self.bnf_concept("expr", OrderedChoice(
Sequence(ConceptExpression("term"), StrMatch("+"), ConceptExpression("expr")),
ConceptExpression("term"))),
}
sheerka, context, parser = self.init_parser(my_map)
text = "1 + 2 * 3"
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
factor = my_map["factor"]
term = my_map["term"]
expr = my_map["expr"]
# concepts_nodes = res.value.value is too complicated to be validated
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
compare_with_test_object(concepts_nodes, [CNC(expr,
term=CC(term,
body=CC(factor, body=DoNotResolve("1")),
factor=CC(factor, body=DoNotResolve("1"))),
expr=CC(expr,
body=CC(term,
body=DoNotResolve("2 * 3"),
factor=CC(factor, body=DoNotResolve("2")),
term=CC(term,
body=CC(factor, body=DoNotResolve("3")),
factor=CC(factor,
body=DoNotResolve("3")))),
term=CC(term,
body=DoNotResolve("2 * 3"),
factor=CC(factor, body=DoNotResolve("2")),
term=CC(term,
body=CC(factor, body=DoNotResolve("3")),
factor=CC(factor,
body=DoNotResolve("3"))))),
body=DoNotResolve("1 + 2 * 3"))])
def test_i_can_parse_simple_recursive_grammar(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("foo"),
OrderedChoice(StrMatch("bar"), ConceptExpression("foo")))),
}
sheerka, context, parser = self.init_parser(my_map)
assert parser.parse(context, ParserInput("foo bar")).status
assert parser.parse(context, ParserInput("foo foo foo bar")).status
assert not parser.parse(context, ParserInput("foo baz")).status
@pytest.mark.parametrize("name, expected", [
(None, []),
("", []),
("foo", StrMatch("foo")),
("foo bar", Sequence(StrMatch("foo"), StrMatch("bar"))),
("'foo bar baz' qux", Sequence(StrMatch("foo", skip_whitespace=False),
StrMatch(" ", skip_whitespace=False),
StrMatch("bar", skip_whitespace=False),
StrMatch(" ", skip_whitespace=False),
StrMatch("baz"),
StrMatch("qux"))),
])
def test_i_can_get_expression_from_concept_name(self, name, expected):
assert BnfNodeParser.get_expression_from_concept_name(name) == expected
def test_i_can_parse_when_multiple_layers(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
# sanity
text = "thirty one"
res = parser.parse(context, ParserInput(text))
assert res.status
compare_with_test_object(res.value.value, compute_expected_array(cmap, text, [CN("thirties", text)]))
# add a layer, I still can parse the text
sheerka.push_ontology(context, "new layer")
parser = BnfNodeParser(sheerka=sheerka)
res = parser.parse(context, ParserInput(text))
assert res.status
compare_with_test_object(res.value.value, compute_expected_array(cmap, text, [CN("thirties", text)]))
def test_i_do_not_eat_unwanted_tokens_at_the_beginning_when_concept_with_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"one": Concept("one"),
"two": Concept("two"),
}
sheerka, context, parser = self.init_parser(my_map)
text = "two one shoe"
res = parser.parse(context, ParserInput(text))
assert res.status
compare_with_test_object(res.value.value, compute_expected_array(my_map, text, [
CN("two"),
CNC("foo", "one shoe", x=CC("one"))]))
def test_i_do_not_eat_unwanted_tokens_at_the_end_when_concept_with_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
}
sheerka, context, parser = self.init_parser(my_map)
text = "one bar baz"
res = parser.parse(context, ParserInput(text))
assert res.status
compare_with_test_object(res.value.value, compute_expected_array(my_map, text, [
CNC("foo", "one bar", x=CC("bar")),
CN("baz")]))
@pytest.mark.parametrize("parsing_expression, expected", [
(RegExMatch("a"), [RegExDef("a")]),
(OrderedChoice(StrMatch("first"), RegExMatch("a|b")), ["first", RegExDef("a|b")]),
(OrderedChoice(RegExMatch("a|b"), StrMatch("first")), [RegExDef("a|b"), "first"]),
(Sequence(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(Sequence(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
(OneOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(OneOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
(ZeroOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(ZeroOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
])
def test_i_can_get_first_item(self, parsing_expression, expected):
sheerka = self.get_sheerka()
visitor = BnfNodeFirstTokenVisitor(sheerka)
visitor.visit(parsing_expression)
assert visitor.first_tokens == expected
def test_i_cannot_parse_regex_when_no_next_matching_token_cannot_be_found(self):
sheerka, context, foo = self.init_test().with_concepts(Concept("foo", definition="r'abcd'"),
create_new=True).unpack()
parser = BnfNodeParser(sheerka=sheerka)
res = parser.parse(context, ParserInput("abcdef"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert res.body.reason == [NoMatchingTokenError(4, concept=foo)]
@pytest.mark.parametrize("text", [
"one",
" one",
"one ",
" one "
])
def test_i_cannot_parse_empty_variable(self, text):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
res = parser.parse(context, ParserInput("one"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
@pytest.mark.parametrize("bnf, text", [
(Sequence(VariableExpression("x"), StrMatch("foo")), "one foo"),
(Sequence(StrMatch("foo"), VariableExpression("x")), "foo one"),
(Sequence(StrMatch("foo"), VariableExpression("x"), StrMatch("bar")), "foo one bar"),
])
def test_i_cannot_parse_variable_when_unrecognized_nodes(self, bnf, text):
sheerka, context, foo = self.init_test().with_concepts(
self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe")))
).unpack()
parser = BnfNodeParser()
parser.init_from_concepts(context, [foo])
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
@pytest.mark.parametrize("to_match, ignore_case, multiline, explicit_flags", [
("xxy", None, None, re.MULTILINE),
("xxy", True, True, re.MULTILINE),
("xxy", False, False, re.MULTILINE),
])
def test_i_can_serialize_reg_ex_def(self, to_match, ignore_case, multiline, explicit_flags):
r = RegExDef(to_match, ignore_case, multiline, explicit_flags)
serialized = r.serialize()
r2 = RegExDef().deserialize(serialized)
assert r == r2
def test_i_can_resolve_parsing_expression_for_variable_concept(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = Sequence(VariableExpression("x"), StrMatch("x"))
resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set())
assert isinstance(resolved.nodes[0], VariableExpression)
assert resolved.nodes[0].nodes[0] == resolved.nodes[1]
def test_i_can_resolve_parsing_expression_when_ending_with_variable_concept(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = Sequence(StrMatch("x"), VariableExpression("x"))
resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set())
assert isinstance(resolved.nodes[1], VariableExpression)
assert resolved.nodes[0].nodes == []
def test_i_can_simplify_unordered_choices_that_refer_to_the_same_isa(self):
my_map = {
"light_red": Concept("light red", key="light_red"),
"dark_red": Concept("dark red"),
"red colors": Concept("red colors"),
"color": Concept("color"),
"adjective": Concept("adjective"),
"qualified table": Concept("qualified table", definition="adjective 'table'"),
}
sheerka, context, parser = self.init_parser(my_map, init_from_sheerka=True, create_new=True)
global_truth_context = self.get_context(sheerka, global_truth=True)
sheerka.set_isa(global_truth_context, my_map["light_red"], my_map["adjective"])
sheerka.set_isa(global_truth_context, my_map["dark_red"], my_map["adjective"])
sheerka.set_isa(global_truth_context, my_map["light_red"], my_map["color"])
sheerka.set_isa(global_truth_context, my_map["dark_red"], my_map["color"])
sheerka.set_isa(global_truth_context, my_map["light_red"], my_map["red colors"])
sheerka.set_isa(global_truth_context, my_map["dark_red"], my_map["red colors"])
sheerka.set_isa(global_truth_context, my_map["color"], my_map["adjective"])
sheerka.set_isa(global_truth_context, my_map["red colors"], my_map["color"])
sheerka.set_isa(global_truth_context, my_map["red colors"], my_map["adjective"])
# hack to ease the tests
sheerka.get_by_id(my_map["light_red"].id).get_metadata().key = "light_red"
sheerka.om.clear(SheerkaIsAManager.CONCEPTS_IN_GROUPS_ENTRY)
text = "light red table"
expected = CNC("qualified table",
source=text,
body=DoNotResolve(text),
adjective=CC("adjective",
source="light red",
body=CC("light_red", source="light red"),
light_red=CC("light_red", source="light red")))
expected_array = compute_expected_array(my_map, text, [expected])
res = parser.parse(context, ParserInput(text))
# there should be only one result !!
assert not isinstance(res, list)
assert res.status
compare_with_test_object(res.value.value, expected_array)
def test_i_cannot_parse_regex_concept_mixed_with_unrecognized_sya(self):
my_map = {
"hex": self.bnf_concept("hex", RegExMatch("[a-f0-9]{8}")),
"isa": Concept("x is an y", body="isinstance(x, y)", pre="is_question()").def_var("x").def_var("y"),
"isafoo": Concept("x is an foo", body="False", pre="is_question()").def_var("x"),
"q": Concept("q ?", body="question(a)").def_var("q")
}
# I need the concept isafoo to fool SyaNodeParser when parsing the sub text 'is an hex ?'"
# The parser will try to recognize 'is an foo', will fail and will revert the result to UTN()
# It's this UTN that need to be properly handled
sheerka, context, parser = self.init_parser(my_map, init_from_sheerka=True, create_new=True)
sheerka.set_precedence(context, my_map["isa"], my_map["q"])
sheerka.set_precedence(context, my_map["isafoo"], my_map["q"])
text = "01234567 is an hexadecimal ?"
res = parser.parse(context, ParserInput(text))
assert not res.status
# @pytest.mark.parametrize("parser_input, expected", [
# ("one", [
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),
# (True, [CNC("one_or_two", source="one", one="one", body="one")]),
# ]),
# ("two plus two", [
# (False, [CN("bnf_one"), UTN(" plus "), CN("one_or_two")]),
# (False, [CN("one_or_two"), UTN(" plus "), CN("one_or_two")]),
# ])
# ])
# def test_i_can_parse_when_multiple_results(self, parser_input, expected):
# sheerka, context, parser = self.init_parser(init_from_sheerka=True)
#
# res = parser.parse(context, parser_input)
# assert len(res) == len(expected)
#
# for res_i, expected_i in zip(res, expected):
# assert res_i.status == expected_i[0]
# expected_array = compute_expected_array(cmap, parser_input, expected_i[1])
# assert res_i.value.value == expected_array