Fixed #30 : Add variable support in BNF concept definition

Fixed #31 : Add regex support in BNF Concept
Fixed #33 : Do not memorize object during restore
This commit is contained in:
2021-02-24 17:23:03 +01:00
parent cac2dad17f
commit 646c428edb
32 changed files with 2107 additions and 360 deletions
+445 -37
View File
@@ -1,3 +1,5 @@
import re
import pytest
import tests.parsers.parsers_utils
@@ -6,12 +8,14 @@ from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYP
from core.global_symbols import NotInit
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC, UTN, CN
from parsers.BaseNodeParser import CNC, UTN, CN, NoMatchingTokenError, SCN
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser, RegExMatch, \
BnfNodeFirstTokenVisitor, Match, RegExDef, VariableExpression
from tests.BaseTest import BaseTest
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.evaluators.EvaluatorTestsUtils import python_ret_val
cmap = {
"one": Concept("one"),
@@ -42,9 +46,14 @@ cmap = {
"three_four": Concept("three_four", definition="three | four").def_var("three").def_var("four"),
"t2": Concept("t2", definition="'twenty' three_four=unit").def_var("unit").def_var("three").def_var("four"),
# bnf with variable
"one thing": Concept("one x", definition="one x").def_var("x"),
"x shoe": Concept("x shoe", definition="x 'shoe'").def_var("x"),
# testing keywords
"def_only": Concept("def"),
"def number": Concept("def number", definition="def (one|two)=number"),
# sequence of keywords using bnf definition
# "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"),
# "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"),
@@ -68,8 +77,8 @@ def u(parsing_expression, start, end, children=None):
if isinstance(parsing_expression, str):
parsing_expression = StrMatch(parsing_expression)
if isinstance(parsing_expression, StrMatch):
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
if isinstance(parsing_expression, Match):
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match, parsing_expression.to_match)
return NonTerminalNode(parsing_expression, start, end, [], children)
@@ -105,7 +114,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka.set_isa(context, cmap["one hundred"], cmap["number"])
sheerka.set_isa(context, cmap["hundreds"], cmap["number"])
# Pay attention. 'twenties (t1 and t2) are not set as number
# Pay attention. 'twenties (t1 and t2) are not set as 'number'
thirties = cls.update_bnf(context, Concept("thirties",
definition="thirty number",
@@ -158,7 +167,10 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
for i, pair in enumerate(my_concepts_map):
my_concepts_map[pair] = updated[i]
parser = BnfNodeParser(sheerka=sheerka) if init_from_sheerka else BnfNodeParser()
if init_from_sheerka:
parser = BnfNodeParser(sheerka=sheerka)
else:
parser = BnfNodeParser().init_from_concepts(context, my_concepts_map.values())
return sheerka, context, parser
def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
@@ -198,7 +210,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert len(bnf_parsers_helpers) == len(expected_array)
for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array):
assert parser_helper.sequence == expected_sequence
to_compare = tests.parsers.parsers_utils.get_test_obj(expected_sequence, parser_helper.sequence)
# assert parser_helper.sequence == expected_sequence
assert to_compare == expected_sequence
if len(bnf_parsers_helpers) == 1:
return bnf_parsers_helpers[0].sequence
@@ -221,7 +235,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
(StrMatch("3.14"), "3.14"),
(StrMatch("+"), "+"),
])
def test_i_can_match_simple_bnf(self, expr, text):
def test_i_can_match_str_bnf(self, expr, text):
my_map = {
text: self.bnf_concept("foo", expr)
}
@@ -229,6 +243,57 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, 0)
@pytest.mark.parametrize("expr, text, end", [
(RegExMatch("bar"), "bar", 0),
(RegExMatch("[a-z]+"), "xyz", 0),
(RegExMatch("[a-z=]+"), "uvt=xyz=abc", 4),
])
def test_i_can_match_regex_bnf(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying.start == 0
assert sequence[0].underlying.end == end
assert sequence[0].underlying.parsing_expression == expr
@pytest.mark.parametrize("expr, text, end", [
(Sequence(StrMatch("foo"), RegExMatch("bar")), "foo bar", 2),
(Sequence(StrMatch("foo"), RegExMatch("[a-z]+")), "foo xyz", 2),
(Sequence(StrMatch("foo"), RegExMatch("[a-z=]+")), "foo uvt=xyz=abc", 6),
])
def test_i_can_match_sequence_str_regex(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children)
@pytest.mark.parametrize("expr, text, end", [
(Sequence(RegExMatch("bar"), StrMatch("foo")), "bar foo", 2),
(Sequence(RegExMatch("[a-z]+"), StrMatch("foo")), "xyz foo", 2),
(Sequence(RegExMatch("[a-z=]+"), StrMatch("foo")), "uvt=xyz=abc foo", 6),
])
def test_i_can_match_sequence_regex_str(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children)
def test_i_can_match_sequence_str_regex_str(self):
text = "foo uvt=xyz=abc baz"
expr = Sequence(StrMatch("foo"), RegExMatch("[a-z=]+"), StrMatch("baz"))
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, 8, sequence[0].underlying.children)
def test_i_can_match_multiple_concepts_in_one_input(self):
my_map = {
"one": self.bnf_concept("one"),
@@ -356,8 +421,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
# ("twenty one", [CNC("foo", source="twenty one")]),
# ("twenty three", []), # three does not exist
("twenty one", [CNC("foo", source="twenty one")]),
("twenty three", []), # three does not exist
("twenty four", []), # four exists but should not be seen
])
def test_i_can_mix_sequence_and_ordered_2(self, text, expected):
@@ -388,7 +453,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("three", []),
])
def test_i_can_parse_unordered_choice(self, text, expected):
def test_i_can_match_unordered_choice(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", UnOrderedChoice(
StrMatch("one"),
@@ -402,7 +467,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("", []),
("two", []),
])
def test_i_can_parse_optional(self, text, expected):
def test_i_can_match_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", Optional(StrMatch("one")))
}
@@ -413,7 +478,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("twenty one", [CNC("foo", source="twenty one")]),
("one", [CNC("foo", source="one")]),
])
def test_i_can_parse_sequence_starting_with_optional(self, text, expected):
def test_i_can_match_sequence_starting_with_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
@@ -427,7 +492,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two three", [CNC("foo", source="one two three")]),
("one two", [CNC("foo", source="one two")]),
])
def test_i_can_parse_sequence_ending_with_optional(self, text, expected):
def test_i_can_match_sequence_ending_with_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
@@ -442,7 +507,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two three", [CNC("foo", source="one two three")]),
("one three", [CNC("foo", source="one three")]),
])
def test_i_can_parse_sequence_with_optional_in_between(self, text, expected):
def test_i_can_match_sequence_with_optional_in_between(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
@@ -459,7 +524,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one", [CNC("foo", source="one")]),
("one one", [CNC("foo", source="one one")]),
])
def test_i_can_parse_zero_or_more(self, text, expected):
def test_i_can_match_zero_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one")))
}
@@ -471,7 +536,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two", [CNC("foo", source="one two")]),
("one one two", [CNC("foo", source="one one two")]),
])
def test_i_can_parse_sequence_and_zero_or_more(self, text, expected):
def test_i_can_match_sequence_and_zero_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
@@ -485,7 +550,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, expected", [
("one, one , one", [CNC("foo", source="one, one , one")]),
])
def test_i_can_parse_zero_or_more_with_separator(self, text, expected):
def test_i_can_match_zero_or_more_with_separator(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"), sep=","))
}
@@ -508,7 +573,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one", [CNC("foo", source="one")]),
("one one one", [CNC("foo", source="one one one")]),
])
def test_i_can_parse_one_or_more(self, text, expected):
def test_i_can_match_one_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))),
}
@@ -520,7 +585,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two", [CNC("foo", source="one two")]),
("one one two", [CNC("foo", source="one one two")]),
])
def test_i_can_parse_sequence_one_and_or_more(self, text, expected):
def test_i_can_match_sequence_one_and_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
@@ -534,7 +599,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, expected", [
("one, one , one", [CNC("foo", source="one, one , one")]),
])
def test_i_can_parse_one_or_more_with_separator(self, text, expected):
def test_i_can_match_one_or_more_with_separator(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"), sep=","))
}
@@ -763,7 +828,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert concept_bar.get_compiled()["foo"][1].get_compiled() == {ConceptParts.BODY: DoNotResolve("two")}
assert concept_bar.get_compiled()["foo"][2].get_compiled() == {ConceptParts.BODY: DoNotResolve("three")}
def test_i_can_parse_concept_reference_that_is_not_in_grammar(self):
def test_i_can_match_concept_reference_that_is_not_in_grammar(self):
my_map = {
"one": Concept("one"),
"two": Concept("two"),
@@ -817,6 +882,234 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert concept_foo.get_compiled() == {'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]),
ConceptParts.BODY: DoNotResolve(value='twenty one')}
@pytest.mark.parametrize("expr, expected", [
("one 'car'", [CNC("foo", source="one 'car'", x=python_ret_val("'car'"))]), # python
("one bar", [CNC("foo", source="one bar", x=CC("bar"))]), # simple concept
("one super car", [CNC("foo", source="one super car", x=CC("super car"))]), # long concept
("one shoe", [CNC("foo", source="one shoe", x=CC("thing", source="shoe", body=DoNotResolve("shoe")))]), # bnf
])
def test_i_can_match_variable_when_ending_with_one_variable(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
"thing": Concept("thing", definition="'shoe'|'skirt'"),
"super car": Concept("super car"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_ending_with_one_variable_and_sya(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
expr = "one bar plus baz"
expected = [
[CNC("foo", source="one bar", x=CC("bar")), UTN(" plus "), CN("baz")],
[CNC("foo", source="one bar plus baz", x=CC("plus", source="bar plus baz", x="bar", y="baz"))],
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_variable_when_ending_with_one_variable_and_multiple_results(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"pretty big": Concept("pretty big", body="'pretty big'"),
"pbig": Concept("pretty big"),
}
expr = "one pretty big"
expected = [
[CNC("foo", source="one pretty big", x=CC("pretty big"))],
[CNC("foo", source="one pretty big", x=CC("pbig", source="pretty big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_variable_when_ending_with_multiple_variables_and_multiple_results(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), VariableExpression("y"))),
"pretty": Concept("pretty", body="pretty"),
"pretty2": Concept("pretty"),
"big": Concept("big", body="big"),
}
expr = "one pretty big"
expected = [
[CNC("foo", source="one pretty big", x=CC("pretty"), y=CC("big"))],
[CNC("foo", source="one pretty big", x=CC("pretty2", source="pretty"), y=CC("big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
@pytest.mark.parametrize("expr, expected", [
("'my' shoe", [CNC("foo", source="'my' shoe", x=python_ret_val("'my' "))]), # python
("one shoe", [CNC("foo", source="one shoe", x=CC("one"))]), # concept
("my little shoe", [CNC("foo", source="my little shoe", x=CC("my little"))]), # long concept
("black shoe", [CNC("foo", source="black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]),
])
def test_i_can_match_variable_when_starting_with_one_variable(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"one": Concept("one"),
"my little": Concept("my little"),
"color": Concept("color", definition="'blue'|'black'"),
"and": Concept("x and y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_starting_with_one_variable_and_sya(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"tiny": Concept("tiny"),
"beautiful": Concept("beautiful"),
"but": Concept("x but y").def_var("x").def_var("y"),
}
expr = "tiny but beautiful shoe"
expected_res = [
CNC("foo",
source="tiny but beautiful shoe",
x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful"))]
unwanted_res = [CN("tiny"), UTN(" but "), CNC("foo", source="beautiful shoe", x=CC("beautiful"))]
self.validate_get_concepts_sequences(my_map, expr, [unwanted_res, expected_res], multiple_result=True)
def test_i_can_match_variable_when_starting_with_multiple_variables(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
VariableExpression("y"),
VariableExpression("z"),
StrMatch("shoe"))),
"one": Concept("one"),
"two": Concept("two"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
text = "one 'one' one plus two shoe"
unwanted_res = [CN("one"), SCN(" 'one' "), ("one", 1), UTN(" plus "), CN("two")]
expected_res = [CNC("foo",
source="one 'one' one plus two shoe",
x=CC("one"),
y=python_ret_val(" 'one' "),
z=CC("plus", source="one plus two", x="one", y="two"))]
expected = [unwanted_res, expected_res]
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
def test_i_can_match_variable_when_starting_with_one_variable_and_longer_str(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("foo"),
StrMatch("bar"),
StrMatch("baz"))),
"one": Concept("one")
}
text = "one foo bar baz"
expected = [CNC("foo", source="one foo bar baz", x=CC("one"))]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("expr, expected", [
("one 'pretty' shoe", [CNC("foo", source="one 'pretty' shoe", x=python_ret_val("'pretty' "))]), # python
("one little shoe", [CNC("foo", source="one little shoe", x=CC("little"))]), # concept
("one very big shoe", [CNC("foo", source="one very big shoe", x=CC("very big"))]), # long concept
("one black shoe",
[CNC("foo", source="one black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]),
("one tiny but beautiful shoe",
[CNC("foo",
source="one tiny but beautiful shoe",
x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful "))]),
])
def test_i_can_match_variable_in_between(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))),
"little": Concept("little"),
"very big": Concept("very big"),
"color": Concept("color", definition="'blue'|'black'"),
"but": Concept("x but y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_multiple_results_in_between(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))),
"pretty big": Concept("pretty big", body="'pretty big'"),
"pbig": Concept("pretty big"),
}
expr = "one pretty big shoe"
expected = [
[CNC("foo", source="one pretty big shoe", x=CC("pretty big"))],
[CNC("foo", source="one pretty big shoe", x=CC("pbig", source="pretty big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_regex_and_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(RegExMatch("[a-z]+"),
VariableExpression("x"))),
"shoe": Concept("shoe")
}
text = "onyx shoe"
expected = [CNC("foo", source="onyx shoe", x=CC("shoe"))]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_match_variable_and_regex(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
RegExMatch("[a-z]+"))),
"one": Concept("one")
}
text = "one onyx"
expected = [CNC("foo", source="one onyx", x=CC("one"))]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_reuse_the_same_variable(self):
# in this test, the variable appears several times, but only once in concept.compiled
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("equals"),
VariableExpression("x"))),
"one": Concept("one"),
"two": Concept("two"),
}
sheerka, context, *updated = self.init_concepts(*my_map.values())
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
# same variable appears only once in the compiled variables
text = "one equals one"
expected = [CNC("foo", source="one equals one", x=CC("one"))]
expected_sequence = compute_expected_array(my_map, text, expected)
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
to_compare = tests.parsers.parsers_utils.get_test_obj(expected_sequence, bnf_parsers_helpers[0].sequence)
assert to_compare == expected
def test_i_cannot_match_variable_when_variables_discrepancy(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("equals"),
VariableExpression("x"))),
"one": Concept("one"),
"one_1": Concept("one", body="1"),
"two": Concept("two"),
"two_2": Concept("two", body="2"),
}
sheerka, context, *updated = self.init_concepts(*my_map.values())
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
text = "one equals two"
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
assert bnf_parsers_helpers[0].sequence == []
@pytest.mark.parametrize("bar_expr, expected", [
(ConceptExpression("foo"), {}),
(OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}),
@@ -833,7 +1126,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == expected
# get_parsing_expression() also returns CHICKEN_AND_EGG
@@ -858,7 +1150,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
@@ -884,7 +1175,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
@@ -908,8 +1198,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"foo": self.bnf_concept("foo", expr),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
sheerka, context, parser = self.init_parser(my_map)
parser.context = context
parser.sheerka = sheerka
@@ -923,7 +1212,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"number": Concept("number"),
"twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
sheerka, context, parser = self.init_parser(my_map)
parser.context = context
parser.sheerka = sheerka
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
@@ -1025,8 +1314,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
ConceptExpression(my_map["one"], rule_name="one"))
@pytest.mark.parametrize("expr, text, expected", [
# (ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]),
# (StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]),
(ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]),
(StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]),
(StrMatch("one"), "two one", [UTN("two "), CNC("foo", source="one")]),
])
def test_i_can_recognize_unknown_concepts(self, expr, text, expected):
@@ -1053,7 +1342,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"three": self.bnf_concept("three")
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
parser.reset_parser(context, ParserInput("one three"))
sequences = parser.get_concepts_sequences(context)
@@ -1067,6 +1355,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]),
("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]),
("twenty one", True, [CNC("t1", source="twenty one", unit="one")]),
("one 'car'", True, [CNC("one thing", source="one 'car'", x=python_ret_val("'car'"), one="one")])
])
def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
@@ -1359,8 +1648,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"expr": Concept("expr", definition="term ('+' term)*"),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
sheerka, context, parser = self.init_parser(my_map)
text = "1 + 2 * 3"
@@ -1396,8 +1684,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
Sequence(ConceptExpression("term"), StrMatch("+"), ConceptExpression("expr")),
ConceptExpression("term"))),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
sheerka, context, parser = self.init_parser(my_map)
text = "1 + 2 * 3"
@@ -1437,8 +1724,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
OrderedChoice(StrMatch("bar"), ConceptExpression("foo")))),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
sheerka, context, parser = self.init_parser(my_map)
assert parser.parse(context, ParserInput("foo bar")).status
assert parser.parse(context, ParserInput("foo foo foo bar")).status
@@ -1475,6 +1761,128 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert res.status
assert res.value.value == compute_expected_array(cmap, text, [CN("thirties", source=text)])
def test_i_do_not_eat_unwanted_tokens_at_the_beginning_when_concept_with_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"one": Concept("one"),
"two": Concept("two"),
}
sheerka, context, parser = self.init_parser(my_map)
text = "two one shoe"
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.value.value == compute_expected_array(my_map, text, [
CN("two"),
CNC("foo", source="one shoe", x=CC("one"))])
def test_i_do_not_eat_unwanted_tokens_at_the_end_when_concept_with_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
}
sheerka, context, parser = self.init_parser(my_map)
text = "one bar baz"
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.value.value == compute_expected_array(my_map, text, [
CNC("foo", source="one bar", x=CC("bar")),
CN("baz")])
@pytest.mark.parametrize("parsing_expression, expected", [
(RegExMatch("a"), [RegExDef("a")]),
(OrderedChoice(StrMatch("first"), RegExMatch("a|b")), ["first", RegExDef("a|b")]),
(OrderedChoice(RegExMatch("a|b"), StrMatch("first")), [RegExDef("a|b"), "first"]),
(Sequence(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(Sequence(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
(OneOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(OneOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
(ZeroOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(ZeroOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
])
def test_i_can_get_first_item(self, parsing_expression, expected):
sheerka = self.get_sheerka()
visitor = BnfNodeFirstTokenVisitor(sheerka)
visitor.visit(parsing_expression)
assert visitor.first_tokens == expected
def test_i_cannot_parse_regex_when_no_next_matching_token_cannot_be_found(self):
sheerka, context, foo = self.init_test().with_concepts(Concept("foo", definition="r'abcd'"),
create_new=True).unpack()
parser = BnfNodeParser(sheerka=sheerka)
res = parser.parse(context, ParserInput("abcdef"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert res.body.reason == [NoMatchingTokenError(4)]
@pytest.mark.parametrize("text", [
"one",
" one",
"one ",
" one "
])
def test_i_cannot_parse_empty_variable(self, text):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
res = parser.parse(context, ParserInput("one"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
@pytest.mark.parametrize("bnf, text", [
(Sequence(VariableExpression("x"), StrMatch("foo")), "one foo"),
(Sequence(StrMatch("foo"), VariableExpression("x")), "foo one"),
(Sequence(StrMatch("foo"), VariableExpression("x"), StrMatch("bar")), "foo one bar"),
])
def test_i_cannot_parse_variable_when_unrecognized_nodes(self, bnf, text):
sheerka, context, foo = self.init_test().with_concepts(
self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe")))
).unpack()
parser = BnfNodeParser()
parser.init_from_concepts(context, [foo])
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
@pytest.mark.parametrize("to_match, ignore_case, multiline, explicit_flags", [
("xxy", None, None, re.MULTILINE),
("xxy", True, True, re.MULTILINE),
("xxy", False, False, re.MULTILINE),
])
def test_i_can_serialize_reg_ex_def(self, to_match, ignore_case, multiline, explicit_flags):
r = RegExDef(to_match, ignore_case, multiline, explicit_flags)
serialized = r.serialize()
r2 = RegExDef().deserialize(serialized)
assert r == r2
def test_i_can_resolve_parsing_expression_for_variable_concept(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = Sequence(VariableExpression("x"), StrMatch("x"))
resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set())
assert isinstance(resolved.nodes[0], VariableExpression)
assert resolved.nodes[0].nodes[0] == resolved.nodes[1]
def test_i_can_resolve_parsing_expression_when_ending_with_variable_concept(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = Sequence(StrMatch("x"), VariableExpression("x"))
resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set())
assert isinstance(resolved.nodes[1], VariableExpression)
assert resolved.nodes[0].nodes == []
# @pytest.mark.parametrize("parser_input, expected", [
# ("one", [
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),