Fixed #30 : Add variable support in BNF concept definition
Fixed #31 : Add regex support in BNF Concept Fixed #33 : Do not memorize object during restore
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
import tests.parsers.parsers_utils
|
||||
@@ -6,12 +8,14 @@ from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYP
|
||||
from core.global_symbols import NotInit
|
||||
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from parsers.BaseNodeParser import CNC, UTN, CN
|
||||
from parsers.BaseNodeParser import CNC, UTN, CN, NoMatchingTokenError, SCN
|
||||
from parsers.BnfDefinitionParser import BnfDefinitionParser
|
||||
from parsers.BnfNodeParser import StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
|
||||
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser
|
||||
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser, RegExMatch, \
|
||||
BnfNodeFirstTokenVisitor, Match, RegExDef, VariableExpression
|
||||
from tests.BaseTest import BaseTest
|
||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
from tests.evaluators.EvaluatorTestsUtils import python_ret_val
|
||||
|
||||
cmap = {
|
||||
"one": Concept("one"),
|
||||
@@ -42,9 +46,14 @@ cmap = {
|
||||
"three_four": Concept("three_four", definition="three | four").def_var("three").def_var("four"),
|
||||
"t2": Concept("t2", definition="'twenty' three_four=unit").def_var("unit").def_var("three").def_var("four"),
|
||||
|
||||
# bnf with variable
|
||||
"one thing": Concept("one x", definition="one x").def_var("x"),
|
||||
"x shoe": Concept("x shoe", definition="x 'shoe'").def_var("x"),
|
||||
|
||||
# testing keywords
|
||||
"def_only": Concept("def"),
|
||||
"def number": Concept("def number", definition="def (one|two)=number"),
|
||||
|
||||
# sequence of keywords using bnf definition
|
||||
# "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"),
|
||||
# "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"),
|
||||
@@ -68,8 +77,8 @@ def u(parsing_expression, start, end, children=None):
|
||||
if isinstance(parsing_expression, str):
|
||||
parsing_expression = StrMatch(parsing_expression)
|
||||
|
||||
if isinstance(parsing_expression, StrMatch):
|
||||
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
|
||||
if isinstance(parsing_expression, Match):
|
||||
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match, parsing_expression.to_match)
|
||||
|
||||
return NonTerminalNode(parsing_expression, start, end, [], children)
|
||||
|
||||
@@ -105,7 +114,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
sheerka.set_isa(context, cmap["one hundred"], cmap["number"])
|
||||
sheerka.set_isa(context, cmap["hundreds"], cmap["number"])
|
||||
|
||||
# Pay attention. 'twenties (t1 and t2) are not set as number
|
||||
# Pay attention. 'twenties (t1 and t2) are not set as 'number'
|
||||
|
||||
thirties = cls.update_bnf(context, Concept("thirties",
|
||||
definition="thirty number",
|
||||
@@ -158,7 +167,10 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
for i, pair in enumerate(my_concepts_map):
|
||||
my_concepts_map[pair] = updated[i]
|
||||
|
||||
parser = BnfNodeParser(sheerka=sheerka) if init_from_sheerka else BnfNodeParser()
|
||||
if init_from_sheerka:
|
||||
parser = BnfNodeParser(sheerka=sheerka)
|
||||
else:
|
||||
parser = BnfNodeParser().init_from_concepts(context, my_concepts_map.values())
|
||||
return sheerka, context, parser
|
||||
|
||||
def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
|
||||
@@ -198,7 +210,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
|
||||
assert len(bnf_parsers_helpers) == len(expected_array)
|
||||
for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array):
|
||||
assert parser_helper.sequence == expected_sequence
|
||||
to_compare = tests.parsers.parsers_utils.get_test_obj(expected_sequence, parser_helper.sequence)
|
||||
# assert parser_helper.sequence == expected_sequence
|
||||
assert to_compare == expected_sequence
|
||||
|
||||
if len(bnf_parsers_helpers) == 1:
|
||||
return bnf_parsers_helpers[0].sequence
|
||||
@@ -221,7 +235,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
(StrMatch("3.14"), "3.14"),
|
||||
(StrMatch("+"), "+"),
|
||||
])
|
||||
def test_i_can_match_simple_bnf(self, expr, text):
|
||||
def test_i_can_match_str_bnf(self, expr, text):
|
||||
my_map = {
|
||||
text: self.bnf_concept("foo", expr)
|
||||
}
|
||||
@@ -229,6 +243,57 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
|
||||
assert sequence[0].underlying == u(expr, 0, 0)
|
||||
|
||||
@pytest.mark.parametrize("expr, text, end", [
|
||||
(RegExMatch("bar"), "bar", 0),
|
||||
(RegExMatch("[a-z]+"), "xyz", 0),
|
||||
(RegExMatch("[a-z=]+"), "uvt=xyz=abc", 4),
|
||||
])
|
||||
def test_i_can_match_regex_bnf(self, expr, text, end):
|
||||
my_map = {
|
||||
text: self.bnf_concept("foo", expr)
|
||||
}
|
||||
|
||||
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
|
||||
assert sequence[0].underlying.start == 0
|
||||
assert sequence[0].underlying.end == end
|
||||
assert sequence[0].underlying.parsing_expression == expr
|
||||
|
||||
@pytest.mark.parametrize("expr, text, end", [
|
||||
(Sequence(StrMatch("foo"), RegExMatch("bar")), "foo bar", 2),
|
||||
(Sequence(StrMatch("foo"), RegExMatch("[a-z]+")), "foo xyz", 2),
|
||||
(Sequence(StrMatch("foo"), RegExMatch("[a-z=]+")), "foo uvt=xyz=abc", 6),
|
||||
])
|
||||
def test_i_can_match_sequence_str_regex(self, expr, text, end):
|
||||
my_map = {
|
||||
text: self.bnf_concept("foo", expr)
|
||||
}
|
||||
|
||||
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
|
||||
assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children)
|
||||
|
||||
@pytest.mark.parametrize("expr, text, end", [
|
||||
(Sequence(RegExMatch("bar"), StrMatch("foo")), "bar foo", 2),
|
||||
(Sequence(RegExMatch("[a-z]+"), StrMatch("foo")), "xyz foo", 2),
|
||||
(Sequence(RegExMatch("[a-z=]+"), StrMatch("foo")), "uvt=xyz=abc foo", 6),
|
||||
])
|
||||
def test_i_can_match_sequence_regex_str(self, expr, text, end):
|
||||
my_map = {
|
||||
text: self.bnf_concept("foo", expr)
|
||||
}
|
||||
|
||||
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
|
||||
assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children)
|
||||
|
||||
def test_i_can_match_sequence_str_regex_str(self):
|
||||
text = "foo uvt=xyz=abc baz"
|
||||
expr = Sequence(StrMatch("foo"), RegExMatch("[a-z=]+"), StrMatch("baz"))
|
||||
my_map = {
|
||||
text: self.bnf_concept("foo", expr)
|
||||
}
|
||||
|
||||
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
|
||||
assert sequence[0].underlying == u(expr, 0, 8, sequence[0].underlying.children)
|
||||
|
||||
def test_i_can_match_multiple_concepts_in_one_input(self):
|
||||
my_map = {
|
||||
"one": self.bnf_concept("one"),
|
||||
@@ -356,8 +421,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
self.validate_get_concepts_sequences(my_map, text, expected)
|
||||
|
||||
@pytest.mark.parametrize("text, expected", [
|
||||
# ("twenty one", [CNC("foo", source="twenty one")]),
|
||||
# ("twenty three", []), # three does not exist
|
||||
("twenty one", [CNC("foo", source="twenty one")]),
|
||||
("twenty three", []), # three does not exist
|
||||
("twenty four", []), # four exists but should not be seen
|
||||
])
|
||||
def test_i_can_mix_sequence_and_ordered_2(self, text, expected):
|
||||
@@ -388,7 +453,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
("three", []),
|
||||
|
||||
])
|
||||
def test_i_can_parse_unordered_choice(self, text, expected):
|
||||
def test_i_can_match_unordered_choice(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", UnOrderedChoice(
|
||||
StrMatch("one"),
|
||||
@@ -402,7 +467,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
("", []),
|
||||
("two", []),
|
||||
])
|
||||
def test_i_can_parse_optional(self, text, expected):
|
||||
def test_i_can_match_optional(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Optional(StrMatch("one")))
|
||||
}
|
||||
@@ -413,7 +478,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
("twenty one", [CNC("foo", source="twenty one")]),
|
||||
("one", [CNC("foo", source="one")]),
|
||||
])
|
||||
def test_i_can_parse_sequence_starting_with_optional(self, text, expected):
|
||||
def test_i_can_match_sequence_starting_with_optional(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo",
|
||||
Sequence(
|
||||
@@ -427,7 +492,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
("one two three", [CNC("foo", source="one two three")]),
|
||||
("one two", [CNC("foo", source="one two")]),
|
||||
])
|
||||
def test_i_can_parse_sequence_ending_with_optional(self, text, expected):
|
||||
def test_i_can_match_sequence_ending_with_optional(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo",
|
||||
Sequence(
|
||||
@@ -442,7 +507,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
("one two three", [CNC("foo", source="one two three")]),
|
||||
("one three", [CNC("foo", source="one three")]),
|
||||
])
|
||||
def test_i_can_parse_sequence_with_optional_in_between(self, text, expected):
|
||||
def test_i_can_match_sequence_with_optional_in_between(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo",
|
||||
Sequence(
|
||||
@@ -459,7 +524,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
("one", [CNC("foo", source="one")]),
|
||||
("one one", [CNC("foo", source="one one")]),
|
||||
])
|
||||
def test_i_can_parse_zero_or_more(self, text, expected):
|
||||
def test_i_can_match_zero_or_more(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one")))
|
||||
}
|
||||
@@ -471,7 +536,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
("one two", [CNC("foo", source="one two")]),
|
||||
("one one two", [CNC("foo", source="one one two")]),
|
||||
])
|
||||
def test_i_can_parse_sequence_and_zero_or_more(self, text, expected):
|
||||
def test_i_can_match_sequence_and_zero_or_more(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo",
|
||||
Sequence(
|
||||
@@ -485,7 +550,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
@pytest.mark.parametrize("text, expected", [
|
||||
("one, one , one", [CNC("foo", source="one, one , one")]),
|
||||
])
|
||||
def test_i_can_parse_zero_or_more_with_separator(self, text, expected):
|
||||
def test_i_can_match_zero_or_more_with_separator(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"), sep=","))
|
||||
}
|
||||
@@ -508,7 +573,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
("one", [CNC("foo", source="one")]),
|
||||
("one one one", [CNC("foo", source="one one one")]),
|
||||
])
|
||||
def test_i_can_parse_one_or_more(self, text, expected):
|
||||
def test_i_can_match_one_or_more(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))),
|
||||
}
|
||||
@@ -520,7 +585,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
("one two", [CNC("foo", source="one two")]),
|
||||
("one one two", [CNC("foo", source="one one two")]),
|
||||
])
|
||||
def test_i_can_parse_sequence_one_and_or_more(self, text, expected):
|
||||
def test_i_can_match_sequence_one_and_or_more(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo",
|
||||
Sequence(
|
||||
@@ -534,7 +599,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
@pytest.mark.parametrize("text, expected", [
|
||||
("one, one , one", [CNC("foo", source="one, one , one")]),
|
||||
])
|
||||
def test_i_can_parse_one_or_more_with_separator(self, text, expected):
|
||||
def test_i_can_match_one_or_more_with_separator(self, text, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"), sep=","))
|
||||
}
|
||||
@@ -763,7 +828,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
assert concept_bar.get_compiled()["foo"][1].get_compiled() == {ConceptParts.BODY: DoNotResolve("two")}
|
||||
assert concept_bar.get_compiled()["foo"][2].get_compiled() == {ConceptParts.BODY: DoNotResolve("three")}
|
||||
|
||||
def test_i_can_parse_concept_reference_that_is_not_in_grammar(self):
|
||||
def test_i_can_match_concept_reference_that_is_not_in_grammar(self):
|
||||
my_map = {
|
||||
"one": Concept("one"),
|
||||
"two": Concept("two"),
|
||||
@@ -817,6 +882,234 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
assert concept_foo.get_compiled() == {'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]),
|
||||
ConceptParts.BODY: DoNotResolve(value='twenty one')}
|
||||
|
||||
@pytest.mark.parametrize("expr, expected", [
|
||||
("one 'car'", [CNC("foo", source="one 'car'", x=python_ret_val("'car'"))]), # python
|
||||
("one bar", [CNC("foo", source="one bar", x=CC("bar"))]), # simple concept
|
||||
("one super car", [CNC("foo", source="one super car", x=CC("super car"))]), # long concept
|
||||
("one shoe", [CNC("foo", source="one shoe", x=CC("thing", source="shoe", body=DoNotResolve("shoe")))]), # bnf
|
||||
])
|
||||
def test_i_can_match_variable_when_ending_with_one_variable(self, expr, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
|
||||
"bar": Concept("bar"),
|
||||
"baz": Concept("baz"),
|
||||
"thing": Concept("thing", definition="'shoe'|'skirt'"),
|
||||
"super car": Concept("super car"),
|
||||
"plus": Concept("x plus y").def_var("x").def_var("y"),
|
||||
}
|
||||
|
||||
self.validate_get_concepts_sequences(my_map, expr, expected)
|
||||
|
||||
def test_i_can_match_variable_when_ending_with_one_variable_and_sya(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
|
||||
"bar": Concept("bar"),
|
||||
"baz": Concept("baz"),
|
||||
"plus": Concept("x plus y").def_var("x").def_var("y"),
|
||||
}
|
||||
|
||||
expr = "one bar plus baz"
|
||||
expected = [
|
||||
[CNC("foo", source="one bar", x=CC("bar")), UTN(" plus "), CN("baz")],
|
||||
[CNC("foo", source="one bar plus baz", x=CC("plus", source="bar plus baz", x="bar", y="baz"))],
|
||||
]
|
||||
|
||||
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
|
||||
|
||||
def test_i_can_match_variable_when_ending_with_one_variable_and_multiple_results(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
|
||||
"pretty big": Concept("pretty big", body="'pretty big'"),
|
||||
"pbig": Concept("pretty big"),
|
||||
}
|
||||
|
||||
expr = "one pretty big"
|
||||
expected = [
|
||||
[CNC("foo", source="one pretty big", x=CC("pretty big"))],
|
||||
[CNC("foo", source="one pretty big", x=CC("pbig", source="pretty big"))]
|
||||
]
|
||||
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
|
||||
|
||||
def test_i_can_match_variable_when_ending_with_multiple_variables_and_multiple_results(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), VariableExpression("y"))),
|
||||
"pretty": Concept("pretty", body="pretty"),
|
||||
"pretty2": Concept("pretty"),
|
||||
"big": Concept("big", body="big"),
|
||||
}
|
||||
|
||||
expr = "one pretty big"
|
||||
expected = [
|
||||
[CNC("foo", source="one pretty big", x=CC("pretty"), y=CC("big"))],
|
||||
[CNC("foo", source="one pretty big", x=CC("pretty2", source="pretty"), y=CC("big"))]
|
||||
]
|
||||
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
|
||||
|
||||
@pytest.mark.parametrize("expr, expected", [
|
||||
("'my' shoe", [CNC("foo", source="'my' shoe", x=python_ret_val("'my' "))]), # python
|
||||
("one shoe", [CNC("foo", source="one shoe", x=CC("one"))]), # concept
|
||||
("my little shoe", [CNC("foo", source="my little shoe", x=CC("my little"))]), # long concept
|
||||
("black shoe", [CNC("foo", source="black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]),
|
||||
])
|
||||
def test_i_can_match_variable_when_starting_with_one_variable(self, expr, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
|
||||
"one": Concept("one"),
|
||||
"my little": Concept("my little"),
|
||||
"color": Concept("color", definition="'blue'|'black'"),
|
||||
"and": Concept("x and y").def_var("x").def_var("y"),
|
||||
}
|
||||
|
||||
self.validate_get_concepts_sequences(my_map, expr, expected)
|
||||
|
||||
def test_i_can_match_variable_when_starting_with_one_variable_and_sya(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
|
||||
"tiny": Concept("tiny"),
|
||||
"beautiful": Concept("beautiful"),
|
||||
"but": Concept("x but y").def_var("x").def_var("y"),
|
||||
}
|
||||
expr = "tiny but beautiful shoe"
|
||||
expected_res = [
|
||||
CNC("foo",
|
||||
source="tiny but beautiful shoe",
|
||||
x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful"))]
|
||||
unwanted_res = [CN("tiny"), UTN(" but "), CNC("foo", source="beautiful shoe", x=CC("beautiful"))]
|
||||
self.validate_get_concepts_sequences(my_map, expr, [unwanted_res, expected_res], multiple_result=True)
|
||||
|
||||
def test_i_can_match_variable_when_starting_with_multiple_variables(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
|
||||
VariableExpression("y"),
|
||||
VariableExpression("z"),
|
||||
StrMatch("shoe"))),
|
||||
"one": Concept("one"),
|
||||
"two": Concept("two"),
|
||||
"plus": Concept("x plus y").def_var("x").def_var("y"),
|
||||
}
|
||||
|
||||
text = "one 'one' one plus two shoe"
|
||||
|
||||
unwanted_res = [CN("one"), SCN(" 'one' "), ("one", 1), UTN(" plus "), CN("two")]
|
||||
expected_res = [CNC("foo",
|
||||
source="one 'one' one plus two shoe",
|
||||
x=CC("one"),
|
||||
y=python_ret_val(" 'one' "),
|
||||
z=CC("plus", source="one plus two", x="one", y="two"))]
|
||||
expected = [unwanted_res, expected_res]
|
||||
|
||||
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
|
||||
|
||||
def test_i_can_match_variable_when_starting_with_one_variable_and_longer_str(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
|
||||
StrMatch("foo"),
|
||||
StrMatch("bar"),
|
||||
StrMatch("baz"))),
|
||||
"one": Concept("one")
|
||||
}
|
||||
text = "one foo bar baz"
|
||||
expected = [CNC("foo", source="one foo bar baz", x=CC("one"))]
|
||||
self.validate_get_concepts_sequences(my_map, text, expected)
|
||||
|
||||
@pytest.mark.parametrize("expr, expected", [
|
||||
("one 'pretty' shoe", [CNC("foo", source="one 'pretty' shoe", x=python_ret_val("'pretty' "))]), # python
|
||||
("one little shoe", [CNC("foo", source="one little shoe", x=CC("little"))]), # concept
|
||||
("one very big shoe", [CNC("foo", source="one very big shoe", x=CC("very big"))]), # long concept
|
||||
("one black shoe",
|
||||
[CNC("foo", source="one black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]),
|
||||
("one tiny but beautiful shoe",
|
||||
[CNC("foo",
|
||||
source="one tiny but beautiful shoe",
|
||||
x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful "))]),
|
||||
])
|
||||
def test_i_can_match_variable_in_between(self, expr, expected):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))),
|
||||
"little": Concept("little"),
|
||||
"very big": Concept("very big"),
|
||||
"color": Concept("color", definition="'blue'|'black'"),
|
||||
"but": Concept("x but y").def_var("x").def_var("y"),
|
||||
}
|
||||
|
||||
self.validate_get_concepts_sequences(my_map, expr, expected)
|
||||
|
||||
def test_i_can_match_variable_when_multiple_results_in_between(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))),
|
||||
"pretty big": Concept("pretty big", body="'pretty big'"),
|
||||
"pbig": Concept("pretty big"),
|
||||
}
|
||||
|
||||
expr = "one pretty big shoe"
|
||||
expected = [
|
||||
[CNC("foo", source="one pretty big shoe", x=CC("pretty big"))],
|
||||
[CNC("foo", source="one pretty big shoe", x=CC("pbig", source="pretty big"))]
|
||||
]
|
||||
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
|
||||
|
||||
def test_i_can_match_regex_and_variable(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(RegExMatch("[a-z]+"),
|
||||
VariableExpression("x"))),
|
||||
"shoe": Concept("shoe")
|
||||
}
|
||||
text = "onyx shoe"
|
||||
expected = [CNC("foo", source="onyx shoe", x=CC("shoe"))]
|
||||
self.validate_get_concepts_sequences(my_map, text, expected)
|
||||
|
||||
def test_i_can_match_variable_and_regex(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
|
||||
RegExMatch("[a-z]+"))),
|
||||
"one": Concept("one")
|
||||
}
|
||||
text = "one onyx"
|
||||
expected = [CNC("foo", source="one onyx", x=CC("one"))]
|
||||
self.validate_get_concepts_sequences(my_map, text, expected)
|
||||
|
||||
def test_i_can_reuse_the_same_variable(self):
|
||||
# in this test, the variable appears several times, but only once in concept.compiled
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
|
||||
StrMatch("equals"),
|
||||
VariableExpression("x"))),
|
||||
"one": Concept("one"),
|
||||
"two": Concept("two"),
|
||||
}
|
||||
sheerka, context, *updated = self.init_concepts(*my_map.values())
|
||||
parser = BnfNodeParser()
|
||||
parser.init_from_concepts(context, updated)
|
||||
|
||||
# same variable appears only once in the compiled variables
|
||||
text = "one equals one"
|
||||
expected = [CNC("foo", source="one equals one", x=CC("one"))]
|
||||
expected_sequence = compute_expected_array(my_map, text, expected)
|
||||
|
||||
parser.reset_parser(context, ParserInput(text))
|
||||
bnf_parsers_helpers = parser.get_concepts_sequences(context)
|
||||
to_compare = tests.parsers.parsers_utils.get_test_obj(expected_sequence, bnf_parsers_helpers[0].sequence)
|
||||
assert to_compare == expected
|
||||
|
||||
def test_i_cannot_match_variable_when_variables_discrepancy(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
|
||||
StrMatch("equals"),
|
||||
VariableExpression("x"))),
|
||||
"one": Concept("one"),
|
||||
"one_1": Concept("one", body="1"),
|
||||
"two": Concept("two"),
|
||||
"two_2": Concept("two", body="2"),
|
||||
}
|
||||
sheerka, context, *updated = self.init_concepts(*my_map.values())
|
||||
parser = BnfNodeParser()
|
||||
parser.init_from_concepts(context, updated)
|
||||
|
||||
text = "one equals two"
|
||||
parser.reset_parser(context, ParserInput(text))
|
||||
bnf_parsers_helpers = parser.get_concepts_sequences(context)
|
||||
assert bnf_parsers_helpers[0].sequence == []
|
||||
|
||||
@pytest.mark.parametrize("bar_expr, expected", [
|
||||
(ConceptExpression("foo"), {}),
|
||||
(OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}),
|
||||
@@ -833,7 +1126,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
parser.sheerka = sheerka
|
||||
|
||||
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
|
||||
parser.init_from_concepts(context, my_map.values())
|
||||
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == expected
|
||||
|
||||
# get_parsing_expression() also returns CHICKEN_AND_EGG
|
||||
@@ -858,7 +1150,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
parser.sheerka = sheerka
|
||||
|
||||
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
|
||||
parser.init_from_concepts(context, my_map.values())
|
||||
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
|
||||
|
||||
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
|
||||
@@ -884,7 +1175,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
parser.sheerka = sheerka
|
||||
|
||||
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
|
||||
parser.init_from_concepts(context, my_map.values())
|
||||
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
|
||||
|
||||
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
|
||||
@@ -908,8 +1198,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
"foo": self.bnf_concept("foo", expr),
|
||||
}
|
||||
|
||||
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
||||
parser.init_from_concepts(context, my_map.values())
|
||||
sheerka, context, parser = self.init_parser(my_map)
|
||||
parser.context = context
|
||||
parser.sheerka = sheerka
|
||||
|
||||
@@ -923,7 +1212,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
"number": Concept("number"),
|
||||
"twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
|
||||
}
|
||||
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
||||
sheerka, context, parser = self.init_parser(my_map)
|
||||
parser.context = context
|
||||
parser.sheerka = sheerka
|
||||
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
|
||||
@@ -1025,8 +1314,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
ConceptExpression(my_map["one"], rule_name="one"))
|
||||
|
||||
@pytest.mark.parametrize("expr, text, expected", [
|
||||
# (ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]),
|
||||
# (StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]),
|
||||
(ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]),
|
||||
(StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]),
|
||||
(StrMatch("one"), "two one", [UTN("two "), CNC("foo", source="one")]),
|
||||
])
|
||||
def test_i_can_recognize_unknown_concepts(self, expr, text, expected):
|
||||
@@ -1053,7 +1342,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
"three": self.bnf_concept("three")
|
||||
}
|
||||
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
||||
parser.init_from_concepts(context, my_map.values())
|
||||
|
||||
parser.reset_parser(context, ParserInput("one three"))
|
||||
sequences = parser.get_concepts_sequences(context)
|
||||
@@ -1067,6 +1355,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]),
|
||||
("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]),
|
||||
("twenty one", True, [CNC("t1", source="twenty one", unit="one")]),
|
||||
("one 'car'", True, [CNC("one thing", source="one 'car'", x=python_ret_val("'car'"), one="one")])
|
||||
])
|
||||
def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected):
|
||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||
@@ -1359,8 +1648,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
"expr": Concept("expr", definition="term ('+' term)*"),
|
||||
}
|
||||
|
||||
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
||||
parser.init_from_concepts(context, my_map.values())
|
||||
sheerka, context, parser = self.init_parser(my_map)
|
||||
|
||||
text = "1 + 2 * 3"
|
||||
|
||||
@@ -1396,8 +1684,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
Sequence(ConceptExpression("term"), StrMatch("+"), ConceptExpression("expr")),
|
||||
ConceptExpression("term"))),
|
||||
}
|
||||
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
||||
parser.init_from_concepts(context, my_map.values())
|
||||
sheerka, context, parser = self.init_parser(my_map)
|
||||
|
||||
text = "1 + 2 * 3"
|
||||
|
||||
@@ -1437,8 +1724,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
OrderedChoice(StrMatch("bar"), ConceptExpression("foo")))),
|
||||
}
|
||||
|
||||
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
||||
parser.init_from_concepts(context, my_map.values())
|
||||
sheerka, context, parser = self.init_parser(my_map)
|
||||
|
||||
assert parser.parse(context, ParserInput("foo bar")).status
|
||||
assert parser.parse(context, ParserInput("foo foo foo bar")).status
|
||||
@@ -1475,6 +1761,128 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
assert res.status
|
||||
assert res.value.value == compute_expected_array(cmap, text, [CN("thirties", source=text)])
|
||||
|
||||
def test_i_do_not_eat_unwanted_tokens_at_the_beginning_when_concept_with_variable(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
|
||||
"one": Concept("one"),
|
||||
"two": Concept("two"),
|
||||
}
|
||||
sheerka, context, parser = self.init_parser(my_map)
|
||||
|
||||
text = "two one shoe"
|
||||
res = parser.parse(context, ParserInput(text))
|
||||
assert res.status
|
||||
assert res.value.value == compute_expected_array(my_map, text, [
|
||||
CN("two"),
|
||||
CNC("foo", source="one shoe", x=CC("one"))])
|
||||
|
||||
def test_i_do_not_eat_unwanted_tokens_at_the_end_when_concept_with_variable(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
|
||||
"bar": Concept("bar"),
|
||||
"baz": Concept("baz"),
|
||||
}
|
||||
sheerka, context, parser = self.init_parser(my_map)
|
||||
|
||||
text = "one bar baz"
|
||||
res = parser.parse(context, ParserInput(text))
|
||||
assert res.status
|
||||
assert res.value.value == compute_expected_array(my_map, text, [
|
||||
CNC("foo", source="one bar", x=CC("bar")),
|
||||
CN("baz")])
|
||||
|
||||
@pytest.mark.parametrize("parsing_expression, expected", [
|
||||
(RegExMatch("a"), [RegExDef("a")]),
|
||||
(OrderedChoice(StrMatch("first"), RegExMatch("a|b")), ["first", RegExDef("a|b")]),
|
||||
(OrderedChoice(RegExMatch("a|b"), StrMatch("first")), [RegExDef("a|b"), "first"]),
|
||||
(Sequence(StrMatch("a"), RegExMatch("a|b")), ["a"]),
|
||||
(Sequence(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
|
||||
(OneOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]),
|
||||
(OneOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
|
||||
(ZeroOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]),
|
||||
(ZeroOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
|
||||
])
|
||||
def test_i_can_get_first_item(self, parsing_expression, expected):
|
||||
sheerka = self.get_sheerka()
|
||||
|
||||
visitor = BnfNodeFirstTokenVisitor(sheerka)
|
||||
|
||||
visitor.visit(parsing_expression)
|
||||
assert visitor.first_tokens == expected
|
||||
|
||||
def test_i_cannot_parse_regex_when_no_next_matching_token_cannot_be_found(self):
|
||||
sheerka, context, foo = self.init_test().with_concepts(Concept("foo", definition="r'abcd'"),
|
||||
create_new=True).unpack()
|
||||
|
||||
parser = BnfNodeParser(sheerka=sheerka)
|
||||
res = parser.parse(context, ParserInput("abcdef"))
|
||||
|
||||
assert not res.status
|
||||
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
|
||||
assert res.body.reason == [NoMatchingTokenError(4)]
|
||||
|
||||
@pytest.mark.parametrize("text", [
|
||||
"one",
|
||||
" one",
|
||||
"one ",
|
||||
" one "
|
||||
])
|
||||
def test_i_cannot_parse_empty_variable(self, text):
|
||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||
|
||||
res = parser.parse(context, ParserInput("one"))
|
||||
|
||||
assert not res.status
|
||||
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
|
||||
|
||||
@pytest.mark.parametrize("bnf, text", [
|
||||
(Sequence(VariableExpression("x"), StrMatch("foo")), "one foo"),
|
||||
(Sequence(StrMatch("foo"), VariableExpression("x")), "foo one"),
|
||||
(Sequence(StrMatch("foo"), VariableExpression("x"), StrMatch("bar")), "foo one bar"),
|
||||
])
|
||||
def test_i_cannot_parse_variable_when_unrecognized_nodes(self, bnf, text):
|
||||
sheerka, context, foo = self.init_test().with_concepts(
|
||||
self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe")))
|
||||
).unpack()
|
||||
parser = BnfNodeParser()
|
||||
parser.init_from_concepts(context, [foo])
|
||||
|
||||
res = parser.parse(context, ParserInput(text))
|
||||
|
||||
assert not res.status
|
||||
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
|
||||
|
||||
@pytest.mark.parametrize("to_match, ignore_case, multiline, explicit_flags", [
|
||||
("xxy", None, None, re.MULTILINE),
|
||||
("xxy", True, True, re.MULTILINE),
|
||||
("xxy", False, False, re.MULTILINE),
|
||||
])
|
||||
def test_i_can_serialize_reg_ex_def(self, to_match, ignore_case, multiline, explicit_flags):
|
||||
r = RegExDef(to_match, ignore_case, multiline, explicit_flags)
|
||||
serialized = r.serialize()
|
||||
|
||||
r2 = RegExDef().deserialize(serialized)
|
||||
|
||||
assert r == r2
|
||||
|
||||
def test_i_can_resolve_parsing_expression_for_variable_concept(self):
|
||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||
|
||||
expression = Sequence(VariableExpression("x"), StrMatch("x"))
|
||||
resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set())
|
||||
|
||||
assert isinstance(resolved.nodes[0], VariableExpression)
|
||||
assert resolved.nodes[0].nodes[0] == resolved.nodes[1]
|
||||
|
||||
def test_i_can_resolve_parsing_expression_when_ending_with_variable_concept(self):
|
||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||
|
||||
expression = Sequence(StrMatch("x"), VariableExpression("x"))
|
||||
resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set())
|
||||
|
||||
assert isinstance(resolved.nodes[1], VariableExpression)
|
||||
assert resolved.nodes[0].nodes == []
|
||||
|
||||
# @pytest.mark.parametrize("parser_input, expected", [
|
||||
# ("one", [
|
||||
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),
|
||||
|
||||
Reference in New Issue
Block a user