Restarting the project.

Fixing unit tests. Continuing SyaParser
This commit is contained in:
2026-04-12 09:40:04 +02:00
parent 3be854d34c
commit 078d8e5df6
15 changed files with 2351 additions and 1290 deletions
+105
View File
@@ -0,0 +1,105 @@
from typing import Literal
from common.utils import str_concept, unstr_concept
from helpers import get_metadata
from parsers.state_machine import MetadataToken, UnrecognizedToken
class MetadataTokenForTest(MetadataToken):
def __repr__(self):
res = f"(MetadataTokenForTest metadata={str_concept(self.metadata, drop_name=True)}"
if self.start is not None:
res += f", start={self.start}"
if self.end is not None:
res += f", end={self.end}"
if self.resolution_method is not None:
res += f", method={self.resolution_method}"
if self.parser is not None:
res += f", origin={self.parser}"
res += ")"
return res
def __eq__(self, other):
if not isinstance(other, MetadataToken):
return False
if self.metadata.id != other.metadata.id:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
if self.parser is not None and self.parser != other.parser:
return False
if self.resolution_method is not None and self.resolution_method != other.resolution_method:
return False
return True
def _ut(buffer, start=0, end=-1):
"""
helper to UnrecognizedToken
:param buffer:
:type buffer:
:param start:
:type start:
:param end:
:type end:
:return:
:rtype:
"""
return UnrecognizedToken(buffer, start, end)
def _mt(concept_id,
start=0,
end=-1,
resolution_method: Literal["name", "key", "id"] = "key",
parser="simple",
**kwargs):
"""
helper to MetadataToken
:param concept_id:
:type concept_id:
:param start:
:type start:
:param end:
:type end:
:return:
:rtype:
"""
name, _id = unstr_concept(concept_id)
variables = [(k, v) for k, v in kwargs.items()] if kwargs else None
metadata = get_metadata(id=concept_id, variables=variables) if _id is None \
else get_metadata(id=_id, name=name, variables=variables)
return MetadataTokenForTest(metadata, start, end, resolution_method, parser)
def _mtsya(concept_id,
start=0,
end=None,
resolution_method: Literal["name", "key", "id"] = "key",
parser="sya",
**kwargs):
"""
helper to MetadataToken
:param concept_id:
:type concept_id:
:param start:
:type start:
:param end:
:type end:
:return:
:rtype:
"""
name, _id = unstr_concept(concept_id)
variables = [(k, v) for k, v in kwargs.items()] if kwargs else None
metadata = get_metadata(id=concept_id, variables=variables) if _id is None \
else get_metadata(id=_id, name=name, variables=variables)
return MetadataTokenForTest(metadata, start, end, resolution_method, parser)
+142 -141
View File
@@ -3,148 +3,149 @@ import pytest
from base import BaseTest
from conftest import NewOntology
from evaluators.base_evaluator import MultipleChoices
from helpers import _mt, _ut, get_concepts, get_from, get_metadata, get_parser_input
from helpers import get_concepts, get_from, get_metadata, get_parser_input
from parsers.SimpleConceptsParser import SimpleConceptsParser
from tests.parsers.conftest import _mt, _ut
class TestSimpleConceptsParser(BaseTest):
@pytest.fixture()
def parser(self):
return SimpleConceptsParser()
@pytest.mark.parametrize("text, expected", [
("I am a new concept", [_mt("1003", 0, 8)]),
("xxx yyy I am a new concept", [_ut("xxx yyy ", 0, 3), _mt("1003", 4, 12)]),
("I am a new concept xxx yyy", [_mt("1003", 0, 8), _ut(" xxx yyy", 9, 12)]),
("xxx I am a new concept yyy", [_ut("xxx ", 0, 1), _mt("1003", 2, 10), _ut(" yyy", 11, 12)]),
("c:#1003:", [_mt("1003", 0, 0)]),
("xxx c:#1003: yyy", [_ut("xxx ", 0, 1), _mt("1003", 2, 2), _ut(" yyy", 3, 4)]),
("xxx c:I am: yyy", [_ut("xxx ", 0, 1), _mt("1002", 2, 2), _ut(" yyy", 3, 4)]),
(" I am a new concept", [_ut(" ", 0, 0), _mt("1003", 1, 9)])
])
def test_i_can_recognize_a_concept(self, context, parser, text, expected):
with NewOntology(context, "test_i_can_recognize_a_concept"):
get_concepts(context, "I", "I am", "I am a new concept", use_sheerka=True)
pi = get_parser_input(text)
error_sink = []
res = parser.parse(context, pi, error_sink)
assert res == MultipleChoices([expected])
assert not error_sink
@pytest.mark.parametrize("text, expected", [
("foo", [_mt("1001", 0, 0)]),
("I am a new concept", [_mt("1001", 0, 8)])
])
def test_i_can_recognize_a_concept_by_its_name_and_its_definition(self, context, parser, text, expected):
with NewOntology(context, "test_i_can_recognize_a_concept_by_its_name_and_its_definition"):
get_concepts(context, get_metadata(name="foo", definition="I am a new concept"), use_sheerka=True)
pi = get_parser_input(text)
error_sink = []
res = parser.parse(context, pi, error_sink)
assert res == MultipleChoices([expected])
assert not error_sink
@pytest.mark.parametrize("text, expected", [
("long concept name", [_mt("1001", 0, 4)]),
("I am a new concept", [_mt("1001", 0, 8)])
])
def test_i_can_recognize_a_concept_by_its_name_when_long_name(self, context, parser, text, expected):
with NewOntology(context, "test_i_can_recognize_a_concept_by_its_name_when_long_name"):
get_concepts(context, get_metadata(name="long concept name", definition="I am a new concept"),
use_sheerka=True)
pi = get_parser_input(text)
error_sink = []
res = parser.parse(context, pi, error_sink)
assert res == MultipleChoices([expected])
assert not error_sink
def test_i_can_parse_a_sequence_of_concept(self, context, parser):
with NewOntology(context, "test_i_can_parse_a_sequence_of_concept"):
get_concepts(context, "foo bar", "baz", "qux", use_sheerka=True)
pi = get_parser_input("foo bar baz foo, qux")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [_mt("1001", 0, 2),
_ut(" ", 3, 3),
_mt("1002", 4, 4),
_ut(" foo, ", 5, 8),
_mt("1003", 9, 9)]
assert res == MultipleChoices([expected])
assert not error_sink
def test_i_can_detect_multiple_choices(self, context, parser):
with NewOntology(context, "test_i_can_detect_multiple_choices"):
get_concepts(context, "foo bar", "bar baz", use_sheerka=True)
pi = get_parser_input("foo bar baz")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected1 = [_mt("1001", 0, 2), _ut(" baz", 3, 4)]
expected2 = [_ut("foo ", 0, 1), _mt("1002", 2, 4)]
assert res == MultipleChoices([expected1, expected2])
assert not error_sink
def test_i_can_detect_multiple_choices_2(self, context, parser):
with NewOntology(context, "test_i_can_detect_multiple_choices_2"):
get_concepts(context, "one two", "one", "two", use_sheerka=True)
pi = get_parser_input("one two")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected1 = [_mt("1001", 0, 2)]
expected2 = [_mt("1002", 0, 0), _ut(" ", 1, 1), _mt("1003", 2, 2)]
assert res == MultipleChoices([expected1, expected2])
assert not error_sink
def test_i_can_detect_multiple_choices_3(self, context, parser):
with NewOntology(context, "test_i_can_detect_multiple_choices_2"):
get_concepts(context, "one two", "one", "two", use_sheerka=True)
pi = get_parser_input("one two xxx one two")
error_sink = []
res = parser.parse(context, pi, error_sink)
e1 = get_from(_mt("c:one two#1001:"), _ut(" xxx "), _mt("c:#1001:"))
e2 = get_from(_mt("c:one#1002:"), _ut(" "), _mt("c:two#1003:"), _ut(" xxx "), _mt("c:one two#1001:"))
e3 = get_from(_mt("c:one two#1001:"), _ut(" xxx "), _mt("c:one#1002:"), _ut(" "), _mt("c:two#1003:"))
e4 = get_from(_mt("c:one#1002:"), _ut(" "), _mt("c:two#1003:"), _ut(" xxx "), _mt("c:#1002:"), _ut(" "),
_mt("c:#1003:"))
assert res == MultipleChoices([e1, e2, e3, e4])
assert not error_sink
def test_nothing_is_return_is_no_concept_is_recognized(self, context, parser):
pi = get_parser_input("one two three")
error_sink = []
res = parser.parse(context, pi, error_sink)
assert res == MultipleChoices([])
def test_i_can_manage_attribute_reference(self, context, parser):
with NewOntology(context, "test_i_can_detect_multiple_choices_2"):
get_concepts(context, "foo", "i am a concept", use_sheerka=True)
pi = get_parser_input("foo.attribute")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [_mt("1001", 0, 0), _ut(".attribute", 1, 2)]
assert res == MultipleChoices([expected])
pi = get_parser_input("i am a concept.attribute")
res = parser.parse(context, pi, error_sink)
expected = [_mt("1002", 0, 6), _ut(".attribute", 7, 8)]
assert res == MultipleChoices([expected])
@pytest.fixture()
def parser(self):
return SimpleConceptsParser()
@pytest.mark.parametrize("text, expected", [
("I am a new concept", [_mt("1003", 0, 8)]),
("xxx yyy I am a new concept", [_ut("xxx yyy ", 0, 3), _mt("1003", 4, 12)]),
("I am a new concept xxx yyy", [_mt("1003", 0, 8), _ut(" xxx yyy", 9, 12)]),
("xxx I am a new concept yyy", [_ut("xxx ", 0, 1), _mt("1003", 2, 10), _ut(" yyy", 11, 12)]),
("c:#1003:", [_mt("1003", 0, 0, resolution_method="id")]),
("xxx c:#1003: yyy", [_ut("xxx ", 0, 1), _mt("1003", 2, 2, resolution_method="id"), _ut(" yyy", 3, 4)]),
("xxx c:I am: yyy", [_ut("xxx ", 0, 1), _mt("1002", 2, 2, resolution_method="name"), _ut(" yyy", 3, 4)]),
(" I am a new concept", [_ut(" ", 0, 0), _mt("1003", 1, 9)])
])
def test_i_can_recognize_a_concept(self, context, parser, text, expected):
with NewOntology(context, "test_i_can_recognize_a_concept"):
get_concepts(context, "I", "I am", "I am a new concept", use_sheerka=True)
pi = get_parser_input(text)
error_sink = []
res = parser.parse(context, pi, error_sink)
assert res == MultipleChoices([expected])
assert not error_sink
@pytest.mark.parametrize("text, expected", [
("foo", [_mt("1001", 0, 0, resolution_method="name")]),
("I am a new concept", [_mt("1001", 0, 8)])
])
def test_i_can_recognize_a_concept_by_its_name_and_its_definition(self, context, parser, text, expected):
with NewOntology(context, "test_i_can_recognize_a_concept_by_its_name_and_its_definition"):
get_concepts(context, get_metadata(name="foo", definition="I am a new concept"), use_sheerka=True)
pi = get_parser_input(text)
error_sink = []
res = parser.parse(context, pi, error_sink)
assert res == MultipleChoices([expected])
assert not error_sink
@pytest.mark.parametrize("text, expected", [
("long concept name", [_mt("1001", 0, 4, resolution_method="name")]),
("I am a new concept", [_mt("1001", 0, 8)])
])
def test_i_can_recognize_a_concept_by_its_name_when_long_name(self, context, parser, text, expected):
with NewOntology(context, "test_i_can_recognize_a_concept_by_its_name_when_long_name"):
get_concepts(context, get_metadata(name="long concept name", definition="I am a new concept"),
use_sheerka=True)
pi = get_parser_input(text)
error_sink = []
res = parser.parse(context, pi, error_sink)
assert res == MultipleChoices([expected])
assert not error_sink
def test_i_can_parse_a_sequence_of_concept(self, context, parser):
with NewOntology(context, "test_i_can_parse_a_sequence_of_concept"):
get_concepts(context, "foo bar", "baz", "qux", use_sheerka=True)
pi = get_parser_input("foo bar baz foo, qux")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [_mt("1001", 0, 2),
_ut(" ", 3, 3),
_mt("1002", 4, 4),
_ut(" foo, ", 5, 8),
_mt("1003", 9, 9)]
assert res == MultipleChoices([expected])
assert not error_sink
def test_i_can_detect_multiple_choices(self, context, parser):
with NewOntology(context, "test_i_can_detect_multiple_choices"):
get_concepts(context, "foo bar", "bar baz", use_sheerka=True)
pi = get_parser_input("foo bar baz")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected1 = [_mt("1001", 0, 2), _ut(" baz", 3, 4)]
expected2 = [_ut("foo ", 0, 1), _mt("1002", 2, 4)]
assert res == MultipleChoices([expected1, expected2])
assert not error_sink
def test_i_can_detect_multiple_choices_2(self, context, parser):
with NewOntology(context, "test_i_can_detect_multiple_choices_2"):
get_concepts(context, "one two", "one", "two", use_sheerka=True)
pi = get_parser_input("one two")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected1 = [_mt("1001", 0, 2)]
expected2 = [_mt("1002", 0, 0), _ut(" ", 1, 1), _mt("1003", 2, 2)]
assert res == MultipleChoices([expected1, expected2])
assert not error_sink
def test_i_can_detect_multiple_choices_3(self, context, parser):
with NewOntology(context, "test_i_can_detect_multiple_choices_2"):
get_concepts(context, "one two", "one", "two", use_sheerka=True)
pi = get_parser_input("one two xxx one two")
error_sink = []
res = parser.parse(context, pi, error_sink)
e1 = get_from(_mt("c:one two#1001:"), _ut(" xxx "), _mt("c:#1001:"))
e2 = get_from(_mt("c:one#1002:"), _ut(" "), _mt("c:two#1003:"), _ut(" xxx "), _mt("c:one two#1001:"))
e3 = get_from(_mt("c:one two#1001:"), _ut(" xxx "), _mt("c:one#1002:"), _ut(" "), _mt("c:two#1003:"))
e4 = get_from(_mt("c:one#1002:"), _ut(" "), _mt("c:two#1003:"), _ut(" xxx "), _mt("c:#1002:"), _ut(" "),
_mt("c:#1003:"))
assert res == MultipleChoices([e1, e2, e3, e4])
assert not error_sink
def test_nothing_is_return_is_no_concept_is_recognized(self, context, parser):
pi = get_parser_input("one two three")
error_sink = []
res = parser.parse(context, pi, error_sink)
assert res == MultipleChoices([])
def test_i_can_manage_attribute_reference(self, context, parser):
with NewOntology(context, "test_i_can_detect_multiple_choices_2"):
get_concepts(context, "foo", "i am a concept", use_sheerka=True)
pi = get_parser_input("foo.attribute")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [_mt("1001", 0, 0), _ut(".attribute", 1, 2)]
assert res == MultipleChoices([expected])
pi = get_parser_input("i am a concept.attribute")
res = parser.parse(context, pi, error_sink)
expected = [_mt("1002", 0, 6), _ut(".attribute", 7, 8)]
assert res == MultipleChoices([expected])
+84 -83
View File
@@ -3,91 +3,92 @@ import pytest
from base import BaseTest
from conftest import NewOntology, comparable_tokens
from evaluators.base_evaluator import MultipleChoices
from helpers import _mt, get_concept, get_concepts, get_parser_input
from helpers import get_concept, get_concepts, get_parser_input
from parsers.SyaConceptsParser import SyaConceptsParser
from parsers.tokenizer import Tokenizer
from tests.parsers.conftest import _mtsya
class TestSyaConceptsParser(BaseTest):
@pytest.fixture()
def parser(self):
return SyaConceptsParser()
@pytest.mark.parametrize("concept_key, expected_list", [
["a long token name", [("a long token name", 0)]],
["__var__0 __var__1 __var__2", [("", 3)]],
["__var__0 __var__1 prefixed", [(" prefixed", 2)]],
["suffixed __var__0 __var__1", [("suffixed ", 0), ["", 2]]],
["__var__0 __var__1 infixed __var__0 __var__1", [(" infixed ", 2), ["", 2]]],
["if __var__0 __var__1 then __var__2 end", [("if ", 0), (" then ", 2), (" end", 1)]]
])
def test_i_can_initialize_expected_parameters(self, parser, concept_key, expected_list):
resolved_expected_list = [(list(Tokenizer(source, yield_eof=False)), nb) for source, nb in expected_list]
actual = parser._get_expected_tokens(concept_key)
with comparable_tokens():
assert actual == resolved_expected_list
@pytest.mark.parametrize("concept", [
get_concept("a plus b", variables=["a", "b"]),
get_concept("add a b", variables=["a", "b"]),
get_concept("a b add", variables=["a", "b"]),
])
def test_i_can_parse_a_simple_case(self, context, parser, concept):
with NewOntology(context, "test_i_can_parse_a_simple_case"):
get_concepts(context, concept, use_sheerka=True)
pi = get_parser_input("1 plus 2")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [_mt("1001", a="1 ", b=" 2")]
assert res == MultipleChoices([expected])
assert not error_sink
def test_i_can_parse_long_names_concept(self, context, parser):
with NewOntology(context, "test_i_can_parse_a_simple_case"):
get_concepts(context, get_concept("a long named concept b", variables=["a", "b"]), use_sheerka=True)
pi = get_parser_input("1 long named concept 2")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [_mt("1001", a="1 ", b=" 2")]
assert res == MultipleChoices([expected])
assert not error_sink
def test_i_can_parse_sequence(self, context, parser):
with NewOntology(context, "test_i_can_parse_sequence"):
get_concepts(context, get_concept("a plus b", variables=["a", "b"]), use_sheerka=True)
pi = get_parser_input("1 plus 2 3 plus 7")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [[_mt("1001", a="1 ", b=" 2")], [_mt("1001", a=" 3 ", b=" 7")]]
assert res == MultipleChoices(expected)
assert not error_sink
def test_not_enough_parameters(self, context, parser):
with NewOntology(context, "test_not_enough_parameters"):
get_concepts(context, get_concept("a plus b", variables=["a", "b"]), use_sheerka=True)
pi = get_parser_input("1 plus 2 3 plus 7")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [[_mt("1001", a="1 ", b=" 2")], [_mt("1001", a=" 3 ", b=" 7")]]
assert res == MultipleChoices(expected)
assert not error_sink
def test_i_can_detect_when_name_does_not_match(self, context, parser):
with NewOntology(context, "test_i_can_detect_when_name_does_not_match"):
get_concepts(context, get_concept("a long named concept b", variables=["a", "b"]), use_sheerka=True)
pi = get_parser_input("1 long named mismatch 2")
error_sink = []
res = parser.parse(context, pi, error_sink)
assert error_sink
@pytest.fixture()
def parser(self):
return SyaConceptsParser()
@pytest.mark.parametrize("concept_key, expected_list", [
["a long token name", [("a long token name", 0)]],
["__var__0 __var__1 __var__2", [("", 3)]],
["__var__0 __var__1 prefixed", [(" prefixed", 2)]],
["suffixed __var__0 __var__1", [("suffixed ", 0), ["", 2]]],
["__var__0 __var__1 infixed __var__0 __var__1", [(" infixed ", 2), ["", 2]]],
["if __var__0 __var__1 then __var__2 end", [("if ", 0), (" then ", 2), (" end", 1)]]
])
def test_i_can_initialize_expected_parameters(self, parser, concept_key, expected_list):
resolved_expected_list = [(list(Tokenizer(source, yield_eof=False)), nb) for source, nb in expected_list]
actual = parser._get_expected_tokens(concept_key)
with comparable_tokens():
assert actual == resolved_expected_list
@pytest.mark.parametrize("concept, _input", [
(get_concept("a plus b", variables=["a", "b"]), "1 plus 2"),
(get_concept("add a b", variables=["a", "b"]), "add 1 2"),
(get_concept("a b add", variables=["a", "b"]), "1 2 add")
])
def test_i_can_parse_a_simple_case(self, context, parser, concept, _input):
with NewOntology(context, "test_i_can_parse_a_simple_case"):
get_concepts(context, concept, use_sheerka=True)
pi = get_parser_input(_input)
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [_mtsya("1001", a="1 ", b=" 2")]
assert res == MultipleChoices([expected])
assert not error_sink
def test_i_can_parse_long_names_concept(self, context, parser):
with NewOntology(context, "test_i_can_parse_a_simple_case"):
get_concepts(context, get_concept("a long named concept b", variables=["a", "b"]), use_sheerka=True)
pi = get_parser_input("1 long named concept 2")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [_mtsya("1001", a="1 ", b=" 2")]
assert res == MultipleChoices([expected])
assert not error_sink
def test_i_can_parse_sequence(self, context, parser):
with NewOntology(context, "test_i_can_parse_sequence"):
get_concepts(context, get_concept("a plus b", variables=["a", "b"]), use_sheerka=True)
pi = get_parser_input("1 plus 2 3 plus 7")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [[_mtsya("1001", a="1 ", b=" 2")], [_mtsya("1001", a=" 3 ", b=" 7")]]
assert res == MultipleChoices(expected)
assert not error_sink
def test_not_enough_parameters(self, context, parser):
with NewOntology(context, "test_not_enough_parameters"):
get_concepts(context, get_concept("a plus b", variables=["a", "b"]), use_sheerka=True)
pi = get_parser_input("1 plus ")
error_sink = []
res = parser.parse(context, pi, error_sink)
expected = [[_mtsya("1001", a="1 ", b=" 2")], [_mtsya("1001", a=" 3 ", b=" 7")]]
assert res == MultipleChoices(expected)
assert not error_sink
def test_i_can_detect_when_name_does_not_match(self, context, parser):
with NewOntology(context, "test_i_can_detect_when_name_does_not_match"):
get_concepts(context, get_concept("a long named concept b", variables=["a", "b"]), use_sheerka=True)
pi = get_parser_input("1 long named mismatch 2")
error_sink = []
res = parser.parse(context, pi, error_sink)
assert error_sink