1291 lines
52 KiB
Python
1291 lines
52 KiB
Python
from ast import Str
|
|
|
|
import pytest
|
|
from core.builtin_concepts import BuiltinConcepts
|
|
from core.concept import Concept, ConceptParts, DoNotResolve
|
|
from core.tokenizer import Tokenizer, TokenKind, Token
|
|
from parsers.BaseNodeParser import cnode, short_cnode
|
|
from parsers.BnfParser import BnfParser
|
|
from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
|
|
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, ZeroOrMore, OneOrMore, \
|
|
UnrecognizedTokensNode, ConceptExpression, ConceptGroupExpression
|
|
|
|
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
|
|
|
|
class ConceptVisitor(ParsingExpressionVisitor):
|
|
def __init__(self):
|
|
self.concepts = set()
|
|
|
|
def visit_ConceptExpression(self, node):
|
|
self.concepts.add(node.concept)
|
|
|
|
|
|
def u(parsing_expression, start, end, children=None):
|
|
"""
|
|
u stands for underlying
|
|
:param parsing_expression:
|
|
:param start:
|
|
:param end:
|
|
:param children:
|
|
:return:
|
|
"""
|
|
if isinstance(parsing_expression, str):
|
|
parsing_expression = StrMatch(parsing_expression)
|
|
|
|
if isinstance(parsing_expression, StrMatch):
|
|
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
|
|
|
|
return NonTerminalNode(parsing_expression, start, end, [], children)
|
|
|
|
|
|
def evaluated(concept):
|
|
c = Concept(name=concept.name, body=concept.name)
|
|
|
|
|
|
def t(text):
|
|
if text.startswith("'") or text.startswith('"'):
|
|
return Token(TokenKind.STRING, text, 0, 0, 0)
|
|
|
|
if text.startswith(" "):
|
|
return Token(TokenKind.WHITESPACE, text, 0, 0, 0)
|
|
|
|
return Token(TokenKind.IDENTIFIER, text, 0, 0, 0)
|
|
|
|
|
|
def get_expected(concept, text=None):
|
|
c = Concept(name=concept.name)
|
|
c.compiled[ConceptParts.BODY] = DoNotResolve(text or concept.name)
|
|
c.init_key()
|
|
c.metadata.id = concept.id
|
|
return c
|
|
|
|
|
|
def cbody(concept):
|
|
"""cbody stands for compiled body"""
|
|
if not ConceptParts.BODY in concept.compiled:
|
|
return None
|
|
return concept.compiled[ConceptParts.BODY]
|
|
|
|
|
|
def cprop(concept, prop_name):
|
|
"""cbody stands for compiled property"""
|
|
return concept.compiled[prop_name]
|
|
|
|
|
|
class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
|
|
def init(self, concepts, grammar):
|
|
sheerka = self.get_sheerka(singleton=True)
|
|
context = self.get_context(sheerka)
|
|
for c in concepts:
|
|
context.sheerka.add_in_cache(c)
|
|
context.sheerka.set_id_if_needed(c, False)
|
|
|
|
parser = BnfNodeParser()
|
|
parser.initialize(context, grammar)
|
|
|
|
return context, parser
|
|
|
|
def execute(self, concepts, grammar, text):
|
|
context, parser = self.init(concepts, grammar)
|
|
|
|
res = parser.parse(context, text)
|
|
wrapper = res.value
|
|
return_value = res.value.value
|
|
|
|
return context, res, wrapper, return_value
|
|
|
|
@pytest.mark.parametrize("match, text", [
|
|
("foo", "foo"),
|
|
("'foo'", "'foo'"),
|
|
("1", "1"),
|
|
("3.14", "3.14"),
|
|
("+", "+"),
|
|
(StrMatch("foo"), "foo"),
|
|
(StrMatch("'foo'"), "'foo'"),
|
|
(StrMatch("1"), "1"),
|
|
(StrMatch("3.14"), "3.14"),
|
|
(StrMatch("+"), "+"),
|
|
])
|
|
def test_i_can_match_simple_tokens(self, match, text):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: match}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, text)
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [ConceptNode(get_expected(foo, text), 0, 0, source=text, underlying=u(match, 0, 0))]
|
|
|
|
def test_i_can_match_multiple_concepts_in_one_input(self):
|
|
one = Concept(name="one")
|
|
two = Concept(name="two")
|
|
grammar = {one: "one", two: "two"}
|
|
|
|
context, res, wrapper, return_value = self.execute([one, two], grammar, "one two one")
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [
|
|
ConceptNode(get_expected(one), 0, 0, source="one", underlying=u("one", 0, 0)),
|
|
ConceptNode(get_expected(two), 2, 2, source="two", underlying=u("two", 2, 2)),
|
|
ConceptNode(get_expected(one), 4, 4, source="one", underlying=u("one", 4, 4)),
|
|
]
|
|
|
|
def test_i_can_match_sequence(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence("one", "two", "three")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "one two three")
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [
|
|
ConceptNode(
|
|
get_expected(foo, "one two three"),
|
|
0,
|
|
4,
|
|
source="one two three",
|
|
underlying=u(grammar[foo], 0, 4, [
|
|
u("one", 0, 0),
|
|
u("two", 2, 2),
|
|
u("three", 4, 4)]))]
|
|
|
|
def test_i_always_choose_the_longest_match(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three")
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [cnode("foo", 0, 4, "one two three")]
|
|
|
|
def test_i_can_match_several_sequences(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three one two")
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [
|
|
cnode("foo", 0, 4, "one two three"),
|
|
cnode("bar", 6, 8, "one two"),
|
|
]
|
|
|
|
def test_i_can_match_ordered_choice(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: OrderedChoice("one", "two")}
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
res1 = parser.parse(context, "one")
|
|
assert res1.status
|
|
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res1.value.body == [cnode("foo", 0, 0, "one")]
|
|
assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)])
|
|
|
|
res2 = parser.parse(context, "two")
|
|
assert res2.status
|
|
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res2.value.body == [cnode("foo", 0, 0, "two")]
|
|
assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)])
|
|
|
|
res3 = parser.parse(context, "three")
|
|
assert not res3.status
|
|
assert context.sheerka.isinstance(res3.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res3.value.value == [
|
|
UnrecognizedTokensNode(0, 0, [t("three")])
|
|
]
|
|
|
|
def test_i_cannot_match_ordered_choice_with_empty_alternative(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence(OrderedChoice("one", ""), "two")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "ok")
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [
|
|
UnrecognizedTokensNode(0, 0, [t("ok")])
|
|
]
|
|
|
|
def test_i_can_mix_sequences_and_ordered_choices(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")}
|
|
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
res1 = parser.parse(context, "twenty one ok")
|
|
assert res1.status
|
|
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res1.value.body == [ConceptNode(get_expected(foo, "twenty one ok"), 0, 4, source="twenty one ok",
|
|
underlying=u(grammar[foo], 0, 4, [
|
|
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]),
|
|
u("one", 2, 2),
|
|
u("ok", 4, 4)]))]
|
|
|
|
res2 = parser.parse(context, "thirty one ok")
|
|
assert res2.status
|
|
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res2.value.body == [ConceptNode(get_expected(foo, "thirty one ok"), 0, 4, source="thirty one ok",
|
|
underlying=u(grammar[foo], 0, 4, [
|
|
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]),
|
|
u("one", 2, 2),
|
|
u("ok", 4, 4)]))]
|
|
|
|
res3 = parser.parse(context, "twenty one")
|
|
assert not res3.status
|
|
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res3.value.value == [
|
|
UnrecognizedTokensNode(0, 2, [t("twenty"), t(" "), t("one")])
|
|
]
|
|
|
|
def test_i_can_mix_ordered_choices_and_sequences(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")}
|
|
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
res = parser.parse(context, "twenty thirty")
|
|
assert res.status
|
|
assert res.value.value == [cnode("foo", 0, 2, "twenty thirty")]
|
|
|
|
res = parser.parse(context, "one")
|
|
assert res.status
|
|
assert res.value.value == [cnode("foo", 0, 0, "one")]
|
|
|
|
def test_i_cannot_parse_empty_optional(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Optional("one")}
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
res = parser.parse(context, "")
|
|
return_value = res.value
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(return_value, BuiltinConcepts.IS_EMPTY)
|
|
|
|
def test_i_can_parse_optional(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Optional("one")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "one")
|
|
|
|
assert res.status
|
|
assert return_value == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one",
|
|
underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))]
|
|
|
|
def test_i_can_parse_sequence_starting_with_optional(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence(Optional("twenty"), "one")}
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
res = parser.parse(context, "twenty one")
|
|
assert res.status
|
|
assert res.value.body == [ConceptNode(
|
|
get_expected(foo, "twenty one"), 0, 2,
|
|
source="twenty one",
|
|
underlying=u(grammar[foo], 0, 2,
|
|
[
|
|
u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]),
|
|
u("one", 2, 2)]
|
|
))]
|
|
|
|
res = parser.parse(context, "one")
|
|
assert res.status
|
|
assert res.value.body == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one",
|
|
underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))]
|
|
|
|
def test_i_can_parse_sequence_ending_with_optional(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence("one", "two", Optional("three"))}
|
|
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
res = parser.parse(context, "one two three")
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 4, "one two three")]
|
|
|
|
res = parser.parse(context, "one two")
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 2, "one two")]
|
|
|
|
def test_i_can_parse_sequence_with_optional_in_between(self):
|
|
foo = Concept(name="foo")
|
|
|
|
grammar = {foo: Sequence("one", Optional("two"), "three")}
|
|
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
res = parser.parse(context, "one two three")
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 4, "one two three")]
|
|
|
|
res = parser.parse(context, "one three")
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 2, "one three")]
|
|
|
|
def test_i_cannot_parse_wrong_input_with_optional(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Optional("one")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "two")
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [
|
|
UnrecognizedTokensNode(0, 0, [t("two")])
|
|
]
|
|
|
|
def test_i_can_use_reference(self):
|
|
# when there are multiple matches for the same input
|
|
# Do I need to create a choice concept ?
|
|
# No, create a return value for every possible graph
|
|
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {foo: Sequence("one", "two"), bar: foo}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
res = parser.parse(context, "one two")
|
|
|
|
assert len(res) == 2
|
|
|
|
assert res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
|
|
concept_found_1 = res[0].value.body[0].concept
|
|
assert cbody(concept_found_1) == DoNotResolve("one two")
|
|
|
|
assert res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
|
|
concept_found_2 = res[1].value.body[0].concept
|
|
# the body and the prop['foo'] are the same concept 'foo'
|
|
assert cbody(concept_found_2) == get_expected(foo, "one two")
|
|
assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2))
|
|
|
|
def test_i_can_use_a_reference_with_a_body(self):
|
|
"""
|
|
Same test than before (test_i_can_use_reference())
|
|
but this time, the concept 'foo' already has a body.
|
|
:return:
|
|
"""
|
|
|
|
foo = Concept(name="foo", body="'foo'")
|
|
bar = Concept(name="bar")
|
|
grammar = {foo: Sequence("one", "two"), bar: foo}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
res = parser.parse(context, "one two")
|
|
|
|
assert len(res) == 2
|
|
|
|
assert res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
|
|
concept_found_1 = res[0].value.body[0].concept
|
|
assert concept_found_1.metadata.body == "'foo'"
|
|
assert cbody(concept_found_1) is None
|
|
|
|
assert res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
|
|
concept_found_2 = res[1].value.body[0].concept
|
|
assert cbody(concept_found_2) == foo
|
|
# the body and the prop['foo'] are the same concept 'foo'
|
|
assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2))
|
|
|
|
def test_i_can_use_context_reference_with_multiple_levels(self):
|
|
"""
|
|
Same than previous one, but with reference of reference
|
|
:return:
|
|
"""
|
|
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
baz = Concept(name="baz")
|
|
grammar = {foo: Sequence("one", "two"), bar: foo, baz: bar}
|
|
context, parser = self.init([foo, bar, baz], grammar)
|
|
|
|
res = parser.parse(context, "one two")
|
|
assert len(res) == 3
|
|
|
|
assert res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
|
|
concept_found_1 = res[0].value.body[0].concept
|
|
assert cbody(concept_found_1) == DoNotResolve("one two")
|
|
|
|
assert res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
|
|
concept_found_2 = res[1].value.body[0].concept
|
|
assert cbody(concept_found_2) == get_expected(foo, "one two")
|
|
assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2))
|
|
|
|
assert res[2].status
|
|
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[2].value.body == [cnode("baz", 0, 2, "one two")]
|
|
concept_found_3 = res[2].value.body[0].concept
|
|
expected_foo = get_expected(foo, "one two")
|
|
assert cbody(concept_found_3) == get_expected(bar, expected_foo)
|
|
assert cprop(concept_found_3, "foo") == expected_foo
|
|
assert id(cprop(concept_found_3, "bar")) == id(cbody(concept_found_3))
|
|
|
|
def test_order_is_not_important_when_using_references(self):
|
|
"""
|
|
Same test than test_i_can_use_reference(),
|
|
but this time, 'bar' is declared before 'foo'
|
|
So the order of the result is different
|
|
:return:
|
|
"""
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {bar: foo, foo: Sequence("one", "two")}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
|
|
res = parser.parse(context, "one two")
|
|
assert len(res) == 2
|
|
assert res[0].value.body == [cnode("bar", 0, 2, "one two")]
|
|
assert res[1].value.body == [cnode("foo", 0, 2, "one two")]
|
|
|
|
def test_i_can_parse_when_reference(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
|
|
res = parser.parse(context, "twenty two")
|
|
assert res.status
|
|
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
|
|
concept_found = res.value.body[0].concept
|
|
assert cbody(concept_found) == DoNotResolve("twenty two")
|
|
assert cprop(concept_found, "foo") == get_expected(foo, "twenty")
|
|
|
|
res = parser.parse(context, "thirty one")
|
|
assert res.status
|
|
assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
|
|
concept_found = res.value.body[0].concept
|
|
assert cbody(concept_found) == DoNotResolve("thirty one")
|
|
assert cprop(concept_found, "foo") == get_expected(foo, "thirty")
|
|
|
|
res = parser.parse(context, "twenty")
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
|
|
concept_found = res.value.body[0].concept
|
|
assert cbody(concept_found) == DoNotResolve("twenty")
|
|
|
|
def test_i_can_parse_when_reference_has_a_body(self):
|
|
foo = Concept(name="foo", body="'one'")
|
|
bar = Concept(name="bar")
|
|
grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
|
|
res = parser.parse(context, "twenty two")
|
|
assert res.status
|
|
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
|
|
concept_found = res.value.body[0].concept
|
|
assert cbody(concept_found) == DoNotResolve("twenty two")
|
|
assert cprop(concept_found, "foo") == foo
|
|
|
|
res = parser.parse(context, "twenty")
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
|
|
concept_found = res.value.body[0].concept
|
|
assert concept_found.metadata.body == "'one'"
|
|
|
|
def test_i_can_parse_multiple_results(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {
|
|
bar: Sequence("one", "two"),
|
|
foo: Sequence("one", OrderedChoice("two", "three"))
|
|
}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
|
|
res = parser.parse(context, "one two")
|
|
assert len(res) == 2
|
|
assert res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [cnode("bar", 0, 2, "one two")]
|
|
concept_found_0 = res[0].value.body[0].concept
|
|
assert cbody(concept_found_0) == DoNotResolve("one two")
|
|
assert len(concept_found_0.props) == 0
|
|
|
|
assert res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [cnode("foo", 0, 2, "one two")]
|
|
concept_found_1 = res[1].value.body[0].concept
|
|
assert cbody(concept_found_1) == DoNotResolve("one two")
|
|
assert len(concept_found_1.props) == 0
|
|
|
|
def test_i_can_parse_multiple_results_times_two(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {
|
|
bar: Sequence("one", "two"),
|
|
foo: Sequence("one", OrderedChoice("two", "three"))
|
|
}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
|
|
res = parser.parse(context, "one two one two")
|
|
assert len(res) == 4
|
|
assert res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [short_cnode("bar", "one two"), short_cnode("bar", "one two")]
|
|
|
|
assert res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [short_cnode("foo", "one two"), short_cnode("bar", "one two")]
|
|
|
|
assert res[2].status
|
|
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[2].value.body == [short_cnode("bar", "one two"), short_cnode("foo", "one two")]
|
|
|
|
assert res[3].status
|
|
assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[3].value.body == [short_cnode("foo", "one two"), short_cnode("foo", "one two")]
|
|
|
|
def test_i_can_parse_multiple_results_when_reference(self):
|
|
"""
|
|
TODO : There should no be two answer, has the one with bar is totally useless
|
|
Note that bar = Sequence(foo, OrderedChoice("one", "two")) does not match
|
|
|
|
:return:
|
|
"""
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {
|
|
bar: Sequence(foo, Optional(OrderedChoice("one", "two"))),
|
|
foo: OrderedChoice("twenty", "thirty")
|
|
}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
|
|
res = parser.parse(context, "twenty")
|
|
assert len(res) == 2
|
|
assert res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [cnode("bar", 0, 0, "twenty")]
|
|
|
|
assert res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [cnode("foo", 0, 0, "twenty")]
|
|
|
|
def test_i_can_parse_concept_reference_that_is_not_in_grammar(self):
|
|
one = Concept(name="one")
|
|
two = Concept(name="two")
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence("twenty", OrderedChoice(one, two))}
|
|
context, parser = self.init([one, two, foo], grammar)
|
|
|
|
res = parser.parse(context, "twenty two")
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 2, "twenty two")]
|
|
concept_found = res.value.body[0].concept
|
|
assert cbody(concept_found) == DoNotResolve("twenty two")
|
|
assert cprop(concept_found, "two") == get_expected(two, "two")
|
|
|
|
res = parser.parse(context, "twenty one")
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 2, "twenty one")]
|
|
|
|
def test_i_can_initialize_when_cyclic_reference(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Optional("one", ConceptExpression("foo"))}
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo"))
|
|
|
|
def test_i_cannot_initialize_when_cyclic_reference_when_concept_is_under_construction_and_not_known(self):
|
|
foo = Concept(name="foo").init_key()
|
|
grammar = {foo: Optional("one", ConceptExpression("foo"))}
|
|
|
|
context = self.get_context()
|
|
parser = BnfNodeParser()
|
|
parser.initialize(context, grammar)
|
|
assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression("foo", rule_name="foo"))
|
|
|
|
def test_i_can_initialize_when_cyclic_reference_when_concept_is_under_construction_and_known(self):
|
|
foo = Concept(name="foo").init_key()
|
|
grammar = {foo: Optional("one", ConceptExpression("foo"))}
|
|
|
|
context = self.get_context()
|
|
context.concepts["foo"] = foo
|
|
parser = BnfNodeParser()
|
|
parser.initialize(context, grammar)
|
|
assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo"))
|
|
|
|
def test_i_can_parse_concept_reference_that_is_group(self):
|
|
"""
|
|
if one is number, then number is a 'group'
|
|
a group can be found under the sdp entry 'all_<group_name>'
|
|
"""
|
|
|
|
context = self.get_context()
|
|
one = Concept(name="one")
|
|
two = Concept(name="two")
|
|
number = Concept(name="number")
|
|
foo = Concept(name="foo")
|
|
for c in [one, two, number, foo]:
|
|
context.sheerka.set_id_if_needed(c, False)
|
|
context.sheerka.add_in_cache(c)
|
|
|
|
context.sheerka.add_concept_to_set(context, one, number)
|
|
context.sheerka.add_concept_to_set(context, two, number)
|
|
|
|
grammar = {foo: Sequence("twenty", number)}
|
|
|
|
parser = BnfNodeParser()
|
|
parser.initialize(context, grammar)
|
|
|
|
res = parser.parse(context, "twenty two")
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 2, "twenty two")]
|
|
concept_found = res.value.body[0].concept
|
|
assert cbody(concept_found) == DoNotResolve("twenty two")
|
|
assert cprop(concept_found, "two") == get_expected(two, "two")
|
|
assert cprop(concept_found, "number") == get_expected(number, get_expected(two, "two"))
|
|
|
|
res = parser.parse(context, "twenty one")
|
|
assert res.status
|
|
assert res.value.body == [cnode("foo", 0, 2, "twenty one")]
|
|
concept_found = res.value.body[0].concept
|
|
assert cbody(concept_found) == DoNotResolve("twenty one")
|
|
assert cprop(concept_found, "one") == get_expected(one, "one")
|
|
assert cprop(concept_found, "number") == get_expected(number, get_expected(one, "one"))
|
|
|
|
def test_i_can_parse_zero_or_more(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: ZeroOrMore("one")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "one one")
|
|
|
|
assert res.status
|
|
assert return_value == [cnode("foo", 0, 2, "one one")]
|
|
assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)])
|
|
|
|
concept_found = return_value[0].concept
|
|
assert cbody(concept_found) == DoNotResolve("one one")
|
|
|
|
def test_i_can_parse_sequence_and_zero_or_more(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence(ZeroOrMore("one"), "two")}
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
res = parser.parse(context, "one one two")
|
|
assert res.status
|
|
assert res.value.value == [cnode("foo", 0, 4, "one one two")]
|
|
|
|
res = parser.parse(context, "two")
|
|
assert res.status
|
|
assert res.value.value == [cnode("foo", 0, 0, "two")]
|
|
|
|
def test_i_cannot_parse_zero_and_more_when_wrong_entry(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: ZeroOrMore("one")}
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
parser = BnfNodeParser()
|
|
parser.initialize(context, grammar)
|
|
|
|
res = parser.parse(context, "one two")
|
|
assert not res.status
|
|
assert res.value.value == [
|
|
cnode("foo", 0, 0, "one"),
|
|
UnrecognizedTokensNode(1, 2, [t(" "), t("two")])
|
|
]
|
|
|
|
res = parser.parse(context, "two")
|
|
assert not res.status
|
|
assert res.value.value == [
|
|
UnrecognizedTokensNode(0, 0, [t("two")])
|
|
]
|
|
|
|
def test_i_can_parse_zero_and_more_with_separator(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: ZeroOrMore("one", sep=",")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one")
|
|
|
|
assert res.status
|
|
assert return_value == [cnode("foo", 0, 7, "one, one , one")]
|
|
|
|
def test_that_zero_and_more_is_greedy(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {foo: ZeroOrMore("one"), bar: "one"}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "one one one")
|
|
|
|
assert res.status
|
|
assert return_value == [cnode("foo", 0, 4, "one one one")]
|
|
|
|
def test_i_can_parse_one_and_more(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: OneOrMore("one")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "one one")
|
|
|
|
assert res.status
|
|
assert return_value == [cnode("foo", 0, 2, "one one")]
|
|
assert return_value[0].underlying == u(grammar[foo], 0, 2, [
|
|
u("one", 0, 0),
|
|
u("one", 2, 2)])
|
|
|
|
def test_i_can_parse_sequence_and_one_or_more(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence(OneOrMore("one"), "two")}
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
res = parser.parse(context, "one one two")
|
|
assert res.status
|
|
assert res.value.value == [cnode("foo", 0, 4, "one one two")]
|
|
|
|
res = parser.parse(context, "two")
|
|
assert not res.status
|
|
assert res.value.value == [
|
|
UnrecognizedTokensNode(0, 0, [t("two")])
|
|
]
|
|
|
|
def test_i_can_parse_one_and_more_with_separator(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: OneOrMore("one", sep=",")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one")
|
|
|
|
assert res.status
|
|
assert return_value == [cnode("foo", 0, 7, "one, one , one")]
|
|
assert return_value[0].underlying == u(grammar[foo], 0, 7, [
|
|
u("one", 0, 0),
|
|
u("one", 3, 3),
|
|
u("one", 7, 7)])
|
|
|
|
def test_that_one_and_more_is_greedy(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {foo: OneOrMore("one"), bar: "one"}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "one one one")
|
|
|
|
assert res.status
|
|
assert return_value == [cnode("foo", 0, 4, "one one one")]
|
|
|
|
def test_i_can_detect_infinite_recursion(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
grammar = {
|
|
bar: foo,
|
|
foo: bar
|
|
}
|
|
parser = BnfNodeParser()
|
|
parser.initialize(self.get_context(), grammar)
|
|
|
|
assert bar not in parser.concepts_grammars
|
|
assert foo not in parser.concepts_grammars
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {
|
|
bar: foo,
|
|
foo: OrderedChoice(bar, "foo")
|
|
}
|
|
|
|
parser = BnfNodeParser()
|
|
parser.initialize(self.get_context(), grammar)
|
|
|
|
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
|
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
|
|
|
# the other way around is possible
|
|
grammar = {
|
|
bar: foo,
|
|
foo: OrderedChoice("foo", bar)
|
|
}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
|
|
assert foo in parser.concepts_grammars
|
|
assert bar in parser.concepts_grammars
|
|
|
|
res = parser.parse(context, "foo")
|
|
assert len(res) == 2
|
|
assert res[0].status
|
|
assert res[0].value.body == [cnode("bar", 0, 0, "foo")]
|
|
assert res[1].status
|
|
assert res[1].value.body == [cnode("foo", 0, 0, "foo")]
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_sequence(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
grammar = {
|
|
bar: foo,
|
|
foo: Sequence("one", bar, "two")
|
|
}
|
|
parser = BnfNodeParser()
|
|
parser.initialize(self.get_context(), grammar)
|
|
|
|
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
|
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
grammar = {
|
|
bar: foo,
|
|
foo: Sequence("one", OrderedChoice(bar, "other"), "two")
|
|
}
|
|
parser = BnfNodeParser()
|
|
parser.initialize(self.get_context(), grammar)
|
|
|
|
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
|
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
|
|
|
def test_infinite_recursion_does_not_fail_if_a_concept_is_missing(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
grammar = {
|
|
foo: bar
|
|
}
|
|
parser = BnfNodeParser()
|
|
parser.initialize(self.get_context(), grammar)
|
|
|
|
assert foo in parser.concepts_grammars
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_optional(self):
|
|
# TODO infinite recursion with optional
|
|
pass
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more(self):
|
|
# TODO infinite recursion with optional
|
|
pass
|
|
|
|
def test_i_can_detect_indirect_infinite_recursion_with_one_and_more(self):
|
|
# TODO infinite recursion with optional
|
|
pass
|
|
|
|
def test_i_can_visit_parsing_expression(self):
|
|
mult = Concept(name="mult")
|
|
add = Concept(name="add")
|
|
|
|
visitor = ConceptVisitor()
|
|
visitor.visit(Sequence(mult, Optional(Sequence("+", add))))
|
|
|
|
assert sorted(list(visitor.concepts)) == ["add", "mult"]
|
|
|
|
def test_i_can_initialize_rule_names(self):
|
|
context = self.get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
|
|
grammar = {foo: Sequence("one", "two"), bar: foo}
|
|
parser = BnfNodeParser()
|
|
ret = parser.initialize(context, grammar)
|
|
return_value = ret.body
|
|
|
|
assert return_value[foo].rule_name == ""
|
|
assert return_value[bar].rule_name == "foo"
|
|
|
|
@pytest.mark.parametrize("text, end_position", [
|
|
("foo", 0),
|
|
("foo bar", 2),
|
|
("foo bar ", 3),
|
|
(" foo bar ", 4)
|
|
])
|
|
def test_cannot_parser_unknown_concepts(self, text, end_position):
|
|
context, res, wrapper, return_value = self.execute([], {}, text)
|
|
tokens = list(Tokenizer(text))[:-1]
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [UnrecognizedTokensNode(0, end_position, tokens)]
|
|
|
|
def test_i_cannot_parse_when_part_of_the_input_is_unrecognized(self):
|
|
one = Concept(name="one")
|
|
two = Concept(name="two")
|
|
grammar = {one: "one", two: "two"}
|
|
|
|
context, res, wrapper, return_value = self.execute([one, two], grammar, "one two three")
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [
|
|
ConceptNode(get_expected(one, "one"), 0, 0, source="one", underlying=u("one", 0, 0)),
|
|
ConceptNode(get_expected(two, "two"), 2, 2, source="two", underlying=u("two", 2, 2)),
|
|
UnrecognizedTokensNode(3, 4, [t(" "), t("three")])
|
|
]
|
|
|
|
def test_i_cannot_parse_when_wrong_sequence(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence("one", "two", "three")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "one two three one")
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [
|
|
short_cnode("foo", "one two three"),
|
|
UnrecognizedTokensNode(5, 6, [t(" "), t("one")])
|
|
]
|
|
|
|
def test_i_cannot_parse_when_sequence_cannot_match_because_of_end_of_file(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence("one", "two", "three")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "one two")
|
|
|
|
assert not res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [
|
|
UnrecognizedTokensNode(0, 2, [t("one"), t(" "), t("two")])
|
|
]
|
|
|
|
def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {
|
|
bar: Sequence("one", "two"),
|
|
foo: Sequence("one", OrderedChoice("two", "three"))
|
|
}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
|
|
res = parser.parse(context, "one two four five")
|
|
|
|
assert len(res) == 2
|
|
assert not res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [
|
|
cnode("bar", 0, 2, "one two"),
|
|
UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
|
|
]
|
|
|
|
assert not res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [
|
|
cnode("foo", 0, 2, "one two"),
|
|
UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
|
|
]
|
|
|
|
def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {
|
|
bar: Sequence("one", "two"),
|
|
foo: Sequence("one", OrderedChoice("two", "three"))
|
|
}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
|
|
res = parser.parse(context, "four five one two")
|
|
|
|
assert len(res) == 2
|
|
assert not res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [
|
|
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
|
|
cnode("bar", 4, 6, "one two"),
|
|
]
|
|
|
|
assert not res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [
|
|
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
|
|
cnode("foo", 4, 6, "one two"),
|
|
]
|
|
|
|
def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {
|
|
bar: Sequence("one", "two"),
|
|
foo: Sequence("one", OrderedChoice("two", "three"))
|
|
}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
|
|
res = parser.parse(context, "four five one two six seven")
|
|
assert len(res) == 2
|
|
assert not res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [
|
|
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
|
|
cnode("bar", 4, 6, "one two"),
|
|
UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
|
|
]
|
|
|
|
assert not res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [
|
|
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
|
|
cnode("foo", 4, 6, "one two"),
|
|
UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
|
|
]
|
|
|
|
def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle(self):
|
|
context = self.get_context()
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
baz = Concept(name="baz")
|
|
grammar = {
|
|
bar: Sequence("one", "two"),
|
|
foo: Sequence("one", OrderedChoice("two", "three")),
|
|
baz: StrMatch("six"),
|
|
}
|
|
context, parser = self.init([foo, bar, baz], grammar)
|
|
|
|
res = parser.parse(context, "one two four five six")
|
|
assert len(res) == 2
|
|
assert not res[0].status
|
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[0].value.body == [
|
|
cnode("bar", 0, 2, "one two"),
|
|
UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
|
|
cnode("baz", 8, 8, "six"),
|
|
]
|
|
|
|
assert not res[1].status
|
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
assert res[1].value.body == [
|
|
cnode("foo", 0, 2, "one two"),
|
|
UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
|
|
cnode("baz", 8, 8, "six"),
|
|
]
|
|
|
|
def test_i_can_get_the_inner_concept_when_possible(self):
|
|
foo = Concept(name="foo")
|
|
one = Concept(name="one")
|
|
grammar = {foo: Sequence(Optional(ZeroOrMore(one)), ZeroOrMore("one"))}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo, one], grammar, "one")
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [cnode("foo", 0, 0, "one")]
|
|
concept_found = return_value[0].concept
|
|
assert cbody(concept_found) == get_expected(one, "one")
|
|
assert id(cprop(concept_found, "one")) == id(cbody(concept_found))
|
|
|
|
def test_i_can_get_the_inner_concept_when_possible_with_rule_name(self):
|
|
foo = Concept(name="foo")
|
|
one = Concept(name="one")
|
|
grammar = {foo: Sequence(
|
|
Optional(ZeroOrMore(one, rule_name="zero"), rule_name="opt"),
|
|
ZeroOrMore("one"), rule_name="seq")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo, one], grammar, "one")
|
|
|
|
assert res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [cnode("foo", 0, 0, "one")]
|
|
concept_found = return_value[0].concept
|
|
assert cbody(concept_found) == get_expected(one, "one")
|
|
assert id(cprop(concept_found, "one")) == id(cbody(concept_found))
|
|
assert id(cprop(concept_found, "zero")) == id(cbody(concept_found))
|
|
assert id(cprop(concept_found, "opt")) == id(cbody(concept_found))
|
|
assert id(cprop(concept_found, "seq")) == id(cbody(concept_found))
|
|
|
|
def test_i_get_multiple_props_when_zero_or_more(self):
|
|
foo = Concept(name="foo")
|
|
one = Concept(name="one")
|
|
grammar = {foo: ZeroOrMore(one)}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo, one], grammar, "one one one")
|
|
assert res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [cnode("foo", 0, 4, "one one one")]
|
|
concept_found = return_value[0].concept
|
|
assert cbody(concept_found) == DoNotResolve("one one one")
|
|
assert len(concept_found.compiled["one"]) == 3
|
|
assert cprop(concept_found, "one")[0] == get_expected(one)
|
|
assert cprop(concept_found, "one")[1] == get_expected(one)
|
|
assert cprop(concept_found, "one")[2] == get_expected(one)
|
|
assert id(cprop(concept_found, "one")[0]) != id(cprop(concept_found, "one")[1])
|
|
assert id(cprop(concept_found, "one")[1]) != id(cprop(concept_found, "one")[2])
|
|
assert id(cprop(concept_found, "one")[2]) != id(cprop(concept_found, "one")[0])
|
|
|
|
def test_i_get_multiple_props_when_zero_or_more_and_different_values(self):
|
|
foo = Concept(name="foo")
|
|
one = Concept(name="one")
|
|
grammar = {foo: ZeroOrMore(Sequence(one, "ok", rule_name="seq")), one: OrderedChoice("one", "un", "uno")}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo, one], grammar, "one ok un ok uno ok")
|
|
assert res.status
|
|
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
assert return_value == [short_cnode("foo", "one ok un ok uno ok")]
|
|
concept_found = return_value[0].concept
|
|
assert cprop(concept_found, "one")[0] == get_expected(one, "one")
|
|
assert cprop(concept_found, "one")[1] == get_expected(one, "un")
|
|
assert cprop(concept_found, "one")[2] == get_expected(one, "uno")
|
|
assert cprop(concept_found, "seq")[0] == DoNotResolve("one ok")
|
|
assert cprop(concept_found, "seq")[1] == DoNotResolve("un ok")
|
|
assert cprop(concept_found, "seq")[2] == DoNotResolve("uno ok")
|
|
|
|
@pytest.mark.parametrize("rule, expected", [
|
|
(StrMatch("string"), "'string'"),
|
|
(StrMatch("string", rule_name="rule_name"), "'string'=rule_name"),
|
|
(Sequence(StrMatch("foo"), StrMatch("bar")), "('foo' 'bar')"),
|
|
(Sequence(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo' 'bar')=rule_name"),
|
|
(OrderedChoice(StrMatch("foo"), StrMatch("bar")), "('foo'|'bar')"),
|
|
(OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo'|'bar')=rule_name"),
|
|
(Optional(StrMatch("foo")), "'foo'?"),
|
|
(Optional(StrMatch("foo"), rule_name="rule_name"), "'foo'?=rule_name"),
|
|
(ZeroOrMore(StrMatch("foo")), "'foo'*"),
|
|
(ZeroOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'*=rule_name"),
|
|
(OneOrMore(StrMatch("foo")), "'foo'+"),
|
|
(OneOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'+=rule_name"),
|
|
(Sequence(
|
|
Optional(StrMatch("foo"), rule_name="a"),
|
|
ZeroOrMore(StrMatch("bar"), rule_name="b"),
|
|
OneOrMore(StrMatch("baz"), rule_name="c"),
|
|
rule_name="d"), "('foo'?=a 'bar'*=b 'baz'+=c)=d"),
|
|
(OrderedChoice(
|
|
Optional(StrMatch("foo"), rule_name="a"),
|
|
ZeroOrMore(StrMatch("bar"), rule_name="b"),
|
|
OneOrMore(StrMatch("baz"), rule_name="c"),
|
|
rule_name="d"), "('foo'?=a|'bar'*=b|'baz'+=c)=d"),
|
|
(Sequence(
|
|
OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="a"),
|
|
OrderedChoice(StrMatch("x"), StrMatch("y"), rule_name="b"),
|
|
rule_name="c"), "(('foo'|'bar')=a ('x'|'y')=b)=c")
|
|
])
|
|
def test_i_can_encode_grammar(self, rule, expected):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: rule}
|
|
context, parser = self.init([foo], grammar)
|
|
|
|
encoded = parser.encode_grammar(parser.concepts_grammars)
|
|
assert encoded["c:foo|1001:"] == expected
|
|
|
|
bnf_parser = BnfParser()
|
|
parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"])
|
|
assert parse_res.status
|
|
assert parse_res.value.value == rule
|
|
|
|
def test_i_can_encode_grammar_when_concept_simple(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
grammar = {foo: ConceptExpression(bar)}
|
|
context, parser = self.init([foo, bar], grammar)
|
|
|
|
encoded = parser.encode_grammar(parser.concepts_grammars)
|
|
assert encoded["c:foo|1001:"] == "c:bar|1002:=bar"
|
|
|
|
bnf_parser = BnfParser()
|
|
parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"])
|
|
assert parse_res.status
|
|
assert parse_res.value.value == grammar[foo]
|
|
|
|
def test_i_can_encode_grammar_when_concepts(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
baz = Concept(name="baz")
|
|
grammar = {foo: Sequence(
|
|
StrMatch("a"),
|
|
OrderedChoice(ConceptExpression(bar),
|
|
OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")}
|
|
context, parser = self.init([foo, bar, baz], grammar)
|
|
|
|
encoded = parser.encode_grammar(parser.concepts_grammars)
|
|
assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s"
|
|
|
|
bnf_parser = BnfParser()
|
|
parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"])
|
|
assert parse_res.status
|
|
assert parse_res.value.value == grammar[foo]
|
|
|
|
def test_i_can_encode_grammar_when_set_concepts(self):
|
|
foo = Concept(name="foo")
|
|
bar = Concept(name="bar")
|
|
baz = Concept(name="baz")
|
|
grammar = {foo: Sequence(
|
|
StrMatch("a"),
|
|
OrderedChoice(bar,
|
|
OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")}
|
|
context = self.get_context()
|
|
for c in [foo, bar, baz]:
|
|
context.sheerka.add_in_cache(c)
|
|
context.sheerka.set_id_if_needed(c, False)
|
|
context.sheerka.add_concept_to_set(context, baz, bar)
|
|
|
|
parser = BnfNodeParser()
|
|
parser.initialize(context, grammar)
|
|
|
|
encoded = parser.encode_grammar(parser.concepts_grammars)
|
|
assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s"
|
|
|
|
bnf_parser = BnfParser()
|
|
parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"])
|
|
assert parse_res.status
|
|
|
|
expected = Sequence(
|
|
StrMatch("a"),
|
|
OrderedChoice(ConceptGroupExpression(bar, rule_name="bar"),
|
|
OneOrMore(ConceptExpression(baz, rule_name="baz")), rule_name="oc"), rule_name="s")
|
|
assert parse_res.value.value == expected
|
|
|
|
def test_i_concept_validation_is_not_set_when_no_variables(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: "foo"}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "foo")
|
|
assert not return_value[0].concept.metadata.need_validation
|
|
|
|
def test_i_concept_validation_is_set_when_unnamed_variables_are_found(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence("foo", OrderedChoice("a", "b"))}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "foo a")
|
|
assert not return_value[0].concept.metadata.need_validation
|
|
|
|
def test_i_concept_validation_is_set_when_named_variables_are_found(self):
|
|
foo = Concept(name="foo")
|
|
grammar = {foo: Sequence("foo", OrderedChoice("a", "b", rule_name="var"))}
|
|
|
|
context, res, wrapper, return_value = self.execute([foo], grammar, "foo a")
|
|
assert return_value[0].concept.metadata.need_validation
|
|
|
|
|
|
#
|
|
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(self):
|
|
# context = self.get_context()
|
|
# add = Concept(name="add")
|
|
# mult = Concept(name="mult")
|
|
# atom = Concept(name="atom")
|
|
#
|
|
# grammar = {
|
|
# add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))),
|
|
# mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))),
|
|
# atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')),
|
|
# }
|
|
#
|
|
# parser = BnfNodeParser()
|
|
# parser.register(grammar)
|
|
#
|
|
# # res = parser.parse(context, "1")
|
|
# # assert len(res) == 3 # add, mult, atom
|
|
# #
|
|
# # res = parser.parse(context, "1 * 2")
|
|
# # assert len(res) == 2 # add and mult
|
|
# #
|
|
# # res = parser.parse(context, "1 + 2")
|
|
# # assert res.status
|
|
# # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2")]
|
|
#
|
|
# res = parser.parse(context, "1 * 2 + 3")
|
|
# assert res.status
|
|
# assert return_value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")]
|
|
|
|
def test_i_can_register_concepts_with_the_same_name(self):
|
|
# TODO : concepts are registered by name,
|
|
# what when two concepts have the same name ?
|
|
pass
|
|
|
|
def test_i_can_parse_very_very_long_input(self):
|
|
# TODO: In the current implementation, all the tokens are loaded in memory
|
|
# It's clearly not the good approach
|
|
pass
|