Files
Sheerka-Old/tests/test_ConceptLexerParser.py
T

1171 lines
40 KiB
Python

import pytest
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore, \
UnrecognizedTokensNode
from sdp.sheerkaDataProvider import Event
class ConceptVisitor(ParsingExpressionVisitor):
def __init__(self):
self.concepts = set()
def visit_ConceptMatch(self, node):
self.concepts.add(node.concept)
def u(parsing_expression, start, end, children=None):
"""
u stands for underlying
:param parsing_expression:
:param start:
:param end:
:param children:
:return:
"""
if isinstance(parsing_expression, str):
parsing_expression = StrMatch(parsing_expression)
if isinstance(parsing_expression, StrMatch):
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
return NonTerminalNode(parsing_expression, start, end, [], children)
def evaluated(concept):
c = Concept(name=concept.name, body=concept.name)
def t(text):
if text.startswith("'") or text.startswith('"'):
return Token(TokenKind.STRING, text, 0, 0, 0)
if text.startswith(" "):
return Token(TokenKind.WHITESPACE, text, 0, 0, 0)
return Token(TokenKind.IDENTIFIER, text, 0, 0, 0)
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("sheerka", Event(), sheerka)
def get_expected(concept, text=None):
return Concept(name=concept.name, body=text or concept.name).init_key()
def init(concepts, grammar):
context = get_context()
for c in concepts:
context.sheerka.add_in_cache(c)
parser = ConceptLexerParser()
parser.initialize(context, grammar)
return context, parser
def execute(concepts, grammar, text):
context, parser = init(concepts, grammar)
res = parser.parse(context, text)
wrapper = res.value
return_value = res.value.value
return context, res, wrapper, return_value
@pytest.mark.parametrize("match, text", [
("foo", "foo"),
("'foo'", "'foo'"),
("1", "1"),
("3.14", "3.14"),
("+", "+"),
(StrMatch("foo"), "foo"),
(StrMatch("'foo'"), "'foo'"),
(StrMatch("1"), "1"),
(StrMatch("3.14"), "3.14"),
(StrMatch("+"), "+"),
])
def test_i_can_match_simple_tokens(match, text):
foo = Concept(name="foo")
grammar = {foo: match}
context, res, wrapper, return_value = execute([foo], grammar, text)
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [ConceptNode(get_expected(foo, text), 0, 0, source=text, underlying=u(match, 0, 0))]
def test_i_can_match_multiple_concepts_in_one_input():
one = Concept(name="one")
two = Concept(name="two")
grammar = {one: "one", two: "two"}
context, res, wrapper, return_value = execute([one, two], grammar, "one two one")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
ConceptNode(get_expected(one), 0, 0, source="one", underlying=u("one", 0, 0)),
ConceptNode(get_expected(two), 2, 2, source="two", underlying=u("two", 2, 2)),
ConceptNode(get_expected(one), 4, 4, source="one", underlying=u("one", 4, 4)),
]
def test_i_can_match_sequence():
foo = Concept(name="foo")
grammar = {foo: Sequence("one", "two", "three")}
context, res, wrapper, return_value = execute([foo], grammar, "one two three")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
ConceptNode(
get_expected(foo, "one two three"),
0,
4,
source="one two three",
underlying=u(grammar[foo], 0, 4, [
u("one", 0, 0),
u("two", 2, 2),
u("three", 4, 4)]))]
def test_i_always_choose_the_longest_match():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
context, res, wrapper, return_value = execute([foo, bar], grammar, "one two three")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 4, "one two three")]
def test_i_can_match_several_sequences():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
context, res, wrapper, return_value = execute([foo, bar], grammar, "one two three one two")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
("foo", 0, 4, "one two three"),
("bar", 6, 8, "one two"),
]
def test_i_can_match_ordered_choice():
foo = Concept(name="foo")
grammar = {foo: OrderedChoice("one", "two")}
context, parser = init([foo], grammar)
res1 = parser.parse(context, "one")
assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [("foo", 0, 0, "one")]
assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)])
res2 = parser.parse(context, "two")
assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [("foo", 0, 0, "two")]
assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)])
res3 = parser.parse(context, "three")
assert not res3.status
assert context.sheerka.isinstance(res3.value, BuiltinConcepts.PARSER_RESULT)
assert res3.value.value == [
UnrecognizedTokensNode(0, 0, [t("three")])
]
def test_i_cannot_match_ordered_choice_with_empty_alternative():
foo = Concept(name="foo")
grammar = {foo: Sequence(OrderedChoice("one", ""), "two")}
context, res, wrapper, return_value = execute([foo], grammar, "ok")
assert not res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
UnrecognizedTokensNode(0, 0, [t("ok")])
]
def test_i_can_mix_sequences_and_ordered_choices():
foo = Concept(name="foo")
grammar = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")}
context, parser = init([foo], grammar)
res1 = parser.parse(context, "twenty one ok")
assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [ConceptNode(get_expected(foo, "twenty one ok"), 0, 4, source="twenty one ok",
underlying=u(grammar[foo], 0, 4, [
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]),
u("one", 2, 2),
u("ok", 4, 4)]))]
res2 = parser.parse(context, "thirty one ok")
assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [ConceptNode(get_expected(foo, "thirty one ok"), 0, 4, source="thirty one ok",
underlying=u(grammar[foo], 0, 4, [
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]),
u("one", 2, 2),
u("ok", 4, 4)]))]
res3 = parser.parse(context, "twenty one")
assert not res3.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res3.value.value == [
UnrecognizedTokensNode(0, 2, [t("twenty"), t(" "), t("one")])
]
def test_i_can_mix_ordered_choices_and_sequences():
foo = Concept(name="foo")
grammar = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")}
context, parser = init([foo], grammar)
res = parser.parse(context, "twenty thirty")
assert res.status
assert res.value.value == [("foo", 0, 2, "twenty thirty")]
res = parser.parse(context, "one")
assert res.status
assert res.value.value == [("foo", 0, 0, "one")]
def test_i_cannot_parse_empty_optional():
foo = Concept(name="foo")
grammar = {foo: Optional("one")}
context, parser = init([foo], grammar)
res = parser.parse(context, "")
return_value = res.value
assert not res.status
assert context.sheerka.isinstance(return_value, BuiltinConcepts.IS_EMPTY)
def test_i_can_parse_optional():
foo = Concept(name="foo")
grammar = {foo: Optional("one")}
context, res, wrapper, return_value = execute([foo], grammar, "one")
assert res.status
assert return_value == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one",
underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))]
def test_i_can_parse_sequence_starting_with_optional():
foo = Concept(name="foo")
grammar = {foo: Sequence(Optional("twenty"), "one")}
context, parser = init([foo], grammar)
res = parser.parse(context, "twenty one")
assert res.status
assert res.value.body == [ConceptNode(
get_expected(foo, "twenty one"), 0, 2,
source="twenty one",
underlying=u(grammar[foo], 0, 2,
[
u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]),
u("one", 2, 2)]
))]
res = parser.parse(context, "one")
assert res.status
assert res.value.body == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one",
underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))]
def test_i_can_parse_sequence_ending_with_optional():
foo = Concept(name="foo")
grammar = {foo: Sequence("one", "two", Optional("three"))}
context, parser = init([foo], grammar)
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [("foo", 0, 4, "one two three")]
res = parser.parse(context, "one two")
assert res.status
assert res.value.body == [("foo", 0, 2, "one two")]
def test_i_can_parse_sequence_with_optional_in_between():
foo = Concept(name="foo")
grammar = {foo: Sequence("one", Optional("two"), "three")}
context, parser = init([foo], grammar)
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [("foo", 0, 4, "one two three")]
res = parser.parse(context, "one three")
assert res.status
assert res.value.body == [("foo", 0, 2, "one three")]
def test_i_cannot_parse_wrong_input_with_optional():
foo = Concept(name="foo")
grammar = {foo: Optional("one")}
context, res, wrapper, return_value = execute([foo], grammar, "two")
assert not res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
UnrecognizedTokensNode(0, 0, [t("two")])
]
def test_i_can_use_reference():
# when there are multiple matches for the same input
# Do I need to create a choice concept ?
# No, create a return value for every possible graph
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {foo: Sequence("one", "two"), bar: foo}
context, parser = init([foo, bar], grammar)
res = parser.parse(context, "one two")
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept
assert concept_found_1.metadata.is_evaluated
assert concept_found_1.body == "one two"
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept
assert concept_found_2.metadata.is_evaluated
# the body and the prop['foo'] are the same concept 'foo'
assert isinstance(concept_found_2.body, Concept)
assert concept_found_2.body.key == "foo"
assert concept_found_2.body.metadata.is_evaluated
assert concept_found_2.body.body == "one two"
assert id(concept_found_2.props["foo"].value) == id(concept_found_2.body)
def test_i_can_use_a_reference_with_a_body():
"""
Same test than before (test_i_can_use_reference())
but this time, the concept 'foo' already has a body.
:return:
"""
foo = Concept(name="foo", body="'foo'")
bar = Concept(name="bar")
grammar = {foo: Sequence("one", "two"), bar: foo}
context, parser = init([foo, bar], grammar)
res = parser.parse(context, "one two")
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept
assert not concept_found_1.metadata.is_evaluated
assert concept_found_1.body == "'foo'"
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept
assert concept_found_2.metadata.is_evaluated
# the body and the prop['foo'] are the same concept 'foo'
assert isinstance(concept_found_2.body, Concept)
assert concept_found_2.body.key == "foo"
assert not concept_found_2.body.metadata.is_evaluated
assert concept_found_2.body.body == "'foo'"
assert id(concept_found_2.props["foo"].value) == id(concept_found_2.body)
def test_i_can_use_context_reference_with_multiple_levels():
"""
Same than previous one, but with reference of reference
:return:
"""
foo = Concept(name="foo")
bar = Concept(name="bar")
baz = Concept(name="baz")
grammar = {foo: Sequence("one", "two"), bar: foo, baz: bar}
context, parser = init([foo, bar, baz], grammar)
res = parser.parse(context, "one two")
assert len(res) == 3
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept
assert concept_found_1.body == "one two"
assert concept_found_1.metadata.is_evaluated
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept
assert concept_found_2.body == get_expected(foo, "one two")
assert id(concept_found_2.props["foo"].value) == id(concept_found_2.body)
assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [("baz", 0, 2, "one two")]
concept_found_3 = res[2].value.body[0].concept
expected_foo = get_expected(foo, "one two")
assert concept_found_3.body == get_expected(bar, expected_foo).set_prop("foo", expected_foo)
assert id(concept_found_3.props["bar"].value) == id(concept_found_3.body)
def test_order_is_not_important_when_using_references():
"""
Same test than test_i_can_use_reference(),
but this time, 'bar' is declared before 'foo'
So the order of the result is different
:return:
"""
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {bar: foo, foo: Sequence("one", "two")}
context, parser = init([foo, bar], grammar)
res = parser.parse(context, "one two")
assert len(res) == 2
assert res[0].value.body == [("bar", 0, 2, "one two")]
assert res[1].value.body == [("foo", 0, 2, "one two")]
def test_i_can_parse_when_reference():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
context, parser = init([foo, bar], grammar)
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [("bar", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept
assert concept_found.body == "twenty two"
assert concept_found.metadata.is_evaluated
assert concept_found.get_prop("foo") == get_expected(foo, "twenty")
assert concept_found.get_prop("foo").metadata.is_evaluated
res = parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [("bar", 0, 2, "thirty one")]
concept_found = res.value.body[0].concept
assert concept_found.body == "thirty one"
assert concept_found.metadata.is_evaluated
assert concept_found.get_prop("foo") == get_expected(foo, "thirty")
assert concept_found.get_prop("foo").metadata.is_evaluated
res = parser.parse(context, "twenty")
assert res.status
assert res.value.body == [("foo", 0, 0, "twenty")]
concept_found = res.value.body[0].concept
assert concept_found.body == "twenty"
assert concept_found.metadata.is_evaluated
def test_i_can_parse_when_reference_has_a_body():
foo = Concept(name="foo", body="'one'")
bar = Concept(name="bar")
grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
context, parser = init([foo, bar], grammar)
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [("bar", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept
assert concept_found.body == "twenty two"
assert concept_found.metadata.is_evaluated
assert concept_found.get_prop("foo") == get_expected(foo, "'one'")
assert not concept_found.get_prop("foo").metadata.is_evaluated
res = parser.parse(context, "twenty")
assert res.status
assert res.value.body == [("foo", 0, 0, "twenty")]
concept_found = res.value.body[0].concept
assert concept_found.body == "'one'"
assert not concept_found.metadata.is_evaluated
def test_i_can_parse_multiple_results():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {
bar: Sequence("one", "two"),
foo: Sequence("one", OrderedChoice("two", "three"))
}
context, parser = init([foo, bar], grammar)
res = parser.parse(context, "one two")
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", 0, 2, "one two")]
concept_found_0 = res[0].value.body[0].concept
assert concept_found_0.body == "one two"
assert concept_found_0.metadata.is_evaluated
assert len(concept_found_0.props) == 0
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", 0, 2, "one two")]
concept_found_1 = res[1].value.body[0].concept
assert concept_found_1.body == "one two"
assert concept_found_1.metadata.is_evaluated
assert len(concept_found_1.props) == 0
def test_i_can_parse_multiple_results_times_two():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {
bar: Sequence("one", "two"),
foo: Sequence("one", OrderedChoice("two", "three"))
}
context, parser = init([foo, bar], grammar)
res = parser.parse(context, "one two one two")
assert len(res) == 4
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", "one two"), ("bar", "one two")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", "one two"), ("bar", "one two")]
assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [("bar", "one two"), ("foo", "one two")]
assert res[3].status
assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT)
assert res[3].value.body == [("foo", "one two"), ("foo", "one two")]
def test_i_can_parse_multiple_results_when_reference():
"""
TODO : There should no be two answer, has the one with bar is totally useless
Note that bar = Sequence(foo, OrderedChoice("one", "two")) does not match
:return:
"""
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {
bar: Sequence(foo, Optional(OrderedChoice("one", "two"))),
foo: OrderedChoice("twenty", "thirty")
}
context, parser = init([foo, bar], grammar)
res = parser.parse(context, "twenty")
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", 0, 0, "twenty")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", 0, 0, "twenty")]
def test_i_can_parse_concept_reference_that_is_not_in_grammar():
one = Concept(name="one")
two = Concept(name="two")
foo = Concept(name="foo")
grammar = {foo: Sequence("twenty", OrderedChoice(one, two))}
context, parser = init([one, two, foo], grammar)
parser = ConceptLexerParser()
parser.initialize(context, grammar)
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [("foo", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept
assert concept_found.body == "twenty two"
assert concept_found.metadata.is_evaluated
assert concept_found.get_prop("two") == get_expected(two, "two")
assert concept_found.get_prop("two").metadata.is_evaluated
res = parser.parse(context, "twenty one")
assert res.status
assert res.value.body == [("foo", 0, 2, "twenty one")]
def test_i_can_parse_zero_or_more():
foo = Concept(name="foo")
grammar = {foo: ZeroOrMore("one")}
context, res, wrapper, return_value = execute([foo], grammar, "one one")
assert res.status
assert return_value == [("foo", 0, 2, "one one")]
assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)])
concept_found = return_value[0].concept
assert concept_found.body == "one one"
assert concept_found.metadata.is_evaluated
def test_i_can_parse_sequence_and_zero_or_more():
foo = Concept(name="foo")
grammar = {foo: Sequence(ZeroOrMore("one"), "two")}
context, parser = init([foo], grammar)
res = parser.parse(context, "one one two")
assert res.status
assert res.value.value == [("foo", 0, 4, "one one two")]
res = parser.parse(context, "two")
assert res.status
assert res.value.value == [("foo", 0, 0, "two")]
def test_i_cannot_parse_zero_and_more_when_wrong_entry():
foo = Concept(name="foo")
grammar = {foo: ZeroOrMore("one")}
context, parser = init([foo], grammar)
parser = ConceptLexerParser()
parser.initialize(context, grammar)
res = parser.parse(context, "one two")
assert not res.status
assert res.value.value == [
("foo", 0, 0, "one"),
UnrecognizedTokensNode(1, 2, [t(" "), t("two")])
]
res = parser.parse(context, "two")
assert not res.status
assert res.value.value == [
UnrecognizedTokensNode(0, 0, [t("two")])
]
def test_i_can_parse_zero_and_more_with_separator():
foo = Concept(name="foo")
grammar = {foo: ZeroOrMore("one", sep=",")}
context, res, wrapper, return_value = execute([foo], grammar, "one, one , one")
assert res.status
assert return_value == [("foo", 0, 7, "one, one , one")]
def test_that_zero_and_more_is_greedy():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {foo: ZeroOrMore("one"), bar: "one"}
context, res, wrapper, return_value = execute([foo], grammar, "one one one")
assert res.status
assert return_value == [("foo", 0, 4, "one one one")]
def test_i_can_parse_one_and_more():
foo = Concept(name="foo")
grammar = {foo: OneOrMore("one")}
context, res, wrapper, return_value = execute([foo], grammar, "one one")
assert res.status
assert return_value == [("foo", 0, 2, "one one")]
assert return_value[0].underlying == u(grammar[foo], 0, 2, [
u("one", 0, 0),
u("one", 2, 2)])
def test_i_can_parse_sequence_and_one_or_more():
foo = Concept(name="foo")
grammar = {foo: Sequence(OneOrMore("one"), "two")}
context, parser = init([foo], grammar)
res = parser.parse(context, "one one two")
assert res.status
assert res.value.value == [("foo", 0, 4, "one one two")]
res = parser.parse(context, "two")
assert not res.status
assert res.value.value == [
UnrecognizedTokensNode(0, 0, [t("two")])
]
def test_i_can_parse_one_and_more_with_separator():
foo = Concept(name="foo")
grammar = {foo: OneOrMore("one", sep=",")}
context, res, wrapper, return_value = execute([foo], grammar, "one, one , one")
assert res.status
assert return_value == [("foo", 0, 7, "one, one , one")]
assert return_value[0].underlying == u(grammar[foo], 0, 7, [
u("one", 0, 0),
u("one", 3, 3),
u("one", 7, 7)])
def test_that_one_and_more_is_greedy():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {foo: OneOrMore("one"), bar: "one"}
context, res, wrapper, return_value = execute([foo], grammar, "one one one")
assert res.status
assert return_value == [("foo", 0, 4, "one one one")]
def test_i_can_detect_infinite_recursion():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {
bar: foo,
foo: bar
}
parser = ConceptLexerParser()
parser.initialize(get_context(), grammar)
assert bar not in parser.concepts_grammars
assert foo not in parser.concepts_grammars
def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {
bar: foo,
foo: OrderedChoice(bar, "foo")
}
parser = ConceptLexerParser()
parser.initialize(get_context(), grammar)
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
# the other way around is possible
grammar = {
bar: foo,
foo: OrderedChoice("foo", bar)
}
context, parser = init([foo, bar], grammar)
assert foo in parser.concepts_grammars
assert bar in parser.concepts_grammars
res = parser.parse(context, "foo")
assert len(res) == 2
assert res[0].status
assert res[0].value.body == [("bar", 0, 0, "foo")]
assert res[1].status
assert res[1].value.body == [("foo", 0, 0, "foo")]
def test_i_can_detect_indirect_infinite_recursion_with_sequence():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {
bar: foo,
foo: Sequence("one", bar, "two")
}
parser = ConceptLexerParser()
parser.initialize(get_context(), grammar)
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {
bar: foo,
foo: Sequence("one", OrderedChoice(bar, "other"), "two")
}
parser = ConceptLexerParser()
parser.initialize(get_context(), grammar)
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
def test_infinite_recursion_does_not_fail_if_a_concept_is_missing():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {
foo: bar
}
parser = ConceptLexerParser()
parser.initialize(get_context(), grammar)
assert foo in parser.concepts_grammars
def test_i_can_detect_indirect_infinite_recursion_with_optional():
# TODO infinite recursion with optional
pass
def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more():
# TODO infinite recursion with optional
pass
def test_i_can_detect_indirect_infinite_recursion_with_one_and_more():
# TODO infinite recursion with optional
pass
def test_i_can_visit_parsing_expression():
mult = Concept(name="mult")
add = Concept(name="add")
visitor = ConceptVisitor()
visitor.visit(Sequence(mult, Optional(Sequence("+", add))))
assert sorted(list(visitor.concepts)) == ["add", "mult"]
def test_i_can_initialize_rule_names():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {foo: Sequence("one", "two"), bar: foo}
parser = ConceptLexerParser()
ret = parser.initialize(context, grammar)
return_value = ret.body
assert return_value[foo].rule_name == ""
assert return_value[bar].rule_name == "foo"
@pytest.mark.parametrize("text, end_position", [
("foo", 0),
("foo bar", 2),
("foo bar ", 3),
(" foo bar ", 4)
])
def test_cannot_parser_unknown_concepts(text, end_position):
context, res, wrapper, return_value = execute([], {}, text)
tokens = list(Tokenizer(text))[:-1]
assert not res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [UnrecognizedTokensNode(0, end_position, tokens)]
def test_i_cannot_parse_when_part_of_the_input_is_unrecognized():
one = Concept(name="one")
two = Concept(name="two")
grammar = {one: "one", two: "two"}
context, res, wrapper, return_value = execute([one, two], grammar, "one two three")
assert not res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
ConceptNode(get_expected(one, "one"), 0, 0, source="one", underlying=u("one", 0, 0)),
ConceptNode(get_expected(two, "two"), 2, 2, source="two", underlying=u("two", 2, 2)),
UnrecognizedTokensNode(3, 4, [t(" "), t("three")])
]
def test_i_cannot_parse_when_wrong_sequence():
foo = Concept(name="foo")
grammar = {foo: Sequence("one", "two", "three")}
context, res, wrapper, return_value = execute([foo], grammar, "one two three one")
assert not res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
("foo", "one two three"),
UnrecognizedTokensNode(5, 6, [t(" "), t("one")])
]
def test_i_cannot_parse_when_sequence_cannot_match_because_of_end_of_file():
foo = Concept(name="foo")
grammar = {foo: Sequence("one", "two", "three")}
context, res, wrapper, return_value = execute([foo], grammar, "one two")
assert not res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
UnrecognizedTokensNode(0, 2, [t("one"), t(" "), t("two")])
]
def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {
bar: Sequence("one", "two"),
foo: Sequence("one", OrderedChoice("two", "three"))
}
context, parser = init([foo, bar], grammar)
res = parser.parse(context, "one two four five")
assert len(res) == 2
assert not res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
("bar", 0, 2, "one two"),
UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
]
assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
("foo", 0, 2, "one two"),
UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
]
def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {
bar: Sequence("one", "two"),
foo: Sequence("one", OrderedChoice("two", "three"))
}
context, parser = init([foo, bar], grammar)
res = parser.parse(context, "four five one two")
assert len(res) == 2
assert not res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("bar", 4, 6, "one two"),
]
assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("foo", 4, 6, "one two"),
]
def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens():
foo = Concept(name="foo")
bar = Concept(name="bar")
grammar = {
bar: Sequence("one", "two"),
foo: Sequence("one", OrderedChoice("two", "three"))
}
context, parser = init([foo, bar], grammar)
res = parser.parse(context, "four five one two six seven")
assert len(res) == 2
assert not res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("bar", 4, 6, "one two"),
UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
]
assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("foo", 4, 6, "one two"),
UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
]
def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
baz = Concept(name="baz")
grammar = {
bar: Sequence("one", "two"),
foo: Sequence("one", OrderedChoice("two", "three")),
baz: StrMatch("six"),
}
context, parser = init([foo, bar, baz], grammar)
res = parser.parse(context, "one two four five six")
assert len(res) == 2
assert not res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
("bar", 0, 2, "one two"),
UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
("baz", 8, 8, "six"),
]
assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
("foo", 0, 2, "one two"),
UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
("baz", 8, 8, "six"),
]
def test_i_can_get_the_inner_concept_when_possible():
foo = Concept(name="foo")
one = Concept(name="one")
grammar = {foo: Sequence(Optional(ZeroOrMore(one)), ZeroOrMore("one"))}
context, res, wrapper, return_value = execute([foo, one], grammar, "one")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 0, "one")]
concept_found = return_value[0].concept
assert concept_found.body == get_expected(one, "one")
assert concept_found.get_prop("one") == concept_found.body
def test_i_can_get_the_inner_concept_when_possible_with_rule_name():
foo = Concept(name="foo")
one = Concept(name="one")
grammar = {foo: Sequence(
Optional(ZeroOrMore(one, rule_name="zero"), rule_name="opt"),
ZeroOrMore("one"), rule_name="seq")}
context, res, wrapper, return_value = execute([foo, one], grammar, "one")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 0, "one")]
concept_found = return_value[0].concept
assert concept_found.body == get_expected(one, "one")
assert id(concept_found.get_prop("one")) == id(concept_found.body)
assert id(concept_found.get_prop("zero")) == id(concept_found.body)
assert id(concept_found.get_prop("opt")) == id(concept_found.body)
assert id(concept_found.get_prop("seq")) == id(concept_found.body)
def test_i_get_multiple_props_when_zero_or_more():
foo = Concept(name="foo")
one = Concept(name="one")
grammar = {foo: ZeroOrMore(one)}
context, res, wrapper, return_value = execute([foo, one], grammar, "one one one")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 4, "one one one")]
concept_found = return_value[0].concept
assert concept_found.body == "one one one"
assert len(concept_found.props) == 1
assert len(concept_found.get_prop("one")) == 3
assert concept_found.get_prop("one")[0] == get_expected(one)
assert concept_found.get_prop("one")[1] == get_expected(one)
assert concept_found.get_prop("one")[2] == get_expected(one)
assert id(concept_found.get_prop("one")[0]) != id(concept_found.get_prop("one")[1])
assert id(concept_found.get_prop("one")[1]) != id(concept_found.get_prop("one")[2])
assert id(concept_found.get_prop("one")[2]) != id(concept_found.get_prop("one")[0])
def test_i_get_multiple_props_when_zero_or_more_and_different_values():
foo = Concept(name="foo")
one = Concept(name="one")
grammar = {foo: ZeroOrMore(Sequence(one, "ok", rule_name="seq")), one: OrderedChoice("one", "un", "uno")}
context, res, wrapper, return_value = execute([foo, one], grammar, "one ok un ok uno ok")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", "one ok un ok uno ok")]
concept_found = return_value[0].concept
assert concept_found.get_prop("one")[0] == get_expected(one, "one")
assert concept_found.get_prop("one")[1] == get_expected(one, "un")
assert concept_found.get_prop("one")[2] == get_expected(one, "uno")
assert concept_found.get_prop("seq")[0] == "one ok"
assert concept_found.get_prop("seq")[1] == "un ok"
assert concept_found.get_prop("seq")[2] == "uno ok"
#
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
# context = get_context()
# add = Concept(name="add")
# mult = Concept(name="mult")
# atom = Concept(name="atom")
#
# grammar = {
# add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))),
# mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))),
# atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')),
# }
#
# parser = ConceptLexerParser()
# parser.register(grammar)
#
# # res = parser.parse(context, "1")
# # assert len(res) == 3 # add, mult, atom
# #
# # res = parser.parse(context, "1 * 2")
# # assert len(res) == 2 # add and mult
# #
# # res = parser.parse(context, "1 + 2")
# # assert res.status
# # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2")]
#
# res = parser.parse(context, "1 * 2 + 3")
# assert res.status
# assert return_value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")]
def test_i_can_register_concepts_with_the_same_name():
# TODO : concepts are registered by name,
# what when two concepts have the same name ?
pass
def test_i_can_parse_very_very_long_input():
# TODO: In the current implementation, all the tokens are loaded in memory
# It's clearly not the good approach
pass