1306 lines
54 KiB
Python
1306 lines
54 KiB
Python
# from ast import Str
|
|
#
|
|
# import pytest
|
|
# from core.builtin_concepts import BuiltinConcepts
|
|
# from core.concept import Concept, ConceptParts, DoNotResolve
|
|
# from core.tokenizer import Tokenizer, TokenKind, Token
|
|
# from parsers.BaseNodeParser import cnode, short_cnode
|
|
# from parsers.BnfParser import BnfParser
|
|
# from parsers.BnfNodeParser_Old import BnfNodeParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
|
|
# ParsingExpressionVisitor, TerminalNode, NonTerminalNode, ZeroOrMore, OneOrMore, \
|
|
# UnrecognizedTokensNode, ConceptExpression, ConceptGroupExpression
|
|
#
|
|
# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
|
#
|
|
#
|
|
# class ConceptVisitor(ParsingExpressionVisitor):
|
|
# def __init__(self):
|
|
# self.concepts = set()
|
|
#
|
|
# def visit_ConceptExpression(self, node):
|
|
# self.concepts.add(node.concept)
|
|
#
|
|
#
|
|
# def u(parsing_expression, start, end, children=None):
|
|
# """
|
|
# u stands for underlying
|
|
# :param parsing_expression:
|
|
# :param start:
|
|
# :param end:
|
|
# :param children:
|
|
# :return:
|
|
# """
|
|
# if isinstance(parsing_expression, str):
|
|
# parsing_expression = StrMatch(parsing_expression)
|
|
#
|
|
# if isinstance(parsing_expression, StrMatch):
|
|
# return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
|
|
#
|
|
# return NonTerminalNode(parsing_expression, start, end, [], children)
|
|
#
|
|
#
|
|
# def evaluated(concept):
|
|
# c = Concept(name=concept.name, body=concept.name)
|
|
#
|
|
#
|
|
# def t(text):
|
|
# if text.startswith("'") or text.startswith('"'):
|
|
# return Token(TokenKind.STRING, text, 0, 0, 0)
|
|
#
|
|
# if text.startswith(" "):
|
|
# return Token(TokenKind.WHITESPACE, text, 0, 0, 0)
|
|
#
|
|
# return Token(TokenKind.IDENTIFIER, text, 0, 0, 0)
|
|
#
|
|
#
|
|
# def get_expected(concept, text=None):
|
|
# c = Concept(name=concept.name)
|
|
# c.compiled[ConceptParts.BODY] = DoNotResolve(text or concept.name)
|
|
# c.init_key()
|
|
# c.metadata.id = concept.id
|
|
# return c
|
|
#
|
|
#
|
|
# def cbody(concept):
|
|
# """cbody stands for compiled body"""
|
|
# if not ConceptParts.BODY in concept.compiled:
|
|
# return None
|
|
# return concept.compiled[ConceptParts.BODY]
|
|
#
|
|
#
|
|
# def cprop(concept, prop_name):
|
|
# """cbody stands for compiled property"""
|
|
# return concept.compiled[prop_name]
|
|
#
|
|
#
|
|
# class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|
#
|
|
# def init(self, concepts, grammar):
|
|
# sheerka = self.get_sheerka(singleton=True)
|
|
# context = self.get_context(sheerka)
|
|
# for c in concepts:
|
|
# context.sheerka.add_in_cache(c)
|
|
# context.sheerka.set_id_if_needed(c, False)
|
|
#
|
|
# parser = BnfNodeParser()
|
|
# parser.initialize(context, grammar)
|
|
#
|
|
# return context, parser
|
|
#
|
|
# def execute(self, concepts, grammar, text):
|
|
# context, parser = self.init(concepts, grammar)
|
|
#
|
|
# res = parser.parse(context, text)
|
|
# wrapper = res.value
|
|
# return_value = res.value.value
|
|
#
|
|
# return context, res, wrapper, return_value
|
|
#
|
|
#
|
|
# @pytest.mark.parametrize("match, text", [
|
|
# ("foo", "foo"),
|
|
# ("'foo'", "'foo'"),
|
|
# ("1", "1"),
|
|
# ("3.14", "3.14"),
|
|
# ("+", "+"),
|
|
# (StrMatch("foo"), "foo"),
|
|
# (StrMatch("'foo'"), "'foo'"),
|
|
# (StrMatch("1"), "1"),
|
|
# (StrMatch("3.14"), "3.14"),
|
|
# (StrMatch("+"), "+"),
|
|
# ])
|
|
# def test_i_can_match_simple_tokens(self, match, text):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: match}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, text)
|
|
#
|
|
# assert res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [ConceptNode(get_expected(foo, text), 0, 0, source=text, underlying=u(match, 0, 0))]
|
|
#
|
|
#
|
|
# def test_i_can_match_multiple_concepts_in_one_input(self):
|
|
# one = Concept(name="one")
|
|
# two = Concept(name="two")
|
|
# grammar = {one: "one", two: "two"}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([one, two], grammar, "one two one")
|
|
#
|
|
# assert res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [
|
|
# ConceptNode(get_expected(one), 0, 0, source="one", underlying=u("one", 0, 0)),
|
|
# ConceptNode(get_expected(two), 2, 2, source="two", underlying=u("two", 2, 2)),
|
|
# ConceptNode(get_expected(one), 4, 4, source="one", underlying=u("one", 4, 4)),
|
|
# ]
|
|
#
|
|
#
|
|
# def test_i_can_match_sequence(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Sequence("one", "two", "three")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "one two three")
|
|
#
|
|
# assert res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [
|
|
# ConceptNode(
|
|
# get_expected(foo, "one two three"),
|
|
# 0,
|
|
# 4,
|
|
# source="one two three",
|
|
# underlying=u(grammar[foo], 0, 4, [
|
|
# u("one", 0, 0),
|
|
# u("two", 2, 2),
|
|
# u("three", 4, 4)]))]
|
|
#
|
|
#
|
|
# def test_i_always_choose_the_longest_match(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three")
|
|
#
|
|
# assert res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [cnode("foo", 0, 4, "one two three")]
|
|
#
|
|
# def test_i_can_match_several_sequences(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three one two")
|
|
#
|
|
# assert res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [
|
|
# cnode("foo", 0, 4, "one two three"),
|
|
# cnode("bar", 6, 8, "one two"),
|
|
# ]
|
|
#
|
|
# def test_i_can_match_ordered_choice(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: OrderedChoice("one", "two")}
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# res1 = parser.parse(context, "one")
|
|
# assert res1.status
|
|
# assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res1.value.body == [cnode("foo", 0, 0, "one")]
|
|
# assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)])
|
|
#
|
|
# res2 = parser.parse(context, "two")
|
|
# assert res2.status
|
|
# assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res2.value.body == [cnode("foo", 0, 0, "two")]
|
|
# assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)])
|
|
#
|
|
# res3 = parser.parse(context, "three")
|
|
# assert not res3.status
|
|
# assert context.sheerka.isinstance(res3.value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res3.value.value == [
|
|
# UnrecognizedTokensNode(0, 0, [t("three")])
|
|
# ]
|
|
#
|
|
# def test_i_cannot_match_ordered_choice_with_empty_alternative(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Sequence(OrderedChoice("one", ""), "two")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "ok")
|
|
#
|
|
# assert not res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [
|
|
# UnrecognizedTokensNode(0, 0, [t("ok")])
|
|
# ]
|
|
#
|
|
# def test_i_can_mix_sequences_and_ordered_choices(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")}
|
|
#
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# res1 = parser.parse(context, "twenty one ok")
|
|
# assert res1.status
|
|
# assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res1.value.body == [ConceptNode(get_expected(foo, "twenty one ok"), 0, 4, source="twenty one ok",
|
|
# underlying=u(grammar[foo], 0, 4, [
|
|
# u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]),
|
|
# u("one", 2, 2),
|
|
# u("ok", 4, 4)]))]
|
|
#
|
|
# res2 = parser.parse(context, "thirty one ok")
|
|
# assert res2.status
|
|
# assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res2.value.body == [ConceptNode(get_expected(foo, "thirty one ok"), 0, 4, source="thirty one ok",
|
|
# underlying=u(grammar[foo], 0, 4, [
|
|
# u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]),
|
|
# u("one", 2, 2),
|
|
# u("ok", 4, 4)]))]
|
|
#
|
|
# res3 = parser.parse(context, "twenty one")
|
|
# assert not res3.status
|
|
# assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res3.value.value == [
|
|
# UnrecognizedTokensNode(0, 2, [t("twenty"), t(" "), t("one")])
|
|
# ]
|
|
#
|
|
# def test_i_can_mix_ordered_choices_and_sequences(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")}
|
|
#
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# res = parser.parse(context, "twenty thirty")
|
|
# assert res.status
|
|
# assert res.value.value == [cnode("foo", 0, 2, "twenty thirty")]
|
|
#
|
|
# res = parser.parse(context, "one")
|
|
# assert res.status
|
|
# assert res.value.value == [cnode("foo", 0, 0, "one")]
|
|
#
|
|
# def test_i_cannot_parse_empty_optional(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Optional("one")}
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# res = parser.parse(context, "")
|
|
# return_value = res.value
|
|
#
|
|
# assert not res.status
|
|
# assert context.sheerka.isinstance(return_value, BuiltinConcepts.IS_EMPTY)
|
|
#
|
|
# def test_i_can_parse_optional(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Optional("one")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "one")
|
|
#
|
|
# assert res.status
|
|
# assert return_value == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one",
|
|
# underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))]
|
|
#
|
|
# def test_i_can_parse_sequence_starting_with_optional(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Sequence(Optional("twenty"), "one")}
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# res = parser.parse(context, "twenty one")
|
|
# assert res.status
|
|
# assert res.value.body == [ConceptNode(
|
|
# get_expected(foo, "twenty one"), 0, 2,
|
|
# source="twenty one",
|
|
# underlying=u(grammar[foo], 0, 2,
|
|
# [
|
|
# u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]),
|
|
# u("one", 2, 2)]
|
|
# ))]
|
|
#
|
|
# res = parser.parse(context, "one")
|
|
# assert res.status
|
|
# assert res.value.body == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one",
|
|
# underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))]
|
|
#
|
|
# def test_i_can_parse_sequence_ending_with_optional(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Sequence("one", "two", Optional("three"))}
|
|
#
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# res = parser.parse(context, "one two three")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("foo", 0, 4, "one two three")]
|
|
#
|
|
# res = parser.parse(context, "one two")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("foo", 0, 2, "one two")]
|
|
#
|
|
# def test_i_can_parse_sequence_with_optional_in_between(self):
|
|
# foo = Concept(name="foo")
|
|
#
|
|
# grammar = {foo: Sequence("one", Optional("two"), "three")}
|
|
#
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# res = parser.parse(context, "one two three")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("foo", 0, 4, "one two three")]
|
|
#
|
|
# res = parser.parse(context, "one three")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("foo", 0, 2, "one three")]
|
|
#
|
|
# def test_i_cannot_parse_wrong_input_with_optional(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Optional("one")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "two")
|
|
#
|
|
# assert not res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [
|
|
# UnrecognizedTokensNode(0, 0, [t("two")])
|
|
# ]
|
|
#
|
|
# def test_i_can_use_reference(self):
|
|
# # when there are multiple matches for the same input
|
|
# # Do I need to create a choice concept ?
|
|
# # No, create a return value for every possible graph
|
|
#
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {foo: Sequence("one", "two"), bar: foo}
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
# res = parser.parse(context, "one two")
|
|
#
|
|
# assert len(res) == 2
|
|
#
|
|
# assert res[0].status
|
|
# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
|
|
# concept_found_1 = res[0].value.body[0].concept
|
|
# assert cbody(concept_found_1) == DoNotResolve("one two")
|
|
#
|
|
# assert res[1].status
|
|
# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
|
|
# concept_found_2 = res[1].value.body[0].concept
|
|
# # the body and the prop['foo'] are the same concept 'foo'
|
|
# assert cbody(concept_found_2) == get_expected(foo, "one two")
|
|
# assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2))
|
|
#
|
|
# def test_i_can_use_a_reference_with_a_body(self):
|
|
# """
|
|
# Same test than before (test_i_can_use_reference())
|
|
# but this time, the concept 'foo' already has a body.
|
|
# :return:
|
|
# """
|
|
#
|
|
# foo = Concept(name="foo", body="'foo'")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {foo: Sequence("one", "two"), bar: foo}
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
# res = parser.parse(context, "one two")
|
|
#
|
|
# assert len(res) == 2
|
|
#
|
|
# assert res[0].status
|
|
# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
|
|
# concept_found_1 = res[0].value.body[0].concept
|
|
# assert concept_found_1.metadata.body == "'foo'"
|
|
# assert cbody(concept_found_1) is None
|
|
#
|
|
# assert res[1].status
|
|
# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
|
|
# concept_found_2 = res[1].value.body[0].concept
|
|
# assert cbody(concept_found_2) == foo
|
|
# # the body and the prop['foo'] are the same concept 'foo'
|
|
# assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2))
|
|
#
|
|
# def test_i_can_use_context_reference_with_multiple_levels(self):
|
|
# """
|
|
# Same than previous one, but with reference of reference
|
|
# :return:
|
|
# """
|
|
#
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# baz = Concept(name="baz")
|
|
# grammar = {foo: Sequence("one", "two"), bar: foo, baz: bar}
|
|
# context, parser = self.init([foo, bar, baz], grammar)
|
|
#
|
|
# res = parser.parse(context, "one two")
|
|
# assert len(res) == 3
|
|
#
|
|
# assert res[0].status
|
|
# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
|
|
# concept_found_1 = res[0].value.body[0].concept
|
|
# assert cbody(concept_found_1) == DoNotResolve("one two")
|
|
#
|
|
# assert res[1].status
|
|
# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
|
|
# concept_found_2 = res[1].value.body[0].concept
|
|
# assert cbody(concept_found_2) == get_expected(foo, "one two")
|
|
# assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2))
|
|
#
|
|
# assert res[2].status
|
|
# assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[2].value.body == [cnode("baz", 0, 2, "one two")]
|
|
# concept_found_3 = res[2].value.body[0].concept
|
|
# expected_foo = get_expected(foo, "one two")
|
|
# assert cbody(concept_found_3) == get_expected(bar, expected_foo)
|
|
# assert cprop(concept_found_3, "foo") == expected_foo
|
|
# assert id(cprop(concept_found_3, "bar")) == id(cbody(concept_found_3))
|
|
#
|
|
# def test_order_is_not_important_when_using_references(self):
|
|
# """
|
|
# Same test than test_i_can_use_reference(),
|
|
# but this time, 'bar' is declared before 'foo'
|
|
# So the order of the result is different
|
|
# :return:
|
|
# """
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {bar: foo, foo: Sequence("one", "two")}
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
#
|
|
# res = parser.parse(context, "one two")
|
|
# assert len(res) == 2
|
|
# assert res[0].value.body == [cnode("bar", 0, 2, "one two")]
|
|
# assert res[1].value.body == [cnode("foo", 0, 2, "one two")]
|
|
#
|
|
# def test_i_can_parse_when_reference(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
#
|
|
# res = parser.parse(context, "twenty two")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
|
|
# concept_found = res.value.body[0].concept
|
|
# assert cbody(concept_found) == DoNotResolve("twenty two")
|
|
# assert cprop(concept_found, "foo") == get_expected(foo, "twenty")
|
|
#
|
|
# res = parser.parse(context, "thirty one")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
|
|
# concept_found = res.value.body[0].concept
|
|
# assert cbody(concept_found) == DoNotResolve("thirty one")
|
|
# assert cprop(concept_found, "foo") == get_expected(foo, "thirty")
|
|
#
|
|
# res = parser.parse(context, "twenty")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("foo", 0, 0, "twenty")]
|
|
# concept_found = res.value.body[0].concept
|
|
# assert cbody(concept_found) == DoNotResolve("twenty")
|
|
#
|
|
# def test_i_can_parse_when_reference_has_a_body(self):
|
|
# foo = Concept(name="foo", body="'one'")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
#
|
|
# res = parser.parse(context, "twenty two")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
|
|
# concept_found = res.value.body[0].concept
|
|
# assert cbody(concept_found) == DoNotResolve("twenty two")
|
|
# assert cprop(concept_found, "foo") == foo
|
|
#
|
|
# res = parser.parse(context, "twenty")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("foo", 0, 0, "twenty")]
|
|
# concept_found = res.value.body[0].concept
|
|
# assert concept_found.metadata.body == "'one'"
|
|
#
|
|
# def test_i_can_parse_multiple_results(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {
|
|
# bar: Sequence("one", "two"),
|
|
# foo: Sequence("one", OrderedChoice("two", "three"))
|
|
# }
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
#
|
|
# res = parser.parse(context, "one two")
|
|
# assert len(res) == 2
|
|
# assert res[0].status
|
|
# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[0].value.body == [cnode("bar", 0, 2, "one two")]
|
|
# concept_found_0 = res[0].value.body[0].concept
|
|
# assert cbody(concept_found_0) == DoNotResolve("one two")
|
|
# assert len(concept_found_0.props) == 0
|
|
#
|
|
# assert res[1].status
|
|
# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[1].value.body == [cnode("foo", 0, 2, "one two")]
|
|
# concept_found_1 = res[1].value.body[0].concept
|
|
# assert cbody(concept_found_1) == DoNotResolve("one two")
|
|
# assert len(concept_found_1.props) == 0
|
|
#
|
|
# def test_i_can_parse_multiple_results_times_two(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {
|
|
# bar: Sequence("one", "two"),
|
|
# foo: Sequence("one", OrderedChoice("two", "three"))
|
|
# }
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
#
|
|
# res = parser.parse(context, "one two one two")
|
|
# assert len(res) == 4
|
|
# assert res[0].status
|
|
# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[0].value.body == [short_cnode("bar", "one two"), short_cnode("bar", "one two")]
|
|
#
|
|
# assert res[1].status
|
|
# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[1].value.body == [short_cnode("foo", "one two"), short_cnode("bar", "one two")]
|
|
#
|
|
# assert res[2].status
|
|
# assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[2].value.body == [short_cnode("bar", "one two"), short_cnode("foo", "one two")]
|
|
#
|
|
# assert res[3].status
|
|
# assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[3].value.body == [short_cnode("foo", "one two"), short_cnode("foo", "one two")]
|
|
#
|
|
# def test_i_can_parse_multiple_results_when_reference(self):
|
|
# """
|
|
# TODO : There should no be two answer, has the one with bar is totally useless
|
|
# Note that bar = Sequence(foo, OrderedChoice("one", "two")) does not match
|
|
#
|
|
# :return:
|
|
# """
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {
|
|
# bar: Sequence(foo, Optional(OrderedChoice("one", "two"))),
|
|
# foo: OrderedChoice("twenty", "thirty")
|
|
# }
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
#
|
|
# res = parser.parse(context, "twenty")
|
|
# assert len(res) == 2
|
|
# assert res[0].status
|
|
# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[0].value.body == [cnode("bar", 0, 0, "twenty")]
|
|
#
|
|
# assert res[1].status
|
|
# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[1].value.body == [cnode("foo", 0, 0, "twenty")]
|
|
#
|
|
# def test_i_can_parse_concept_reference_that_is_not_in_grammar(self):
|
|
# one = Concept(name="one")
|
|
# two = Concept(name="two")
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Sequence("twenty", OrderedChoice(one, two))}
|
|
# context, parser = self.init([one, two, foo], grammar)
|
|
#
|
|
# res = parser.parse(context, "twenty two")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("foo", 0, 2, "twenty two")]
|
|
# concept_found = res.value.body[0].concept
|
|
# assert cbody(concept_found) == DoNotResolve("twenty two")
|
|
# assert cprop(concept_found, "two") == get_expected(two, "two")
|
|
#
|
|
# res = parser.parse(context, "twenty one")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("foo", 0, 2, "twenty one")]
|
|
#
|
|
# def test_i_can_initialize_when_cyclic_reference(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Optional("one", ConceptExpression("foo"))}
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo"))
|
|
#
|
|
# def test_i_cannot_initialize_when_cyclic_reference_when_concept_is_under_construction_and_not_known(self):
|
|
# foo = Concept(name="foo").init_key()
|
|
# grammar = {foo: Optional("one", ConceptExpression("foo"))}
|
|
#
|
|
# context = self.get_context()
|
|
# parser = BnfNodeParser()
|
|
# parser.initialize(context, grammar)
|
|
# assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression("foo", rule_name="foo"))
|
|
#
|
|
# def test_i_can_initialize_when_cyclic_reference_when_concept_is_under_construction_and_known(self):
|
|
# foo = Concept(name="foo").init_key()
|
|
# grammar = {foo: Optional("one", ConceptExpression("foo"))}
|
|
#
|
|
# context = self.get_context()
|
|
# context.concepts["foo"] = foo
|
|
# parser = BnfNodeParser()
|
|
# parser.initialize(context, grammar)
|
|
# assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo"))
|
|
#
|
|
# def test_i_can_parse_concept_reference_that_is_group(self):
|
|
# """
|
|
# if one is number, then number is a 'group'
|
|
# a group can be found under the sdp entry 'all_<group_name>'
|
|
# """
|
|
#
|
|
# context = self.get_context()
|
|
# one = Concept(name="one")
|
|
# two = Concept(name="two")
|
|
# number = Concept(name="number")
|
|
# foo = Concept(name="foo")
|
|
# for c in [one, two, number, foo]:
|
|
# context.sheerka.set_id_if_needed(c, False)
|
|
# context.sheerka.add_in_cache(c)
|
|
#
|
|
# context.sheerka.add_concept_to_set(context, one, number)
|
|
# context.sheerka.add_concept_to_set(context, two, number)
|
|
#
|
|
# grammar = {foo: Sequence("twenty", number)}
|
|
#
|
|
# parser = BnfNodeParser()
|
|
# parser.initialize(context, grammar)
|
|
#
|
|
# res = parser.parse(context, "twenty two")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("foo", 0, 2, "twenty two")]
|
|
# concept_found = res.value.body[0].concept
|
|
# assert cbody(concept_found) == DoNotResolve("twenty two")
|
|
# assert cprop(concept_found, "two") == get_expected(two, "two")
|
|
# assert cprop(concept_found, "number") == get_expected(number, get_expected(two, "two"))
|
|
#
|
|
# res = parser.parse(context, "twenty one")
|
|
# assert res.status
|
|
# assert res.value.body == [cnode("foo", 0, 2, "twenty one")]
|
|
# concept_found = res.value.body[0].concept
|
|
# assert cbody(concept_found) == DoNotResolve("twenty one")
|
|
# assert cprop(concept_found, "one") == get_expected(one, "one")
|
|
# assert cprop(concept_found, "number") == get_expected(number, get_expected(one, "one"))
|
|
#
|
|
# def test_i_can_parse_zero_or_more(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: ZeroOrMore("one")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "one one")
|
|
#
|
|
# assert res.status
|
|
# assert return_value == [cnode("foo", 0, 2, "one one")]
|
|
# assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)])
|
|
#
|
|
# concept_found = return_value[0].concept
|
|
# assert cbody(concept_found) == DoNotResolve("one one")
|
|
#
|
|
# def test_i_can_parse_sequence_and_zero_or_more(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Sequence(ZeroOrMore("one"), "two")}
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# res = parser.parse(context, "one one two")
|
|
# assert res.status
|
|
# assert res.value.value == [cnode("foo", 0, 4, "one one two")]
|
|
#
|
|
# res = parser.parse(context, "two")
|
|
# assert res.status
|
|
# assert res.value.value == [cnode("foo", 0, 0, "two")]
|
|
#
|
|
# def test_i_cannot_parse_zero_and_more_when_wrong_entry(self):
|
|
# # TEST WITH UNRECOGNIZED
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: ZeroOrMore("one")}
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# parser = BnfNodeParser()
|
|
# parser.initialize(context, grammar)
|
|
#
|
|
# res = parser.parse(context, "one two")
|
|
# assert not res.status
|
|
# assert res.value.value == [
|
|
# cnode("foo", 0, 0, "one"),
|
|
# UnrecognizedTokensNode(1, 2, [t(" "), t("two")])
|
|
# ]
|
|
#
|
|
# res = parser.parse(context, "two")
|
|
# assert not res.status
|
|
# assert res.value.value == [
|
|
# UnrecognizedTokensNode(0, 0, [t("two")])
|
|
# ]
|
|
#
|
|
# def test_i_can_parse_zero_and_more_with_separator(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: ZeroOrMore("one", sep=",")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one")
|
|
#
|
|
# assert res.status
|
|
# assert return_value == [cnode("foo", 0, 7, "one, one , one")]
|
|
#
|
|
# def test_that_zero_and_more_is_greedy(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {foo: ZeroOrMore("one"), bar: "one"}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "one one one")
|
|
#
|
|
# assert res.status
|
|
# assert return_value == [cnode("foo", 0, 4, "one one one")]
|
|
#
|
|
# ##############
|
|
# ## YOU STOPPED HERE
|
|
#
|
|
# # next one to do is below
|
|
# #############
|
|
#
|
|
#
|
|
# def test_i_can_parse_one_and_more(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: OneOrMore("one")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "one one")
|
|
#
|
|
# assert res.status
|
|
# assert return_value == [cnode("foo", 0, 2, "one one")]
|
|
# assert return_value[0].underlying == u(grammar[foo], 0, 2, [
|
|
# u("one", 0, 0),
|
|
# u("one", 2, 2)])
|
|
#
|
|
#
|
|
# def test_i_can_parse_sequence_and_one_or_more(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Sequence(OneOrMore("one"), "two")}
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# res = parser.parse(context, "one one two")
|
|
# assert res.status
|
|
# assert res.value.value == [cnode("foo", 0, 4, "one one two")]
|
|
#
|
|
# res = parser.parse(context, "two")
|
|
# assert not res.status
|
|
# assert res.value.value == [
|
|
# UnrecognizedTokensNode(0, 0, [t("two")])
|
|
# ]
|
|
#
|
|
# def test_i_can_parse_one_and_more_with_separator(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: OneOrMore("one", sep=",")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one")
|
|
#
|
|
# assert res.status
|
|
# assert return_value == [cnode("foo", 0, 7, "one, one , one")]
|
|
# assert return_value[0].underlying == u(grammar[foo], 0, 7, [
|
|
# u("one", 0, 0),
|
|
# u("one", 3, 3),
|
|
# u("one", 7, 7)])
|
|
#
|
|
# def test_that_one_and_more_is_greedy(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {foo: OneOrMore("one"), bar: "one"}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "one one one")
|
|
#
|
|
# assert res.status
|
|
# assert return_value == [cnode("foo", 0, 4, "one one one")]
|
|
#
|
|
# @pytest.mark.skip("Done in BaseNode")
|
|
# def test_i_can_detect_infinite_recursion(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
#
|
|
# grammar = {
|
|
# bar: foo,
|
|
# foo: bar
|
|
# }
|
|
# parser = BnfNodeParser()
|
|
# parser.initialize(self.get_context(), grammar)
|
|
#
|
|
# assert bar not in parser.concepts_grammars
|
|
# assert foo not in parser.concepts_grammars
|
|
#
|
|
# @pytest.mark.skip("Done in BaseNode")
|
|
# def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {
|
|
# bar: foo,
|
|
# foo: OrderedChoice(bar, "foo")
|
|
# }
|
|
#
|
|
# parser = BnfNodeParser()
|
|
# parser.initialize(self.get_context(), grammar)
|
|
#
|
|
# assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
|
# assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
|
#
|
|
# # the other way around is possible
|
|
# grammar = {
|
|
# bar: foo,
|
|
# foo: OrderedChoice("foo", bar)
|
|
# }
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
#
|
|
# assert foo in parser.concepts_grammars
|
|
# assert bar in parser.concepts_grammars
|
|
#
|
|
# res = parser.parse(context, "foo")
|
|
# assert len(res) == 2
|
|
# assert res[0].status
|
|
# assert res[0].value.body == [cnode("bar", 0, 0, "foo")]
|
|
# assert res[1].status
|
|
# assert res[1].value.body == [cnode("foo", 0, 0, "foo")]
|
|
#
|
|
# def test_i_can_detect_indirect_infinite_recursion_with_sequence(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
#
|
|
# grammar = {
|
|
# bar: foo,
|
|
# foo: Sequence("one", bar, "two")
|
|
# }
|
|
# parser = BnfNodeParser()
|
|
# parser.initialize(self.get_context(), grammar)
|
|
#
|
|
# assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
|
# assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
|
#
|
|
# def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
#
|
|
# grammar = {
|
|
# bar: foo,
|
|
# foo: Sequence("one", OrderedChoice(bar, "other"), "two")
|
|
# }
|
|
# parser = BnfNodeParser()
|
|
# parser.initialize(self.get_context(), grammar)
|
|
#
|
|
# assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
|
# assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
|
#
|
|
# def test_infinite_recursion_does_not_fail_if_a_concept_is_missing(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
#
|
|
# grammar = {
|
|
# foo: bar
|
|
# }
|
|
# parser = BnfNodeParser()
|
|
# parser.initialize(self.get_context(), grammar)
|
|
#
|
|
# assert foo in parser.concepts_grammars
|
|
#
|
|
# def test_i_can_detect_indirect_infinite_recursion_with_optional(self):
|
|
# # TODO infinite recursion with optional
|
|
# pass
|
|
#
|
|
# def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more(self):
|
|
# # TODO infinite recursion with optional
|
|
# pass
|
|
#
|
|
# def test_i_can_detect_indirect_infinite_recursion_with_one_and_more(self):
|
|
# # TODO infinite recursion with optional
|
|
# pass
|
|
#
|
|
# def test_i_can_visit_parsing_expression(self):
|
|
# mult = Concept(name="mult")
|
|
# add = Concept(name="add")
|
|
#
|
|
# visitor = ConceptVisitor()
|
|
# visitor.visit(Sequence(mult, Optional(Sequence("+", add))))
|
|
#
|
|
# assert sorted(list(visitor.concepts)) == ["add", "mult"]
|
|
#
|
|
# def test_i_can_initialize_rule_names(self):
|
|
# context = self.get_context()
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
#
|
|
# grammar = {foo: Sequence("one", "two"), bar: foo}
|
|
# parser = BnfNodeParser()
|
|
# ret = parser.initialize(context, grammar)
|
|
# return_value = ret.body
|
|
#
|
|
# assert return_value[foo].rule_name == ""
|
|
# assert return_value[bar].rule_name == "foo"
|
|
#
|
|
# @pytest.mark.parametrize("text, end_position", [
|
|
# ("foo", 0),
|
|
# ("foo bar", 2),
|
|
# ("foo bar ", 3),
|
|
# (" foo bar ", 4)
|
|
# ])
|
|
# def test_cannot_parser_unknown_concepts(self, text, end_position):
|
|
# context, res, wrapper, return_value = self.execute([], {}, text)
|
|
# tokens = list(Tokenizer(text))[:-1]
|
|
#
|
|
# assert not res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [UnrecognizedTokensNode(0, end_position, tokens)]
|
|
#
|
|
# def test_i_cannot_parse_when_part_of_the_input_is_unrecognized(self):
|
|
# one = Concept(name="one")
|
|
# two = Concept(name="two")
|
|
# grammar = {one: "one", two: "two"}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([one, two], grammar, "one two three")
|
|
#
|
|
# assert not res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [
|
|
# ConceptNode(get_expected(one, "one"), 0, 0, source="one", underlying=u("one", 0, 0)),
|
|
# ConceptNode(get_expected(two, "two"), 2, 2, source="two", underlying=u("two", 2, 2)),
|
|
# UnrecognizedTokensNode(3, 4, [t(" "), t("three")])
|
|
# ]
|
|
#
|
|
# # def test_i_cannot_parse_when_wrong_sequence(self):
|
|
# # foo = Concept(name="foo")
|
|
# # grammar = {foo: Sequence("one", "two", "three")}
|
|
# #
|
|
# # context, res, wrapper, return_value = self.execute([foo], grammar, "one two three one")
|
|
# #
|
|
# # assert not res.status
|
|
# # assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# # assert return_value == [
|
|
# # short_cnode("foo", "one two three"),
|
|
# # UnrecognizedTokensNode(5, 6, [t(" "), t("one")])
|
|
# # ]
|
|
#
|
|
# # def test_i_cannot_parse_when_sequence_cannot_match_because_of_end_of_file(self):
|
|
# # foo = Concept(name="foo")
|
|
# # grammar = {foo: Sequence("one", "two", "three")}
|
|
# #
|
|
# # context, res, wrapper, return_value = self.execute([foo], grammar, "one two")
|
|
# #
|
|
# # assert not res.status
|
|
# # assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# # assert return_value == [
|
|
# # UnrecognizedTokensNode(0, 2, [t("one"), t(" "), t("two")])
|
|
# # ]
|
|
#
|
|
# def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {
|
|
# bar: Sequence("one", "two"),
|
|
# foo: Sequence("one", OrderedChoice("two", "three"))
|
|
# }
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
#
|
|
# res = parser.parse(context, "one two four five")
|
|
#
|
|
# assert len(res) == 2
|
|
# assert not res[0].status
|
|
# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[0].value.body == [
|
|
# cnode("bar", 0, 2, "one two"),
|
|
# UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
|
|
# ]
|
|
#
|
|
# assert not res[1].status
|
|
# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[1].value.body == [
|
|
# cnode("foo", 0, 2, "one two"),
|
|
# UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
|
|
# ]
|
|
#
|
|
# def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {
|
|
# bar: Sequence("one", "two"),
|
|
# foo: Sequence("one", OrderedChoice("two", "three"))
|
|
# }
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
#
|
|
# res = parser.parse(context, "four five one two")
|
|
#
|
|
# assert len(res) == 2
|
|
# assert not res[0].status
|
|
# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[0].value.body == [
|
|
# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
|
|
# cnode("bar", 4, 6, "one two"),
|
|
# ]
|
|
#
|
|
# assert not res[1].status
|
|
# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[1].value.body == [
|
|
# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
|
|
# cnode("foo", 4, 6, "one two"),
|
|
# ]
|
|
#
|
|
# def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {
|
|
# bar: Sequence("one", "two"),
|
|
# foo: Sequence("one", OrderedChoice("two", "three"))
|
|
# }
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
#
|
|
# res = parser.parse(context, "four five one two six seven")
|
|
# assert len(res) == 2
|
|
# assert not res[0].status
|
|
# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[0].value.body == [
|
|
# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
|
|
# cnode("bar", 4, 6, "one two"),
|
|
# UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
|
|
# ]
|
|
#
|
|
# assert not res[1].status
|
|
# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[1].value.body == [
|
|
# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
|
|
# cnode("foo", 4, 6, "one two"),
|
|
# UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
|
|
# ]
|
|
#
|
|
# def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle(self):
|
|
# context = self.get_context()
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# baz = Concept(name="baz")
|
|
# grammar = {
|
|
# bar: Sequence("one", "two"),
|
|
# foo: Sequence("one", OrderedChoice("two", "three")),
|
|
# baz: StrMatch("six"),
|
|
# }
|
|
# context, parser = self.init([foo, bar, baz], grammar)
|
|
#
|
|
# res = parser.parse(context, "one two four five six")
|
|
# assert len(res) == 2
|
|
# assert not res[0].status
|
|
# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[0].value.body == [
|
|
# cnode("bar", 0, 2, "one two"),
|
|
# UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
|
|
# cnode("baz", 8, 8, "six"),
|
|
# ]
|
|
#
|
|
# assert not res[1].status
|
|
# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
|
# assert res[1].value.body == [
|
|
# cnode("foo", 0, 2, "one two"),
|
|
# UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
|
|
# cnode("baz", 8, 8, "six"),
|
|
# ]
|
|
#
|
|
# def test_i_can_get_the_inner_concept_when_possible(self):
|
|
# foo = Concept(name="foo")
|
|
# one = Concept(name="one")
|
|
# grammar = {foo: Sequence(Optional(ZeroOrMore(one)), ZeroOrMore("one"))}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one")
|
|
#
|
|
# assert res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [cnode("foo", 0, 0, "one")]
|
|
# concept_found = return_value[0].concept
|
|
# assert cbody(concept_found) == get_expected(one, "one")
|
|
# assert id(cprop(concept_found, "one")) == id(cbody(concept_found))
|
|
#
|
|
# def test_i_can_get_the_inner_concept_when_possible_with_rule_name(self):
|
|
# foo = Concept(name="foo")
|
|
# one = Concept(name="one")
|
|
# grammar = {foo: Sequence(
|
|
# Optional(ZeroOrMore(one, rule_name="zero"), rule_name="opt"),
|
|
# ZeroOrMore("one"), rule_name="seq")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one")
|
|
#
|
|
# assert res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [cnode("foo", 0, 0, "one")]
|
|
# concept_found = return_value[0].concept
|
|
# assert cbody(concept_found) == get_expected(one, "one")
|
|
# assert id(cprop(concept_found, "one")) == id(cbody(concept_found))
|
|
# assert id(cprop(concept_found, "zero")) == id(cbody(concept_found))
|
|
# assert id(cprop(concept_found, "opt")) == id(cbody(concept_found))
|
|
# assert id(cprop(concept_found, "seq")) == id(cbody(concept_found))
|
|
#
|
|
# def test_i_get_multiple_props_when_zero_or_more(self):
|
|
# foo = Concept(name="foo")
|
|
# one = Concept(name="one")
|
|
# grammar = {foo: ZeroOrMore(one)}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one one one")
|
|
# assert res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [cnode("foo", 0, 4, "one one one")]
|
|
# concept_found = return_value[0].concept
|
|
# assert cbody(concept_found) == DoNotResolve("one one one")
|
|
# assert len(concept_found.compiled["one"]) == 3
|
|
# assert cprop(concept_found, "one")[0] == get_expected(one)
|
|
# assert cprop(concept_found, "one")[1] == get_expected(one)
|
|
# assert cprop(concept_found, "one")[2] == get_expected(one)
|
|
# assert id(cprop(concept_found, "one")[0]) != id(cprop(concept_found, "one")[1])
|
|
# assert id(cprop(concept_found, "one")[1]) != id(cprop(concept_found, "one")[2])
|
|
# assert id(cprop(concept_found, "one")[2]) != id(cprop(concept_found, "one")[0])
|
|
#
|
|
# def test_i_get_multiple_props_when_zero_or_more_and_different_values(self):
|
|
# foo = Concept(name="foo")
|
|
# one = Concept(name="one")
|
|
# grammar = {foo: ZeroOrMore(Sequence(one, "ok", rule_name="seq")), one: OrderedChoice("one", "un", "uno")}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one ok un ok uno ok")
|
|
# assert res.status
|
|
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
|
# assert return_value == [short_cnode("foo", "one ok un ok uno ok")]
|
|
# concept_found = return_value[0].concept
|
|
# assert cprop(concept_found, "one")[0] == get_expected(one, "one")
|
|
# assert cprop(concept_found, "one")[1] == get_expected(one, "un")
|
|
# assert cprop(concept_found, "one")[2] == get_expected(one, "uno")
|
|
# assert cprop(concept_found, "seq")[0] == DoNotResolve("one ok")
|
|
# assert cprop(concept_found, "seq")[1] == DoNotResolve("un ok")
|
|
# assert cprop(concept_found, "seq")[2] == DoNotResolve("uno ok")
|
|
#
|
|
# @pytest.mark.parametrize("rule, expected", [
|
|
# (StrMatch("string"), "'string'"),
|
|
# (StrMatch("string", rule_name="rule_name"), "'string'=rule_name"),
|
|
# (Sequence(StrMatch("foo"), StrMatch("bar")), "('foo' 'bar')"),
|
|
# (Sequence(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo' 'bar')=rule_name"),
|
|
# (OrderedChoice(StrMatch("foo"), StrMatch("bar")), "('foo'|'bar')"),
|
|
# (OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo'|'bar')=rule_name"),
|
|
# (Optional(StrMatch("foo")), "'foo'?"),
|
|
# (Optional(StrMatch("foo"), rule_name="rule_name"), "'foo'?=rule_name"),
|
|
# (ZeroOrMore(StrMatch("foo")), "'foo'*"),
|
|
# (ZeroOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'*=rule_name"),
|
|
# (OneOrMore(StrMatch("foo")), "'foo'+"),
|
|
# (OneOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'+=rule_name"),
|
|
# (Sequence(
|
|
# Optional(StrMatch("foo"), rule_name="a"),
|
|
# ZeroOrMore(StrMatch("bar"), rule_name="b"),
|
|
# OneOrMore(StrMatch("baz"), rule_name="c"),
|
|
# rule_name="d"), "('foo'?=a 'bar'*=b 'baz'+=c)=d"),
|
|
# (OrderedChoice(
|
|
# Optional(StrMatch("foo"), rule_name="a"),
|
|
# ZeroOrMore(StrMatch("bar"), rule_name="b"),
|
|
# OneOrMore(StrMatch("baz"), rule_name="c"),
|
|
# rule_name="d"), "('foo'?=a|'bar'*=b|'baz'+=c)=d"),
|
|
# (Sequence(
|
|
# OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="a"),
|
|
# OrderedChoice(StrMatch("x"), StrMatch("y"), rule_name="b"),
|
|
# rule_name="c"), "(('foo'|'bar')=a ('x'|'y')=b)=c")
|
|
# ])
|
|
# def test_i_can_encode_grammar(self, rule, expected):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: rule}
|
|
# context, parser = self.init([foo], grammar)
|
|
#
|
|
# encoded = parser.encode_grammar(parser.concepts_grammars)
|
|
# assert encoded["c:foo|1001:"] == expected
|
|
#
|
|
# bnf_parser = BnfParser()
|
|
# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"])
|
|
# assert parse_res.status
|
|
# assert parse_res.value.value == rule
|
|
#
|
|
# def test_i_can_encode_grammar_when_concept_simple(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# grammar = {foo: ConceptExpression(bar)}
|
|
# context, parser = self.init([foo, bar], grammar)
|
|
#
|
|
# encoded = parser.encode_grammar(parser.concepts_grammars)
|
|
# assert encoded["c:foo|1001:"] == "c:bar|1002:=bar"
|
|
#
|
|
# bnf_parser = BnfParser()
|
|
# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"])
|
|
# assert parse_res.status
|
|
# assert parse_res.value.value == grammar[foo]
|
|
#
|
|
# def test_i_can_encode_grammar_when_concepts(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# baz = Concept(name="baz")
|
|
# grammar = {foo: Sequence(
|
|
# StrMatch("a"),
|
|
# OrderedChoice(ConceptExpression(bar),
|
|
# OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")}
|
|
# context, parser = self.init([foo, bar, baz], grammar)
|
|
#
|
|
# encoded = parser.encode_grammar(parser.concepts_grammars)
|
|
# assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s"
|
|
#
|
|
# bnf_parser = BnfParser()
|
|
# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"])
|
|
# assert parse_res.status
|
|
# assert parse_res.value.value == grammar[foo]
|
|
#
|
|
# def test_i_can_encode_grammar_when_set_concepts(self):
|
|
# foo = Concept(name="foo")
|
|
# bar = Concept(name="bar")
|
|
# baz = Concept(name="baz")
|
|
# grammar = {foo: Sequence(
|
|
# StrMatch("a"),
|
|
# OrderedChoice(bar,
|
|
# OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")}
|
|
# context = self.get_context()
|
|
# for c in [foo, bar, baz]:
|
|
# context.sheerka.add_in_cache(c)
|
|
# context.sheerka.set_id_if_needed(c, False)
|
|
# context.sheerka.add_concept_to_set(context, baz, bar)
|
|
#
|
|
# parser = BnfNodeParser()
|
|
# parser.initialize(context, grammar)
|
|
#
|
|
# encoded = parser.encode_grammar(parser.concepts_grammars)
|
|
# assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s"
|
|
#
|
|
# bnf_parser = BnfParser()
|
|
# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"])
|
|
# assert parse_res.status
|
|
#
|
|
# expected = Sequence(
|
|
# StrMatch("a"),
|
|
# OrderedChoice(ConceptGroupExpression(bar, rule_name="bar"),
|
|
# OneOrMore(ConceptExpression(baz, rule_name="baz")), rule_name="oc"), rule_name="s")
|
|
# assert parse_res.value.value == expected
|
|
#
|
|
# def test_i_concept_validation_is_not_set_when_no_variables(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: "foo"}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "foo")
|
|
# assert not return_value[0].concept.metadata.need_validation
|
|
#
|
|
# def test_i_concept_validation_is_set_when_unnamed_variables_are_found(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Sequence("foo", OrderedChoice("a", "b"))}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "foo a")
|
|
# assert not return_value[0].concept.metadata.need_validation
|
|
#
|
|
# def test_i_concept_validation_is_set_when_named_variables_are_found(self):
|
|
# foo = Concept(name="foo")
|
|
# grammar = {foo: Sequence("foo", OrderedChoice("a", "b", rule_name="var"))}
|
|
#
|
|
# context, res, wrapper, return_value = self.execute([foo], grammar, "foo a")
|
|
# assert return_value[0].concept.metadata.need_validation
|
|
#
|
|
#
|
|
# #
|
|
# # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(self):
|
|
# # context = self.get_context()
|
|
# # add = Concept(name="add")
|
|
# # mult = Concept(name="mult")
|
|
# # atom = Concept(name="atom")
|
|
# #
|
|
# # grammar = {
|
|
# # add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))),
|
|
# # mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))),
|
|
# # atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')),
|
|
# # }
|
|
# #
|
|
# # parser = BnfNodeParser()
|
|
# # parser.register(grammar)
|
|
# #
|
|
# # # res = parser.parse(context, "1")
|
|
# # # assert len(res) == 3 # add, mult, atom
|
|
# # #
|
|
# # # res = parser.parse(context, "1 * 2")
|
|
# # # assert len(res) == 2 # add and mult
|
|
# # #
|
|
# # # res = parser.parse(context, "1 + 2")
|
|
# # # assert res.status
|
|
# # # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2")]
|
|
# #
|
|
# # res = parser.parse(context, "1 * 2 + 3")
|
|
# # assert res.status
|
|
# # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")]
|
|
#
|
|
# def test_i_can_register_concepts_with_the_same_name(self):
|
|
# # TODO : concepts are registered by name,
|
|
# # what when two concepts have the same name ?
|
|
# pass
|
|
#
|
|
# def test_i_can_parse_very_very_long_input(self):
|
|
# # TODO: In the current implementation, all the tokens are loaded in memory
|
|
# # It's clearly not the good approach
|
|
# pass
|