I can define and eval BNF definitions

This commit is contained in:
2019-12-17 21:19:44 +01:00
parent c668cc46d2
commit 88cd3162be
25 changed files with 1099 additions and 569 deletions
+3 -3
View File
@@ -8,9 +8,9 @@ from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer
from evaluators.AddConceptEvaluator import AddConceptEvaluator
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import Sequence, RegexParser, StrMatch, ZeroOrMore, ConceptMatch
from parsers.ConceptLexerParser import Sequence, StrMatch, ZeroOrMore, ConceptMatch
from parsers.BnfParser import BnfParser
from parsers.DefaultParser import DefConceptNode, NameNode
from parsers.ExactConceptParser import ExactConceptParser
from parsers.PythonParser import PythonNode, PythonParser
@@ -67,7 +67,7 @@ def get_concept_definition(source, parsing_expression):
status=True,
value=ParserResultConcept(
source=source,
parser=RegexParser(),
parser=BnfParser(),
value=parsing_expression
)
)
+138
View File
@@ -0,0 +1,138 @@
import pytest
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind
from parsers.BaseParser import UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
ConceptLexerParser, ConceptNode, ConceptMatch
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("sheerka", "xxxx", sheerka)
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
])
def test_i_can_parse_regex(expression, expected):
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
])
def test_i_can_detect_errors(expression, error):
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
ret_value = res.value.value
assert parser.has_error
assert not res.status
assert ret_value[0] == error
def test_i_can_parse_regex_with_reference():
expression = "foo"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ConceptMatch("foo")
assert res.value.source == expression
def test_i_can_parse_cross_ref_with_modifier():
expression = "foo*"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
assert res.value.source == expression
def test_i_can_parse_sequence_with_cross_ref():
expression = "foo 'and' bar+"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_parse_choice_with_cross_ref():
foo = Concept("foo")
bar = Concept("bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
expression = "foo | bar?"
parser = BnfParser()
res = parser.parse(context, Tokenizer(expression))
assert res.status
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
foo = Concept(name="foo")
bar = Concept(name="bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
regex_parser = BnfParser()
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = ConceptLexerParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [(bar, 0, 2, "twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [(bar, 0, 2, "thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [(foo, 0, 0, "twenty")]
+97 -172
View File
@@ -2,10 +2,8 @@ import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
CrossRef, RegexParser, ZeroOrMore, OneOrMore, UnexpectedEndOfFileError, UnexpectedTokenErrorNode, ConceptMatch, \
ParsingExpressionVisitor
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch
class ConceptVisitor(ParsingExpressionVisitor):
@@ -13,7 +11,17 @@ class ConceptVisitor(ParsingExpressionVisitor):
self.concepts = set()
def visit_ConceptMatch(self, node):
self.concepts.add(node.concept_name)
self.concepts.add(node.concept)
def u(parsing_expression, start, end, children=None):
if isinstance(parsing_expression, str):
parsing_expression = StrMatch(parsing_expression)
if isinstance(parsing_expression, StrMatch):
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
return NonTerminalNode(parsing_expression, start, end, [], children)
@pytest.mark.parametrize("match, text", [
@@ -39,7 +47,7 @@ def test_i_can_match_simple_tokens(match, text):
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 0, source=text)]
assert res.value.value == [ConceptNode(foo, 0, 0, source=text, underlying=u(match, 0, 0))]
def test_i_can_match_multiple_concepts_in_one_input():
@@ -55,9 +63,9 @@ def test_i_can_match_multiple_concepts_in_one_input():
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [
ConceptNode(one, 0, 0, source="one"),
ConceptNode(two, 2, 2, source="two"),
ConceptNode(one, 4, 4, source="one"),
ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)),
ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2)),
ConceptNode(one, 4, 4, source="one", underlying=u("one", 4, 4)),
]
@@ -85,8 +93,8 @@ def test_i_cannot_match_when_part_of_the_input_is_unknown():
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == [
ConceptNode(one, 0, 0, source="one"),
ConceptNode(two, 2, 2, source="two")] # these two were recognized
ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)),
ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2))] # these two were recognized
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "three"
@@ -102,7 +110,11 @@ def test_i_can_match_sequence():
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")]
assert res.value.value == [
ConceptNode(foo, 0, 4, source="one two three", underlying=u(concepts[foo], 0, 4, [
u("one", 0, 0),
u("two", 2, 2),
u("three", 4, 4)]))]
def test_wrong_sequence_is_not_matched():
@@ -116,7 +128,7 @@ def test_wrong_sequence_is_not_matched():
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == [ConceptNode(foo, 0, 4, source="one two three")]
assert res.value.try_parsed == [(foo, "one two three")]
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "one"
@@ -149,7 +161,7 @@ def test_i_always_choose_the_longest_match():
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")]
assert res.value.value == [(foo, "one two three")]
def test_i_can_match_several_sequences():
@@ -166,8 +178,8 @@ def test_i_can_match_several_sequences():
assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [
ConceptNode(foo, 0, 4, source="one two three"),
ConceptNode(bar, 6, 8, source="one two"),
(foo, 0, 4, "one two three"),
(bar, 6, 8, "one two"),
]
@@ -181,12 +193,14 @@ def test_i_can_match_ordered_choice():
res1 = parser.parse(context, "one")
assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [ConceptNode(foo, 0, 0, source="one")]
assert res1.value.body == [
ConceptNode(foo, 0, 0, source="one", underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
res2 = parser.parse(context, "two")
assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [ConceptNode(foo, 0, 0, source="two")]
assert res2.value.body == [
ConceptNode(foo, 0, 0, source="two", underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))]
res3 = parser.parse(context, "three")
assert not res3.status
@@ -216,12 +230,20 @@ def test_i_can_mix_sequences_and_ordered_choices():
res1 = parser.parse(context, "twenty one ok")
assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok")]
assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok",
underlying=u(concepts[foo], 0, 4, [
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]),
u("one", 2, 2),
u("ok", 4, 4)]))]
res2 = parser.parse(context, "thirty one ok")
assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok")]
assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok",
underlying=u(concepts[foo], 0, 4, [
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]),
u("one", 2, 2),
u("ok", 4, 4)]))]
res3 = parser.parse(context, "twenty one")
assert not res3.status
@@ -267,7 +289,8 @@ def test_i_can_parse_optional():
res = parser.parse(context, "one")
assert res.status
assert res.value.value == [ConceptNode(foo, 0, 0, source="one")]
assert res.value.value == [ConceptNode(foo, 0, 0, source="one",
underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
def test_i_can_parse_sequence_starting_with_optional():
@@ -280,11 +303,19 @@ def test_i_can_parse_sequence_starting_with_optional():
res = parser.parse(context, "twenty one")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="twenty one")]
assert res.value.body == [ConceptNode(
foo, 0, 2,
source="twenty one",
underlying=u(concepts[foo], 0, 2,
[
u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]),
u("one", 2, 2)]
))]
res = parser.parse(context, "one")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="one")]
assert res.value.body == [ConceptNode(foo, 0, 0, source="one",
underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
def test_i_can_parse_sequence_ending_with_optional():
@@ -297,11 +328,11 @@ def test_i_can_parse_sequence_ending_with_optional():
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")]
assert res.value.body == [(foo, 0, 4, "one two three")]
res = parser.parse(context, "one two")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="one two")]
assert res.value.body == [(foo, 0, 2, "one two")]
def test_i_can_parse_sequence_with_optional_in_between():
@@ -314,11 +345,11 @@ def test_i_can_parse_sequence_with_optional_in_between():
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")]
assert res.value.body == [(foo, 0, 4, "one two three")]
res = parser.parse(context, "one three")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="one three")]
assert res.value.body == [(foo, 0, 2, "one three")]
def test_i_can_use_reference():
@@ -338,11 +369,14 @@ def test_i_can_use_reference():
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")]
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two",
underlying=u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)]))]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")]
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two",
underlying=u(ConceptMatch(foo, rule_name="foo"), 0, 2,
[u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)])]))]
def test_i_can_use_context_reference_with_multiple_levels():
@@ -364,15 +398,15 @@ def test_i_can_use_context_reference_with_multiple_levels():
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")]
assert res[0].value.body == [(foo, 0, 2, "one two")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")]
assert res[1].value.body == [(bar, 0, 2, "one two")]
assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [ConceptNode(baz, 0, 2, source="one two")]
assert res[2].value.body == [(baz, 0, 2, "one two")]
def test_order_is_not_important_when_using_references():
@@ -386,8 +420,8 @@ def test_order_is_not_important_when_using_references():
res = parser.parse(context, "one two")
assert len(res) == 2
assert res[0].value.body == [ConceptNode(bar, 0, 2, source="one two")]
assert res[1].value.body == [ConceptNode(foo, 0, 2, source="one two")]
assert res[0].value.body == [(bar, 0, 2, "one two")]
assert res[1].value.body == [(foo, 0, 2, "one two")]
def test_i_can_parse_when_reference():
@@ -401,15 +435,15 @@ def test_i_can_parse_when_reference():
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
assert res.value.body == [(bar, 0, 2, "twenty two")]
res = parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
assert res.value.body == [(bar, 0, 2, "thirty one")]
res = parser.parse(context, "twenty")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
assert res.value.body == [(foo, 0, 0, "twenty")]
def test_i_can_detect_duplicates_when_reference():
@@ -428,11 +462,11 @@ def test_i_can_detect_duplicates_when_reference():
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(bar, 0, 0, source="twenty")]
assert res[0].value.body == [(bar, 0, 0, "twenty")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(foo, 0, 0, source="twenty")]
assert res[1].value.body == [(foo, 0, 0, "twenty")]
def test_i_can_detect_infinite_recursion():
@@ -446,8 +480,8 @@ def test_i_can_detect_infinite_recursion():
parser = ConceptLexerParser()
parser.initialize(get_context(), concepts)
assert bar not in parser.concepts_dict
assert foo not in parser.concepts_dict
assert bar not in parser.concepts_grammars
assert foo not in parser.concepts_grammars
def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
@@ -461,8 +495,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
parser = ConceptLexerParser()
parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
# the other way around is possible
context = get_context()
@@ -472,15 +506,15 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
}
parser = ConceptLexerParser()
parser.initialize(context, concepts)
assert foo in parser.concepts_dict
assert bar in parser.concepts_dict
assert foo in parser.concepts_grammars
assert bar in parser.concepts_grammars
res = parser.parse(context, "foo")
assert len(res) == 2
assert res[0].status
assert res[0].value.body == [ConceptNode(bar, 0, 0, source="foo")]
assert res[0].value.body == [(bar, 0, 0, "foo")]
assert res[1].status
assert res[1].value.body == [ConceptNode(foo, 0, 0, source="foo")]
assert res[1].value.body == [(foo, 0, 0, "foo")]
def test_i_can_detect_indirect_infinite_recursion_with_sequence():
@@ -494,8 +528,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence():
parser = ConceptLexerParser()
parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice():
@@ -509,8 +543,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choic
parser = ConceptLexerParser()
parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
def test_i_can_detect_indirect_infinite_recursion_with_optional():
@@ -518,128 +552,6 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional():
pass
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
])
def test_i_can_parse_regex(expression, expected):
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
])
def test_i_can_detect_errors(expression, error):
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
ret_value = res.value.value
assert parser.has_error
assert not res.status
assert ret_value[0] == error
def test_i_can_parse_regex_with_reference():
expression = "foo"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ConceptMatch("foo")
assert res.value.source == expression
def test_i_can_parse_cross_ref_with_modifier():
expression = "foo*"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
assert res.value.source == expression
def test_i_can_parse_sequence_with_cross_ref():
expression = "foo 'and' bar+"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_parse_choice_with_cross_ref():
foo = Concept("foo")
bar = Concept("bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
expression = "foo | bar?"
parser = RegexParser()
res = parser.parse(context, Tokenizer(expression))
assert res.status
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
foo = Concept(name="foo")
bar = Concept(name="bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
regex_parser = RegexParser()
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = ConceptLexerParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
def test_i_can_visit_parsing_expression():
mult = Concept(name="mult")
add = Concept(name="add")
@@ -650,6 +562,19 @@ def test_i_can_visit_parsing_expression():
assert sorted(list(visitor.concepts)) == ["add", "mult"]
def test_i_can_initialize_rule_names():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {foo: Sequence("one", "two"), bar: foo}
parser = ConceptLexerParser()
ret = parser.initialize(context, concepts)
return_value = ret.body
assert return_value[foo].rule_name == ""
assert return_value[bar].rule_name == "foo"
#
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
+178
View File
@@ -0,0 +1,178 @@
import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator
from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, NonTerminalNode, Sequence, TerminalNode, \
StrMatch, Optional, OrderedChoice
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("test", "xxx", sheerka)
def get_return_value(nodes, source):
return ReturnValueConcept(
"some_name",
True,
ParserResultConcept(parser=ConceptLexerParser(),
source=source,
value=nodes,
try_parsed=nodes))
def get_concept_node(context, grammar, expression):
parser = ConceptLexerParser()
parser.initialize(context, grammar)
res = parser.parse(context, expression)
assert res.status
return res.value.value[0]
@pytest.mark.parametrize("ret_val, expected", [
(ReturnValueConcept("some_name", True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), True),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept node")), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=["Not a concept node"])), False),
(ReturnValueConcept("some_name", True, [ConceptNode(Concept(), 0, 0)]), False),
(ReturnValueConcept("some_name", True, ConceptNode(Concept(), 0, 0)), False),
])
def test_i_can_match(ret_val, expected):
context = get_context()
assert ConceptNodeEvaluator().matches(context, ret_val) == expected
def test_concept_is_returned_when_list_of_one_concept_node():
foo = Concept("foo")
context = get_context()
context.sheerka.add_in_cache(foo)
evaluator = ConceptNodeEvaluator()
node = ConceptNode(foo, 0, 0, underlying=TerminalNode(StrMatch("foo"), 0, 0, "foo"))
ret_val = get_return_value([node], "h")
result = evaluator.eval(context, ret_val)
assert result.who == evaluator.name
assert result.status
assert result.value == node.concept
assert result.parents == [ret_val]
def test_concept_property_is_correctly_updated_for_str_match():
context = get_context()
foo = Concept("foo")
concept_node = get_concept_node(context, {foo: StrMatch("foo", rule_name="variable")}, "foo")
updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "foo"
def test_concept_property_is_correctly_updated_for_sequence():
context = get_context()
foo = Concept("foo")
grammar = {foo: Sequence("one", "two", rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one two")
updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "one two"
def test_concept_property_is_updated_for_str_in_sequence():
context = get_context()
foo = Concept("foo")
grammar = {foo: Sequence(StrMatch("one", rule_name="s1"), StrMatch("two", rule_name="s2"), rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one two")
updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying)
assert updated.props["variable"].value == "one two"
assert updated.props["s1"].value == "one"
assert updated.props["s2"].value == "two"
def test_concept_property_is_correctly_updated_for_optional():
context = get_context()
foo = Concept("foo")
grammar = {foo: Sequence("one", Optional("two", rule_name="o"), rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one two")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "one two"
assert updated.props["o"].value == "two"
def test_concept_property_is_correctly_updated_when_list_of_properties():
context = get_context()
foo = Concept("foo")
grammar = {foo: Sequence(StrMatch("one", rule_name="s"), StrMatch("two", rule_name="s"), rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one two")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert updated.props["variable"].value == "one two"
assert updated.props["s"].value == ["one", "two"]
def test_concept_property_is_correctly_updated_when_another_concept():
context = get_context()
foo = Concept("foo")
bar = Concept("bar")
context.sheerka.add_in_cache(foo)
grammar = {
foo: Sequence("one", "two", rule_name="variable"),
bar: Sequence(foo, "three", rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one two three")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert updated.props["variable"].value == "one two three"
assert updated.props["foo"].value == Concept("foo").set_prop("variable", "one two").init_key()
def test_concept_property_is_correctly_updated_when_concept_recursion():
context = get_context()
number = Concept("number")
add = Concept("add")
context.sheerka.add_in_cache(number)
context.sheerka.add_in_cache(add)
grammar = {
number: OrderedChoice("one", "two"),
add: Sequence(number, Optional(Sequence(OrderedChoice("plus", "minus", rule_name="op"), add)))
}
concept_node = get_concept_node(context, grammar, "one plus two")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert updated.props["number"].value == Concept("number").init_key()
assert updated.props["op"].value == "plus"
assert updated.props["add"].value == Concept("add").set_prop("number", Concept("number").init_key()).init_key()
+3 -3
View File
@@ -2,13 +2,13 @@ import pytest
import ast
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch, RegexParser
from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch
from parsers.PythonParser import PythonParser, PythonNode
from core.tokenizer import Keywords, Tokenizer
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
from parsers.BnfParser import BnfParser
# def nop():
@@ -341,7 +341,7 @@ def test_i_can_parse_def_concept_from_regex():
res = parser.parse(get_context(), text)
node = res.value.value
definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string"))
parser_result = ParserResultConcept(RegexParser(), "a_concept | 'a_string'", definition, definition)
parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", definition, definition)
expected = get_concept(name="name", body="__definition[0]", definition=parser_result)
assert res.status
+52 -9
View File
@@ -1,12 +1,10 @@
import ast
import pytest
import os
from os import path
import shutil
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.concept import Concept, PROPERTIES_TO_SERIALIZE
from core.concept import Concept, PROPERTIES_TO_SERIALIZE, Property
from core.sheerka import Sheerka, ExecutionContext
from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator
from parsers.ConceptLexerParser import Sequence, ZeroOrMore, StrMatch, OrderedChoice, Optional, ConceptMatch, \
@@ -63,12 +61,12 @@ def test_builtin_concepts_are_initialized():
def test_builtin_concepts_can_be_updated():
sheerka = get_sheerka(root_folder, skip_builtins_in_db=False)
sheerka = get_sheerka(False, skip_builtins_in_db=False)
loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA)
loaded_sheerka.metadata.desc = "I have a description"
sheerka.sdp.modify("Test", sheerka.CONCEPTS_ENTRY, loaded_sheerka.key, loaded_sheerka)
sheerka = get_sheerka(root_folder)
sheerka = get_sheerka(False)
loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA)
assert loaded_sheerka.metadata.desc == "I have a description"
@@ -593,9 +591,8 @@ def test_i_can_create_concept_with_bnf_definition():
saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY)
expected_bnf = Sequence(
ConceptMatch("a"),
Optional(Sequence(StrMatch("plus"), ConceptMatch("plus"))),
rule_name="plus")
ConceptMatch("a", rule_name="a"),
Optional(Sequence(StrMatch("plus"), ConceptMatch("plus", rule_name="plus"))))
assert saved_definitions[saved_concept] == expected_bnf
new_concept = res[0].value.body
@@ -606,7 +603,53 @@ def test_i_can_create_concept_with_bnf_definition():
assert "plus" in new_concept.props
def get_sheerka(root="mem://", skip_builtins_in_db=True):
def test_i_can_eval_bnf_definitions():
sheerka = get_sheerka()
concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body
res = sheerka.eval("one")
assert len(res) == 1
assert res[0].status
assert sheerka.isinstance(res[0].value, concept_a)
def test_i_can_eval_bnf_definitions_with_variables():
sheerka = get_sheerka()
concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body
concept_b = sheerka.eval("def concept b from bnf a 'three'")[0].body.body
res = sheerka.eval("one three")
assert len(res) == 1
assert res[0].status
return_value = res[0].value
assert sheerka.isinstance(return_value, concept_b)
assert return_value.props["a"] == Property("a", concept_a)
def test_i_can_eval_bnf_definitions_from_separate_instances():
"""
Same test then before,
but make sure that the BNF are correctly persisted and loaded
"""
sheerka = get_sheerka(False)
concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body
res = get_sheerka(False).eval("one")
assert len(res) == 1
assert res[0].status
assert sheerka.isinstance(res[0].value, concept_a)
res = get_sheerka(False).eval("two")
assert len(res) == 1
assert res[0].status
assert sheerka.isinstance(res[0].value, concept_a)
def get_sheerka(use_dict=True, skip_builtins_in_db=True):
root = "mem://" if use_dict else root_folder
sheerka = Sheerka(skip_builtins_in_db)
sheerka.initialize(root)
+73 -3
View File
@@ -754,7 +754,7 @@ def test_i_can_set_using_reference(root):
".sheerka",
"mem://"
])
def test_i_can_add_reference_of_an_object_with_a_key(root):
def test_i_can_add_an_object_with_a_key_as_a_reference(root):
sdp = SheerkaDataProvider(root)
obj = ObjDumpJson("my_key", "value1")
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
@@ -777,7 +777,7 @@ def test_i_can_add_reference_of_an_object_with_a_key(root):
".sheerka",
"mem://"
])
def test_i_can_add_reference_a_dictionary(root):
def test_i_can_add_a_dictionary_as_a_reference(root):
sdp = SheerkaDataProvider(root)
obj = {"my_key": "value1"}
@@ -1403,7 +1403,7 @@ def test_i_can_get_an_entry_by_key(root):
".sheerka",
"mem://"
])
def test_i_can_get_object_save_by_reference(root):
def test_i_can_get_object_saved_by_reference(root):
sdp = SheerkaDataProvider(root)
obj = ObjDumpJson("my_key", "value1")
sdp.serializer.register(ObjectSerializer(core.utils.get_full_qualified_name(obj)))
@@ -1687,3 +1687,73 @@ def test_i_can_add_obj_with_same_key_and_get_them_back(root):
assert len(loaded) == 2
assert loaded[0] == obj1
assert loaded[1] == obj2
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_get_safe_dictionary_without_origin(root):
sdp = SheerkaDataProvider(root)
obj = {"my_key": "value1"}
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get_safe(entry, key)
assert len(from_db) == 2
assert from_db["my_key"] == obj["my_key"]
assert Serializer.ORIGIN in from_db
from_db_no_origin = sdp.get_safe(entry, key, load_origin=False)
assert len(from_db_no_origin) == 1
assert from_db_no_origin["my_key"] == obj["my_key"]
assert Serializer.ORIGIN not in from_db_no_origin
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_get_dictionary_without_origin(root):
sdp = SheerkaDataProvider(root)
obj = {"my_key": "value1"}
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get(entry, key)
assert len(from_db) == 2
assert from_db["my_key"] == obj["my_key"]
assert Serializer.ORIGIN in from_db
from_db_no_origin = sdp.get(entry, key, load_origin=False)
assert len(from_db_no_origin) == 1
assert from_db_no_origin["my_key"] == obj["my_key"]
assert Serializer.ORIGIN not in from_db_no_origin
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_get_safe_object_without_origin(root):
sdp = SheerkaDataProvider(root)
obj = ObjDumpJson("my_key", "value1")
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get_safe(entry, key)
assert from_db == obj
assert hasattr(from_db, Serializer.ORIGIN)
from_db_no_origin = sdp.get_safe(entry, key, load_origin=False)
assert from_db_no_origin == obj
assert not hasattr(from_db_no_origin, Serializer.ORIGIN)