Refactored sheerka class: splitted to use sub handlers. Refactored unit tests to use classes.

This commit is contained in:
2020-01-22 17:49:28 +01:00
parent 821614a6c4
commit c489a38ebc
120 changed files with 7947 additions and 8190 deletions
View File
+25
View File
@@ -0,0 +1,25 @@
import pytest
from core.tokenizer import Tokenizer, TokenKind
from parsers.BaseParser import BaseParser
@pytest.mark.parametrize("text, expected_text", [
("hello world", "hello world"),
("'hello' 'world'", "'hello' 'world'"),
("def concept a from", "def concept a from"),
("()[]{}1=1.5+-/*><&é", "()[]{}1=1.5+-/*><&é"),
("execute(c:concept_name:)", "execute(c:concept_name:)")
])
def test_i_can_get_text_from_tokens(text, expected_text):
tokens = list(Tokenizer(text))
assert BaseParser.get_text_from_tokens(tokens) == expected_text
@pytest.mark.parametrize("text, custom, expected_text", [
("execute(c:concept_name:)", {TokenKind.CONCEPT: lambda t: f"__C__{t.value}"}, "execute(__C__concept_name)")
])
def test_i_can_get_text_from_tokens_with_custom_switcher(text, custom, expected_text):
tokens = list(Tokenizer(text))
assert BaseParser.get_text_from_tokens(tokens, custom) == expected_text
+176
View File
@@ -0,0 +1,176 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Tokenizer, TokenKind, LexerError
from parsers.BaseParser import UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
ConceptLexerParser, ConceptExpression, cnode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class ClassWithName:
def __init__(self, name):
self.name = name
class TestBnfParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
(
"1 (2 | 3) 4+",
Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
("'str'=var", StrMatch("str", rule_name="var")),
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
])
def test_i_can_parse_regex(self, expression, expected):
parser = BnfParser()
res = parser.parse(self.get_context(), Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", Concept("foo").init_key()),
("foo*", ZeroOrMore(Concept("foo").init_key())),
("foo 'and' bar+", Sequence(Concept("foo").init_key(), StrMatch("and"), OneOrMore(Concept("bar").init_key()))),
("foo | bar?", OrderedChoice(Concept("foo").init_key(), Optional(Concept("bar").init_key()))),
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), Concept("var").init_key())),
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), Concept("var").init_key())),
])
def test_i_can_parse_regex_with_concept(self, expression, expected):
foo = Concept("foo")
bar = Concept("bar")
var = Concept("var")
context = self.get_context()
for c in (foo, bar, var):
context.sheerka.add_in_cache(c)
parser = BnfParser()
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
def test_i_can_parse_regex_with_concept_when_the_concept_is_still_under_definition(self):
expression = "foo"
expected = ConceptExpression("foo")
context = self.get_context()
context.obj = ClassWithName("foo")
parser = BnfParser()
res = parser.parse(context, Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", [TokenKind.RPAR])),
("1=", UnexpectedTokenErrorNode("Unexpected token 'Token(<EOF>)'", [TokenKind.IDENTIFIER])),
("'name", LexerError("Missing Trailing quote", "'name", 5, 1, 6))
])
def test_i_can_detect_errors(self, expression, error):
parser = BnfParser()
res = parser.parse(self.get_context(), Tokenizer(expression))
ret_value = res.value.value
assert parser.has_error
assert not res.status
assert ret_value[0] == error
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(self):
foo = Concept(name="foo")
bar = Concept(name="bar")
context = self.get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
regex_parser = BnfParser()
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = ConceptLexerParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
def test_i_cannot_parse_when_too_many_concepts(self):
foo1 = Concept(name="foo", body="1")
foo2 = Concept(name="foo", body="2")
context = self.get_context()
context.sheerka.cache_by_key["foo"] = [foo1, foo2]
regex_parser = BnfParser()
res = regex_parser.parse(context, "foo")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT)
assert res.value.body == ('key', 'foo')
def test_i_cannot_parse_when_unknown_concept(self):
context = self.get_context()
regex_parser = BnfParser()
res = regex_parser.parse(self.get_context(), "foo")
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ('key', 'foo')
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,193 @@
import ast
import pytest
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
multiple_concepts_parser = MultipleConceptsParser()
def ret_val(*args):
result = []
index = 0
source = ""
for item in args:
if isinstance(item, Concept):
tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)]
result.append(ConceptNode(item, index, index, tokens, item.name))
index += 1
source += item.name
elif isinstance(item, PythonNode):
tokens = list(Tokenizer(item.source))[:-1] # strip trailing EOF
result.append(SourceCodeNode(item, index, index + len(tokens) - 1, tokens, item.source))
index += len(tokens)
source += item.source
else:
tokens = list(Tokenizer(item))[:-1] # strip trailing EOF
result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens))
index += len(tokens)
source += item
return ReturnValueConcept(
"who",
False,
ParserResultConcept(parser=multiple_concepts_parser, value=result, source=source))
class TestConceptsWithConceptsParser(TestUsingMemoryBasedSheerka):
def init(self, concepts, inputs):
context = self.get_context()
for concept in concepts:
context.sheerka.create_new_concept(context, concept)
return context, ret_val(*inputs)
def execute(self, concepts, inputs):
context, input_return_values = self.init(concepts, inputs)
parser = ConceptsWithConceptsParser()
result = parser.parse(context, input_return_values.body)
wrapper = result.body
return_value = result.body.body
return context, parser, result, wrapper, return_value
@pytest.mark.parametrize("text, interested", [
("not parser result", False),
(ParserResultConcept(parser="not multiple_concepts_parser"), False),
(ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
])
def test_not_interested(self, text, interested):
context = self.get_context()
res = ConceptsWithConceptsParser().parse(context, text)
if interested:
assert res is not None
else:
assert res is None
def test_i_can_parse_composition_of_concepts(self):
foo = Concept("foo")
bar = Concept("bar")
plus = Concept("a plus b").def_prop("a").def_prop("b")
context, parser, result, wrapper, return_value = self.execute([foo, bar, plus], [foo, " plus ", bar])
assert result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert result.who == wrapper.parser.name
assert wrapper.source == "foo plus bar"
assert context.sheerka.isinstance(return_value, plus)
assert return_value.compiled["a"] == foo
assert return_value.compiled["b"] == bar
# sanity check, I can evaluate the result
evaluated = context.sheerka.evaluate_concept(context, return_value)
assert evaluated.key == return_value.key
assert evaluated.get_prop("a") == foo.init_key()
assert evaluated.get_prop("b") == bar.init_key()
def test_i_can_parse_when_composition_of_source_code(self):
plus = Concept("a plus b", body="a + b").def_prop("a").def_prop("b")
left = PythonNode("1+1", ast.parse("1+1", mode="eval"))
right = PythonNode("2+2", ast.parse("2+2", mode="eval"))
context, parser, result, wrapper, return_value = self.execute([plus], [left, " plus ", right])
assert result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert result.who == wrapper.parser.name
assert wrapper.source == "1+1 plus 2+2"
assert context.sheerka.isinstance(return_value, plus)
left_parser_result = ParserResultConcept(parser=parser, source="1+1", value=left)
right_parser_result = ParserResultConcept(parser=parser, source="2+2", value=right)
assert return_value.compiled["a"] == [ReturnValueConcept(parser.name, True, left_parser_result)]
assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, right_parser_result)]
# sanity check, I can evaluate the result
evaluated = context.sheerka.evaluate_concept(context, return_value)
assert evaluated.key == return_value.key
assert evaluated.get_prop("a") == 2
assert evaluated.get_prop("b") == 4
assert evaluated.body == 6
def test_i_can_parse_when_mix_of_concept_and_code(self):
plus = Concept("a plus b").def_prop("a").def_prop("b")
code = PythonNode("1+1", ast.parse("1+1", mode="eval"))
foo = Concept("foo")
context, parser, result, wrapper, return_value = self.execute([plus, foo], [foo, " plus ", code])
assert result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert result.who == wrapper.parser.name
assert wrapper.source == "foo plus 1+1"
assert context.sheerka.isinstance(return_value, plus)
code_parser_result = ParserResultConcept(parser=parser, source="1+1", value=code)
assert return_value.compiled["a"] == foo
assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, code_parser_result)]
# sanity check, I can evaluate the result
evaluated = context.sheerka.evaluate_concept(context, return_value)
assert evaluated.key == return_value.key
assert evaluated.get_prop("a") == foo.init_key()
assert evaluated.get_prop("b") == 2
def test_i_can_parse_when_multiple_concepts_are_recognized(self):
foo = Concept("foo")
bar = Concept("bar")
plus_1 = Concept("a plus b", body="body1").def_prop("a").def_prop("b")
plus_2 = Concept("a plus b", body="body2").def_prop("a").def_prop("b")
context, input_return_values = self.init([foo, bar, plus_1, plus_2], [foo, " plus ", bar])
parser = ConceptsWithConceptsParser()
result = parser.parse(context, input_return_values.body)
assert len(result) == 2
res = result[0]
wrapper = res.value
return_value = res.value.value
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert res.who == wrapper.parser.name
assert wrapper.source == "foo plus bar"
assert context.sheerka.isinstance(return_value, plus_1)
assert return_value.compiled["a"] == foo
assert return_value.compiled["b"] == bar
res = result[1]
wrapper = res.value
return_value = res.value.value
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert res.who == wrapper.parser.name
assert wrapper.source == "foo plus bar"
assert context.sheerka.isinstance(return_value, plus_2)
assert return_value.compiled["a"] == foo
assert return_value.compiled["b"] == bar
def test_i_cannot_parse_when_unknown_concept(self):
foo = Concept("foo")
bar = Concept("bar")
context, input_return_values = self.init([foo, bar], [foo, " plus ", bar])
parser = ConceptsWithConceptsParser()
result = parser.parse(context, input_return_values.body)
wrapper = result.body
return_value = result.body.body
assert not result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.NOT_FOR_ME)
assert result.who == parser.name
assert return_value == input_return_values.body.body
+345
View File
@@ -0,0 +1,345 @@
import pytest
import ast
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import Concept
from parsers.ConceptLexerParser import OrderedChoice, StrMatch
from parsers.PythonParser import PythonParser, PythonNode
from core.tokenizer import Keywords, Tokenizer, LexerError
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode, IsaConceptNode
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
from parsers.BnfParser import BnfParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
def get_def_concept(name, where=None, pre=None, post=None, body=None, definition=None):
def_concept = DefConceptNode([], name=NameNode(list(Tokenizer(name))))
if body:
def_concept.body = get_concept_part(body)
if where:
def_concept.where = get_concept_part(where)
if pre:
def_concept.pre = get_concept_part(pre)
if post:
def_concept.post = get_concept_part(post)
if definition:
def_concept.definition = ReturnValueConcept(
"parsers.Bnf",
True,
definition)
return def_concept
def get_concept_part(part):
if isinstance(part, str):
node = PythonNode(part, ast.parse(part, mode="eval"))
return ReturnValueConcept(
who="parsers.Default",
status=True,
value=ParserResultConcept(
source=part,
parser=PythonParser(),
value=node))
if isinstance(part, PythonNode):
return ReturnValueConcept(
who="parsers.Default",
status=True,
value=ParserResultConcept(
source=part.source,
parser=PythonParser(),
value=part))
if isinstance(part, ReturnValueConcept):
return part
class TestDefaultParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, expected", [
("def concept hello", get_def_concept(name="hello")),
("def concept hello ", get_def_concept(name="hello")),
("def concept a + b", get_def_concept(name="a + b")),
("def concept a+b", get_def_concept(name="a + b")),
("def concept 'a+b'+c", get_def_concept(name="'a+b' + c")),
("def concept 'as if'", get_def_concept(name="'as if'")),
("def concept 'as' if", get_def_concept(name="'as if'")),
("def concept hello as 'hello'", get_def_concept(name="hello", body="'hello'")),
("def concept hello as 1", get_def_concept(name="hello", body="1")),
("def concept hello as 1 + 1", get_def_concept(name="hello", body="1 + 1")),
])
def test_i_can_parse_def_concept(self, text, expected):
parser = DefaultParser()
res = parser.parse(self.get_context(), text)
node = res.value.value
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_parse_complex_def_concept_statement(self):
text = """def concept a plus b
where a,b
pre isinstance(a, int) and isinstance(b, float)
post isinstance(res, int)
as res = a + b
"""
parser = DefaultParser()
res = parser.parse(self.get_context(), text)
return_value = res.value
expected_concept = get_def_concept(
name="a plus b",
where="a,b",
pre="isinstance(a, int) and isinstance(b, float)",
post="isinstance(res, int)",
body=PythonNode("res = a + b", ast.parse("res = a + b", mode="exec"))
)
assert res.status
assert isinstance(return_value, ParserResultConcept)
assert return_value.value == expected_concept
def test_i_can_have_mutilines_declarations(self):
text = """
def concept add one to a as
def func(x):
return x+1
func(a)
"""
expected_concept = get_def_concept(
name="add one to a ",
body=PythonNode(
"def func(x):\n return x+1\nfunc(a)",
ast.parse("def func(x):\n return x+1\nfunc(a)", mode="exec"))
)
parser = DefaultParser()
res = parser.parse(self.get_context(), text)
return_value = res.value
assert res.status
assert isinstance(return_value, ParserResultConcept)
assert return_value.value == expected_concept
def test_i_can_use_colon_to_use_indentation(self):
text = """
def concept add one to a as:
def func(x):
return x+1
func(a)
"""
expected_concept = get_def_concept(
name="add one to a ",
body=PythonNode(
"def func(x):\n return x+1\nfunc(a)",
ast.parse("def func(x):\n return x+1\nfunc(a)", mode="exec"))
)
parser = DefaultParser()
res = parser.parse(self.get_context(), text)
return_value = res.value
assert res.status
assert isinstance(return_value, ParserResultConcept)
assert return_value.value == expected_concept
def test_indentation_is_mandatory_after_a_colon(self):
text = """
def concept add one to a as:
def func(x):
return x+1
func(a)
"""
parser = DefaultParser()
res = parser.parse(self.get_context(), text)
return_value = res.value
assert not res.status
assert isinstance(return_value, ParserResultConcept)
assert isinstance(return_value.value[0], SyntaxErrorNode)
assert return_value.value[0].message == "Indentation not found."
def test_indentation_is_not_allowed_if_the_colon_is_missing(self):
text = """
def concept add one to a as
def func(x):
return x+1
func(a)
"""
context = self.get_context()
parser = DefaultParser()
res = parser.parse(context, text)
return_value = res.value
assert not res.status
assert context.sheerka.isinstance(return_value, BuiltinConcepts.TOO_MANY_ERRORS)
def test_name_is_mandatory(self):
text = "def concept as 'hello'"
parser = DefaultParser()
res = parser.parse(self.get_context(), text)
return_value = res.value
assert not res.status
assert isinstance(return_value, ParserResultConcept)
assert isinstance(return_value.value[0], SyntaxErrorNode)
assert return_value.value[0].message == "Name is mandatory"
def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self):
text = "def hello as a where b pre c post d"
expected_concept = get_def_concept(name="hello", body="a", where="b", pre="c", post="d")
parser = DefaultParser()
res = parser.parse(self.get_context(), text)
return_value = res.value
assert not res.status
assert isinstance(return_value, ParserResultConcept)
assert isinstance(return_value.value[0], UnexpectedTokenErrorNode)
assert return_value.value[0].message == "Syntax error."
assert return_value.value[0].expected_tokens == [Keywords.CONCEPT]
assert return_value.try_parsed == expected_concept
@pytest.mark.parametrize("text", [
"def concept hello where 1+",
"def concept hello pre 1+",
"def concept hello post 1+",
"def concept hello as 1+"
])
def test_i_can_detect_error_in_declaration(self, text):
context = self.get_context()
sheerka = context.sheerka
parser = DefaultParser()
res = parser.parse(context, text)
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.TOO_MANY_ERRORS)
def test_new_line_is_not_allowed_in_the_name(self):
text = "def concept hello \n my friend as 'hello'"
parser = DefaultParser()
res = parser.parse(self.get_context(), text)
return_value = res.value
assert not res.status
assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")]
def test_i_can_parse_def_concept_from_regex(self):
context = self.get_context()
a_concept = Concept("a_concept")
context.sheerka.add_in_cache(a_concept)
text = "def concept name from bnf a_concept | 'a_string' as __definition[0]"
parser = DefaultParser()
res = parser.parse(context, text)
node = res.value.value
definition = OrderedChoice(a_concept, StrMatch("a_string"))
parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", definition, definition)
expected = get_def_concept(name="name", body="__definition[0]", definition=parser_result)
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_parse_def_concept_where_bnf_references_itself(self):
context = self.get_context()
a_concept = Concept("a_concept")
context.sheerka.add_in_cache(a_concept)
text = "def concept name from bnf 'a' + name?"
parser = DefaultParser()
parser.parse(context, text)
assert not parser.has_error
def test_i_can_detect_empty_bnf_declaration(self):
text = "def concept name from bnf as __definition[0]"
parser = DefaultParser()
res = parser.parse(self.get_context(), text)
assert not res.status
assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration")
def test_i_can_detect_not_for_me(self):
text = "hello world"
context = self.get_context()
parser = DefaultParser()
res = parser.parse(context, text)
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(res.value.body[0], CannotHandleErrorNode)
def test_i_can_parse_is_a(self):
parser = DefaultParser()
text = "the name of my 'concept' isa the name of the set"
res = parser.parse(self.get_context(), text)
expected = IsaConceptNode([],
concept=NameNode(list(Tokenizer("the name of my 'concept'"))),
set=NameNode(list(Tokenizer("the name of the set"))))
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
assert res.value.value == expected
@pytest.mark.parametrize("text", [
"concept",
"isa number",
"name isa",
"def",
"def concept_name"
])
def test_i_cannot_parse_invalid_entries(self, text):
parser = DefaultParser()
res = parser.parse(self.get_context(), text)
assert not res.status
assert isinstance(res.body, ParserResultConcept)
assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
@pytest.mark.parametrize("text, error_msg, error_text", [
("'name", "Missing Trailing quote", "'name"),
("foo isa 'name", "Missing Trailing quote", "'name"),
("def concept 'name", "Missing Trailing quote", "'name"),
("def concept name as 'body", "Missing Trailing quote", "'body"),
("def concept name from bnf 'expression", "Missing Trailing quote", "'expression"),
("def concept c::", "Concept name not found", ""),
])
def test_i_cannot_parse_when_tokenizer_fails(self, text, error_msg, error_text):
parser = DefaultParser()
res = parser.parse(self.get_context(), text)
assert not res.status
assert isinstance(res.body, ParserResultConcept)
assert isinstance(res.body.body[0], LexerError)
assert res.body.body[0].message == error_msg
assert res.body.body[0].text == error_text
def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self):
context = self.get_context()
text = "def concept name from bnf unknown"
parser = DefaultParser()
res = parser.parse(context, text)
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ("key", "unknown")
+138
View File
@@ -0,0 +1,138 @@
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Tokenizer
from parsers.ExactConceptParser import ExactConceptParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
def metadata_prop(concept, prop_name):
for name, value in concept.metadata.props:
if name == prop_name:
return value
return None
def get_concept(name, variables):
c = Concept(name=name)
if variables:
for v in variables:
c.def_prop(v)
c.init_key()
return c
class TestExactConceptParser(TestUsingMemoryBasedSheerka):
def test_i_can_compute_combinations(self):
parser = ExactConceptParser()
res = parser.combinations(["foo", "bar", "baz"])
assert res == {('foo', 'bar', 'baz'),
('__var__0', 'bar', 'baz'),
('foo', '__var__0', 'baz'),
('foo', 'bar', '__var__0'),
('__var__0', '__var__1', 'baz'),
('__var__0', 'bar', '__var__1'),
('foo', '__var__0', '__var__1'),
('__var__0', '__var__1', '__var__2')}
def test_i_can_compute_combinations_with_duplicates(self):
parser = ExactConceptParser()
res = parser.combinations(["foo", "bar", "foo"])
assert res == {('foo', 'bar', 'foo'),
('__var__0', 'bar', '__var__0'),
('foo', '__var__0', 'foo'),
('__var__0', '__var__1', '__var__0'),
('__var__1', '__var__0', '__var__1')}
# TODO: the last tuple is not possible, so the algo can be improved
def test_i_can_recognize_a_simple_concept(self):
context = self.get_context()
concept = get_concept("hello world", [])
context.sheerka.add_in_cache(concept)
source = "hello world"
results = ExactConceptParser().parse(context, source)
assert len(results) == 1
assert results[0].status
assert results[0].value.value == concept
def test_i_can_recognize_concepts_defined_several_times(self):
context = self.get_context()
context.sheerka.add_in_cache(get_concept("hello world", []))
context.sheerka.add_in_cache(get_concept("hello a", ["a"]))
source = "hello world"
results = ExactConceptParser().parse(context, source)
assert len(results) == 2
results = sorted(results, key=lambda x: x.value.value.name) # because of the usage of sets
assert results[0].status
assert results[0].value.value.name == "hello a"
assert metadata_prop(results[0].value.value, "a") == "world"
assert results[1].status
assert results[1].value.value.name == "hello world"
def test_i_can_recognize_a_concept_with_variables(self):
context = self.get_context()
concept = get_concept("a + b", ["a", "b"])
context.sheerka.add_in_cache(concept)
source = "10 + 5"
results = ExactConceptParser().parse(context, source)
assert len(results) == 1
assert results[0].status
concept_found = results[0].value.value
assert concept_found.key == concept.key
assert metadata_prop(concept_found, "a") == "10"
assert metadata_prop(concept_found, "b") == "5"
def test_i_can_recognize_a_concept_with_duplicate_variables(self):
context = self.get_context()
concept = get_concept("a + b + a", ["a", "b"])
context.sheerka.cache_by_key[concept.key] = concept
source = "10 + 5 + 10"
results = ExactConceptParser().parse(context, source)
assert len(results) == 1
assert results[0].status
concept_found = results[0].value.value
assert concept_found.key == concept.key
assert metadata_prop(concept_found, "a") == "10"
assert metadata_prop(concept_found, "b") == "5"
def test_i_can_manage_unknown_concept(self):
context = self.get_context()
source = "def concept hello world" # this is not a concept by itself
res = ExactConceptParser().parse(context, source)
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == "def concept hello world"
def test_i_can_detect_concepts_too_long(self):
context = self.get_context()
source = "a very very long concept that cannot be an unique one"
res = ExactConceptParser().parse(context, source)
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.CONCEPT_TOO_LONG)
assert res.value.body == "a very very long concept that cannot be an unique one"
def test_i_can_detect_concept_from_tokens(self):
context = self.get_context()
concept = get_concept("hello world", [])
context.sheerka.add_in_cache(concept)
source = "hello world"
results = ExactConceptParser().parse(context, list(Tokenizer(source)))
assert len(results) == 1
assert results[0].status
assert results[0].value.value == concept
@@ -0,0 +1,215 @@
import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, cnode, utnode, scnode, SourceCodeNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
def get_return_value(context, grammar, expression):
parser = ConceptLexerParser()
parser.initialize(context, grammar)
ret_val = parser.parse(context, expression)
assert not ret_val.status
return ret_val
class TestMultipleConceptsParser(TestUsingMemoryBasedSheerka):
def init(self, concepts, grammar, expression):
context = self.get_context()
for c in concepts:
context.sheerka.create_new_concept(context, c)
return_value = get_return_value(context, grammar, expression)
return context, return_value
def test_not_interested_if_not_parser_result(self):
context = self.get_context()
text = "not parser result"
res = MultipleConceptsParser().parse(context, text)
assert res is None
def test_not_interested_if_not_from_concept_lexer_parser(self):
context = self.get_context()
text = ParserResultConcept(parser="not concept lexer", value="some value")
res = MultipleConceptsParser().parse(context, text)
assert res is None
def test_i_can_parse_exact_concepts(self):
foo = Concept("foo", body="'foo'")
bar = Concept("bar", body="'bar'")
baz = Concept("baz", body="'baz'")
grammar = {}
context, return_value = self.init([foo, bar, baz], grammar, "bar foo baz")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
ConceptNode(bar, 0, 0, source="bar"),
ConceptNode(foo, 2, 2, source="foo"),
ConceptNode(baz, 4, 4, source="baz")]
assert ret_val.value.source == "bar foo baz"
def test_i_can_parse_when_ending_with_bnf(self):
foo = Concept("foo", body="'foo'")
bar = Concept("bar", body="'bar'")
grammar = {foo: Sequence("foo1", "foo2", "foo3")}
context, return_value = self.init([foo, bar], grammar, "bar foo1 foo2 foo3")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")]
assert ret_val.value.source == "bar foo1 foo2 foo3"
def test_i_can_parse_when_starting_with_bnf(self):
foo = Concept("foo", body="'foo'")
bar = Concept("bar", body="'bar'")
grammar = {foo: Sequence("foo1", "foo2", "foo3")}
context, return_value = self.init([foo, bar], grammar, "foo1 foo2 foo3 bar")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")]
assert ret_val.value.source == "foo1 foo2 foo3 bar"
def test_i_can_parse_when_concept_are_mixed(self):
foo = Concept("foo")
bar = Concept("bar")
baz = Concept("baz")
grammar = {foo: Sequence("foo1", "foo2", "foo3")}
context, return_value = self.init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
cnode("baz", 0, 0, "baz"),
cnode("foo", 2, 6, "foo1 foo2 foo3"),
cnode("bar", 8, 8, "bar")]
assert ret_val.value.source == "baz foo1 foo2 foo3 bar"
def test_i_can_parse_when_multiple_concepts_are_matching(self):
foo = Concept("foo")
bar = Concept("bar", body="bar1")
baz = Concept("bar", body="bar2")
grammar = {foo: "foo"}
context, return_value = self.init([foo, bar, baz], grammar, "foo bar")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert len(ret_val) == 2
assert ret_val[0].status
assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
assert ret_val[0].value.source == "foo bar"
assert ret_val[0].value.value[1].concept.metadata.body == "bar1"
assert ret_val[1].status
assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
assert ret_val[1].value.source == "foo bar"
assert ret_val[1].value.value[1].concept.metadata.body == "bar2"
def test_i_can_parse_when_source_code(self):
foo = Concept("foo")
grammar = {foo: "foo"}
context, return_value = self.init([foo], grammar, "1 foo")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
wrapper = ret_val.value
value = ret_val.value.value
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert wrapper.source == "1 foo"
assert value == [
scnode(0, 1, "1 "),
cnode("foo", 2, 2, "foo")]
def test_i_cannot_parse_when_unrecognized_token(self):
twenty_two = Concept("twenty two")
one = Concept("one")
grammar = {twenty_two: Sequence("twenty", "two")}
context, return_value = self.init([twenty_two, one], grammar, "twenty two + one")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert not ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
cnode("twenty two", 0, 2, "twenty two"),
utnode(3, 5, " + "),
cnode("one", 6, 6, "one")
]
assert ret_val.value.source == "twenty two + one"
def test_i_cannot_parse_when_unknown_concepts(self):
twenty_two = Concept("twenty two")
one = Concept("one")
grammar = {twenty_two: Sequence("twenty", "two")}
context, return_value = self.init([twenty_two, one], grammar, "twenty two plus one")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert not ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
cnode("twenty two", 0, 2, "twenty two"),
utnode(3, 5, " plus "),
cnode("one", 6, 6, "one")
]
assert ret_val.value.source == "twenty two plus one"
@pytest.mark.parametrize("text, expected_source, expected_end", [
("True", "True", 0),
("1 == 1", "1 == 1", 5),
("1!xdf", "1", 0),
("1", "1", 0),
])
def test_i_can_get_source_code_node(self, text, expected_source, expected_end):
tokens = list(Tokenizer(text))[:-1] # strip trailing EOF
start_index = 5 # a random number different of zero
res = MultipleConceptsParser().get_source_code_node(self.get_context(), start_index, tokens)
assert isinstance(res, SourceCodeNode)
assert isinstance(res.node, PythonNode)
assert res.source == expected_source
assert res.start == start_index
assert res.end == start_index + expected_end
def test_i_cannot_parse_null_text(self):
res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [])
assert res is None
eof = Token(TokenKind.EOF, "", 0, 0, 0)
res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [eof])
assert res is None
+77
View File
@@ -0,0 +1,77 @@
import ast
import pytest
from core.builtin_concepts import ParserResultConcept
from core.tokenizer import Tokenizer, LexerError
from parsers.PythonParser import PythonNode, PythonParser, PythonErrorNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class TestPythonParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, expected", [
("1+1", PythonNode("1+1", ast.parse("1+1", mode="eval"))),
("a=10", PythonNode("a=10", ast.parse("a=10", mode="exec"))),
])
def test_i_can_parse_a_simple_expression(self, text, expected):
parser = PythonParser()
res = parser.parse(self.get_context(), text)
assert res.status
assert res.who == parser.name
assert isinstance(res.value, ParserResultConcept)
assert res.value.value == expected
@pytest.mark.parametrize("text, expected", [
("1+1", PythonNode("1+1", ast.parse("1+1", mode="eval"))),
("a=10", PythonNode("a=10", ast.parse("a=10", mode="exec"))),
])
def test_i_can_parse_from_tokens(self, text, expected):
parser = PythonParser()
tokens = list(Tokenizer(text))
res = parser.parse(self.get_context(), tokens)
assert res.status
assert res.who == parser.name
assert isinstance(res.value, ParserResultConcept)
assert res.value.value == expected
@pytest.mark.parametrize("text", [
"1+",
"'name",
"foo = 'name"
])
def test_i_can_detect_error(self, text):
parser = PythonParser()
res = parser.parse(self.get_context(), text)
assert not res.status
assert res.who == parser.name
assert isinstance(res.value, ParserResultConcept)
assert isinstance(res.value.value[0], PythonErrorNode)
assert isinstance(res.value.value[0].exception, SyntaxError)
@pytest.mark.parametrize("text, error_msg, error_text", [
("c::", "Concept name not found", ""),
("c:: + 1", "Concept name not found", ""),
])
def test_i_can_detect_lexer_errors(self, text, error_msg, error_text):
parser = PythonParser()
res = parser.parse(self.get_context(), text)
assert not res.status
assert isinstance(res.body, ParserResultConcept)
assert isinstance(res.body.body[0], LexerError)
assert res.body.body[0].message == error_msg
assert res.body.body[0].text == error_text
def test_i_can_parse_a_concept(self):
text = "c:concept_name: + 1"
parser = PythonParser()
res = parser.parse(self.get_context(), text)
assert res
assert res.value.value == PythonNode(
"c:concept_name: + 1",
ast.parse("__C__USE_CONCEPT__concept_name__C__+1", mode="eval"))
@@ -0,0 +1,138 @@
import ast
import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import Concept
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode, PythonErrorNode
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
multiple_concepts_parser = MultipleConceptsParser()
def ret_val(*args):
result = []
index = 0
for item in args:
if isinstance(item, Concept):
tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)]
result.append(ConceptNode(item, index, index, tokens, item.name))
index += 1
else:
tokens = list(Tokenizer(item))
result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens))
index += len(tokens)
return ReturnValueConcept("who", False, ParserResultConcept(parser=multiple_concepts_parser, value=result))
def to_str_ast(expression):
return PythonNode.get_dump(ast.parse(expression, mode="eval"))
class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, interested", [
("not parser result", False),
(ParserResultConcept(parser="not multiple_concepts_parser"), False),
(ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
])
def test_not_interested(self, text, interested):
context = self.get_context()
res = PythonWithConceptsParser().parse(context, text)
if interested:
assert res is not None
else:
assert res is None
def test_i_can_parse_concepts_and_python(self):
context = self.get_context()
foo = Concept("foo")
input_return_value = ret_val(foo, " + 1")
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
wrapper = result.value
return_value = result.value.value
assert result.status
assert result.who == parser.name
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert wrapper.source == "foo + 1"
assert isinstance(return_value, PythonNode)
assert return_value.source == "foo + 1"
assert return_value.get_dump(return_value.ast_) == to_str_ast("__C__foo__C__ + 1")
assert return_value.concepts["__C__foo__C__"] == foo
def test_i_can_parse_concepts_and_python_when_concept_is_known(self):
context = self.get_context()
foo = Concept("foo")
foo = context.sheerka.create_new_concept(context, foo).body.body
input_return_value = ret_val(foo, " + 1")
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
wrapper = result.value
return_value = result.value.value
assert result.status
assert result.who == parser.name
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert wrapper.source == "foo + 1"
assert isinstance(return_value, PythonNode)
assert return_value.source == "foo + 1"
assert return_value.get_dump(return_value.ast_) == to_str_ast("__C__foo__1001__C__ + 1")
assert return_value.concepts["__C__foo__1001__C__"] == foo
def test_i_can_parse_when_concept_name_has_invalid_characters(self):
context = self.get_context()
foo = Concept("foo et > (,")
foo = context.sheerka.create_new_concept(context, foo).body.body
input_return_value = ret_val(foo, " + 1")
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
return_value = result.value.value
assert result.status
assert return_value.concepts["__C__foo0et000000__1001__C__"] == foo
def test_python_ids_mappings_are_correct_when_concepts_with_the_same_name(self):
context = self.get_context()
foo1 = Concept("foo")
foo2 = Concept("foo")
foo3 = context.sheerka.create_new_concept(context, Concept("foo", body="foo3")).body.body
foo4 = context.sheerka.create_new_concept(context, Concept("foo", body="foo4")).body.body
input_return_value = ret_val(foo1, "+", foo2, "+", foo3, "+", foo4)
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
return_value = result.value.value
assert result.status
assert return_value.concepts["__C__foo__C__"] == foo1
assert return_value.concepts["__C__foo_1__C__"] == foo2
assert return_value.concepts["__C__foo__1001__C__"] == foo3
assert return_value.concepts["__C__foo__1002__C__"] == foo4
def test_i_cannot_parse_if_syntax_error(self):
context = self.get_context()
foo = Concept("foo")
foo = context.sheerka.create_new_concept(context, foo).body.body
input_return_value = ret_val(foo, " + ")
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
wrapper = result.value
return_value = result.value.value
assert not result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert isinstance(return_value[0], PythonErrorNode)