Refactored Caching, Refactored BnfNodeParser, Introduced Sphinx

This commit is contained in:
2020-05-12 17:21:10 +02:00
parent 7d3a490bc5
commit 6e343ba996
110 changed files with 13865 additions and 7540 deletions
+216 -216
View File
@@ -1,216 +1,216 @@
import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode
from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
def get_return_value(context, grammar, expression):
parser = BnfNodeParser()
parser.initialize(context, grammar)
ret_val = parser.parse(context, expression)
assert not ret_val.status
return ret_val
class TestMultipleConceptsParser(TestUsingMemoryBasedSheerka):
def init(self, concepts, grammar, expression):
context = self.get_context()
for c in concepts:
context.sheerka.create_new_concept(context, c)
return_value = get_return_value(context, grammar, expression)
return context, return_value
def test_not_interested_if_not_parser_result(self):
context = self.get_context()
text = "not parser result"
res = MultipleConceptsParser().parse(context, text)
assert res is None
def test_not_interested_if_not_from_concept_lexer_parser(self):
context = self.get_context()
text = ParserResultConcept(parser="not concept lexer", value="some value")
res = MultipleConceptsParser().parse(context, text)
assert res is None
def test_i_can_parse_exact_concepts(self):
foo = Concept("foo", body="'foo'")
bar = Concept("bar", body="'bar'")
baz = Concept("baz", body="'baz'")
grammar = {}
context, return_value = self.init([foo, bar, baz], grammar, "bar foo baz")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
ConceptNode(bar, 0, 0, source="bar"),
ConceptNode(foo, 2, 2, source="foo"),
ConceptNode(baz, 4, 4, source="baz")]
assert ret_val.value.source == "bar foo baz"
def test_i_can_parse_when_ending_with_bnf(self):
foo = Concept("foo", body="'foo'")
bar = Concept("bar", body="'bar'")
grammar = {foo: Sequence("foo1", "foo2", "foo3")}
context, return_value = self.init([foo, bar], grammar, "bar foo1 foo2 foo3")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")]
assert ret_val.value.source == "bar foo1 foo2 foo3"
def test_i_can_parse_when_starting_with_bnf(self):
foo = Concept("foo", body="'foo'")
bar = Concept("bar", body="'bar'")
grammar = {foo: Sequence("foo1", "foo2", "foo3")}
context, return_value = self.init([foo, bar], grammar, "foo1 foo2 foo3 bar")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")]
assert ret_val.value.source == "foo1 foo2 foo3 bar"
def test_i_can_parse_when_concept_are_mixed(self):
foo = Concept("foo")
bar = Concept("bar")
baz = Concept("baz")
grammar = {foo: Sequence("foo1", "foo2", "foo3")}
context, return_value = self.init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
cnode("baz", 0, 0, "baz"),
cnode("foo", 2, 6, "foo1 foo2 foo3"),
cnode("bar", 8, 8, "bar")]
assert ret_val.value.source == "baz foo1 foo2 foo3 bar"
def test_i_can_parse_when_multiple_concepts_are_matching(self):
foo = Concept("foo")
bar = Concept("bar", body="bar1")
baz = Concept("bar", body="bar2")
grammar = {foo: "foo"}
context, return_value = self.init([foo, bar, baz], grammar, "foo bar")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert len(ret_val) == 2
assert ret_val[0].status
assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
assert ret_val[0].value.source == "foo bar"
assert ret_val[0].value.value[1].concept.metadata.body == "bar1"
assert ret_val[1].status
assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
assert ret_val[1].value.source == "foo bar"
assert ret_val[1].value.value[1].concept.metadata.body == "bar2"
def test_i_can_parse_when_source_code(self):
foo = Concept("foo")
grammar = {foo: "foo"}
context, return_value = self.init([foo], grammar, "1 foo")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
wrapper = ret_val.value
value = ret_val.value.value
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert wrapper.source == "1 foo"
assert value == [
scnode(0, 1, "1 "),
cnode("foo", 2, 2, "foo")]
def test_i_cannot_parse_when_unrecognized_token(self):
twenty_two = Concept("twenty two")
one = Concept("one")
grammar = {twenty_two: Sequence("twenty", "two")}
context, return_value = self.init([twenty_two, one], grammar, "twenty two + one")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert not ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
cnode("twenty two", 0, 2, "twenty two"),
utnode(3, 5, " + "),
cnode("one", 6, 6, "one")
]
assert ret_val.value.source == "twenty two + one"
def test_i_cannot_parse_when_unknown_concepts(self):
twenty_two = Concept("twenty two")
one = Concept("one")
grammar = {twenty_two: Sequence("twenty", "two")}
context, return_value = self.init([twenty_two, one], grammar, "twenty two plus one")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert not ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
cnode("twenty two", 0, 2, "twenty two"),
utnode(3, 5, " plus "),
cnode("one", 6, 6, "one")
]
assert ret_val.value.source == "twenty two plus one"
@pytest.mark.parametrize("text, expected_source, expected_end", [
("True", "True", 0),
("1 == 1", "1 == 1", 4),
("1!xdf", "1", 0),
("1", "1", 0),
])
def test_i_can_get_source_code_node(self, text, expected_source, expected_end):
tokens = list(Tokenizer(text))[:-1] # strip trailing EOF
start_index = 5 # a random number different of zero
res = MultipleConceptsParser().get_source_code_node(self.get_context(), start_index, tokens)
assert isinstance(res, SourceCodeNode)
assert isinstance(res.node, PythonNode)
assert res.source == expected_source
assert res.start == start_index
assert res.end == start_index + expected_end
def test_i_cannot_parse_null_text(self):
res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [])
assert res is None
eof = Token(TokenKind.EOF, "", 0, 0, 0)
res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [eof])
assert res is None
# import pytest
#
# from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
# from core.concept import Concept
# from core.tokenizer import Tokenizer, TokenKind, Token
# from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode, ConceptNode
# from parsers.BnfNodeParser import BnfNodeParser, Sequence
# from parsers.MultipleConceptsParser import MultipleConceptsParser
# from parsers.PythonParser import PythonNode
#
# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
#
#
# def get_return_value(context, grammar, expression):
# parser = BnfNodeParser()
# parser.initialize(context, grammar)
#
# ret_val = parser.parse(context, expression)
# assert not ret_val.status
# return ret_val
#
#
# class TestMultipleConceptsParser(TestUsingMemoryBasedSheerka):
#
# def init(self, concepts, grammar, expression):
# context = self.get_context()
# for c in concepts:
# context.sheerka.create_new_concept(context, c)
# return_value = get_return_value(context, grammar, expression)
#
# return context, return_value
#
# def test_not_interested_if_not_parser_result(self):
# context = self.get_context()
# text = "not parser result"
#
# res = MultipleConceptsParser().parse(context, text)
# assert res is None
#
# def test_not_interested_if_not_from_concept_lexer_parser(self):
# context = self.get_context()
# text = ParserResultConcept(parser="not concept lexer", value="some value")
#
# res = MultipleConceptsParser().parse(context, text)
# assert res is None
#
# def test_i_can_parse_exact_concepts(self):
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# baz = Concept("baz", body="'baz'")
# grammar = {}
# context, return_value = self.init([foo, bar, baz], grammar, "bar foo baz")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# ConceptNode(bar, 0, 0, source="bar"),
# ConceptNode(foo, 2, 2, source="foo"),
# ConceptNode(baz, 4, 4, source="baz")]
# assert ret_val.value.source == "bar foo baz"
#
# def test_i_can_parse_when_ending_with_bnf(self):
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
# context, return_value = self.init([foo, bar], grammar, "bar foo1 foo2 foo3")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")]
# assert ret_val.value.source == "bar foo1 foo2 foo3"
#
# def test_i_can_parse_when_starting_with_bnf(self):
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
# context, return_value = self.init([foo, bar], grammar, "foo1 foo2 foo3 bar")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")]
# assert ret_val.value.source == "foo1 foo2 foo3 bar"
#
# def test_i_can_parse_when_concept_are_mixed(self):
# foo = Concept("foo")
# bar = Concept("bar")
# baz = Concept("baz")
# grammar = {foo: Sequence("foo1", "foo2", "foo3")}
# context, return_value = self.init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# cnode("baz", 0, 0, "baz"),
# cnode("foo", 2, 6, "foo1 foo2 foo3"),
# cnode("bar", 8, 8, "bar")]
# assert ret_val.value.source == "baz foo1 foo2 foo3 bar"
#
# def test_i_can_parse_when_multiple_concepts_are_matching(self):
# foo = Concept("foo")
# bar = Concept("bar", body="bar1")
# baz = Concept("bar", body="bar2")
# grammar = {foo: "foo"}
# context, return_value = self.init([foo, bar, baz], grammar, "foo bar")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert len(ret_val) == 2
# assert ret_val[0].status
# assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
# assert ret_val[0].value.source == "foo bar"
# assert ret_val[0].value.value[1].concept.metadata.body == "bar1"
#
# assert ret_val[1].status
# assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
# assert ret_val[1].value.source == "foo bar"
# assert ret_val[1].value.value[1].concept.metadata.body == "bar2"
#
# def test_i_can_parse_when_source_code(self):
# foo = Concept("foo")
# grammar = {foo: "foo"}
# context, return_value = self.init([foo], grammar, "1 foo")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
# wrapper = ret_val.value
# value = ret_val.value.value
#
# assert ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
# assert wrapper.source == "1 foo"
# assert value == [
# scnode(0, 1, "1 "),
# cnode("foo", 2, 2, "foo")]
#
# def test_i_cannot_parse_when_unrecognized_token(self):
# twenty_two = Concept("twenty two")
# one = Concept("one")
# grammar = {twenty_two: Sequence("twenty", "two")}
# context, return_value = self.init([twenty_two, one], grammar, "twenty two + one")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert not ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# cnode("twenty two", 0, 2, "twenty two"),
# utnode(3, 5, " + "),
# cnode("one", 6, 6, "one")
# ]
# assert ret_val.value.source == "twenty two + one"
#
# def test_i_cannot_parse_when_unknown_concepts(self):
# twenty_two = Concept("twenty two")
# one = Concept("one")
# grammar = {twenty_two: Sequence("twenty", "two")}
# context, return_value = self.init([twenty_two, one], grammar, "twenty two plus one")
#
# parser = MultipleConceptsParser()
# ret_val = parser.parse(context, return_value.body)
#
# assert not ret_val.status
# assert ret_val.who == parser.name
# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
# assert ret_val.value.value == [
# cnode("twenty two", 0, 2, "twenty two"),
# utnode(3, 5, " plus "),
# cnode("one", 6, 6, "one")
# ]
# assert ret_val.value.source == "twenty two plus one"
#
# @pytest.mark.parametrize("text, expected_source, expected_end", [
# ("True", "True", 0),
# ("1 == 1", "1 == 1", 4),
# ("1!xdf", "1", 0),
# ("1", "1", 0),
# ])
# def test_i_can_get_source_code_node(self, text, expected_source, expected_end):
# tokens = list(Tokenizer(text))[:-1] # strip trailing EOF
#
# start_index = 5 # a random number different of zero
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), start_index, tokens)
#
# assert isinstance(res, SourceCodeNode)
# assert isinstance(res.node, PythonNode)
# assert res.source == expected_source
# assert res.start == start_index
# assert res.end == start_index + expected_end
#
# def test_i_cannot_parse_null_text(self):
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [])
# assert res is None
#
# eof = Token(TokenKind.EOF, "", 0, 0, 0)
# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [eof])
# assert res is None