import pytest from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from core.concept import Concept from core.tokenizer import Tokenizer, TokenKind, Token from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence from parsers.MultipleConceptsParser import MultipleConceptsParser from parsers.PythonParser import PythonNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka def get_return_value(context, grammar, expression): parser = BnfNodeParser() parser.initialize(context, grammar) ret_val = parser.parse(context, expression) assert not ret_val.status return ret_val class TestMultipleConceptsParser(TestUsingMemoryBasedSheerka): def init(self, concepts, grammar, expression): context = self.get_context() for c in concepts: context.sheerka.create_new_concept(context, c) return_value = get_return_value(context, grammar, expression) return context, return_value def test_not_interested_if_not_parser_result(self): context = self.get_context() text = "not parser result" res = MultipleConceptsParser().parse(context, text) assert res is None def test_not_interested_if_not_from_concept_lexer_parser(self): context = self.get_context() text = ParserResultConcept(parser="not concept lexer", value="some value") res = MultipleConceptsParser().parse(context, text) assert res is None def test_i_can_parse_exact_concepts(self): foo = Concept("foo", body="'foo'") bar = Concept("bar", body="'bar'") baz = Concept("baz", body="'baz'") grammar = {} context, return_value = self.init([foo, bar, baz], grammar, "bar foo baz") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [ ConceptNode(bar, 0, 0, source="bar"), ConceptNode(foo, 2, 2, source="foo"), ConceptNode(baz, 4, 4, source="baz")] assert ret_val.value.source == "bar foo baz" def test_i_can_parse_when_ending_with_bnf(self): foo = Concept("foo", body="'foo'") bar = Concept("bar", body="'bar'") grammar = {foo: Sequence("foo1", "foo2", "foo3")} context, return_value = self.init([foo, bar], grammar, "bar foo1 foo2 foo3") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")] assert ret_val.value.source == "bar foo1 foo2 foo3" def test_i_can_parse_when_starting_with_bnf(self): foo = Concept("foo", body="'foo'") bar = Concept("bar", body="'bar'") grammar = {foo: Sequence("foo1", "foo2", "foo3")} context, return_value = self.init([foo, bar], grammar, "foo1 foo2 foo3 bar") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")] assert ret_val.value.source == "foo1 foo2 foo3 bar" def test_i_can_parse_when_concept_are_mixed(self): foo = Concept("foo") bar = Concept("bar") baz = Concept("baz") grammar = {foo: Sequence("foo1", "foo2", "foo3")} context, return_value = self.init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [ cnode("baz", 0, 0, "baz"), cnode("foo", 2, 6, "foo1 foo2 foo3"), cnode("bar", 8, 8, "bar")] assert ret_val.value.source == "baz foo1 foo2 foo3 bar" def test_i_can_parse_when_multiple_concepts_are_matching(self): foo = Concept("foo") bar = Concept("bar", body="bar1") baz = Concept("bar", body="bar2") grammar = {foo: "foo"} context, return_value = self.init([foo, bar, baz], grammar, "foo bar") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert len(ret_val) == 2 assert ret_val[0].status assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")] assert ret_val[0].value.source == "foo bar" assert ret_val[0].value.value[1].concept.metadata.body == "bar1" assert ret_val[1].status assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")] assert ret_val[1].value.source == "foo bar" assert ret_val[1].value.value[1].concept.metadata.body == "bar2" def test_i_can_parse_when_source_code(self): foo = Concept("foo") grammar = {foo: "foo"} context, return_value = self.init([foo], grammar, "1 foo") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) wrapper = ret_val.value value = ret_val.value.value assert ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert wrapper.source == "1 foo" assert value == [ scnode(0, 1, "1 "), cnode("foo", 2, 2, "foo")] def test_i_cannot_parse_when_unrecognized_token(self): twenty_two = Concept("twenty two") one = Concept("one") grammar = {twenty_two: Sequence("twenty", "two")} context, return_value = self.init([twenty_two, one], grammar, "twenty two + one") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert not ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [ cnode("twenty two", 0, 2, "twenty two"), utnode(3, 5, " + "), cnode("one", 6, 6, "one") ] assert ret_val.value.source == "twenty two + one" def test_i_cannot_parse_when_unknown_concepts(self): twenty_two = Concept("twenty two") one = Concept("one") grammar = {twenty_two: Sequence("twenty", "two")} context, return_value = self.init([twenty_two, one], grammar, "twenty two plus one") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert not ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [ cnode("twenty two", 0, 2, "twenty two"), utnode(3, 5, " plus "), cnode("one", 6, 6, "one") ] assert ret_val.value.source == "twenty two plus one" @pytest.mark.parametrize("text, expected_source, expected_end", [ ("True", "True", 0), ("1 == 1", "1 == 1", 4), ("1!xdf", "1", 0), ("1", "1", 0), ]) def test_i_can_get_source_code_node(self, text, expected_source, expected_end): tokens = list(Tokenizer(text))[:-1] # strip trailing EOF start_index = 5 # a random number different of zero res = MultipleConceptsParser().get_source_code_node(self.get_context(), start_index, tokens) assert isinstance(res, SourceCodeNode) assert isinstance(res.node, PythonNode) assert res.source == expected_source assert res.start == start_index assert res.end == start_index + expected_end def test_i_cannot_parse_null_text(self): res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, []) assert res is None eof = Token(TokenKind.EOF, "", 0, 0, 0) res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [eof]) assert res is None