import ast import pytest from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept from core.concept import Concept from core.rule import Rule from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Token, TokenKind, Tokenizer from core.var_ref import VariableRef from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, RuleNode, VariableNode from parsers.BnfNodeParser import BnfNodeParser from parsers.PythonParser import PythonNode from parsers.PythonWithConceptsParser import PythonWithConceptsParser from parsers.SequenceNodeParser import SequenceNodeParser from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.parsers.parsers_utils import get_source_code_node unrecognized_nodes_parser = UnrecognizedNodeParser() def ret_val(*args): result = [] index = 0 for item in args: if isinstance(item, Concept): tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)] result.append(ConceptNode(item, index, index, tokens, item.name)) index += 1 elif isinstance(item, Rule): tokens = [Token(TokenKind.RULE, (None, item.id), 0, 0, 0)] result.append(RuleNode(item, index, index, tokens, f"r:|{item.id}:")) index += 1 elif isinstance(item, VariableRef): tokens = list(Tokenizer(item.prop, yield_eof=False)) result.append(VariableNode(item.obj, item.prop, index, index + len(tokens) - 1, tokens, f"{item.prop}")) index += len(tokens) else: tokens = list(Tokenizer(item, yield_eof=False)) result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens)) index += len(tokens) return ReturnValueConcept("who", False, ParserResultConcept(parser=unrecognized_nodes_parser, value=result)) def to_str_ast(expression): return PythonNode.get_dump(ast.parse(expression, mode="eval")) class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("text, interested", [ ("not parser result", False), (ParserInput("not parser result"), False), (ParserResultConcept(parser="not multiple_concepts_parser"), False), (ParserResultConcept(parser=unrecognized_nodes_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True), ]) def test_not_interested(self, text, interested): context = self.get_context() res = PythonWithConceptsParser().parse(context, text) if interested: assert res is not None else: assert res is None def test_i_can_parse_concepts_python_and_variable_ref(self): context = self.get_context() foo = Concept("foo") input_return_value = ret_val(foo, " + 1 + ", VariableRef(foo, "var_name")) parser = PythonWithConceptsParser() result = parser.parse(context, input_return_value.body) wrapper = result.value return_value = result.value.value assert result.status assert result.who == parser.name assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert wrapper.source == "foo + 1 + var_name" assert isinstance(return_value, PythonNode) assert return_value.source == "__C__foo__C__ + 1 + __V__foo__var_name__V__" assert return_value.original_source == "foo + 1 + var_name" assert return_value.get_dump(return_value.ast_) == to_str_ast("__C__foo__C__ + 1 + __V__foo__var_name__V__") assert return_value.objects == {"__C__foo__C__": foo, "__V__foo__var_name__V__": VariableRef(foo, "var_name")} def test_i_can_parse_concepts_and_python_when_concept_is_known(self): context = self.get_context() foo = Concept("foo") foo = context.sheerka.create_new_concept(context, foo).body.body input_return_value = ret_val(foo, " + 1") parser = PythonWithConceptsParser() result = parser.parse(context, input_return_value.body) wrapper = result.value return_value = result.value.value assert result.status assert result.who == parser.name assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert wrapper.source == "foo + 1" assert isinstance(return_value, PythonNode) assert return_value.source == "__C__foo__1001__C__ + 1" assert return_value.original_source == "foo + 1" assert return_value.get_dump(return_value.ast_) == to_str_ast("__C__foo__1001__C__ + 1") assert return_value.objects["__C__foo__1001__C__"] == foo def test_i_can_parse_when_concept_name_has_invalid_characters(self): context = self.get_context() foo = Concept("foo et > (,") foo = context.sheerka.create_new_concept(context, foo).body.body input_return_value = ret_val(foo, " + 1") parser = PythonWithConceptsParser() result = parser.parse(context, input_return_value.body) return_value = result.value.value assert result.status assert return_value.objects["__C__foo0et000000__1001__C__"] == foo def test_i_can_parse_when_multiple_concepts(self): sheerka, context, foo, bar = self.init_concepts("foo", "bar") input_return_value = ret_val("func(", foo, ", ", bar, ")") parser = PythonWithConceptsParser() result = parser.parse(context, input_return_value.body) parser_result = result.value return_value = result.value.value assert result.status assert result.who == parser.name assert context.sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert parser_result.source == "func(foo, bar)" assert isinstance(return_value, PythonNode) assert return_value.source == "func(__C__foo__1001__C__, __C__bar__1002__C__)" assert return_value.original_source == "func(foo, bar)" assert return_value.get_dump(return_value.ast_) == to_str_ast("func(__C__foo__1001__C__, __C__bar__1002__C__)") assert return_value.objects["__C__foo__1001__C__"] == foo assert return_value.objects["__C__bar__1002__C__"] == bar def test_i_can_parse_when_python_and_rule(self): context = self.get_context() rule = Rule().set_id("rule_id") input_return_value = ret_val(rule, " + 1") parser = PythonWithConceptsParser() result = parser.parse(context, input_return_value.body) wrapper = result.value return_value = result.value.value assert result.status assert result.who == parser.name assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert wrapper.source == "r:|rule_id: + 1" assert isinstance(return_value, PythonNode) assert return_value.source == "__R____rule_id__R__ + 1" assert return_value.original_source == "r:|rule_id: + 1" assert return_value.get_dump(return_value.ast_) == to_str_ast("__R____rule_id__R__ + 1") assert return_value.objects["__R____rule_id__R__"] == rule def test_python_ids_mappings_are_correct_when_rules_with_the_same_id(self): context = self.get_context() rule1 = Rule().set_id("rule_id") rule2 = Rule().set_id("rule_id") input_return_value = ret_val(rule1, "+", rule2) parser = PythonWithConceptsParser() result = parser.parse(context, input_return_value.body) return_value = result.value.value assert result.status assert return_value.objects["__R____rule_id__R__"] == rule1 assert return_value.objects["__R____rule_id_1__R__"] == rule2 def test_python_ids_mappings_are_correct_when_concepts_with_the_same_name(self): context = self.get_context() foo1 = Concept("foo") foo2 = Concept("foo") foo3 = context.sheerka.create_new_concept(context, Concept("foo", body="foo3")).body.body foo4 = context.sheerka.create_new_concept(context, Concept("foo", body="foo4")).body.body input_return_value = ret_val(foo1, "+", foo2, "+", foo3, "+", foo4) parser = PythonWithConceptsParser() result = parser.parse(context, input_return_value.body) return_value = result.value.value assert result.status assert return_value.objects["__C__foo__C__"] == foo1 assert return_value.objects["__C__foo_1__C__"] == foo2 assert return_value.objects["__C__foo__1001__C__"] == foo3 assert return_value.objects["__C__foo__1002__C__"] == foo4 def test_i_cannot_parse_if_syntax_error(self): context = self.get_context() foo = Concept("foo") foo = context.sheerka.create_new_concept(context, foo).body.body input_return_value = ret_val(foo, " + ") parser = PythonWithConceptsParser() result = parser.parse(context, input_return_value.body) assert not result.status assert context.sheerka.isinstance(result.value, BuiltinConcepts.NOT_FOR_ME) def test_i_can_parse_nodes_when_source_code_node(self): context = self.get_context() foo = Concept("foo") bar = Concept("bar") nodes = [ UnrecognizedTokensNode(0, 1, list(Tokenizer("not ", yield_eof=False))), get_source_code_node(2, "foo == 1", {"foo": foo}), UnrecognizedTokensNode(7, 9, list(Tokenizer(" and ", yield_eof=False))), get_source_code_node(10, "bar < 1", {"bar": bar}), ] expected_ast = ast.parse('not __C__foo__C__ == 1 and __C__bar__C__ < 1', "", 'eval') parser = PythonWithConceptsParser() result = parser.parse_nodes(context, nodes) result_python_node = result.value.value assert isinstance(result_python_node, PythonNode) assert result_python_node.source == 'not __C__foo__C__ == 1 and __C__bar__C__ < 1' assert result_python_node.ast_str == PythonNode.get_dump(expected_ast) assert result_python_node.original_source == "not foo == 1 and bar < 1" assert result_python_node.objects == {"__C__foo__C__": foo, "__C__bar__C__": bar} def test_can_parse_after_unrecognized_bnf(self): sheerka, context, one, two, twenties = self.init_concepts( Concept("one", body="1"), Concept("two", body="2"), Concept("twenties", definition="'twenty' (one|two)=n", body='20 + n').def_var("n"), create_new=True ) bnf_parser_ret_val = BnfNodeParser().parse(context, ParserInput("a + twenty one")) unrec_node_ret_val = UnrecognizedNodeParser().parse(context, bnf_parser_ret_val.body) parser = PythonWithConceptsParser() result = parser.parse(context, unrec_node_ret_val.body) assert result.status python_node = result.body.body assert isinstance(python_node, PythonNode) assert python_node.source == 'a + __C__twenties__1003__C__' assert "__C__twenties__1003__C__" in python_node.objects def test_i_cannot_parse_unrecognized_sequence(self): sheerka, context, one, two, twenties = self.init_concepts( Concept("one", body="1"), Concept("two", body="2"), Concept("twenties", definition="'twenty' (one|two)=n", body='20 + n').def_var("n"), create_new=True ) sequence_parser_ret_val = SequenceNodeParser().parse(context, ParserInput("a + twenty one")) unrec_node_ret_val = UnrecognizedNodeParser().parse(context, sequence_parser_ret_val.body) parser = PythonWithConceptsParser() result = parser.parse(context, unrec_node_ret_val.body) assert not result.status