from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, UnrecognizedTokensNode from parsers.MultipleConceptsParser import MultipleConceptsParser from sdp.sheerkaDataProvider import Event def get_context(): sheerka = Sheerka(skip_builtins_in_db=True) sheerka.initialize("mem://") return ExecutionContext("test", Event(), sheerka) def get_return_value(context, grammar, expression): parser = ConceptLexerParser() parser.initialize(context, grammar) ret_val = parser.parse(context, expression) assert not ret_val.status return ret_val def init(concepts, grammar, expression): context = get_context() for c in concepts: context.sheerka.create_new_concept(context, c) return_value = get_return_value(context, grammar, expression) return context, return_value def test_not_interested_if_not_parser_result(): context = get_context() text = "not parser result" res = MultipleConceptsParser().parse(context, text) assert res is None def test_not_interested_if_not_from_concept_lexer_parser(): context = get_context() text = ParserResultConcept(parser="not concept lexer", value="some value") res = MultipleConceptsParser().parse(context, text) assert res is None def test_i_can_parse_exact_concepts(): foo = Concept("foo", body="'foo'") bar = Concept("bar", body="'bar'") baz = Concept("baz", body="'baz'") grammar = {} context, return_value = init([foo, bar, baz], grammar, "bar foo baz") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [ ConceptNode(bar, 0, 0, source="bar"), ConceptNode(foo, 2, 2, source="foo"), ConceptNode(baz, 4, 4, source="baz")] assert ret_val.value.source == "bar foo baz" def test_i_can_parse_when_ending_with_bnf(): foo = Concept("foo", body="'foo'") bar = Concept("bar", body="'bar'") grammar = {foo: Sequence("foo1", "foo2", "foo3")} context, return_value = init([foo, bar], grammar, "bar foo1 foo2 foo3") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [("bar", 0, 0, "bar"), ("foo", 2, 6, "foo1 foo2 foo3")] assert ret_val.value.source == "bar foo1 foo2 foo3" def test_i_can_parse_when_starting_with_bnf(): foo = Concept("foo", body="'foo'") bar = Concept("bar", body="'bar'") grammar = {foo: Sequence("foo1", "foo2", "foo3")} context, return_value = init([foo, bar], grammar, "foo1 foo2 foo3 bar") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [("foo", 0, 4, "foo1 foo2 foo3"), ("bar", 6, 6, "bar")] assert ret_val.value.source == "foo1 foo2 foo3 bar" def test_i_can_parse_when_concept_are_mixed(): foo = Concept("foo") bar = Concept("bar") baz = Concept("baz") grammar = {foo: Sequence("foo1", "foo2", "foo3")} context, return_value = init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [ ("baz", 0, 0, "baz"), ("foo", 2, 6, "foo1 foo2 foo3"), ("bar", 8, 8, "bar")] assert ret_val.value.source == "baz foo1 foo2 foo3 bar" def test_i_can_parse_when_multiple_concept_are_matching(): foo = Concept("foo") bar = Concept("bar", body="bar1") baz = Concept("bar", body="bar2") grammar = {foo: "foo"} context, return_value = init([foo, bar, baz], grammar, "foo bar") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert len(ret_val) == 2 assert ret_val[0].status assert ret_val[0].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")] assert ret_val[0].value.source == "foo bar" assert ret_val[0].value.value[1].concept.body == "bar1" assert ret_val[1].status assert ret_val[1].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")] assert ret_val[1].value.source == "foo bar" assert ret_val[1].value.value[1].concept.body == "bar2" def test_i_cannot_parse_when_unrecognized_token(): twenty_two = Concept("twenty two") one = Concept("one") grammar = {twenty_two: Sequence("twenty", "two")} context, return_value = init([twenty_two, one], grammar, "twenty two + one") parser = MultipleConceptsParser() ret_val = parser.parse(context, return_value.body) assert not ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [ ("twenty two", 0, 2, "twenty two"), (3, 5, " + "), ("one", 6, 6, "one") ] assert ret_val.value.source == "twenty two + one"