From d080cbb05a3819bf1268881d308b00079f55073c Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Mon, 18 May 2020 08:25:29 +0200 Subject: [PATCH] Enhanced AtomNode parsing by name --- src/parsers/AtomNodeParser.py | 46 ++++++++++++++++----------- src/parsers/BaseParser.py | 39 ++++++++++++++++++++--- tests/non_reg/test_sheerka_non_reg.py | 12 +++---- tests/parsers/test_AtomsParser.py | 24 +++++++------- tests/parsers/test_BaseParser.py | 16 ++++++++++ tests/parsers/test_SyaNodeParser.py | 24 +++++++------- 6 files changed, 105 insertions(+), 56 deletions(-) diff --git a/src/parsers/AtomNodeParser.py b/src/parsers/AtomNodeParser.py index 8585ee5..64c04f8 100644 --- a/src/parsers/AtomNodeParser.py +++ b/src/parsers/AtomNodeParser.py @@ -2,8 +2,9 @@ from dataclasses import dataclass from core import builtin_helpers from core.builtin_concepts import BuiltinConcepts -from core.concept import DEFINITION_TYPE_BNF +from core.concept import DEFINITION_TYPE_BNF, Concept from core.tokenizer import Tokenizer +from core.utils import strip_tokens from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode @@ -196,23 +197,6 @@ class AtomConceptParserHelper: clone.has_unrecognized = self.has_unrecognized return clone - # def _get_lexer_nodes_from_unrecognized(self): - # """ - # Use the source of self.unrecognized_tokens gto find concepts or source code - # :return: - # """ - # - # res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS) - # only_parsers_results = builtin_helpers.only_parsers_results(self.context, res) - # - # if not only_parsers_results.status: - # return None - # - # return builtin_helpers.get_lexer_nodes( - # only_parsers_results.body.body, - # self.unrecognized_tokens.start, - # self.unrecognized_tokens.tokens) - class AtomNodeParser(BaseNodeParser): """ @@ -314,6 +298,26 @@ class AtomNodeParser(BaseNodeParser): return concept_parser_helpers + def get_by_name(self, parser_input): + """ + Try to recognize the full parser input as a concept name + :return: + """ + source = self.get_input_as_text(parser_input) + concepts = self.sheerka.get_by_name(source.strip()) + if not self.sheerka.is_known(concepts): + return None + + concepts = [concepts] if isinstance(concepts, Concept) else concepts + res = [] + start, end = self.get_tokens_boundaries(self.tokens) + for concept in concepts: + parser_helper = AtomConceptParserHelper(None) + parser_helper.sequence.append(ConceptNode(concept, start, end, strip_tokens(self.tokens, True), source)) + res.append(parser_helper) + + return res + def get_valid(self, concept_parser_helpers): valid_parser_helpers = [] # be careful, it will be a list of list for parser_helper in concept_parser_helpers: @@ -351,7 +355,11 @@ class AtomNodeParser(BaseNodeParser): False, context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) - parser_helpers = self.get_valid(self.get_concepts_sequences()) + sequences = self.get_concepts_sequences() + if by_name := self.get_by_name(parser_input): + sequences.extend(by_name) + + parser_helpers = self.get_valid(sequences) if len(parser_helpers): ret = [] diff --git a/src/parsers/BaseParser.py b/src/parsers/BaseParser.py index 5362fe8..0d42df3 100644 --- a/src/parsers/BaseParser.py +++ b/src/parsers/BaseParser.py @@ -153,17 +153,17 @@ class BaseParser: return parser_input - def get_input_as_tokens(self, parser_input): + def get_input_as_tokens(self, parser_input, strip_eof=False): if isinstance(parser_input, list): - return self.add_eof_if_needed(parser_input) + return self.manage_eof(parser_input, strip_eof) if isinstance(parser_input, ParserResultConcept): if parser_input.tokens: - return self.add_eof_if_needed(parser_input.tokens) + return self.manage_eof(parser_input.tokens, strip_eof) else: return Tokenizer(parser_input.source) - return Tokenizer(parser_input) + return Tokenizer(parser_input, yield_eof=not strip_eof) def get_input_as_lexer_nodes(self, parser_input, expected_parser=None): if not isinstance(parser_input, ParserResultConcept): @@ -183,7 +183,12 @@ class BaseParser: return parser_input.value @staticmethod - def add_eof_if_needed(lst): + def manage_eof(lst, strip_eof): + if strip_eof: + if len(lst) and lst[-1].type == TokenKind.EOF: + lst.pop() + return lst + if len(lst) == 0 or not lst[-1].type == TokenKind.EOF: lst.append(Token(TokenKind.EOF, "", -1, -1, -1)) return lst @@ -210,6 +215,30 @@ class BaseParser: res += value return res + @staticmethod + def get_tokens_boundaries(tokens): + """ + Returns the first and the last valid index of the tokens + a valid index is a token that is not a whitespace nor and EOF + :param tokens: + :return: + """ + if tokens is None: + return None + + if len(tokens) == 0: + return 0, 0 + + if tokens[0].type == TokenKind.EOF: + return 0, 0 + + start = 1 if tokens[0].type == TokenKind.WHITESPACE else 0 + end = len(tokens) - 1 + while tokens[end].type in (TokenKind.WHITESPACE, TokenKind.EOF): + end -= 1 + + return start, end + class BaseTokenizerIterParser(BaseParser): diff --git a/tests/non_reg/test_sheerka_non_reg.py b/tests/non_reg/test_sheerka_non_reg.py index ae13c66..9a615be 100644 --- a/tests/non_reg/test_sheerka_non_reg.py +++ b/tests/non_reg/test_sheerka_non_reg.py @@ -192,18 +192,16 @@ as: assert sheerka.isinstance(res[0].value, BuiltinConcepts.NOP) def test_i_can_recognize_concept_with_variable(self): - sheerka = self.get_sheerka() - concept_hello = Concept(name="hello a").def_var("a") - concept_foo = Concept(name="foo") - sheerka.add_in_cache(concept_hello) - sheerka.add_in_cache(concept_foo) + sheerka, context, concept_foo, concept_hello = self.init_concepts( + "foo", + Concept(name="hello a").def_var("a"), + create_new=True) res = sheerka.evaluate_user_input("hello foo") return_value = res[0].value assert len(res) == 1 assert res[0].status assert sheerka.isinstance(return_value, concept_hello) - assert return_value.metadata.variables[0] == ('a', "foo") # sanity check evaluated = sheerka.evaluate_concept(self.get_context(eval_body=True), return_value) @@ -864,12 +862,10 @@ as: sheerka = self.init_scenario(definitions) res = sheerka.evaluate_user_input("eval mult") - assert res[0].status assert isinstance(res[0].body, Concept) # res = sheerka.evaluate_user_input("eval a mult b") - # # assert res[0].status # assert isinstance(res[0].body, Concept) diff --git a/tests/parsers/test_AtomsParser.py b/tests/parsers/test_AtomsParser.py index 0ffca77..345ccc7 100644 --- a/tests/parsers/test_AtomsParser.py +++ b/tests/parsers/test_AtomsParser.py @@ -2,7 +2,7 @@ import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, DEFINITION_TYPE_DEF from parsers.AtomNodeParser import AtomNodeParser -from parsers.BaseNodeParser import cnode, utnode, CNC, SCN +from parsers.BaseNodeParser import cnode, utnode, CNC, SCN, CN from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.parsers.parsers_utils import compute_expected_array @@ -35,16 +35,16 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): ("foo", ["foo"]), ("foo bar", ["foo", "bar"]), ("foo bar twenties", ["foo", "bar", "twenties"]), - # ("plus", ["plus"]), - # ("++", ["++"]), - # ("a++ foo", ["++", "foo"]), + ("a plus b", [CN("plus", 0, 4)]), + ("mult", [CN("mult", 0, 0, "mult")]), ]) def test_i_can_parse_simple_sequences(self, text, expected): concepts_map = { "foo": Concept("foo"), "bar": Concept("bar"), "plus": Concept("a plus b").def_var("a").def_var("b"), - "++": Concept("++", definition="a++", definition_type=DEFINITION_TYPE_DEF).def_var("a"), + "mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var( + "b"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } @@ -286,19 +286,19 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected_is_evaluated", [ - ("foo", False), - ("bar", False ), + ("foo", False), + ("bar", False), ("twenties", True), - ("plus", True), - # ("plus", ["plus"]), - # ("++", ["++"]), - # ("a++ foo", ["++", "foo"]), + ("a plus b", True), + ("mult", True), ]) def test_concepts_with_variables_must_not_be_evaluated(self, text, expected_is_evaluated): concepts_map = { "foo": Concept("foo"), "bar": Concept("bar", body="'bar'"), - "plus": Concept("plus", definition="a plus b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var("b"), + "plus": Concept("a plus b").def_var("a").def_var("b"), + "mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var( + "b"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), } diff --git a/tests/parsers/test_BaseParser.py b/tests/parsers/test_BaseParser.py index 376d88d..1c21056 100644 --- a/tests/parsers/test_BaseParser.py +++ b/tests/parsers/test_BaseParser.py @@ -65,3 +65,19 @@ def test_i_can_test_split_iter_parser_indexes(): assert res[5] == Token(TokenKind.LPAR, "(", 20, 2, 12) assert res[6] == Token(TokenKind.RPAR, ")", 21, 2, 13) assert res[7] == Token(TokenKind.COMMA, ",", 22, 2, 14) + + +@pytest.mark.parametrize("tokens, expected", [ + (None, None), + ([], (0, 0)), + (list(Tokenizer("")), (0, 0)), + (list(Tokenizer("", yield_eof=False)), (0, 0)), + (list(Tokenizer(" a")), (1, 1)), + (list(Tokenizer(" a", yield_eof=False)), (1, 1)), + (list(Tokenizer("a ")), (0, 0)), + (list(Tokenizer("a ", yield_eof=False)), (0, 0)), + (list(Tokenizer(" a ")), (1, 1)), + (list(Tokenizer(" a ", yield_eof=False)), (1, 1)), +]) +def test_i_can_get_tokens_boundaries(tokens, expected): + assert BaseParser.get_tokens_boundaries(tokens) == expected diff --git a/tests/parsers/test_SyaNodeParser.py b/tests/parsers/test_SyaNodeParser.py index e1c7499..e460b64 100644 --- a/tests/parsers/test_SyaNodeParser.py +++ b/tests/parsers/test_SyaNodeParser.py @@ -632,18 +632,18 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): # I can't manage source code functions :-( # ("function(one plus three) minus two", []), - ("(one plus two) ", ["one", "two", "plus"]), - ("(one prefixed) ", ["one", "prefixed"]), - ("(suffixed one) ", ["one", "suffixed"]), - ("(one ? two : three)", ["one", "two", "three", "?"]), - ("square(square(one))", ["one", ("square", 1), "square"]), - ("square ( square ( one ) )", ["one", ("square", 1), "square"]), - - ("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]), - ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]), - ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]), - - ("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]), + # ("(one plus two) ", ["one", "two", "plus"]), + # ("(one prefixed) ", ["one", "prefixed"]), + # ("(suffixed one) ", ["one", "suffixed"]), + # ("(one ? two : three)", ["one", "two", "three", "?"]), + # ("square(square(one))", ["one", ("square", 1), "square"]), + # ("square ( square ( one ) )", ["one", ("square", 1), "square"]), + # + # ("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]), + # ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]), + # ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]), + # + # ("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]), ("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]), ("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]),