From c498b394e3189122516cba6b3c9550022a1e38ba Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Fri, 29 May 2020 08:52:06 +0200 Subject: [PATCH] Fixed first token recognition when creating bnf concepts --- src/parsers/AtomNodeParser.py | 3 ++ src/parsers/BaseParser.py | 8 +++- src/parsers/BnfNodeParser.py | 10 ++++- src/parsers/BnfParser.py | 11 +++++ src/parsers/SyaNodeParser.py | 5 ++- src/sheerkapickle/SheerkaPickler.py | 1 - tests/core/test_SheerkaCreateNewConcept.py | 28 ++++++++++++- tests/parsers/test_BaseNodeParser.py | 1 + tests/parsers/test_BnfNodeParser.py | 47 +++++++++++++++++++++- tests/parsers/test_BnfParser.py | 5 ++- tests/parsers/test_DefaultParser.py | 30 +++++++------- 11 files changed, 125 insertions(+), 24 deletions(-) diff --git a/src/parsers/AtomNodeParser.py b/src/parsers/AtomNodeParser.py index a2f73ac..7a6a567 100644 --- a/src/parsers/AtomNodeParser.py +++ b/src/parsers/AtomNodeParser.py @@ -348,6 +348,9 @@ class AtomNodeParser(BaseNodeParser): return valid_parser_helpers def parse(self, context, parser_input: ParserInput): + if not isinstance(parser_input, ParserInput): + return None + if parser_input.is_empty(): return context.sheerka.ret( self.name, diff --git a/src/parsers/BaseParser.py b/src/parsers/BaseParser.py index dcf32c5..62d57f8 100644 --- a/src/parsers/BaseParser.py +++ b/src/parsers/BaseParser.py @@ -162,11 +162,17 @@ class BaseParser: if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept): return self.error_sink[0] + if self.has_error: + return sheerka.new( + BuiltinConcepts.ERROR, + body=self.error_sink + ) + return sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, source=source, - body=self.error_sink if self.has_error else tree, + body=tree, try_parsed=try_parse) def get_input_as_text(self, parser_input, custom_switcher=None, tracker=None): diff --git a/src/parsers/BnfNodeParser.py b/src/parsers/BnfNodeParser.py index dd44cf7..925ae8c 100644 --- a/src/parsers/BnfNodeParser.py +++ b/src/parsers/BnfNodeParser.py @@ -391,10 +391,11 @@ class StrMatch(Match): Matches a literal """ - def __init__(self, to_match, rule_name="", ignore_case=True): + def __init__(self, to_match, rule_name="", ignore_case=True, skip_whitespace=True): super(Match, self).__init__(rule_name=rule_name) self.to_match = to_match self.ignore_case = ignore_case + self.skip_white_space = skip_whitespace def __repr__(self): return self.add_rule_name_if_needed(f"'{self.to_match}'") @@ -415,7 +416,7 @@ class StrMatch(Match): if m: node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value) - parser_helper.next_token() + parser_helper.next_token(self.skip_white_space) return node return None @@ -469,6 +470,8 @@ class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor): return self.STOP def visit_StrMatch(self, pe): + if not pe.to_match: + return self.add_first_token(pe.to_match) return self.STOP @@ -947,6 +950,9 @@ class BnfNodeParser(BaseNodeParser): :return: """ + if not isinstance(parser_input, ParserInput): + return None + context.log(f"Parsing '{parser_input}' with BnfNode", self.name) sheerka = context.sheerka diff --git a/src/parsers/BnfParser.py b/src/parsers/BnfParser.py index 79dd750..04a9bff 100644 --- a/src/parsers/BnfParser.py +++ b/src/parsers/BnfParser.py @@ -266,6 +266,17 @@ class BnfParser(BaseParser): expr.rule_name = concept.name return self.eat_rule_name_if_needed(expr) + if token.type == TokenKind.STRING: + self.next_token() + tokens = list(Tokenizer(token.strip_quote, yield_eof=False)) + if len(tokens) == 1: + return self.eat_rule_name_if_needed(StrMatch(tokens[0].str_value)) + else: + elements = [StrMatch(t.str_value, skip_whitespace=False) for t in tokens] + elements[-1].skip_white_space = True + ret = Sequence(*elements) + return self.eat_rule_name_if_needed(ret) + ret = StrMatch(core.utils.strip_quotes(token.value)) self.next_token() return self.eat_rule_name_if_needed(ret) diff --git a/src/parsers/SyaNodeParser.py b/src/parsers/SyaNodeParser.py index ed8ebeb..717cd44 100644 --- a/src/parsers/SyaNodeParser.py +++ b/src/parsers/SyaNodeParser.py @@ -313,7 +313,6 @@ class InFixToPostFix: """ return len(self.stack) > 0 and isinstance(self.stack[-1], type) - def _make_source_code_with_concept(self, start, rpar_token, end): """ @@ -1068,6 +1067,10 @@ class SyaNodeParser(BaseNodeParser): :param parser_input: :return: """ + + if not isinstance(parser_input, ParserInput): + return None + if parser_input.is_empty(): return context.sheerka.ret( self.name, diff --git a/src/sheerkapickle/SheerkaPickler.py b/src/sheerkapickle/SheerkaPickler.py index 3bcec93..29104ea 100644 --- a/src/sheerkapickle/SheerkaPickler.py +++ b/src/sheerkapickle/SheerkaPickler.py @@ -4,7 +4,6 @@ from logging import Logger import core.utils from core.concept import Concept from core.sheerka.services.SheerkaExecute import ParserInput - from sheerkapickle import utils, tags, handlers diff --git a/tests/core/test_SheerkaCreateNewConcept.py b/tests/core/test_SheerkaCreateNewConcept.py index 4ff516f..32c115c 100644 --- a/tests/core/test_SheerkaCreateNewConcept.py +++ b/tests/core/test_SheerkaCreateNewConcept.py @@ -1,3 +1,4 @@ +import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF from core.sheerka.Sheerka import Sheerka @@ -154,7 +155,32 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka): res = sheerka.create_new_concept(context, concept) assert res.status - assert sheerka.get_by_name(concept.name) == concept # it's not a list, ie the entry is not duplicated + assert sheerka.get_by_name(concept.name) == concept # it's not a list, ie the entry is not duplicated + + def test_i_can_get_first_token_when_not_a_letter(self): + sheerka = self.get_sheerka(cache_only=False) + context = self.get_context(sheerka) + concept = Concept("--filter a").def_var("a") + + res = sheerka.create_new_concept(context, concept) + assert res.status + + # I can get by the first entry + assert sheerka.cache_manager.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id] + assert sheerka.cache_manager.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id] + + @pytest.mark.parametrize("expression", [ + "--'filter' ('one' | 'two') ", + "'--filter' ('one' | 'two') ", + ]) + def test_i_can_get_first_token_when_bnf_concept_and_not_a_letter(self, expression): + sheerka, context, bnf_concept = self.init_concepts( + Concept("foo", definition=expression), + create_new=True) + + # I can get by the first entry + assert sheerka.cache_manager.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id] + assert sheerka.cache_manager.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id] class TestSheerkaCreateNewConceptFileBased(TestUsingFileBasedSheerka): diff --git a/tests/parsers/test_BaseNodeParser.py b/tests/parsers/test_BaseNodeParser.py index 58ace58..6496917 100644 --- a/tests/parsers/test_BaseNodeParser.py +++ b/tests/parsers/test_BaseNodeParser.py @@ -40,6 +40,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka): (Optional(StrMatch("foo")), {"foo": ["1002"]}), (ZeroOrMore(StrMatch("foo")), {"foo": ["1002"]}), (OneOrMore(StrMatch("foo")), {"foo": ["1002"]}), + (StrMatch("--filter"), {"--filter": ["1002"]}), # add both entries ]) def test_i_can_get_concepts_by_first_keyword_with_bnf(self, bnf, expected): sheerka = self.get_sheerka() diff --git a/tests/parsers/test_BnfNodeParser.py b/tests/parsers/test_BnfNodeParser.py index c2a5401..402a59b 100644 --- a/tests/parsers/test_BnfNodeParser.py +++ b/tests/parsers/test_BnfNodeParser.py @@ -1,6 +1,6 @@ import pytest from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept, ConceptParts, DoNotResolve, DEFINITION_TYPE_DEF, CC +from core.concept import Concept, ConceptParts, DoNotResolve from core.sheerka.services.SheerkaExecute import ParserInput from parsers.BaseNodeParser import CNC, UTN, CN from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ @@ -30,6 +30,8 @@ cmap = { # sequence of keywords using def definition # "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF), # "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"), + + "filter": Concept("filter", definition="'--filter' (one | two)") } @@ -125,7 +127,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert res.body.reason == BuiltinConcepts.IS_EMPTY @pytest.mark.parametrize("expr, text", [ - # (StrMatch("foo"), "foo"), + (StrMatch("foo"), "foo"), (StrMatch("'foo'"), "'foo'"), (StrMatch("1"), "1"), (StrMatch("3.14"), "3.14"), @@ -171,6 +173,33 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): expected = [CNC("foo", source=text)] self.validate_get_concepts_sequences(my_map, text, expected) + def test_i_can_use_skip_whitespace_when_mixing_sequence_and_strmatch(self): + my_map = { + "filter": self.bnf_concept("filter", + Sequence(StrMatch("-", skip_whitespace=False), + StrMatch("-", skip_whitespace=False), + "filter")), + } + + sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True) + parser = BnfNodeParser() + parser.init_from_concepts(context, updated) + + text = "--filter" + expected = [CN("filter", source="--filter")] + expected_array = compute_expected_array(my_map, text, expected) + + parser.reset_parser(context, ParserInput(text)) + bnf_parsers_helpers = parser.get_concepts_sequences() + assert bnf_parsers_helpers[0].sequence == expected_array + assert not bnf_parsers_helpers[0].has_unrecognized + + # but I cannot parse + text = "- - filter" + parser.reset_parser(context, ParserInput(text)) + bnf_parsers_helpers = parser.get_concepts_sequences() + assert bnf_parsers_helpers[0].has_unrecognized + def test_i_can_match_multiple_sequences(self): my_map = { "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"), StrMatch("three"))), @@ -760,6 +789,20 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array + def test_i_can_parse_filter(self): + sheerka, context, parser = self.init_parser(init_from_sheerka=True) + + expression = "--filter one" + expected = [CNC("filter", source="--filter one", one="one")] + + res = parser.parse(context, ParserInput(expression)) + expected_array = compute_expected_array(cmap, expression, expected) + parser_result = res.value + concepts_nodes = res.value.value + + assert res.status == True + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert concepts_nodes == expected_array # @pytest.mark.parametrize("parser_input, expected", [ # ("one", [ diff --git a/tests/parsers/test_BnfParser.py b/tests/parsers/test_BnfParser.py index da2be80..389a968 100644 --- a/tests/parsers/test_BnfParser.py +++ b/tests/parsers/test_BnfParser.py @@ -84,6 +84,8 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): ("('a'*=x 'b'+=y)=z", Sequence( ZeroOrMore(StrMatch("a"), rule_name="x"), OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")), + ("'--filter'", + Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter"))) ]) def test_i_can_parse_regex(self, expression, expected): sheerka, context, parser = self.init_parser() @@ -141,8 +143,9 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser() res = parser.parse(context, Tokenizer(expression)) - ret_value = res.value.value + ret_value = res.body.body assert parser.has_error + assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert not res.status assert ret_value[0] == error diff --git a/tests/parsers/test_DefaultParser.py b/tests/parsers/test_DefaultParser.py index b01d8f0..723721b 100644 --- a/tests/parsers/test_DefaultParser.py +++ b/tests/parsers/test_DefaultParser.py @@ -187,9 +187,9 @@ func(a) return_value = res.value assert not res.status - assert isinstance(return_value, ParserResultConcept) - assert isinstance(return_value.value[0], SyntaxErrorNode) - assert return_value.value[0].message == "Indentation not found." + assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR) + assert isinstance(return_value.body[0], SyntaxErrorNode) + assert return_value.body[0].message == "Indentation not found." def test_indentation_is_not_allowed_if_the_colon_is_missing(self): text = """ @@ -213,24 +213,22 @@ def concept add one to a as return_value = res.value assert not res.status - assert isinstance(return_value, ParserResultConcept) - assert isinstance(return_value.value[0], SyntaxErrorNode) - assert return_value.value[0].message == "Name is mandatory" + assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR) + assert isinstance(return_value.body[0], SyntaxErrorNode) + assert return_value.body[0].message == "Name is mandatory" def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self): text = "def hello as a where b pre c post d" - expected_concept = get_def_concept(name="hello", body="a ", where="b ", pre="c ", post="d") sheerka, context, parser = self.init_parser() res = parser.parse(context, ParserInput(text)) return_value = res.value assert not res.status - assert isinstance(return_value, ParserResultConcept) - assert isinstance(return_value.value[0], UnexpectedTokenErrorNode) - assert return_value.value[0].message == "Syntax error." - assert return_value.value[0].expected_tokens == [Keywords.CONCEPT] - assert return_value.try_parsed == expected_concept + assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR) + assert isinstance(return_value.body[0], UnexpectedTokenErrorNode) + assert return_value.body[0].message == "Syntax error." + assert return_value.body[0].expected_tokens == [Keywords.CONCEPT] @pytest.mark.parametrize("text", [ "def concept hello where 1+", @@ -254,7 +252,8 @@ def concept add one to a as return_value = res.value assert not res.status - assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")] + assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR) + assert return_value.body == [SyntaxErrorNode([], "Newline are not allowed in name.")] def test_i_can_parse_def_concept_from_bnf(self): text = "def concept name from bnf a_concept | 'a_string' as __definition[0]" @@ -289,7 +288,8 @@ def concept add one to a as res = parser.parse(context, ParserInput(text)) assert not res.status - assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration") + assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR) + assert res.value.body[0] == SyntaxErrorNode([], "Empty declaration") @pytest.mark.parametrize("text", [ "def concept addition from a plus b as a + b", @@ -341,7 +341,7 @@ def concept add one to a as res = parser.parse(context, ParserInput(text)) assert not res.status - assert isinstance(res.body, ParserResultConcept) + assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert isinstance(res.body.body[0], UnexpectedTokenErrorNode) @pytest.mark.parametrize("text, error_msg, error_text", [