From 3ce6ce2a7635142bb24f708906ea45c97af643c4 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Fri, 22 May 2020 15:46:04 +0200 Subject: [PATCH] I can now use keyword in concept definition and parsing --- src/core/concept.py | 2 +- src/core/tokenizer.py | 24 ++++++++++---- src/parsers/AtomNodeParser.py | 4 +-- src/parsers/BaseNodeParser.py | 4 +-- src/parsers/BnfNodeParser.py | 8 ++--- src/parsers/BnfParser.py | 15 ++++----- src/parsers/SyaNodeParser.py | 21 ++++--------- tests/core/test_concept.py | 1 + tests/parsers/parsers_utils.py | 2 +- tests/parsers/test_AtomsParser.py | 18 +++++++++++ tests/parsers/test_BnfNodeParser.py | 40 ++++++++++++++++++++---- tests/parsers/test_BnfParser.py | 3 +- tests/parsers/test_ExactConceptParser.py | 26 +++++++++++++++ tests/parsers/test_SyaNodeParser.py | 4 +++ 14 files changed, 127 insertions(+), 45 deletions(-) diff --git a/src/core/concept.py b/src/core/concept.py index 37592b8..4a7ca87 100644 --- a/src/core/concept.py +++ b/src/core/concept.py @@ -246,7 +246,7 @@ class Concept: key += VARIABLE_PREFIX + str(variables.index(token.value)) else: # value = token.value[1:-1] if token.type == TokenKind.STRING else token.value - key += token.value + key += token.value.value if token.type == TokenKind.KEYWORD else token.value first = False self.metadata.key = key diff --git a/src/core/tokenizer.py b/src/core/tokenizer.py index 181361d..6d4be35 100644 --- a/src/core/tokenizer.py +++ b/src/core/tokenizer.py @@ -59,6 +59,7 @@ class Token: line: int column: int + _strip_quote: str = field(default=None, repr=False, compare=False, hash=None) _str_value: str = field(default=None, repr=False, compare=False, hash=None) def __repr__(self): @@ -75,23 +76,34 @@ class Token: return f"Token({value})" + @property + def strip_quote(self): + if self._strip_quote: + return self._strip_quote + + self._strip_quote = self._to_str(True) + return self._strip_quote + @property def str_value(self): if self._str_value: return self._str_value - if self.type == TokenKind.STRING: - self._str_value = self.value[1:-1] - elif self.type == TokenKind.KEYWORD: - self._str_value = self.value.value - else: - self._str_value = str(self.value) + self._str_value = self._to_str(False) return self._str_value @staticmethod def is_whitespace(token): return token and token.type == TokenKind.WHITESPACE + def _to_str(self, strip_quote): + if strip_quote and self.type == TokenKind.STRING: + return self.value[1:-1] + elif self.type == TokenKind.KEYWORD: + return self.value.value + else: + return str(self.value) + @dataclass() class LexerError(Exception): diff --git a/src/parsers/AtomNodeParser.py b/src/parsers/AtomNodeParser.py index 64c04f8..465054e 100644 --- a/src/parsers/AtomNodeParser.py +++ b/src/parsers/AtomNodeParser.py @@ -92,7 +92,7 @@ class AtomConceptParserHelper: self.debug.append(token) - if self.expected_tokens[0] != token.str_value: + if self.expected_tokens[0] != token.strip_quote: self.errors.append(UnexpectedTokenErrorNode( f"Found '{token}' while expecting '{self.expected_tokens[0]}'", token, @@ -120,7 +120,7 @@ class AtomConceptParserHelper: forked.eat_concept(concept, pos) concept_node = ConceptNode(concept, pos, pos) - expected = [t.str_value for t in Tokenizer(concept.name)][1:-1] + expected = [t.strip_quote for t in Tokenizer(concept.name)][1:-1] if not expected: # the concept is already matched diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index 235a732..2b62af3 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -6,7 +6,7 @@ import core.utils from core.builtin_concepts import BuiltinConcepts from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts from core.sheerka.ExecutionContext import ExecutionContext -from core.tokenizer import TokenKind, LexerError, Token +from core.tokenizer import TokenKind, LexerError, Token, Keywords from parsers.BaseParser import Node, BaseParser, ErrorNode DEBUG_COMPILED = True @@ -440,7 +440,7 @@ class CN(HelperWithPos): self.concept = concept if isinstance(concept, Concept) else None def fix_source(self, str_tokens): - self.source = "".join(str_tokens) + self.source = "".join([s.value if isinstance(s, Keywords) else s for s in str_tokens]) return self def __eq__(self, other): diff --git a/src/parsers/BnfNodeParser.py b/src/parsers/BnfNodeParser.py index e18f9e2..dd2d24d 100644 --- a/src/parsers/BnfNodeParser.py +++ b/src/parsers/BnfNodeParser.py @@ -400,11 +400,11 @@ class StrMatch(Match): def _parse(self, parser_helper): token = parser_helper.get_token() - m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \ - else token.value == self.to_match + m = token.str_value.lower() == self.to_match.lower() if self.ignore_case \ + else token.strip_quote == self.to_match if m: - node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.value) + node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value) parser_helper.next_token() return node @@ -882,7 +882,7 @@ class BnfNodeParser(BaseNodeParser): else: # regular concepts tokens = Tokenizer(concept.name) - nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]] + nodes = [StrMatch(token.strip_quote) for token in list(tokens)[:-1]] pe = inner_resolve(nodes[0] if len(nodes) == 1 else Sequence(nodes), inner_already_seen) if not isinstance(pe, ParsingExpression): diff --git a/src/parsers/BnfParser.py b/src/parsers/BnfParser.py index 18e7141..8382b35 100644 --- a/src/parsers/BnfParser.py +++ b/src/parsers/BnfParser.py @@ -74,13 +74,13 @@ class BnfParser(BaseParser): try: self._current = self.after_current or next(self.lexer_iter) - self.source += str(self._current.value) + self.source += self._current.str_value self.after_current = None if skip_whitespace: while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: self._current = next(self.lexer_iter) - self.source += str(self._current.value) + self.source += self._current.str_value except StopIteration: self._current = Token(TokenKind.EOF, "", -1, -1, -1) @@ -90,7 +90,7 @@ class BnfParser(BaseParser): try: self.after_current = next(self.lexer_iter) - # self.source += str(self.after_current.value) + # self.source += self.after_current.str_value return self.after_current except StopIteration: self.after_current = Token(TokenKind.EOF, "", -1, -1, -1) @@ -99,13 +99,13 @@ class BnfParser(BaseParser): def eat_white_space(self): if self.after_current is not None: self._current = self.after_current - self.source += str(self._current.value) + self.source += self._current.str_value self.after_current = None try: while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: self._current = next(self.lexer_iter) - self.source += str(self._current.value) + self.source += self._current.str_value except StopIteration: self._current = None @@ -239,10 +239,10 @@ class BnfParser(BaseParser): # else ConceptExpression(concept) return self.eat_rule_name_if_needed(expr) - if token.type == TokenKind.IDENTIFIER: + if token.type in (TokenKind.IDENTIFIER, TokenKind.KEYWORD): self.next_token() - concept_name = str(token.value) + concept_name = token.str_value # we are trying to match against a concept which is still under construction ! # (for example of recursive bnf definition) @@ -283,3 +283,4 @@ class BnfParser(BaseParser): expression.rule_name = token.value self.next_token() return expression + diff --git a/src/parsers/SyaNodeParser.py b/src/parsers/SyaNodeParser.py index 581c427..d81c70d 100644 --- a/src/parsers/SyaNodeParser.py +++ b/src/parsers/SyaNodeParser.py @@ -131,15 +131,15 @@ class SyaConceptParserHelper: # True if the next token is the one that is expected # Or if the next token is a whitespace and the expected one is the one after # (whitespace are sometimes not mandatory) - return token.str_value == self.expected[0].str_value or \ - self.expected[0].type == TokenKind.WHITESPACE and token.str_value == self.expected[1].str_value + return token.strip_quote == self.expected[0].strip_quote or \ + self.expected[0].type == TokenKind.WHITESPACE and token.strip_quote == self.expected[1].strip_quote def is_expected(self, token): if self.is_matched() or token.type == TokenKind.WHITESPACE: return False for expected in self.expected: - if expected.type != TokenKind.VAR_DEF and expected.str_value == token.str_value: + if expected.type != TokenKind.VAR_DEF and expected.strip_quote == token.strip_quote: return True return False @@ -154,7 +154,7 @@ class SyaConceptParserHelper: :return: """ # No check, as it is used only after is_expected() or is_next() - while self.expected[0].str_value != until_token.str_value: + while self.expected[0].strip_quote != until_token.strip_quote: del self.expected[0] del self.expected[0] @@ -193,15 +193,6 @@ class SyaConceptParserHelper: self.concept = self.concept.concept return self - # @staticmethod - # def _get_token_value(token): - # if token.type == TokenKind.STRING: - # return token.value[1:-1] - # elif token.type == TokenKind.KEYWORD: - # return token.value.value - # else: - # return token.value - def clone(self): clone = SyaConceptParserHelper(self.concept, self.start, self.end) clone.expected = self.expected[:] @@ -296,7 +287,7 @@ class InFixToPostFix: if item.expected[0].type == TokenKind.VAR_DEF: item.error = "Not enough suffix parameters" else: - item.error = f"token '{item.expected[0].str_value}' not found" + item.error = f"token '{item.expected[0].strip_quote}' not found" if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1: self.out.insert(item.potential_pos, item) @@ -431,7 +422,7 @@ class InFixToPostFix: while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF: # eat everything that was expected if len(self.parameters_list) == 0: - current_concept.error = f"Failed to match parameter '{current_concept.expected[0].str_value}'" + current_concept.error = f"Failed to match parameter '{current_concept.expected[0].strip_quote}'" return del self.parameters_list[0] del current_concept.expected[0] diff --git a/tests/core/test_concept.py b/tests/core/test_concept.py index 14a0acb..5521d6c 100644 --- a/tests/core/test_concept.py +++ b/tests/core/test_concept.py @@ -19,6 +19,7 @@ from core.concept import Concept, ConceptParts, DEFINITION_TYPE_DEF ("a | b", ["a", "b"], "__var__0 | __var__1"), ("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"), ("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"), + ("def concept a", ["a"], "def concept __var__0"), ]) def test_i_can_compute_the_key(name, variables, expected): concept = Concept(name) diff --git a/tests/parsers/parsers_utils.py b/tests/parsers/parsers_utils.py index 312a064..6211cb9 100644 --- a/tests/parsers/parsers_utils.py +++ b/tests/parsers/parsers_utils.py @@ -154,7 +154,7 @@ def get_node( if concept_found: concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests if sya and len(concept_found.metadata.variables) > 0 and not is_bnf: - return SyaConceptParserHelper(concept_found, start) + return SyaConceptParserHelper(concept_found, start, start + length - 1) elif init_empty_body: node = CNC(concept_found, start, start + length - 1, source=sub_expr, exclude_body=exclude_body) init_body(node, concept_found, sub_expr) diff --git a/tests/parsers/test_AtomsParser.py b/tests/parsers/test_AtomsParser.py index 345ccc7..4d95668 100644 --- a/tests/parsers/test_AtomsParser.py +++ b/tests/parsers/test_AtomsParser.py @@ -215,6 +215,24 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): expected_array = compute_expected_array(concepts_map, "one", ["one"]) assert lexer_nodes == expected_array + def test_i_can_parse_concepts_with_keyword(self): + concepts_map = { + "a special concept": Concept("a special concept"), + "isa": Concept("isa"), + } + + sheerka, context, parser = self.init_parser(concepts_map) + + res = parser.parse(context, "a special concept") + lexer_nodes = res.body.body + expected_array = compute_expected_array(concepts_map, "a special concept", ["a special concept"]) + assert lexer_nodes == expected_array + + res = parser.parse(context, "isa") + lexer_nodes = res.body.body + expected_array = compute_expected_array(concepts_map, "isa", ["isa"]) + assert lexer_nodes == expected_array + @pytest.mark.parametrize("text", [ "foo", f"foo one", diff --git a/tests/parsers/test_BnfNodeParser.py b/tests/parsers/test_BnfNodeParser.py index ef92c69..6fe7466 100644 --- a/tests/parsers/test_BnfNodeParser.py +++ b/tests/parsers/test_BnfNodeParser.py @@ -1,6 +1,6 @@ import pytest from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept, ConceptParts, DoNotResolve +from core.concept import Concept, ConceptParts, DoNotResolve, DEFINITION_TYPE_DEF, CC from parsers.BaseNodeParser import CNC, UTN, CN from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ Optional, ZeroOrMore, OneOrMore, ConceptExpression @@ -18,7 +18,17 @@ cmap = { 'one or more three': Concept("one or more three", definition="three+"), 'two or four': Concept("two or four", definition="two | 'four'"), "twenties": Concept("twenties", definition="'twenty' c:two or four:=unit"), - "one or more plus": Concept("one or more plus", definition="c:a plus b:+"), + "one or more plus": Concept("one or more plus", definition="c:a plus b:+"), # TODO + + # testing keywords + "def_only": Concept("def"), + "def number": Concept("def number", definition="def (one|two)=number"), + # sequence of keywords using bnf definition + # "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"), + # "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"), + # sequence of keywords using def definition + # "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF), + # "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"), } @@ -40,12 +50,13 @@ def u(parsing_expression, start, end, children=None): return NonTerminalNode(parsing_expression, start, end, [], children) -def compute_expected_array(my_concepts_map, expression, expected): +def compute_expected_array(my_concepts_map, expression, expected, exclude_body=False): return tests.parsers.parsers_utils.compute_expected_array( my_concepts_map, expression, expected, - init_empty_body=True) + init_empty_body=True, + exclude_body=exclude_body) class TestBnfNodeParser(TestUsingMemoryBasedSheerka): @@ -136,7 +147,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert res.body.reason == BuiltinConcepts.IS_EMPTY @pytest.mark.parametrize("expr, text", [ - (StrMatch("foo"), "foo"), + # (StrMatch("foo"), "foo"), (StrMatch("'foo'"), "'foo'"), (StrMatch("1"), "1"), (StrMatch("3.14"), "3.14"), @@ -741,7 +752,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): ("twenty four", True, [CN("twenties", source="twenty four")]), ("twenty one", False, [UTN("twenty "), CN("bnf one", source="one")]), ("twenty two + 1", True, [CN("twenties", source="twenty two"), " + 1"]), - ]) def test_i_can_parse(self, parser_input, expected_status, expected): sheerka, context, parser = self.init_parser(init_from_sheerka=True) @@ -755,6 +765,24 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array + def test_i_can_parse_when_keyword(self): + sheerka, context, parser = self.init_parser(init_from_sheerka=True) + + parser_input = "def one" + expected = [CNC("def number", source="def one", number="one", one="one")] + + res = parser.parse(context, parser_input) + expected_array = compute_expected_array(cmap, parser_input, expected) + expected_array[0].compiled["def"] = cmap["def_only"] + + parser_result = res.value + concepts_nodes = res.value.value + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert concepts_nodes == expected_array + + # @pytest.mark.parametrize("parser_input, expected", [ # ("one", [ # (True, [CNC("bnf_one", source="one", one="one", body="one")]), diff --git a/tests/parsers/test_BnfParser.py b/tests/parsers/test_BnfParser.py index 7a5230e..4e9a98b 100644 --- a/tests/parsers/test_BnfParser.py +++ b/tests/parsers/test_BnfParser.py @@ -103,9 +103,10 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("foo=f", c("foo", "f")), ("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))), + ("def 'concept'", Sequence(c("def"), StrMatch("concept"))), ]) def test_i_can_parse_regex_with_concept(self, expression, expected): - sheerka, context, parser, foo, bar, var = self.init_parser("foo", "bar", "var") + sheerka, context, parser, foo, bar, var, _def = self.init_parser("foo", "bar", "var", "def") res = parser.parse(context, Tokenizer(expression)) diff --git a/tests/parsers/test_ExactConceptParser.py b/tests/parsers/test_ExactConceptParser.py index 1e90fca..e3a0cdf 100644 --- a/tests/parsers/test_ExactConceptParser.py +++ b/tests/parsers/test_ExactConceptParser.py @@ -161,6 +161,32 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka): assert concept_found.metadata.need_validation assert not concept_found.metadata.is_evaluated + def test_i_can_parse_when_expression_contains_keyword(self): + sheerka, context, isa, def_concept = self.init_concepts( + Concept("c is a concept").def_var("c"), + Concept("def concept a").def_var("a"), + ) + + source = "z is a concept" + results = ExactConceptParser().parse(context, source) + concept_found = results[0].value.value + + assert len(results) == 1 + assert results[0].status + assert concept_found == CMV(isa, c="z") + assert concept_found.metadata.need_validation + assert not concept_found.metadata.is_evaluated + + source = "def concept z" + results = ExactConceptParser().parse(context, source) + concept_found = results[0].value.value + + assert len(results) == 1 + assert results[0].status + assert concept_found == CMV(def_concept, a="z") + assert concept_found.metadata.need_validation + assert not concept_found.metadata.is_evaluated + def test_i_can_manage_unknown_concept(self): context = self.get_context(self.get_sheerka(singleton=True)) source = "def concept hello" # this is not a concept by itself diff --git a/tests/parsers/test_SyaNodeParser.py b/tests/parsers/test_SyaNodeParser.py index e460b64..ff6b060 100644 --- a/tests/parsers/test_SyaNodeParser.py +++ b/tests/parsers/test_SyaNodeParser.py @@ -34,6 +34,7 @@ cmap = { "foo bar": Concept("foo bar(a)").def_var("a"), "long infixed": Concept("a long infixed b").def_var("a").def_var("b"), "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), + "is a concept": Concept("c is a concept").def_var("c"), } @@ -1013,6 +1014,8 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ]), ("function(suffixed x$!#)", False, [ SCWC("function(", ")", CNC("suffixed", 2, 7, a="x$!#"))]), + ("one is a concept", True, [CNC("is a concept", c="one")]), + ("a is a concept", False, [CNC("is a concept", c=UTN("a"))]), ]) def test_i_can_parse_when_one_result(self, text, expected_status, expected_result): sheerka, context, parser = self.init_parser() @@ -1142,3 +1145,4 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) +