I can now use keyword in concept definition and parsing

2020-05-22 15:46:04 +02:00
parent 37d3d16e21
commit 3ce6ce2a76
14 changed files with 127 additions and 45 deletions
@@ -246,7 +246,7 @@ class Concept:
                key += VARIABLE_PREFIX + str(variables.index(token.value))
            else:
                # value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
-                key += token.value
+                key += token.value.value if token.type == TokenKind.KEYWORD else token.value
            first = False

        self.metadata.key = key
@@ -59,6 +59,7 @@ class Token:
    line: int
    column: int

+    _strip_quote: str = field(default=None, repr=False, compare=False, hash=None)
    _str_value: str = field(default=None, repr=False, compare=False, hash=None)

    def __repr__(self):
@@ -75,23 +76,34 @@ class Token:

        return f"Token({value})"

+    @property
+    def strip_quote(self):
+        if self._strip_quote:
+            return self._strip_quote
+
+        self._strip_quote = self._to_str(True)
+        return self._strip_quote
+
    @property
    def str_value(self):
        if self._str_value:
            return self._str_value

-        if self.type == TokenKind.STRING:
-            self._str_value = self.value[1:-1]
-        elif self.type == TokenKind.KEYWORD:
-            self._str_value = self.value.value
-        else:
-            self._str_value = str(self.value)
+        self._str_value = self._to_str(False)
        return self._str_value

    @staticmethod
    def is_whitespace(token):
        return token and token.type == TokenKind.WHITESPACE

+    def _to_str(self, strip_quote):
+        if strip_quote and self.type == TokenKind.STRING:
+            return self.value[1:-1]
+        elif self.type == TokenKind.KEYWORD:
+            return self.value.value
+        else:
+            return str(self.value)
+

@dataclass()
 class LexerError(Exception):
@@ -92,7 +92,7 @@ class AtomConceptParserHelper:

        self.debug.append(token)

-        if self.expected_tokens[0] != token.str_value:
+        if self.expected_tokens[0] != token.strip_quote:
            self.errors.append(UnexpectedTokenErrorNode(
                f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
                token,
@@ -120,7 +120,7 @@ class AtomConceptParserHelper:
            forked.eat_concept(concept, pos)

        concept_node = ConceptNode(concept, pos, pos)
-        expected = [t.str_value for t in Tokenizer(concept.name)][1:-1]
+        expected = [t.strip_quote for t in Tokenizer(concept.name)][1:-1]

        if not expected:
            # the concept is already matched
@@ -6,7 +6,7 @@ import core.utils
 from core.builtin_concepts import BuiltinConcepts
 from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
 from core.sheerka.ExecutionContext import ExecutionContext
-from core.tokenizer import TokenKind, LexerError, Token
+from core.tokenizer import TokenKind, LexerError, Token, Keywords
 from parsers.BaseParser import Node, BaseParser, ErrorNode

 DEBUG_COMPILED = True
@@ -440,7 +440,7 @@ class CN(HelperWithPos):
        self.concept = concept if isinstance(concept, Concept) else None

    def fix_source(self, str_tokens):
-        self.source = "".join(str_tokens)
+        self.source = "".join([s.value if isinstance(s, Keywords) else s for s in str_tokens])
        return self

    def __eq__(self, other):
@@ -400,11 +400,11 @@ class StrMatch(Match):

    def _parse(self, parser_helper):
        token = parser_helper.get_token()
-        m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
-            else token.value == self.to_match
+        m = token.str_value.lower() == self.to_match.lower() if self.ignore_case \
+            else token.strip_quote == self.to_match

        if m:
-            node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.value)
+            node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value)
            parser_helper.next_token()
            return node

@@ -882,7 +882,7 @@ class BnfNodeParser(BaseNodeParser):
                else:
                    # regular concepts
                    tokens = Tokenizer(concept.name)
-                    nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
+                    nodes = [StrMatch(token.strip_quote) for token in list(tokens)[:-1]]
                    pe = inner_resolve(nodes[0] if len(nodes) == 1 else Sequence(nodes), inner_already_seen)

                if not isinstance(pe, ParsingExpression):
@@ -74,13 +74,13 @@ class BnfParser(BaseParser):

        try:
            self._current = self.after_current or next(self.lexer_iter)
-            self.source += str(self._current.value)
+            self.source += self._current.str_value
            self.after_current = None

            if skip_whitespace:
                while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
                    self._current = next(self.lexer_iter)
-                    self.source += str(self._current.value)
+                    self.source += self._current.str_value
        except StopIteration:
            self._current = Token(TokenKind.EOF, "", -1, -1, -1)

@@ -90,7 +90,7 @@ class BnfParser(BaseParser):

        try:
            self.after_current = next(self.lexer_iter)
-            # self.source += str(self.after_current.value)
+            # self.source += self.after_current.str_value
            return self.after_current
        except StopIteration:
            self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
@@ -99,13 +99,13 @@ class BnfParser(BaseParser):
    def eat_white_space(self):
        if self.after_current is not None:
            self._current = self.after_current
-            self.source += str(self._current.value)
+            self.source += self._current.str_value
            self.after_current = None

        try:
            while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
                self._current = next(self.lexer_iter)
-                self.source += str(self._current.value)
+                self.source += self._current.str_value
        except StopIteration:
            self._current = None

@@ -239,10 +239,10 @@ class BnfParser(BaseParser):
            #     else ConceptExpression(concept)
            return self.eat_rule_name_if_needed(expr)

-        if token.type == TokenKind.IDENTIFIER:
+        if token.type in (TokenKind.IDENTIFIER, TokenKind.KEYWORD):
            self.next_token()

-            concept_name = str(token.value)
+            concept_name = token.str_value

            # we are trying to match against a concept which is still under construction !
            # (for example of recursive bnf definition)
@@ -283,3 +283,4 @@ class BnfParser(BaseParser):
        expression.rule_name = token.value
        self.next_token()
        return expression
+
@@ -131,15 +131,15 @@ class SyaConceptParserHelper:
        # True if the next token is the one that is expected
        # Or if the next token is a whitespace and the expected one is the one after
        # (whitespace are sometimes not mandatory)
-        return token.str_value == self.expected[0].str_value or \
-               self.expected[0].type == TokenKind.WHITESPACE and token.str_value == self.expected[1].str_value
+        return token.strip_quote == self.expected[0].strip_quote or \
+               self.expected[0].type == TokenKind.WHITESPACE and token.strip_quote == self.expected[1].strip_quote

    def is_expected(self, token):
        if self.is_matched() or token.type == TokenKind.WHITESPACE:
            return False

        for expected in self.expected:
-            if expected.type != TokenKind.VAR_DEF and expected.str_value == token.str_value:
+            if expected.type != TokenKind.VAR_DEF and expected.strip_quote == token.strip_quote:
                return True

        return False
@@ -154,7 +154,7 @@ class SyaConceptParserHelper:
        :return:
        """
        # No check, as it is used only after is_expected() or is_next()
-        while self.expected[0].str_value != until_token.str_value:
+        while self.expected[0].strip_quote != until_token.strip_quote:
            del self.expected[0]
        del self.expected[0]

@@ -193,15 +193,6 @@ class SyaConceptParserHelper:
            self.concept = self.concept.concept
        return self

-    # @staticmethod
-    # def _get_token_value(token):
-    #     if token.type == TokenKind.STRING:
-    #         return token.value[1:-1]
-    #     elif token.type == TokenKind.KEYWORD:
-    #         return token.value.value
-    #     else:
-    #         return token.value
-
    def clone(self):
        clone = SyaConceptParserHelper(self.concept, self.start, self.end)
        clone.expected = self.expected[:]
@@ -296,7 +287,7 @@ class InFixToPostFix:
            if item.expected[0].type == TokenKind.VAR_DEF:
                item.error = "Not enough suffix parameters"
            else:
-                item.error = f"token '{item.expected[0].str_value}' not found"
+                item.error = f"token '{item.expected[0].strip_quote}' not found"

        if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
            self.out.insert(item.potential_pos, item)
@@ -431,7 +422,7 @@ class InFixToPostFix:
        while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF:
            # eat everything that was expected
            if len(self.parameters_list) == 0:
-                current_concept.error = f"Failed to match parameter '{current_concept.expected[0].str_value}'"
+                current_concept.error = f"Failed to match parameter '{current_concept.expected[0].strip_quote}'"
                return
            del self.parameters_list[0]
            del current_concept.expected[0]