First implementation of Debugger for SyaNodeParser

2020-12-03 21:50:48 +01:00
parent 4f899280c4
commit 8b86998225
48 changed files with 1781 additions and 1795 deletions
@@ -11,7 +11,7 @@ from core.global_symbols import CONCEPT_COMPARISON_CONTEXT
 from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager
 from core.sheerka.services.SheerkaExecute import ParserInput
 from core.tokenizer import Token, TokenKind, Tokenizer
-from core.utils import get_n_clones
+from core.utils import get_n_clones, get_text_from_tokens, NextIdManager
 from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
    SourceCodeWithConceptNode, BaseNodeParser
 from parsers.BaseParser import ErrorNode
@@ -25,6 +25,7 @@ DEBUG_PUSH_UNREC = "PUSH_UNREC"
 DEBUG_POP = "POP"
 DEBUG_EAT = "EAT"
 DEBUG_RECOG = "RECOG"
+DEBUG_CAN_POP = "CAN_POP"


@dataclass()
@@ -42,12 +43,13 @@ class DebugInfo:
    token: Token = None  # current token
    concept: Concept = None  # current concept if ay
    action: str = None  # action taken
+    level: str = None

    def __repr__(self):
        token_repr = self.token.repr_value if isinstance(self.token, Token) else self.token
        msg = f"{self.pos:3}:{token_repr}" if self.pos != -1 else "  _:"
        if self.concept:
-            msg += f"({self.concept})"
+            msg += f" {self.concept.short_repr()}"
        return msg + f" => {self.action}"


@@ -118,6 +120,36 @@ class SyaConceptDef:
    precedence: int = SheerkaComparisonManager.DEFAULT_COMPARISON_VALUE
    associativity: SyaAssociativity = SyaAssociativity.Right

+    @staticmethod
+    def get_sya_concept_def(concept, parser, sheerka):
+        sya_concept_def = SyaConceptDef(concept)
+
+        # first, try to look in the parser
+        # it is where to find the data during the unit tests
+        if parser and concept.id in parser.sya_definitions:
+            # Manage when precedence and associativity are given in the unit tests
+            sya_def = parser.sya_definitions.get(concept.id)
+            if sya_def[0] is not None:
+                sya_concept_def.precedence = sya_def[0]
+            if sya_def[1] is not None:
+                sya_concept_def.associativity = sya_def[1]
+
+        # otherwise, use sheerka
+        if sheerka:
+            concept_weight = parser.sheerka.get_concepts_weights(BuiltinConcepts.PRECEDENCE, CONCEPT_COMPARISON_CONTEXT)
+            if concept.str_id in concept_weight:
+                sya_concept_def.precedence = concept_weight[concept.str_id]
+
+        # in the case of Sheerka, the associativity is managed by the concept itself
+        # There is no conflict with the settings of the unit test, as I don't use the props in the unit tests
+        if associativity := concept.get_prop(BuiltinConcepts.ASSOCIATIVITY):
+            sya_concept_def.associativity = SyaAssociativity(associativity)
+
+        return sya_concept_def
+
+    def short_repr(self):
+        return f"({self.concept}, prio={self.precedence}, assoc={self.associativity})"
+

@dataclass()
 class SyaConceptParserHelper:
@@ -248,9 +280,19 @@ class SyaConceptParserHelper:


 class InFixToPostFix:
-    def __init__(self, context, debug_enabled=False):
+    def __init__(self, context, next_id_manager, debugger=None):
        self.context = context
-        self.debug_enabled = debug_enabled
+
+        self.next_id_manager = next_id_manager
+        self.id = self.next_id_manager.get_next_id()
+
+        self.debugger = debugger
+        if debugger:
+            self.debug_enabled = debugger.is_enabled()
+            self.enabled_debug_levels = debugger.get_enabled_vars()
+        else:
+            self.debug_enabled = False
+            self.enabled_debug_levels = None

        self.is_locked = False  # when locked, cannot process input

@@ -284,9 +326,15 @@ class InFixToPostFix:

    def _add_error(self, error):
        if self.debug_enabled:
-            self.debug.append(DebugInfo(action=f"=> ERROR {error}"))
+            self._add_debug(DebugInfo(action=f"=> ERROR {error}"))
        self.errors.append(error)

+    def _add_debug(self, debug_info: DebugInfo):
+        if debug_info.level is None or (self.enabled_debug_levels and
+                                        (f"#{self.id}.{debug_info.level}" in self.enabled_debug_levels or
+                                        "*" in self.enabled_debug_levels)):
+            self.debug.append(debug_info)
+
    def _is_lpar(self, token):
        """
        True if the token is a left parenthesis '('
@@ -337,10 +385,10 @@ class InFixToPostFix:
            else:
                item.error = f"token '{item.expected[0].strip_quote}' not found"
            if self.debug_enabled:
-                self.debug.append(DebugInfo(action=f"ERROR {item.error}"))
+                self._add_debug(DebugInfo(action=f"ERROR {item.error}"))

        if self.debug_enabled:
-            self.debug.append(DebugInfo(action=f"{DEBUG_POP} {item}"))
+            self._add_debug(DebugInfo(action=f"{DEBUG_POP} {item}"))
        if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
            self.out.insert(item.potential_pos, item)
        else:
@@ -402,6 +450,11 @@ class InFixToPostFix:
            self.debug.pop()

    def _debug_nodes(self, nodes_sequences):
+        """
+        Returns a debug representation of a sequence of LexerNodes
+        :param nodes_sequences:
+        :return:
+        """
        res = "["
        first = True
        for sequence in nodes_sequences:
@@ -520,7 +573,7 @@ class InFixToPostFix:
                # There are more than one solution found
                # In the case, we create a new InfixToPostfix for each new possibility
                if self.debug_enabled:
-                    self.debug.append(DebugInfo(action=f"{DEBUG_RECOG} {self._debug_nodes(nodes_sequences)}"))
+                    self._add_debug(DebugInfo(action=f"{DEBUG_RECOG} {self._debug_nodes(nodes_sequences)}"))
                if len(nodes_sequences) > 1:
                    for node_sequence in nodes_sequences[1:]:
                        clone = self.clone()
@@ -599,33 +652,52 @@ class InFixToPostFix:
        self.stack.pop()
        self._put_to_out(item)

-    def i_can_pop(self, concept_node):
+    def i_can_pop(self, sya_parser_helper):
        """
        Validate the Shunting Yard Algorithm conditions to pop out from the stack
        Note that it's a custom implementation as I need to manage UnrecognizedTokensNode
-        :param concept_node:
+        :param sya_parser_helper:
        :return:
        """
        if len(self.stack) == 0:
+            if self.debug_enabled:
+                self._add_debug(DebugInfo(action=f"No stack. {DEBUG_CAN_POP} false.", level="can_pop"))
            return False

        stack_head = self.stack[-1]

        if not isinstance(stack_head, SyaConceptParserHelper):  # mostly left parenthesis
+            if self.debug_enabled:
+                self._add_debug(DebugInfo(action=f"No concept. {DEBUG_CAN_POP} false.", level="can_pop"))
            return False

-        current = concept_node.concept
+        current = sya_parser_helper.concept
        stack = stack_head.concept

        if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No:
-            self._add_error(NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, concept_node.start))
+            self._add_error(
+                NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, sya_parser_helper.start))

        if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence:
+            if self.debug_enabled:
+                current_debug = f"{current.concept.id}({current.precedence})"
+                stack_debug = f"{stack.concept.id}({stack.precedence})"
+                self._add_debug(
+                    DebugInfo(action=f"assoc=Left and {current_debug} <= {stack_debug}. {DEBUG_CAN_POP} True.",
+                              level="can_pop"))
            return True

        if current.associativity == SyaAssociativity.Right and current.precedence < stack.precedence:
+            if self.debug_enabled:
+                current_debug = f"{current.concept.id}({current.precedence})"
+                stack_debug = f"{stack.concept.id}({stack.precedence})"
+                self._add_debug(
+                    DebugInfo(action=f"assoc=Right and {current_debug} < {stack_debug}. {DEBUG_CAN_POP} True.",
+                              level="can_pop"))
            return True

+        if self.debug_enabled:
+            self._add_debug(DebugInfo(action=f"No rule. {DEBUG_CAN_POP} False.", level="can_pop"))
        return False

    def handle_expected_token(self, token, pos):
@@ -693,7 +765,7 @@ class InFixToPostFix:

                current_concept.end = pos
                if self.debug_enabled:
-                    self.debug.append(DebugInfo(pos, token, None, "??"))
+                    self._add_debug(DebugInfo(pos, token, None, "??"))
                self.manage_unrecognized()
                # manage that some clones may have been forked
                for forked in self.forked:
@@ -755,7 +827,7 @@ class InFixToPostFix:

        if self.parsing_function:
            if self.debug_enabled:
-                self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
+                self._add_debug(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))

            self.unrecognized_tokens.add_token(token, pos)

@@ -790,13 +862,13 @@ class InFixToPostFix:
            # if the token 'bar' is found, it has to be considered as part of the concept foo
            if self.debug_enabled:
                self._remove_debug_info_if_needed()
-                self.debug.append(DebugInfo(pos, token, None, DEBUG_EAT))
+                self._add_debug(DebugInfo(pos, token, None, DEBUG_EAT))
            return True

        elif self._is_lpar(token):

            if self.debug_enabled:
-                self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
+                self._add_debug(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))

            if self.unrecognized_tokens.is_empty() or self.unrecognized_tokens.is_whitespace():

@@ -865,7 +937,7 @@ class InFixToPostFix:

        elif self._is_rpar(token):
            if self.debug_enabled:
-                self.debug.append(DebugInfo(pos, token, None, DEBUG_EAT))
+                self._add_debug(DebugInfo(pos, token, None, DEBUG_EAT))

            # first, remove what was in the buffer
            self.manage_unrecognized()
@@ -933,7 +1005,7 @@ class InFixToPostFix:

        if first_pass:
            if self.debug_enabled:
-                self.debug.append(DebugInfo(pos, token, sya_concept_def, "??"))
+                self._add_debug(DebugInfo(pos, token, sya_concept_def, "??"))

            if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE:
                parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1]
@@ -970,7 +1042,7 @@ class InFixToPostFix:
            else:
                if self.debug_enabled:
                    self._remove_debug_info_if_needed()
-                    self.debug.append(DebugInfo(pos, token, sya_concept_def, DEBUG_PUSH))
+                    self._add_debug(DebugInfo(pos, token, sya_concept_def, DEBUG_PUSH))
                self.stack.append(parser_helper)
                self.manage_parameters_when_new_concept(parser_helper)

@@ -985,7 +1057,7 @@ class InFixToPostFix:
            return

        if self.debug_enabled:
-            self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
+            self._add_debug(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))

        self.unrecognized_tokens.add_token(token, pos)

@@ -1005,7 +1077,7 @@ class InFixToPostFix:
            return  # no need to pop the buffer, as no concept is found

        if self.debug_enabled:
-            self.debug.append(DebugInfo(pos, "<EOF>", None, "??"))
+            self._add_debug(DebugInfo(pos, "<EOF>", None, "??"))

        while len(self.stack) > 0:
            parser_helper = self.stack[-1]
@@ -1036,7 +1108,7 @@ class InFixToPostFix:
            forked.finalize(pos)

    def clone(self):
-        clone = InFixToPostFix(self.context, self.debug_enabled)
+        clone = InFixToPostFix(self.context, self.next_id_manager, self.debugger)
        clone.is_locked = self.is_locked
        clone.out = self.out[:]
        clone.stack = [i.clone() if hasattr(i, "clone") else i for i in self.stack]
@@ -1054,6 +1126,7 @@ class PostFixToItem:
    start: int
    end: int
    has_unrecognized: bool
+    source: str


 class SyaNodeParser(BaseNodeParser):
@@ -1069,14 +1142,6 @@ class SyaNodeParser(BaseNodeParser):
            self.concepts_by_first_keyword = {}
            self.sya_definitions = {}

-        # self.token = None
-        # self.pos = -1
-        # self.tokens = None
-        #
-        # self.context: ExecutionContext = None
-        # self.text = None
-        # self.sheerka = None
-
    def init_from_concepts(self, context, concepts, **kwargs):
        super().init_from_concepts(context, concepts)

@@ -1093,27 +1158,8 @@ class SyaNodeParser(BaseNodeParser):
        """
        # We only concepts that has parameter (refuse atoms)
        # Bnf definitions are not supposed to be managed by this parser either
-        return len(concept.get_metadata().variables) > 0 and concept.get_metadata().definition_type != DEFINITION_TYPE_BNF
-
-    def _get_sya_concept_def(self, parser, concept):
-        sya_concept_def = SyaConceptDef(concept)
-        if concept.id in parser.sya_definitions:
-            # Manage when precedence and associativity are given in the unit tests
-            sya_def = parser.sya_definitions.get(concept.id)
-            if sya_def[0] is not None:
-                sya_concept_def.precedence = sya_def[0]
-            if sya_def[1] is not None:
-                sya_concept_def.associativity = sya_def[1]
-
-        if parser.sheerka:
-            concept_weight = parser.sheerka.get_concepts_weights(BuiltinConcepts.PRECEDENCE, CONCEPT_COMPARISON_CONTEXT)
-            if concept.str_id in concept_weight:
-                sya_concept_def.precedence = concept_weight[concept.str_id]
-
-        if associativity := concept.get_prop(BuiltinConcepts.ASSOCIATIVITY):
-            sya_concept_def.associativity = SyaAssociativity(associativity)
-
-        return sya_concept_def
+        return len(
+            concept.get_metadata().variables) > 0 and concept.get_metadata().definition_type != DEFINITION_TYPE_BNF

    def infix_to_postfix(self, context, parser_input: ParserInput):
        """
@@ -1126,6 +1172,9 @@ class SyaNodeParser(BaseNodeParser):
        if not self.reset_parser(context, parser_input):
            return None

+        debugger = context.get_debugger(self.NAME, "parse")
+        debugger.debug_entering(source=self.parser_input.as_text())
+
        forked = []

        def _add_forked_to_res():
@@ -1138,16 +1187,21 @@ class SyaNodeParser(BaseNodeParser):
                res.extend(forked)
                forked.clear()

-        res = [InFixToPostFix(context, context.debug_enabled)]
+        res = [InFixToPostFix(context, NextIdManager(), debugger)]
        while self.parser_input.next_token(False):
            for infix_to_postfix in res:
                infix_to_postfix.reset()

            token = self.parser_input.token
+            if debugger.is_enabled():
+                debug_prefix = f"pos={self.parser_input.pos}, {token=}, {len(res)} parser(s)"

            try:
                if token.type in (TokenKind.LPAR, TokenKind.RPAR):
                    # little optim, no need to lock, unlock or get the concept when parenthesis
+                    if debugger.is_enabled():
+                        debugger.debug_log(debug_prefix + ", eat token.")
+
                    for infix_to_postfix in res:
                        infix_to_postfix.eat_token(token, self.parser_input.pos)
                    continue
@@ -1156,21 +1210,34 @@ class SyaNodeParser(BaseNodeParser):
                    if infix_to_postfix.eat_token(token, self.parser_input.pos):
                        infix_to_postfix.lock()

-                concepts = self.get_concepts(token, self._is_eligible, to_map=self._get_sya_concept_def)
-                if not concepts:
+                nb_locked = len([itp for itp in res if itp.is_locked])
+                if nb_locked == len(res):
+                    if debugger.is_enabled():
+                        debugger.debug_log(debug_prefix + f", all parsers are locked")
+                    continue
+
+                concepts_def = self.get_concepts(token, self._is_eligible, to_map=SyaConceptDef.get_sya_concept_def)
+                if not concepts_def:
+                    if debugger.is_enabled():
+                        debugger.debug_log(debug_prefix + f", no concept found")
+
                    for infix_to_postfix in res:
                        infix_to_postfix.eat_unrecognized(token, self.parser_input.pos)
                    continue

-                if len(concepts) == 1:
+                if debugger.is_enabled():
+                    found = [cd.short_repr() for cd in concepts_def]
+                    debugger.debug_log(debug_prefix + f", concept(s) found={found}")
+
+                if len(concepts_def) == 1:
                    for infix_to_postfix in res:
-                        infix_to_postfix.eat_concept(concepts[0], token, self.parser_input.pos)
+                        infix_to_postfix.eat_concept(concepts_def[0], token, self.parser_input.pos)
                    continue

                # make the cartesian product
                temp_res = []
                for infix_to_postfix in res:
-                    for concept in concepts:
+                    for concept in concepts_def:
                        clone = infix_to_postfix.clone()
                        temp_res.append(clone)
                        clone.eat_concept(concept, token, self.parser_input.pos)
@@ -1185,13 +1252,13 @@ class SyaNodeParser(BaseNodeParser):
            infix_to_postfix.finalize(self.parser_input.pos)
        _add_forked_to_res()

-        if context.debug_enabled:
-            context.debug(self.name, "infix_to_postfix", None, f"Parsing {parser_input}")
-            context.debug(self.name, "infix_to_postfix", "nb_found", f"{len(res)} InfixToPostFix(s) found")
-            for i, r in enumerate(res):
-                context.debug(self.name, "infix_to_postfix", "infix_to_postfix", f"#{i}")
+        if debugger.is_enabled():
+            for r in res:
                for line in r.debug:
-                    context.debug(self.name, "infix_to_postfix", "infix_to_postfix", line)
+                    if line.level:
+                        debugger.debug_var(f"#{r.id}.{line.level}", line)
+                    else:
+                        debugger.debug_var(f"#{r.id}", line)

        return res

@@ -1222,6 +1289,7 @@ class SyaNodeParser(BaseNodeParser):
        end = item.end
        has_unrecognized = False
        concept = sheerka.new_from_template(item.concept, item.concept.key)
+        concept_metadata = []
        for param_index in reversed(range(len(concept.get_metadata().variables))):
            inner_item = self.postfix_to_item(sheerka, postfixed)
            if inner_item.start < start:
@@ -1237,8 +1305,20 @@ class SyaNodeParser(BaseNodeParser):
                    inner_item

            concept.get_compiled()[param_name] = param_value
+            concept_metadata.append((param_name, inner_item.source))

-        return PostFixToItem(concept, start, end, has_unrecognized)
+        # update the metadata
+        concept_metadata.reverse()
+
+        # ---- Sanity check. To remove at some point
+        assert len(concept_metadata) == len(concept.get_metadata().variables)
+        for meta_orig, meta_new in zip(concept.get_metadata().variables, concept_metadata):
+            assert meta_orig[0] == meta_new[0]
+        # ---- Sanity check. To remove at some point
+        concept.get_metadata().variables = concept_metadata
+
+        source = get_text_from_tokens(self.parser_input.tokens[start:end + 1])
+        return PostFixToItem(concept, start, end, has_unrecognized, source)

    def parse(self, context, parser_input: ParserInput):
        """