from collections import namedtuple from dataclasses import dataclass from enum import Enum import core.utils from core.concept import Concept, ConceptParts from core.rule import Rule from core.tokenizer import TokenKind, Token from parsers.BaseParser import Node, BaseParser, ParsingError DEBUG_COMPILED = True @dataclass() class LexerNode(Node): start: int # starting index in the tokens list end: int # ending index in the tokens list tokens: list = None # tokens source: str = None # string representation of what was parsed def __post_init__(self): if self.source is None: self.source = core.utils.get_text_from_tokens(self.tokens) def __eq__(self, other): if not isinstance(other, LexerNode): return False return self.start == other.start and \ self.end == other.end and \ self.source == other.source and \ self.tokens == other.tokens def fix_source(self, force=True): if force or self.source is None: self.source = core.utils.get_text_from_tokens(self.tokens) return self def clone(self): pass def to_short_str(self): raise NotImplementedError def get_source_to_parse(self): return self.source class UnrecognizedTokensNode(LexerNode): def __init__(self, start, end, tokens): super().__init__(start, end, tokens) self.is_frozen = False # TODO: Remove as it seems to now be useless self.parenthesis_count = 0 def freeze(self): # TODO: Remove as it seems to now be useless self.is_frozen = True def reset(self): self.start = self.end = -1 self.tokens.clear() self.is_frozen = False self.parenthesis_count = 0 self.source = "" def add_token(self, token, pos): if self.is_frozen: raise Exception("The node is frozen") if self.end != -1 and pos == self.end + 2: # add the missing whitespace p = self.tokens[-1] # previous token self.tokens.append(Token(TokenKind.WHITESPACE, " ", p.index + 1, p.line, p.column + 1)) self.tokens.append(token) self.end = pos if self.start == -1: self.start = pos if token.type == TokenKind.LPAR: self.parenthesis_count += 1 if token.type == TokenKind.RPAR: self.parenthesis_count -= 1 return self def pop(self, token_kind): if self.is_frozen: raise Exception("The node is frozen") if len(self.tokens) > 0 and self.tokens[-1].type == token_kind: self.tokens.pop() if len(self.tokens) == 0: self.reset() else: self.end -= 1 def has_open_paren(self): return self.parenthesis_count > 0 def not_whitespace(self): return not self.is_whitespace() def is_whitespace(self): for t in self.tokens: if t.type not in (TokenKind.WHITESPACE, TokenKind.NEWLINE): return False return True def is_empty(self): return len(self.tokens) == 0 def last_token_type(self): if len(self.tokens) == 0: return None return self.tokens[-1].type def __eq__(self, other): if isinstance(other, utnode): return self.start == other.start and \ self.end == other.end and \ self.source == other.source if isinstance(other, UTN): return other == self if not isinstance(other, UnrecognizedTokensNode): return False return self.start == other.start and \ self.end == other.end and \ self.source == other.source def __hash__(self): return hash((self.start, self.end, self.source)) def __repr__(self): return f"UnrecognizedTokensNode(source='{self.source}', start={self.start}, end={self.end})" def clone(self): clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:]) clone.is_frozen = self.is_frozen clone.parenthesis_count = self.parenthesis_count return clone def to_short_str(self): return f"UTN('{self.source}')" class RuleNode(LexerNode): def __init__(self, rule, start, end, tokens=None, source=None): super().__init__(start, end, tokens, source) self.rule = rule self.fix_source(False) def __eq__(self, other): if id(self) == id(other): return True if isinstance(other, RN): return other == self if not isinstance(other, RuleNode): return False return self.rule == other.rule and \ self.start == other.start and \ self.end == other.end and \ self.source == other.source def __hash__(self): return hash((self.rule, self.start, self.end, self.source)) def __repr__(self): return f"RuleNode(rule='{self.rule}', source='{self.source}', start={self.start}, end={self.end})" def clone(self): return RuleNode(self.rule, self.start, self.end, self.tokens, self.source) def to_short_str(self): return f'RN({self.rule})' class ConceptNode(LexerNode): """ Returned by the BnfNodeParser It represents a recognized concept """ def __init__(self, concept, start, end, tokens=None, source=None, underlying=None): super().__init__(start, end, tokens, source) self.concept = concept self.underlying = underlying self.fix_source(False) def __eq__(self, other): if id(self) == id(other): return True if isinstance(other, (CN, CNC)): return other == self if isinstance(other, cnode): return self.concept.key == other.concept_key and \ self.start == other.start and \ self.end == other.end and \ self.source == other.source if isinstance(other, short_cnode): return self.concept.key == other.concept_key and self.source == other.source if not isinstance(other, ConceptNode): return False return self.concept == other.concept and \ self.start == other.start and \ self.end == other.end and \ self.source == other.source and \ self.underlying == other.underlying def __hash__(self): return hash((self.concept, self.start, self.end, self.source, self.underlying)) def __repr__(self): text = f"ConceptNode(concept='{self.concept}', source='{self.source}', start={self.start}, end={self.end}" if DEBUG_COMPILED: for k, v in self.concept.get_compiled().items(): text += f", {k}='{v}'" return text + ")" def clone(self): # do we need to clone the concept as well ? clone = ConceptNode(self.concept, self.start, self.end, self.tokens, self.source, self.underlying) return clone def as_bag(self): """ Creates a dictionary with the useful properties of the ConceptNode see Concept.as_bag() for extra informations """ bag = {} for k, v in self.__dict__.items(): bag[k] = v # if isinstance(self.concept, Concept): # bag["compiled"] = self.concept.get_compiled() return bag def to_short_str(self): return f'CN({self.concept})' class SourceCodeNode(LexerNode): """ Returned when some source code (like Python source code is recognized) """ def __init__(self, start, end, tokens=None, source=None, python_node=None, return_value=None): """ :param start: start position (index of the first token) :param end: end position (index of the last token) :param tokens: :param source: tokens as string :param python_node: PythonNode found (when the SourceCodeNode is validated) :param return_value: ReturnValueConcept returned when the source was validated When return_value is provided, You should have return_value.body.body == node """ super().__init__(start, end, tokens, source) self.python_node = python_node # The PythonNode (or whatever language node) that is found self.return_value = return_value # original result of the parsing def __eq__(self, other): if isinstance(other, scnode): return self.start == other.start and \ self.end == other.end and \ self.source == other.source if isinstance(other, SCN): return other == self if not isinstance(other, SourceCodeNode): return False return self.python_node == other.python_node and \ self.start == other.start and \ self.end == other.end and \ self.source == other.source def __hash__(self): return hash((self.start, self.end, self.source)) def __repr__(self): return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')" def to_short_str(self): return f"SCN('{self.source}')" def get_python_node(self): return self.python_node def get_source_to_parse(self): return self.python_node.source class SourceCodeWithConceptNode(LexerNode): """ Kind of temporary version for SourceCodeNode I know that there is some code, I know that there are some concepts I just don't want to make the glue yet So I push all the nodes into one big bag """ def __init__(self, first_node, last_node, content_nodes=None, has_unrecognized=False): super().__init__(9999, -1, None) # why not sys.maxint ? self.first = first_node self.last = last_node self.nodes = content_nodes or [] self.has_unrecognized = has_unrecognized self._all_nodes = None self.fix_all_pos() self.python_node = None # if the source code node is validated against a python parse, here is the PythonNode self.return_value = None # return_value that produced the PythonNode def add_node(self, node): self.nodes.append(node) self.fix_pos(node) self._all_nodes = None return self def __eq__(self, other): if id(self) == id(other): return True if isinstance(other, SCWC): return other == self if not isinstance(other, SourceCodeWithConceptNode): return False if self.start != other.start or self.end != other.end: return False if self.first != other.first: return False if self.last != other.last: return False if len(self.nodes) != len(other.nodes): return False for self_node, other_node in zip(self.nodes, other.nodes): if self_node != other_node: return False # at last return True def __hash__(self): return hash((self.first, self.last, len(self.nodes))) def __repr__(self): return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')" def fix_all_pos(self): if self.first is None: # to ease some unit test where only the python_node is necessary return for n in [self.first, self.last] + self.nodes: self.fix_pos(n) def fix_pos(self, node): if hasattr(node, "start") and node.start is not None: if node.start < self.start: self.start = node.start if hasattr(node, "end") and node.end is not None: if node.end > self.end: self.end = node.end return self def pseudo_fix_source(self): """ pseudo because the code is not that clean ! :return: """ self.source = self.first.source for n in self.nodes: self.source += " " if hasattr(n, "source"): self.source += n.source elif hasattr(n, "concept"): self.source += str(n.concept) else: self.source += " unknown" self.source += self.last.source return self def get_all_nodes(self): if self._all_nodes: return self._all_nodes self._all_nodes = [self.first, *self.nodes, self.last] return self._all_nodes def clone(self): clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes.copy(), self.has_unrecognized) return clone def to_short_str(self): return f"SCWC({self.first}" + ", ".join(n.to_short_str for n in self.nodes) + f"{self.last})" def get_python_node(self): return self.python_node def get_source_to_parse(self): return self.python_node.source @dataclass() class GrammarErrorNode(ParsingError): message: str class SyaAssociativity(Enum): Left = "left" Right = "right" No = "No" def __repr__(self): return self.value cnode = namedtuple("ConceptNode", "concept_key start end source") short_cnode = namedtuple("ConceptNode", "concept_key source") utnode = namedtuple("utnode", "start end source") scnode = namedtuple("scnode", "start end source") class HelperWithPos: def __init__(self, start=None, end=None): self.start = start self.end = end self.start_is_fixed = start is not None self.end_is_fixed = end is not None def fix_pos(self, node): if not self.start_is_fixed: start = node.start if hasattr(node, "start") else \ node[0] if isinstance(node, tuple) else None if start is not None and (self.start is None or start < self.start): self.start = start if not self.end_is_fixed: end = node.end if hasattr(node, "end") else \ node[1] if isinstance(node, tuple) else None if end is not None and (self.end is None or end > self.end): self.end = end return self class SCN(HelperWithPos): """ SourceCodeNode tester class It matches with SourceCodeNode but with less constraints SCN == SourceCodeNode if source, start, end (start and end are not validated when None) """ def __init__(self, source, start=None, end=None): super().__init__(start, end) self.source = source def __eq__(self, other): if id(self) == id(other): return True if isinstance(other, SourceCodeNode): if self.source != other.source: return False if self.start is not None and self.start != other.start: return False if self.end is not None and self.end != other.end: return False return True if not isinstance(other, CN): return False return self.source == other.source and \ self.start == other.start and \ self.end == other.end def __hash__(self): return hash((self.source, self.start, self.end)) def __repr__(self): txt = f"SCN(source='{self.source}'" if self.start is not None: txt += f", start={self.start}" if self.end is not None: txt += f", end={self.end}" return txt + ")" class SCWC(HelperWithPos): """ SourceNodeWithConcept tester class It matches with a SourceNodeWithConcept but it's easier to instantiate during the tests """ def __init__(self, first, last, *args): super().__init__(None, None) self.first = first self.last = last self.content = args def __eq__(self, other): if id(self) == id(other): return True if isinstance(other, SourceCodeWithConceptNode): if self.first != other.first: return False if self.last != other.last: return False if len(self.content) != len(other.nodes): return False for self_node, other_node in zip(self.content, other.nodes): if self_node != other_node: return False # at last return True def __repr__(self): txt = "SCWC(" if self.start is not None: txt += f"start={self.start}" if self.end is not None: txt += f", end={self.end}" txt += f", source='{self.source}'" return txt + ")" @property def source(self): """ this code is a copy and paste from SourceCodeWithConceptNode.pseudo_fix_source TODO: create a common function or whatever... :return: """ source = self.first.source if hasattr(self.first, "source") else self.first for n in self.content: source += " " if hasattr(n, "source"): source += n.source elif hasattr(n, "concept"): source += str(n.concept) else: source += " unknown" source += self.last.source if hasattr(self.last, "source") else self.last return source class CN(HelperWithPos): """ ConceptNode tester class It matches with ConceptNode but with less constraints CN == ConceptNode if concept key, start, end and source are the same """ def __init__(self, concept, start=None, end=None, source=None): """ :param concept: Concept or concept_key (only the key is used anyway) :param start: :param end: :param source: """ super().__init__(start, end) self.concept_key = concept.key if isinstance(concept, Concept) else concept self.source = source self.concept = concept if isinstance(concept, Concept) else None def fix_source(self, str_tokens): self.source = "".join(str_tokens) return self def __eq__(self, other): if id(self) == id(other): return True if isinstance(other, ConceptNode): if other.concept is None: return False if other.concept.key != self.concept_key: return False if self.start is not None and self.start != other.start: return False if self.end is not None and self.end != other.end: return False if self.source is not None and self.source != other.source: return False return True if not isinstance(other, CN): return False return self.concept_key == other.concept_key and \ self.start == other.start and \ self.end == other.end and \ self.source == other.source def __hash__(self): return hash((self.concept_key, self.start, self.end, self.source)) def __repr__(self): if self.concept: txt = f"CN(concept='{self.concept}'" else: txt = f"CN(concept_key='{self.concept_key}'" txt += f", source='{self.source}'" if self.start is not None: txt += f", start={self.start}" if self.end is not None: txt += f", end={self.end}" return txt + ")" class CNC(CN): """ ConceptNode for Compiled tester class It matches with ConceptNode But focuses on the 'compiled' property of the concept CNC == ConceptNode if CNC.get_compiled() == ConceptNode.concept.get_compiled() """ def __init__(self, concept_key, start=None, end=None, source=None, exclude_body=False, **kwargs): super().__init__(concept_key, start, end, source) self.compiled = kwargs self.exclude_body = exclude_body if "body" in self.compiled: self.compiled[ConceptParts.BODY] = self.compiled["body"] del self.compiled["body"] def __eq__(self, other): if id(self) == id(other): return True if isinstance(other, ConceptNode): if other.concept is None: return False if other.concept.key != self.concept_key: return False if self.start is not None and self.start != other.start: return False if self.end is not None and self.end != other.end: return False if self.source is not None and self.source != other.source: return False if self.exclude_body: to_compare = {k: v for k, v in other.concept.get_compiled().items() if k != ConceptParts.BODY} else: to_compare = other.concept.get_compiled() if self.compiled == to_compare: # expanded form to ease the debug return True else: return False if not isinstance(other, CNC): return False return self.concept_key == other.concept_key and \ self.start == other.start and \ self.end == other.end and \ self.source == other.source and \ self.compiled == other.compiled def __repr__(self): if self.concept: txt = f"CNC(concept='{self.concept}'" else: txt = f"CNC(concept_key='{self.concept_key}'" txt += f", source='{self.source}'" if self.start is not None: txt += f", start={self.start}" if self.end is not None: txt += f", end={self.end}" for k, v in self.compiled.items(): txt += f", {k}='{v}'" return txt + ")" class UTN(HelperWithPos): """ Tester class for UnrecognizedTokenNode compare the source, and start, end if defined """ def __init__(self, source, start=None, end=None): """ :param source: :param start: :param end: """ super().__init__(start, end) self.source = source def __eq__(self, other): if id(self) == id(other): return True if isinstance(other, UnrecognizedTokensNode): return self.start == other.start and \ self.end == other.end and \ self.source == other.source if not isinstance(other, UTN): return False return self.start == other.start and \ self.end == other.end and \ self.source == other.source def __hash__(self): return hash((self.source, self.start, self.end)) def __repr__(self): txt = f"UTN(source='{self.source}'" if self.start is not None: txt += f", start={self.start}" if self.end is not None: txt += f", end={self.end}" return txt + ")" class RN(HelperWithPos): """ Helper class to test RuleNode """ def __init__(self, rule, start=None, end=None, source=None): """ :param concept: Concept or concept_key (only the key is used anyway) :param start: :param end: :param source: """ super().__init__(start, end) self.rule_id = rule.id if isinstance(rule, Rule) else rule self.source = source or core.utils.str_concept((None, self.rule_id), prefix="r:") self.rule = rule if isinstance(rule, Rule) else None def __eq__(self, other): if id(self) == id(other): return True if isinstance(other, RuleNode): if other.rule is None: return False if other.rule.id != self.rule_id: return False if self.start is not None and self.start != other.start: return False if self.end is not None and self.end != other.end: return False if self.source is not None and self.source != other.source: return False return True if not isinstance(other, RN): return False return self.rule_id == other.rule_id and \ self.start == other.start and \ self.end == other.end and \ self.source == other.source def __hash__(self): return hash((self.rule_id, self.start, self.end, self.source)) def __repr__(self): if self.rule: txt = f"RN(rule='{self.rule}'" else: txt = f"RN(rule_id='{self.rule_id}'" txt += f", source='{self.source}'" if self.start is not None: txt += f", start={self.start}" if self.end is not None: txt += f", end={self.end}" return txt + ")" class BaseNodeParser(BaseParser): """ Parser that return LexerNode """ def __init__(self, name, priority, **kwargs): super().__init__(name, priority, yield_eof=True) def init_from_concepts(self, context, concepts, **kwargs): """ Initialize the parser with a list of concepts For unit tests convenience :param context :param concepts :return: """ from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager concepts_by_first_keyword = SheerkaConceptManager.compute_concepts_by_first_token(context, concepts).body resolved = SheerkaConceptManager.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body context.sheerka.om.put(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved)