from dataclasses import dataclass from core.builtin_concepts import BuiltinConcepts from core.builtin_helpers import debug_nodes, update_concepts_hints from core.concept import Concept, DEFINITION_TYPE_BNF from core.global_symbols import NotFound from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind, Tokenizer from core.utils import make_unique, strip_tokens from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, SourceCodeNode, UnrecognizedTokensCache, \ UnrecognizedTokensNode from parsers.BaseParser import ParsingError, UnexpectedTokenParsingError from parsers.BnfNodeParser import BnfNodeParser from parsers.SyaNodeParser import SyaNodeParser PARSERS = [BnfNodeParser.NAME, SyaNodeParser.NAME, "Python"] @dataclass() class TokensNodeFoundError(ParsingError): expected_tokens: list def __eq__(self, other): if id(other) == id(self): return True if not isinstance(other, UnexpectedTokenParsingError): return False if self.message != other.message: return False if self.token.type != other.token.type or self.token.value != other.token.value: return False if len(self.expected_tokens) != len(other.expected_tokens): return False for i, t in enumerate(self.expected_tokens): if t != other.expected_tokens[i]: return False return True def __hash__(self): return hash((self.message, self.token, self.expected_tokens)) class AtomConceptParserHelper: def __init__(self, parser): self.parser = parser self.context = parser.context self.debug = [] self.sequence = [] # sequence of concepts already found found self.current_concept: ConceptNode = None # concept being parsed self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # buffer that keeps tracks of tokens positions self.expected_tokens = None # expected tokens for this concepts self.is_locked = False self.errors = [] self.has_unrecognized = False self.forked = [] # use to duplicate AtomConceptParserHelper. See manage_unrecognized() def __eq__(self, other): if id(other) == id(self): return True if not isinstance(other, AtomConceptParserHelper): return False if len(self.sequence) != len(other.sequence): return False for item_self, item_other in zip(self.sequence, other.sequence): if item_self != item_other: return False return True def __hash__(self): return hash(len(self.sequence)) def __repr__(self): return f"{debug_nodes(self.sequence)}" def lock(self): self.is_locked = True def reset(self): self.is_locked = False def has_error(self): return len(self.errors) > 0 def eat_token(self, token, pos): if not self.expected_tokens: return False self.debug.append(token) if self.expected_tokens[0] != token.strip_quote: self.errors.append(UnexpectedTokenParsingError( f"Found '{token}' while expecting '{self.expected_tokens[0]}'", token, [self.expected_tokens[0]])) return False self.current_concept.end = pos del self.expected_tokens[0] if not self.expected_tokens: # the concept is fully matched self.sequence.append(self.current_concept) self.expected_tokens = None return True def eat_concept(self, concept, pos): if self.is_locked: return self.debug.append(concept) self.manage_unrecognized() for forked in self.forked: # manage that some clones may have been forked forked.eat_concept(concept, pos) concept_node = ConceptNode(concept, pos, pos) expected = [t.strip_quote for t in Tokenizer(concept.name)][1:-1] if not expected: # the concept is already matched self.sequence.append(concept_node) else: self.current_concept = concept_node self.expected_tokens = expected def manage_unrecognized(self): if self.unrecognized_tokens.is_empty(): return # do not put empty UnrecognizedToken in out if self.unrecognized_tokens.is_whitespace(): self.unrecognized_tokens.reset() return self.unrecognized_tokens.fix_source() # try to recognize concepts nodes_sequences = self.parser.cache.get_lexer_nodes_from_unrecognized(self.context, self.unrecognized_tokens) if nodes_sequences: instances = [self] for i in range(len(nodes_sequences) - 1): clone = self.clone() instances.append(clone) self.forked.append(clone) for instance, node_sequence in zip(instances, nodes_sequences): for node in node_sequence: instance.sequence.append(node) if isinstance(node, (UnrecognizedTokensNode, SourceCodeNode)) or \ hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens: instance.has_unrecognized = True instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) else: self.sequence.append(self.unrecognized_tokens) self.has_unrecognized = True # create another instance self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) def eat_unrecognized(self, token, pos): if self.is_locked: return self.debug.append(token) self.unrecognized_tokens.add_token(token, pos) def finalize(self): if len(self.sequence) > 0: self.manage_unrecognized() for forked in self.forked: # manage that some clones may have been forked forked.finalize() if self.expected_tokens: self.errors.append(TokensNodeFoundError(self.expected_tokens)) def clone(self): clone = AtomConceptParserHelper(self.parser) clone.debug = self.debug[:] clone.sequence = self.sequence[:] clone.current_concept = self.current_concept.clone() if self.current_concept else None clone.unrecognized_tokens = self.unrecognized_tokens.clone() clone.expected_tokens = self.expected_tokens[:] if self.expected_tokens else None clone.is_locked = self.is_locked clone.errors = self.errors[:] clone.has_unrecognized = self.has_unrecognized return clone class SequenceNodeParser(BaseNodeParser): """ Parser used to recognize atoms concepts or sequence of atoms concepts An atom concept is concept that does not have any property thought it may have a body So, if 'one', 'two', 'three' are defined as atom concepts (with no property/parameter) This parser can recognize the sequence 'one two three' as [ConceptNode(one), ConceptNode(two), ConceptNode(three)] It can partly recognized 'one x$1!! two three' as [ConceptNode(one), UnrecognizedTokensNode(x$1!!), [ConceptNode(two), [ConceptNode(three)] It cannot recognize concepts with parameters (non atom) ex: 'one plus two' won't be recognized as ConceptNode(plus, one, two) it will be [ConceptNode(one), UnrecognizedTokensNode(plus), [ConceptNode(two)] Note 'one plus two' will be recognized by the SyaParser """ NAME = "Sequence" def __init__(self, **kwargs): super().__init__(SequenceNodeParser.NAME, 50, **kwargs) self.cache = UnrecognizedTokensCache(PARSERS) @staticmethod def _is_eligible(concept): """ Predicate that select concepts that must handled by SequenceNodeParser :param concept: :return: """ return len(concept.get_metadata().parameters) == 0 \ and concept.get_metadata().definition_type != DEFINITION_TYPE_BNF def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False): def new_instances(list_of_concepts): if list_of_concepts is None: return None return [self.context.sheerka.new_from_template(c, c.id) for c in list_of_concepts] if token.type == TokenKind.WHITESPACE: return None concepts_by_name = self.as_list(self.sheerka.fast_resolve(token)) concepts_by_first_keyword = new_instances(self.sheerka.get_concepts_by_first_token(token, self._is_eligible)) if concepts_by_name is None: return concepts_by_first_keyword if concepts_by_first_keyword is None: return concepts_by_name return make_unique(concepts_by_name + concepts_by_first_keyword, lambda c: c.id) def get_concepts_sequences(self): """ Tries to find the concept. TODO: KSI 20201206 I think that the code can be optimized as we create a new instance of each concept before validating that we are going to keep it. :return: """ forked = [] def _add_forked_to_concept_parser_helpers(): # check that if some new InfixToPostfix are created for parser in concept_parser_helpers: if len(parser.forked) > 0: forked.extend(parser.forked) parser.forked.clear() if len(forked) > 0: concept_parser_helpers.extend(forked) forked.clear() concept_parser_helpers = [AtomConceptParserHelper(self)] while self.parser_input.next_token(False): for concept_parser in concept_parser_helpers: concept_parser.reset() token = self.parser_input.token pos = self.parser_input.pos try: for concept_parser in concept_parser_helpers: if concept_parser.eat_token(token, pos): concept_parser.lock() concepts = self.get_concepts(token, self._is_eligible) # self.context.log(f"concepts found for {token=}: {concepts}", who=self.name) if not concepts: concepts = self.get_plural(token) if not concepts: for concept_parser in concept_parser_helpers: concept_parser.eat_unrecognized(token, pos) continue if len(concepts) == 1: for concept_parser in concept_parser_helpers: concept_parser.eat_concept(concepts[0], pos) continue # len(concepts) > 1, make the cartesian product temp_res = [] for concept_parser in concept_parser_helpers: if concept_parser.is_locked: # It means that it already eat the token # so simply add it, do not clone temp_res.append(concept_parser) continue for concept in concepts: clone = concept_parser.clone() temp_res.append(clone) clone.eat_concept(concept, pos) concept_parser_helpers = temp_res finally: _add_forked_to_concept_parser_helpers() # make sure that remaining items in stack are moved to out for concept_parser in concept_parser_helpers: concept_parser.reset() concept_parser.finalize() _add_forked_to_concept_parser_helpers() return concept_parser_helpers def get_by_name(self): """ Use the whole input to recognize the concepts It will use the name of the concept, but also its compact form (c::) :return: """ source = self.parser_input.as_text() concepts = self.sheerka.fast_resolve(source.strip()) if concepts is None: return None update_concepts_hints(concepts, # recognized_by=RECOGNIZED_BY_NAME, # keep fast_resolve settings is_instance=False, is_evaluated=True) concepts = [concepts] if isinstance(concepts, Concept) else concepts res = [] start, end = self.get_tokens_boundaries(self.parser_input.as_tokens()) for concept in concepts: parser_helper = AtomConceptParserHelper(self) parser_helper.sequence.append(ConceptNode(concept, start, end, strip_tokens(self.parser_input.as_tokens(), True), source)) res.append(parser_helper) return res def get_valid(self, concept_parser_helpers): valid_parser_helpers = [] # be careful, it will be a list of list already_seen = set() def compute_hash_code(ph): """ compute a hash code for already seen parser helper :param ph: :return: """ return "#".join( [f"c:|{n.concept.id}:" if isinstance(n, ConceptNode) else n.source for n in ph.sequence]) for parser_helper in concept_parser_helpers: if parser_helper.has_error(): continue if len(parser_helper.sequence) == 0: continue for node in parser_helper.sequence: # if isinstance(node, ConceptNode): # if len(node.concept.get_metadata().variables) > 0: # node.concept.get_hints().is_evaluated = True # Do not try to evaluate those concepts node.tokens = self.parser_input.tokens[node.start:node.end + 1] node.fix_source() if isinstance(node, ConceptNode): node.concept.get_hints().use_copy = True parser_helper_hash_code = compute_hash_code(parser_helper) if parser_helper_hash_code in already_seen: continue valid_parser_helpers.append(parser_helper) already_seen.add(parser_helper_hash_code) return valid_parser_helpers def parse(self, context, parser_input: ParserInput): if not isinstance(parser_input, ParserInput): return None if parser_input.is_empty(): return context.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.IS_EMPTY) ) if not self.reset_parser(context, parser_input): return self.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) debugger = context.get_debugger(self.NAME, "parse") debugger.debug_entering(source=self.parser_input.as_text()) sequences = self.get_concepts_sequences() if by_name := self.get_by_name(): # note that concepts by names must be appended, not prepended # In case of conflict, we want to keep the one found by get_concepts_sequences() sequences.extend(by_name) parser_helpers = self.get_valid(sequences) if debugger.is_enabled(): debugger.debug_var("stats", self.cache.to_dict()) debugger.debug_leaving(result=parser_helpers) if len(parser_helpers): ret = [] for parser_helper in parser_helpers: ret.append( self.sheerka.ret( self.name, not parser_helper.has_unrecognized, self.sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, source=parser_input.as_text(), body=parser_helper.sequence, try_parsed=parser_helper.sequence))) if len(ret) == 1: self.log_result(context, parser_input, ret[0]) return ret[0] else: self.log_multiple_results(context, parser_input, ret) return ret else: return self.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text())) def get_plural(self, token): if not token.type == TokenKind.IDENTIFIER: return None if not token.value.endswith("s"): return None concept_name = token.value[:-1] # remove the trailing 's' concepts = self.as_list(self.sheerka.fast_resolve(concept_name)) if concepts is None: return None eligible = [c for c in concepts if self.sheerka.known_plural(c) == NotFound] if not eligible: return None plural_concepts = [self.sheerka.new_dynamic(c, BuiltinConcepts.PLURAL, name=token.value, props={BuiltinConcepts.PLURAL: c}) for c in concepts] for concept in plural_concepts: underlying_concept = concept.get_prop(BuiltinConcepts.PLURAL) if self.sheerka.isaset(self.context, underlying_concept): concept.get_metadata().body = f"get_set_elements(c:|{underlying_concept.id}:)" return plural_concepts @staticmethod def as_list(obj): if obj is None: return None return obj if isinstance(obj, list) else [obj]