from dataclasses import dataclass from core.builtin_concepts_ids import BuiltinConcepts from core.builtin_helpers import debug_nodes, get_new_variables_definitions, update_compiled from core.concept import Concept, DEFINITION_TYPE_BNF from core.global_symbols import CONCEPT_COMPARISON_CONTEXT, SyaAssociativity from core.sheerka.Sheerka import RECOGNIZED_BY_KEY from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind, Tokenizer from core.utils import flatten, get_text_from_tokens, strip_tokens from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensCache, UnrecognizedTokensNode from parsers.BaseParser import ParsingError PARSERS = ["Sequence", "Bnf", "Python"] class SyaNodeException(Exception): pass class NoSyaConceptFound(ParsingError): pass @dataclass class NotEnoughParameters(ParsingError, SyaNodeException): concept: Concept tokens: str # token before which the parameters are expected pos: int # position of the token nb_expected: int parameters: list = None def __repr__(self): return f"not enough parameters found when parsing {self.concept}." def __str__(self): return repr(self) def __eq__(self, other): if not isinstance(other, NotEnoughParameters): return False return ( self.concept == other.concept and self.tokens == other.tokens and self.pos == other.pos and self.nb_expected == other.nb_expected and (other.parameters is None or self.parameters == other.parameters) ) @dataclass class TooManyParameters(ParsingError, SyaNodeException): concept: Concept tokens: str # token before which the parameters are expected pos: int # position of the token nb_expected: int parameters: list = None def __repr__(self): return f"Too many parameters found when parsing {self.concept}." def __str__(self): return repr(self) def __eq__(self, other): if not isinstance(other, TooManyParameters): return False return ( self.concept == other.concept and self.tokens == other.tokens and self.pos == other.pos and self.nb_expected == other.nb_expected and (other.parameters is None or self.parameters == other.parameters) ) @dataclass class TokensNotFound(ParsingError, SyaNodeException): concept: Concept # concept being parsed tokens: str def __repr__(self): return f"Failed to find '{self.tokens}' when parsing {self.concept}." def __str__(self): return repr(self) @dataclass class NoneAssociativeConceptsError(ParsingError, SyaNodeException): concept_a: Concept concept_b: Concept @dataclass class FunctionDetected(ParsingError): pass @dataclass class DebugItem: text: str is_error: bool args: dict def __repr__(self): return f"(DebugItem '{self.text}')" class SyaState: def __init__(self, name, owner, next_state=None): self.name = name self.owner = owner self.next_state = next_state def __repr__(self): return f"({self.name}, token={self.owner.parser_input.token}, pos={self.owner.parser_input.pos})" def next(self): pass def run(self): pass class EatUnrecognizedTokenState(SyaState): def run(self): token = self.owner.parser_input.token self.owner.unrecognized_tokens.add_token(token, self.owner.parser_input.pos) def next(self): return self.owner.all_states.read_next_token class ManageUnrecognizedState(SyaState): def run(self): if self.owner.unrecognized_tokens.is_empty(): return self.owner.unrecognized_tokens.fix_source() if not self.owner.unrecognized_tokens.is_whitespace(): # try to recognize concepts cache = self.owner.get_unrecognized_tokens_requests_cache() nodes_sequences = cache.get_lexer_nodes_from_unrecognized(self.owner.context, self.owner.unrecognized_tokens) if nodes_sequences: if self.owner.debugger.is_enabled(): nodes_sequences_as_dbg = [debug_nodes(nodes) for nodes in nodes_sequences] debug_text = f"from '{self.owner.unrecognized_tokens.source}', recognized {nodes_sequences_as_dbg}" self.owner.debug(debug_text, nodes_sequences=nodes_sequences) concept_parser_clones = self.owner.n_clones(len(nodes_sequences)) for concept_parser, node_sequence in zip(concept_parser_clones, nodes_sequences): concept_parser.stack.extend(node_sequence) concept_parser.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) if concept_parser != self.owner: concept_parser.set_state(self.next_state) return self.owner.stack.append(self.owner.unrecognized_tokens) self.owner.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) def next(self): return self.next_state class CreateNewConceptParserState(SyaState): def run(self): concept = self.owner.state_context concept_parser = SyaConceptParser(self.owner.get_tokens_parser(), concept, self.owner.stack) self.owner.state_context = concept_parser def next(self): return self.owner.all_states.parse_concept class ParseConceptState(SyaState): def run(self): start_pos = self.owner.parser_input.pos concept_parser = self.owner.state_context concept_parser.parse() if concept_parser.has_error(): # we did not parse the concept we though we parsed. Let's rollback token = self.owner.parser_input.tokens[start_pos] if self.owner.debugger.is_enabled(): msg = f"Rollbacking {concept_parser.concept}. " msg += f"Token '{token.str_value}' is now unrecognized. pos={start_pos}." self.owner.debug(msg, is_error=True) self.owner.unrecognized_tokens.add_token(token, start_pos) self.owner.parser_input.seek(start_pos) concept_parser.prefix_parameters.clear() concept_parser.prefix_parameters.extend(concept_parser.prefix_parameters_snapshot) else: self.owner.stack.append(concept_parser.concept_node) if isinstance(self.owner, SyaConceptParser) and self.owner.expected and self.owner.expected[0][0]: # the parent (owner) is a concept parser that still need some tokens # What was recognized by the current concept_parser may be irrelevant self.owner.parser_input.seek(concept_parser.concept_node.end) else: self.owner.stack.extend(concept_parser.parameters) # append the parameters parsed but not used if concept_parser.after_parsing_hint: self.next_state = self.owner.all_states.get_state(concept_parser.after_parsing_hint.next_state) self.owner.state_context = concept_parser.state_context else: self.next_state = self.owner.all_states.read_next_token # manage when there are remaining parameters. It means that we may not have parse the correct concept # Concept("one x").def_var("x"), # Concept("a plus b").def_var("a").def_var("b"), # and parsing one plus two if (not concept_parser.has_error() and concept_parser.parameters and concept_parser.sub_concept_detected): # not the longest match and possible concept misread assert isinstance(self.owner, SyaTokensParser) fork = self.owner.sya_node_parser.fork_tokens_parser(self.owner) fork.stack.clear() token = self.owner.parser_input.tokens[start_pos] fork.unrecognized_tokens.add_token(token, start_pos) fork.parser_input.seek(start_pos) fork.set_state(fork.all_states.read_next_token) def next(self): return self.next_state class TokensParserStartState(SyaState): def run(self): pass def next(self): return self.owner.all_states.read_next_token class TokensParserReadNextTokenState(SyaState): def run(self): if not self.owner.parser_input.next_token(False): self.next_state = self.owner.all_states.finalize_end return token = self.owner.parser_input.token concepts = self.owner.sya_node_parser.get_concepts(self.owner.context, token) if concepts: instances = self.owner.sya_node_parser.fork(self.owner, len(concepts)) for instance, concept in zip(instances, concepts): instance.debug(f"concepts found. {concepts=}. Will parse {concept}.", concepts=concepts) instance.state_context = concept if instance != self.owner: instance.set_state(self.owner.all_states.on_new_concept) self.next_state = self.owner.all_states.on_new_concept return self.next_state = self.owner.all_states.eat_unrecognized_token def next(self): return self.next_state class TokensParserFinalizeState(SyaState): def run(self): self.owner.finalize() def next(self): return None class ConceptParserStartState(SyaState): def run(self): self.owner.initialize_expected_parameters() def next(self): return self.owner.all_states.read_next_token class ConceptParserReadNextTokenState(SyaState): def run(self): if not self.owner.expected: self.next_state = self.owner.all_states.finalize_end self.owner.checkpoint = self.owner.parser_input.pos if self.owner.debugger.is_enabled(): self.owner.debug("The last part is found. Let's finalize.") return if not self.owner.parser_input.next_token(False): self.next_state = self.owner.all_states.finalize_start self.owner.checkpoint = self.owner.parser_input.pos return token = self.owner.parser_input.token self.owner.checkpoint = self.owner.parser_input.pos # Are we parsing the current concept ? if self.owner.is_part_of_concept_definition(): if self.owner.debugger.is_enabled(): msg = f"'{get_text_from_tokens(self.owner.expected[0][0])}'" msg += f" (from expected {self.owner.nb_expected_popped}) successfully recognized." self.owner.debug(msg, expected=self.owner.expected[0][0]) self.next_state = self.owner.all_states.concept_tokens_start return else: if self.owner.expected[0][0] and len(self.owner.stack) >= self.owner.expected[0][1]: # Improvement: the number of parameter is already found, so we must look for concept parts raise TokensNotFound(self.owner.concept, get_text_from_tokens(self.owner.expected[0][0])) # is it a new concept ? concepts = self.owner.parent_tokens_parser.sya_node_parser.get_concepts(self.owner.context, token) if concepts: concept_parser_clones = self.owner.n_clones(len(concepts)) for concept_parser, concept in zip(concept_parser_clones, concepts): concept_parser.debug(f"concepts found. {concepts=}. Will parse {concept}.", concepts=concepts) concept_parser.state_context = concept self.owner.sub_concepts_start_positions.append(self.owner.parser_input.pos) if concept_parser != self.owner: concept_parser.set_state(concept_parser.all_states.on_new_concept) self.next_state = self.owner.all_states.on_new_concept return # push the token into unrecognized self.next_state = self.owner.all_states.eat_unrecognized_token def next(self): return self.next_state class ConceptParserManageParametersState(SyaState): def run(self): self.owner.manage_parameters() def next(self): return self.next_state class ConceptParserOnNewConceptState(SyaState): def run(self): concept = self.owner.state_context if self.owner.must_pop(concept): hint = AfterRunHint(self.owner.all_states.on_new_concept, concept) self.owner.after_parsing_hint = hint self.next_state = self.owner.all_states.finalize_start else: self.next_state = self.owner.all_states.new_concept_start def next(self): return self.next_state class ConceptParserStartFinalizeState(SyaState): def run(self): if len(self.owner.expected) > 0 and len(self.owner.expected[0][0]) > 0: raise TokensNotFound(self.owner.concept, get_text_from_tokens(self.owner.expected[0][0])) def next(self): return self.owner.all_states.finalize_manage_unrecognized class ConceptParserEndFinalizeState(SyaState): def run(self): self.owner.concept_node = self.owner.create_concept_node() def next(self): return None class TokensParserStates: def __init__(self, owner): self.start = TokensParserStartState("start", owner) self.read_next_token = TokensParserReadNextTokenState("read_next_token", owner) self.finalize_end = TokensParserFinalizeState("finalize_end", owner) self.create_concept_parser = CreateNewConceptParserState("create_concept_parser", owner) self.on_new_concept = ManageUnrecognizedState("on_new_concept", owner, self.create_concept_parser) self.parse_concept = ParseConceptState("parse_concept", owner) self.eat_unrecognized_token = EatUnrecognizedTokenState("eat_unrecognized_token", owner) self._all = {prop.name: prop for prop in vars(self).values()} def get_state(self, state): return self._all[state.name] if state else None class ConceptParserStates: def __init__(self, owner): self.start = ConceptParserStartState("start", owner) self.read_next_token = ConceptParserReadNextTokenState("read_next_token", owner) self.concept_tokens_end = ConceptParserManageParametersState("manage_parameters", owner, self.read_next_token) self.concept_tokens_start = ManageUnrecognizedState("manage_concept_tokens", owner, self.concept_tokens_end) self.on_new_concept = ConceptParserOnNewConceptState("on_new_concept", owner) self.create_concept_parser = CreateNewConceptParserState("create_concept_parser", owner) self.new_concept_start = ManageUnrecognizedState("new_concept_start", owner, self.create_concept_parser) self.parse_concept = ParseConceptState("parse_concept", owner) self.eat_unrecognized_token = EatUnrecognizedTokenState("eat_unrecognized_token", owner) self.finalize_start = ConceptParserStartFinalizeState("finalize_start", owner) self.finalize_end = ConceptParserEndFinalizeState("finalize_end", owner) self.finalize_manage_parameters = ConceptParserManageParametersState("finalize_manage_parameters", owner, self.finalize_end) self.finalize_manage_unrecognized = ManageUnrecognizedState("finalize_manage_unrecognized", owner, self.finalize_manage_parameters) self._all = {prop.name: prop for prop in vars(self).values()} def get_state(self, state): return self._all[state.name] if state else None @dataclass() class AfterRunHint: """ What to do when a sub state machine ends """ next_state: SyaState state_context: object @dataclass() class SyaConceptDef: """ Wrapper to concept It gives the precedence and the associativity for the concept """ concept: Concept precedence: int = SheerkaComparisonManager.DEFAULT_COMPARISON_VALUE associativity: SyaAssociativity = SyaAssociativity.Right @staticmethod def get_sya_concept_def(context, concept): sya_concept_def = SyaConceptDef(concept) concept_weight = context.sheerka.get_weights(BuiltinConcepts.PRECEDENCE, CONCEPT_COMPARISON_CONTEXT) if concept.str_id in concept_weight: sya_concept_def.precedence = concept_weight[concept.str_id] if associativity := concept.get_prop(BuiltinConcepts.ASSOCIATIVITY): sya_concept_def.associativity = SyaAssociativity(associativity) return sya_concept_def def short_repr(self): return f"({self.concept}, prio={self.precedence}, assoc={self.associativity})" def get_concept(self): return self.concept class BaseSyaParser: def __init__(self, context, parser_input, all_states, start_pos=None, end_pos=None, stack=None, unrecognized_tokens=None, state=None, state_context=None, debug_items=None): self.context = context self.parser_input = parser_input self.start_pos = start_pos if start_pos is not None else parser_input.pos self.end_pos = end_pos if end_pos is not None else self.start_pos self.stack = stack if stack is not None else [] self.unrecognized_tokens = unrecognized_tokens or UnrecognizedTokensNode(-1, -1, []) self.errors = [] self.all_states = all_states self._state = self.all_states.get_state(state) if state else self.all_states.start self.state_context = state_context self.debug_items = debug_items if debug_items is not None else [] def has_error(self): return len(self.errors) > 0 def add_to_unrecognized(self, token, pos): self.unrecognized_tokens.add_token(token, pos) self.end_pos = pos @property def state(self): return self._state def set_state(self, state): self._state = self.all_states.get_state(state) def get_unrecognized_tokens_requests_cache(self): raise NotImplementedError(f"BaseSyaParser.get_unrecognized_tokens_requests_cache()") def get_tokens_parser(self): raise NotImplementedError(f"BaseSyaParser.get_tokens_parser()") def add_debug(self, text, is_error=False, **kwargs): args = {"token": self.parser_input.token, "pos": self.parser_input.pos, "stack": self.stack.copy()} args.update(kwargs) self.debug_items.append(DebugItem(text, is_error, args)) return args class SyaTokensParser(BaseSyaParser): def __init__(self, context, sya_node_parser, parser_input: ParserInput, start_pos=None, stack=None, unrecognized_tokens=None, state=None, state_context=None, debug_items=None): super().__init__(context, parser_input, TokensParserStates(self), start_pos, start_pos, stack, unrecognized_tokens, state, state_context, debug_items) self.id = sya_node_parser.get_next_tokens_parser_id() self.sya_node_parser = sya_node_parser self.has_sya_concept = False self.has_unrecognized = False self.debugger = context.get_debugger(sya_node_parser.NAME, "parse", forced_debug_id=self.id) def __repr__(self): return f"SyaTokensParser({debug_nodes(self.stack)})" def parse(self): try: while self.state: self.state.run() self._state = self.state.next() except SyaNodeException as err: self.debug(err, is_error=True, error=err) self.errors.append(err) def finalize(self): """ Merge UnrecognizedTokensNodes when there are following each other """ # flush the unrecognized if not self.unrecognized_tokens.is_empty(): self.unrecognized_tokens.fix_source() self.stack.append(self.unrecognized_tokens) self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # merge sequential unrecognized res = [] unrecognized_tokens_node = None for node in self.stack: if isinstance(node, UnrecognizedTokensNode): if unrecognized_tokens_node: for i, token in enumerate(node.tokens): unrecognized_tokens_node.add_token(token, node.start + i) else: unrecognized_tokens_node = node res.append(unrecognized_tokens_node) else: res.append(node) if unrecognized_tokens_node: unrecognized_tokens_node.fix_source() unrecognized_tokens_node = None if unrecognized_tokens_node: unrecognized_tokens_node.fix_source() if len(res) != len(self.stack) and self.debugger.is_enabled(): self.debug(f"Transformed stack from {debug_nodes(self.stack)} into {debug_nodes(res)}") self.stack = res def clone(self): clone = SyaTokensParser(self.context, self.sya_node_parser, self.parser_input.clone(), self.start_pos, self.stack.copy(), self.unrecognized_tokens.clone(), self.state, self.state_context, self.debug_items) return clone def n_clones(self, nb_clones): return self.sya_node_parser.fork(self, nb_clones) def get_unrecognized_tokens_requests_cache(self): return self.sya_node_parser.cache def get_tokens_parser(self): return self def debug(self, text, is_error=False, **kwargs): args = self.add_debug(text, is_error, **kwargs) if self.debugger.is_enabled(): debug_prefix = f"pos={self.parser_input.pos}, token='{self.parser_input.token.repr_value}'" self.debugger.debug_log(f"{debug_prefix}: {text}", is_error, args) class SyaConceptParser(BaseSyaParser): def __init__(self, tokens_parser: SyaTokensParser, concept: Concept, prefix_parameters, stack=None, unrecognized_tokens=None, start_pos=None, end_pos=None, sub_concepts_start_positions=None, sub_concept_detected=False, debug_items=None): super().__init__(tokens_parser.context, tokens_parser.parser_input, ConceptParserStates(self), start_pos, end_pos, stack, unrecognized_tokens, debug_items=debug_items) self.parent_tokens_parser = tokens_parser self.concept = concept self.prefix_parameters = prefix_parameters self.prefix_parameters_snapshot = prefix_parameters.copy() self.checkpoint = self.start_pos self.expected = [] self.nb_expected_popped = 0 self.parameters = [] # parameters found during parsing self.concept_node = None # concept_node returned self.sub_concepts_start_positions = sub_concepts_start_positions or [] # sya concepts found during parsing self.sub_concept_detected = sub_concept_detected self.after_parsing_hint = None self.debugger = self.parent_tokens_parser.debugger def __repr__(self): if self.concept_node: return f"SyaConceptParser(concept_node={self.concept_node})" text = f"SyaConceptParser(concept={self.concept}, " if self.has_error(): text += f"errors={self.errors}" else: res = [] for tokens, nb_vars in self.expected: debug_tokens = "".join([t.repr_value for t in tokens]) res.append((debug_tokens, nb_vars)) text += f"expected={res}" text += f", stack={debug_nodes(self.stack)}" return text + ")" @staticmethod def compute_expected_parameters(concept_key): """ Return of list of pairs expected token and number of expected variable before this token ex: 'if x y then z end' => ('if', 0), ('then', 2), ('end', 1) """ def custom_strip_tokens(_tokens): res = [] buffer = None for t in _tokens: if t.type == TokenKind.WHITESPACE: buffer = t else: if buffer: res.append(buffer) buffer = None res.append(t) if res and buffer: # add the buffer only is the result is not empty res.append(buffer) return res expected = [] # tuple of expected token and number of expected variables before this token tokens = [] nb_variables = 0 under_tokens = None for token in Tokenizer(concept_key, yield_eof=False): if token.type == TokenKind.WHITESPACE: tokens.append(token) elif token.type == TokenKind.VAR_DEF: if under_tokens is not None and under_tokens: expected.append((custom_strip_tokens(tokens), nb_variables)) nb_variables = 1 tokens = [] under_tokens = False else: nb_variables += 1 else: tokens.append(token) under_tokens = True if tokens or nb_variables: expected.append((custom_strip_tokens(tokens), nb_variables)) return expected def initialize_expected_parameters(self): self.expected = self.compute_expected_parameters(self.concept.key) expected_parameters_before_first_token = self.expected[0][1] # remove the trailing whitespace before counting the parameters if len(self.prefix_parameters) > 0 and self.prefix_parameters[-1].source.isspace(): self.prefix_parameters.pop() # Check the number of prefixed parameters if len(self.prefix_parameters) < expected_parameters_before_first_token: raise NotEnoughParameters(self.concept, get_text_from_tokens(self.expected[0][0]), self.start_pos, expected_parameters_before_first_token, self.prefix_parameters.copy()) # add the previous parameters to the list of available parameters for i in range(expected_parameters_before_first_token): self.stack.insert(0, self.prefix_parameters.pop()) # prepare the tokens to recognize if self.expected[0][0][0].type == TokenKind.WHITESPACE: # remove white space before the first token if any self.expected[0][0].pop(0) self.expected[0][0].pop(0) # pop the first token def parse(self): try: while self.state: self.state.run() self._state = self.state.next() except SyaNodeException as err: self.debug(err, is_error=True, error=err) self.errors.append(err) def recognize(self, start_pos, tokens): for i in range(len(tokens)): pi_input_token = self.parser_input.tokens[i + start_pos] token = tokens[i] if pi_input_token.type != token.type or pi_input_token.value != token.value: return False return True def is_part_of_concept_definition(self): to_be_recognized = self.expected[0][0] if not to_be_recognized: return False if self.parser_input.pos == self.parser_input.length - len(to_be_recognized): return False if self.recognize(self.parser_input.pos, to_be_recognized): self.parser_input.seek(self.parser_input.pos + len(to_be_recognized) - 1) self.end_pos = self.parser_input.pos return True return False def must_pop(self, other_concept: Concept): """ Compare the priority of the 'other' concept against self.concept """ current = SyaConceptDef.get_sya_concept_def(self.context, self.concept) other = SyaConceptDef.get_sya_concept_def(self.context, other_concept) if current.associativity == SyaAssociativity.No and other.associativity == SyaAssociativity.No: raise NoneAssociativeConceptsError(self.concept, other_concept) if other.associativity == SyaAssociativity.Left and other.precedence <= current.precedence: return True if other.associativity == SyaAssociativity.Right and other.precedence < current.precedence: return True return False def manage_parameters(self): # check the number of expected parameters if not self.expected: return nb_expected_parameters = self.expected[0][1] if len(self.stack) < nb_expected_parameters: raise NotEnoughParameters(self.concept, get_text_from_tokens(self.expected[0][0]), self.checkpoint, nb_expected_parameters, self.stack.copy()) if len(self.stack) > nb_expected_parameters and len(self.expected[0][0]) > 0: # Only raise an exception when there are too many parameters for ternary like concepts raise TooManyParameters(self.concept, get_text_from_tokens(self.expected[0][0]), self.checkpoint, nb_expected_parameters, self.stack.copy()) self.parameters.extend(self.stack) self.stack.clear() self.expected.pop(0) self.nb_expected_popped += 1 def create_concept_node(self): new_concept = self.context.sheerka.new_from_template(self.concept, self.concept.key) new_concept.get_hints().use_copy = True new_concept.get_hints().need_validation = True new_concept.get_hints().recognized_by = RECOGNIZED_BY_KEY start_pos = self.start_pos end_pos = self.end_pos assert len(new_concept.get_metadata().parameters) <= len(self.parameters) # update the parameters for param_name, param_value in zip(new_concept.get_metadata().parameters, self.parameters): new_concept.get_compiled()[param_name] = param_value # set sub_concept_detected to True if we have eaten an sya concept as an unrecognized token # It may help if we fail to validate the current concept # See test_i_always_look_for_the_longest_match() for more explanations if param_value.start in self.sub_concepts_start_positions: self.sub_concept_detected = True if param_value.start < start_pos: start_pos = param_value.start if param_value.end > end_pos: end_pos = param_value.end # remove the parameters that are already used for i in range(len(new_concept.get_metadata().parameters)): self.parameters.pop(0) # update variable metadata new_concept.get_metadata().variables = get_new_variables_definitions(new_concept) tokens = self.parser_input.tokens[start_pos:end_pos + 1] concept_node = ConceptNode(new_concept, start_pos, end_pos, tokens, get_text_from_tokens(tokens)) if self.debugger.is_enabled(): msg = f"concept node {new_concept}" for param_name, param_value in new_concept.get_compiled().items(): msg += f", {param_name}='{param_value.source}'" msg += " successfully created." self.debug(msg, concept_node=concept_node) return concept_node def clone(self): fork = self.parent_tokens_parser.sya_node_parser.fork_tokens_parser(self.parent_tokens_parser) clone = SyaConceptParser(fork, self.concept, self.prefix_parameters.copy(), self.stack.copy(), self.unrecognized_tokens.clone(), self.start_pos, self.end_pos, self.sub_concepts_start_positions.copy(), self.sub_concept_detected, self.debug_items) clone.expected = self.expected.copy() clone.nb_expected_popped = self.nb_expected_popped clone.parameters = self.parameters.copy() clone.state_context = self.state_context fork.state_context = clone return clone def n_clones(self, nb_clones): if nb_clones == 1: return [self] return [self] + [self.clone() for _ in range(nb_clones - 1)] def get_unrecognized_tokens_requests_cache(self): return self.parent_tokens_parser.sya_node_parser.cache def get_tokens_parser(self): return self.parent_tokens_parser def debug(self, text, is_error=False, **kwargs): args = self.add_debug(text, is_error, **kwargs) if self.debugger.is_enabled(): debug_prefix = f"pos={self.parser_input.pos}, token='{self.parser_input.token.repr_value}'" self.debugger.debug_log(f"{debug_prefix}: {text}", is_error=is_error, args=args) class SyaNodeParser(BaseNodeParser): NAME = "Sya" def __init__(self, **kwargs): super().__init__(SyaNodeParser.NAME, 50, enabled=True, **kwargs) self.cache = UnrecognizedTokensCache(PARSERS) self.forks = [] self.tokens_parser_next_id = 0 @staticmethod def _is_eligible(concept): """ Predicate that select concepts that must handled by SyaNodeParser :param concept: :return: """ # We only considers concepts that has parameter variables (refuse atoms) # Bnf definitions are not supposed to be managed by this parser either return (concept.get_metadata().definition_type != DEFINITION_TYPE_BNF and len(concept.get_metadata().parameters) > 0) @staticmethod def _function_detected(tokens): """ Returns True if we thing that the result of the tokens parser is a function """ tokens = strip_tokens(tokens, True) if (len(tokens) >= 3 and tokens[0].type == TokenKind.IDENTIFIER and tokens[1].value == "(" and tokens[-1].value == ")"): return True if (len(tokens) >= 4 and tokens[0].type == TokenKind.IDENTIFIER and tokens[1].type == TokenKind.WHITESPACE and tokens[2].value == "(" and tokens[-1].value == ")"): return True return False @staticmethod def _merge_errors(sheerka, errors): res = flatten(errors) if len(res) == 1: return res[0] else: return sheerka.err([e.body for e in res]) def get_next_tokens_parser_id(self): self.tokens_parser_next_id += 1 return self.tokens_parser_next_id def get_concepts(self, context, token): return context.sheerka.get_concepts_by_first_token(token, self._is_eligible) def fork(self, tokens_parser, number_of_forks): if number_of_forks == 1: return [tokens_parser] forks = [tokens_parser.clone() for _ in range(number_of_forks - 1)] self.forks.extend(forks) return [tokens_parser] + forks def fork_tokens_parser(self, tokens_parser): clone = tokens_parser.clone() self.forks.append(clone) return clone def parse_token_parser(self, context, tokens_parser: SyaTokensParser, result, in_error, not_for_me): tokens_parser.parse() # validate the nodes for node in tokens_parser.stack: if isinstance(node, ConceptNode): errors = [] update_compiled(context, node.concept, errors) tokens_parser.errors.extend(errors) if self._is_eligible(node.concept) and node.concept.get_compiled(): tokens_parser.has_sya_concept = True elif isinstance(node, UnrecognizedTokensNode): tokens_parser.has_unrecognized = True # put the tokens_parser in the correct bag if tokens_parser.has_sya_concept and not tokens_parser.has_error(): result.append(tokens_parser) elif tokens_parser.has_sya_concept and tokens_parser.has_error(): in_error.append(tokens_parser) else: tokens_parser.errors.append(NoSyaConceptFound()) not_for_me.append(tokens_parser) # recurse on the forks if self.forks: forks = self.forks.copy() self.forks.clear() for fork in forks: self.parse_token_parser(context, fork, result, in_error, not_for_me) # @profile(filename="sya_node_parser_parse") def parse(self, context, parser_input: ParserInput): if not isinstance(parser_input, ParserInput): return None if parser_input.is_empty(): return context.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.IS_EMPTY) ) if not self.reset_parser(context, parser_input): return self.sheerka.ret( self.name, False, context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) debugger = context.get_debugger(self.NAME, "parse") debugger.debug_entering(source=self.parser_input.as_text()) if self._function_detected(parser_input.tokens): return self.sheerka.ret(self.name, False, context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text(), reason=[FunctionDetected()])) tokens_parser = SyaTokensParser(context, self, parser_input) valid_tokens_parser, in_errors, not_for_me = [], [], [] self.parse_token_parser(context, tokens_parser, valid_tokens_parser, in_errors, not_for_me) if debugger.is_enabled(): debugger.debug_var("stats", self.cache.to_dict()) debugger.debug_leaving(valid=valid_tokens_parser, error=in_errors, not_for_me=not_for_me) ret = [] if valid_tokens_parser: for tokens_parser in valid_tokens_parser: ret.append( self.sheerka.ret(self.name, not tokens_parser.has_unrecognized, self.sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, source=parser_input.as_text(), body=tokens_parser.stack, try_parsed=tokens_parser.stack))) elif in_errors: errors = self._merge_errors(self.sheerka, [tp.errors for tp in in_errors]) ret.append(self.sheerka.ret(self.name, False, errors)) else: errors = [] for tp in not_for_me: errors.extend(tp.errors) ret.append(self.sheerka.ret(self.name, False, context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text(), reason=errors))) if len(ret) == 1: self.log_result(context, parser_input, ret[0]) return ret[0] else: self.log_multiple_results(context, parser_input, ret) return ret