945807b375
Fixed #74 : Keyword parameters are no longer recognized when a concept that redefines equality is created Fixed #118 : RecursionError: maximum recursion depth exceeded Fixed #119 : PreventCircularReferenceEvaluator Fixed #121 : Plural are not updated when new elements are added Fixed #123 : BaseCache : Values in cache can be evicted before being committed Fixed #105 : TOO_MANY_ERROR is not the relevant error when results are filtered
1088 lines
41 KiB
Python
1088 lines
41 KiB
Python
from dataclasses import dataclass
|
|
|
|
from core.builtin_concepts_ids import BuiltinConcepts
|
|
from core.builtin_helpers import debug_nodes, get_new_variables_definitions, update_compiled
|
|
from core.concept import Concept, DEFINITION_TYPE_BNF
|
|
from core.global_symbols import CONCEPT_COMPARISON_CONTEXT, SyaAssociativity
|
|
from core.sheerka.Sheerka import RECOGNIZED_BY_KEY
|
|
from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager
|
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
|
from core.tokenizer import TokenKind, Tokenizer
|
|
from core.utils import flatten, get_text_from_tokens, strip_tokens
|
|
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensCache, UnrecognizedTokensNode
|
|
from parsers.BaseParser import ParsingError
|
|
|
|
PARSERS = ["Sequence", "Bnf", "Python"]
|
|
|
|
|
|
class SyaNodeException(Exception):
|
|
pass
|
|
|
|
|
|
class NoSyaConceptFound(ParsingError):
|
|
pass
|
|
|
|
@dataclass
|
|
class NotEnoughParameters(ParsingError, SyaNodeException):
|
|
concept: Concept
|
|
tokens: str # token before which the parameters are expected
|
|
pos: int # position of the token
|
|
nb_expected: int
|
|
parameters: list = None
|
|
|
|
def __repr__(self):
|
|
return f"not enough parameters found when parsing {self.concept}."
|
|
|
|
def __str__(self):
|
|
return repr(self)
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, NotEnoughParameters):
|
|
return False
|
|
|
|
return (
|
|
self.concept == other.concept and
|
|
self.tokens == other.tokens and
|
|
self.pos == other.pos and
|
|
self.nb_expected == other.nb_expected and
|
|
(other.parameters is None or self.parameters == other.parameters)
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class TooManyParameters(ParsingError, SyaNodeException):
|
|
concept: Concept
|
|
tokens: str # token before which the parameters are expected
|
|
pos: int # position of the token
|
|
nb_expected: int
|
|
parameters: list = None
|
|
|
|
def __repr__(self):
|
|
return f"Too many parameters found when parsing {self.concept}."
|
|
|
|
def __str__(self):
|
|
return repr(self)
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, TooManyParameters):
|
|
return False
|
|
|
|
return (
|
|
self.concept == other.concept and
|
|
self.tokens == other.tokens and
|
|
self.pos == other.pos and
|
|
self.nb_expected == other.nb_expected and
|
|
(other.parameters is None or self.parameters == other.parameters)
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class TokensNotFound(ParsingError, SyaNodeException):
|
|
concept: Concept # concept being parsed
|
|
tokens: str
|
|
|
|
def __repr__(self):
|
|
return f"Failed to find '{self.tokens}' when parsing {self.concept}."
|
|
|
|
def __str__(self):
|
|
return repr(self)
|
|
|
|
|
|
@dataclass
|
|
class NoneAssociativeConceptsError(ParsingError, SyaNodeException):
|
|
concept_a: Concept
|
|
concept_b: Concept
|
|
|
|
|
|
@dataclass
|
|
class FunctionDetected(ParsingError):
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class DebugItem:
|
|
text: str
|
|
is_error: bool
|
|
args: dict
|
|
|
|
def __repr__(self):
|
|
return f"(DebugItem '{self.text}')"
|
|
|
|
|
|
class SyaState:
|
|
def __init__(self, name, owner, next_state=None):
|
|
self.name = name
|
|
self.owner = owner
|
|
self.next_state = next_state
|
|
|
|
def __repr__(self):
|
|
return f"({self.name}, token={self.owner.parser_input.token}, pos={self.owner.parser_input.pos})"
|
|
|
|
def next(self):
|
|
pass
|
|
|
|
def run(self):
|
|
pass
|
|
|
|
|
|
class EatUnrecognizedTokenState(SyaState):
|
|
def run(self):
|
|
token = self.owner.parser_input.token
|
|
self.owner.unrecognized_tokens.add_token(token, self.owner.parser_input.pos)
|
|
|
|
def next(self):
|
|
return self.owner.all_states.read_next_token
|
|
|
|
|
|
class ManageUnrecognizedState(SyaState):
|
|
def run(self):
|
|
if self.owner.unrecognized_tokens.is_empty():
|
|
return
|
|
|
|
self.owner.unrecognized_tokens.fix_source()
|
|
|
|
if not self.owner.unrecognized_tokens.is_whitespace():
|
|
# try to recognize concepts
|
|
cache = self.owner.get_unrecognized_tokens_requests_cache()
|
|
nodes_sequences = cache.get_lexer_nodes_from_unrecognized(self.owner.context,
|
|
self.owner.unrecognized_tokens)
|
|
|
|
if nodes_sequences:
|
|
if self.owner.debugger.is_enabled():
|
|
nodes_sequences_as_dbg = [debug_nodes(nodes) for nodes in nodes_sequences]
|
|
debug_text = f"from '{self.owner.unrecognized_tokens.source}', recognized {nodes_sequences_as_dbg}"
|
|
self.owner.debug(debug_text, nodes_sequences=nodes_sequences)
|
|
|
|
concept_parser_clones = self.owner.n_clones(len(nodes_sequences))
|
|
for concept_parser, node_sequence in zip(concept_parser_clones, nodes_sequences):
|
|
concept_parser.stack.extend(node_sequence)
|
|
concept_parser.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
|
if concept_parser != self.owner:
|
|
concept_parser.set_state(self.next_state)
|
|
|
|
return
|
|
|
|
self.owner.stack.append(self.owner.unrecognized_tokens)
|
|
self.owner.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
|
|
|
def next(self):
|
|
return self.next_state
|
|
|
|
|
|
class CreateNewConceptParserState(SyaState):
|
|
def run(self):
|
|
concept = self.owner.state_context
|
|
concept_parser = SyaConceptParser(self.owner.get_tokens_parser(), concept, self.owner.stack)
|
|
self.owner.state_context = concept_parser
|
|
|
|
def next(self):
|
|
return self.owner.all_states.parse_concept
|
|
|
|
|
|
class ParseConceptState(SyaState):
|
|
def run(self):
|
|
start_pos = self.owner.parser_input.pos
|
|
concept_parser = self.owner.state_context
|
|
concept_parser.parse()
|
|
|
|
if concept_parser.has_error():
|
|
# we did not parse the concept we though we parsed. Let's rollback
|
|
token = self.owner.parser_input.tokens[start_pos]
|
|
|
|
if self.owner.debugger.is_enabled():
|
|
msg = f"Rollbacking {concept_parser.concept}. "
|
|
msg += f"Token '{token.str_value}' is now unrecognized. pos={start_pos}."
|
|
self.owner.debug(msg, is_error=True)
|
|
|
|
self.owner.unrecognized_tokens.add_token(token, start_pos)
|
|
self.owner.parser_input.seek(start_pos)
|
|
concept_parser.prefix_parameters.clear()
|
|
concept_parser.prefix_parameters.extend(concept_parser.prefix_parameters_snapshot)
|
|
|
|
else:
|
|
self.owner.stack.append(concept_parser.concept_node)
|
|
if isinstance(self.owner, SyaConceptParser) and self.owner.expected and self.owner.expected[0][0]:
|
|
# the parent (owner) is a concept parser that still need some tokens
|
|
# What was recognized by the current concept_parser may be irrelevant
|
|
self.owner.parser_input.seek(concept_parser.concept_node.end)
|
|
else:
|
|
self.owner.stack.extend(concept_parser.parameters) # append the parameters parsed but not used
|
|
|
|
if concept_parser.after_parsing_hint:
|
|
self.next_state = self.owner.all_states.get_state(concept_parser.after_parsing_hint.next_state)
|
|
self.owner.state_context = concept_parser.state_context
|
|
else:
|
|
self.next_state = self.owner.all_states.read_next_token
|
|
|
|
# manage when there are remaining parameters. It means that we may not have parse the correct concept
|
|
# Concept("one x").def_var("x"),
|
|
# Concept("a plus b").def_var("a").def_var("b"),
|
|
# and parsing one plus two
|
|
if (not concept_parser.has_error() and
|
|
concept_parser.parameters and
|
|
concept_parser.sub_concept_detected): # not the longest match and possible concept misread
|
|
assert isinstance(self.owner, SyaTokensParser)
|
|
|
|
fork = self.owner.sya_node_parser.fork_tokens_parser(self.owner)
|
|
fork.stack.clear()
|
|
token = self.owner.parser_input.tokens[start_pos]
|
|
fork.unrecognized_tokens.add_token(token, start_pos)
|
|
fork.parser_input.seek(start_pos)
|
|
fork.set_state(fork.all_states.read_next_token)
|
|
|
|
def next(self):
|
|
return self.next_state
|
|
|
|
|
|
class TokensParserStartState(SyaState):
|
|
def run(self):
|
|
pass
|
|
|
|
def next(self):
|
|
return self.owner.all_states.read_next_token
|
|
|
|
|
|
class TokensParserReadNextTokenState(SyaState):
|
|
def run(self):
|
|
if not self.owner.parser_input.next_token(False):
|
|
self.next_state = self.owner.all_states.finalize_end
|
|
return
|
|
|
|
token = self.owner.parser_input.token
|
|
concepts = self.owner.sya_node_parser.get_concepts(self.owner.context, token)
|
|
if concepts:
|
|
instances = self.owner.sya_node_parser.fork(self.owner, len(concepts))
|
|
for instance, concept in zip(instances, concepts):
|
|
instance.debug(f"concepts found. {concepts=}. Will parse {concept}.", concepts=concepts)
|
|
instance.state_context = concept
|
|
if instance != self.owner:
|
|
instance.set_state(self.owner.all_states.on_new_concept)
|
|
self.next_state = self.owner.all_states.on_new_concept
|
|
return
|
|
|
|
self.next_state = self.owner.all_states.eat_unrecognized_token
|
|
|
|
def next(self):
|
|
return self.next_state
|
|
|
|
|
|
class TokensParserFinalizeState(SyaState):
|
|
def run(self):
|
|
self.owner.finalize()
|
|
|
|
def next(self):
|
|
return None
|
|
|
|
|
|
class ConceptParserStartState(SyaState):
|
|
def run(self):
|
|
self.owner.initialize_expected_parameters()
|
|
|
|
def next(self):
|
|
return self.owner.all_states.read_next_token
|
|
|
|
|
|
class ConceptParserReadNextTokenState(SyaState):
|
|
def run(self):
|
|
|
|
if not self.owner.expected:
|
|
self.next_state = self.owner.all_states.finalize_end
|
|
self.owner.checkpoint = self.owner.parser_input.pos
|
|
if self.owner.debugger.is_enabled():
|
|
self.owner.debug("The last part is found. Let's finalize.")
|
|
return
|
|
|
|
if not self.owner.parser_input.next_token(False):
|
|
self.next_state = self.owner.all_states.finalize_start
|
|
self.owner.checkpoint = self.owner.parser_input.pos
|
|
return
|
|
|
|
token = self.owner.parser_input.token
|
|
self.owner.checkpoint = self.owner.parser_input.pos
|
|
|
|
# Are we parsing the current concept ?
|
|
if self.owner.is_part_of_concept_definition():
|
|
if self.owner.debugger.is_enabled():
|
|
msg = f"'{get_text_from_tokens(self.owner.expected[0][0])}'"
|
|
msg += f" (from expected {self.owner.nb_expected_popped}) successfully recognized."
|
|
self.owner.debug(msg, expected=self.owner.expected[0][0])
|
|
self.next_state = self.owner.all_states.concept_tokens_start
|
|
return
|
|
else:
|
|
if self.owner.expected[0][0] and len(self.owner.stack) >= self.owner.expected[0][1]:
|
|
# Improvement: the number of parameter is already found, so we must look for concept parts
|
|
raise TokensNotFound(self.owner.concept, get_text_from_tokens(self.owner.expected[0][0]))
|
|
|
|
# is it a new concept ?
|
|
concepts = self.owner.parent_tokens_parser.sya_node_parser.get_concepts(self.owner.context, token)
|
|
if concepts:
|
|
concept_parser_clones = self.owner.n_clones(len(concepts))
|
|
for concept_parser, concept in zip(concept_parser_clones, concepts):
|
|
concept_parser.debug(f"concepts found. {concepts=}. Will parse {concept}.", concepts=concepts)
|
|
concept_parser.state_context = concept
|
|
self.owner.sub_concepts_start_positions.append(self.owner.parser_input.pos)
|
|
if concept_parser != self.owner:
|
|
concept_parser.set_state(concept_parser.all_states.on_new_concept)
|
|
self.next_state = self.owner.all_states.on_new_concept
|
|
return
|
|
|
|
# push the token into unrecognized
|
|
self.next_state = self.owner.all_states.eat_unrecognized_token
|
|
|
|
def next(self):
|
|
return self.next_state
|
|
|
|
|
|
class ConceptParserManageParametersState(SyaState):
|
|
def run(self):
|
|
self.owner.manage_parameters()
|
|
|
|
def next(self):
|
|
return self.next_state
|
|
|
|
|
|
class ConceptParserOnNewConceptState(SyaState):
|
|
def run(self):
|
|
concept = self.owner.state_context
|
|
if self.owner.must_pop(concept):
|
|
hint = AfterRunHint(self.owner.all_states.on_new_concept, concept)
|
|
self.owner.after_parsing_hint = hint
|
|
self.next_state = self.owner.all_states.finalize_start
|
|
else:
|
|
self.next_state = self.owner.all_states.new_concept_start
|
|
|
|
def next(self):
|
|
return self.next_state
|
|
|
|
|
|
class ConceptParserStartFinalizeState(SyaState):
|
|
def run(self):
|
|
if len(self.owner.expected) > 0 and len(self.owner.expected[0][0]) > 0:
|
|
raise TokensNotFound(self.owner.concept, get_text_from_tokens(self.owner.expected[0][0]))
|
|
|
|
def next(self):
|
|
return self.owner.all_states.finalize_manage_unrecognized
|
|
|
|
|
|
class ConceptParserEndFinalizeState(SyaState):
|
|
def run(self):
|
|
self.owner.concept_node = self.owner.create_concept_node()
|
|
|
|
def next(self):
|
|
return None
|
|
|
|
|
|
class TokensParserStates:
|
|
def __init__(self, owner):
|
|
self.start = TokensParserStartState("start", owner)
|
|
self.read_next_token = TokensParserReadNextTokenState("read_next_token", owner)
|
|
self.finalize_end = TokensParserFinalizeState("finalize_end", owner)
|
|
self.create_concept_parser = CreateNewConceptParserState("create_concept_parser", owner)
|
|
self.on_new_concept = ManageUnrecognizedState("on_new_concept", owner, self.create_concept_parser)
|
|
self.parse_concept = ParseConceptState("parse_concept", owner)
|
|
self.eat_unrecognized_token = EatUnrecognizedTokenState("eat_unrecognized_token", owner)
|
|
|
|
self._all = {prop.name: prop for prop in vars(self).values()}
|
|
|
|
def get_state(self, state):
|
|
return self._all[state.name] if state else None
|
|
|
|
|
|
class ConceptParserStates:
|
|
def __init__(self, owner):
|
|
self.start = ConceptParserStartState("start", owner)
|
|
self.read_next_token = ConceptParserReadNextTokenState("read_next_token", owner)
|
|
|
|
self.concept_tokens_end = ConceptParserManageParametersState("manage_parameters", owner, self.read_next_token)
|
|
self.concept_tokens_start = ManageUnrecognizedState("manage_concept_tokens", owner, self.concept_tokens_end)
|
|
|
|
self.on_new_concept = ConceptParserOnNewConceptState("on_new_concept", owner)
|
|
self.create_concept_parser = CreateNewConceptParserState("create_concept_parser", owner)
|
|
self.new_concept_start = ManageUnrecognizedState("new_concept_start", owner, self.create_concept_parser)
|
|
self.parse_concept = ParseConceptState("parse_concept", owner)
|
|
|
|
self.eat_unrecognized_token = EatUnrecognizedTokenState("eat_unrecognized_token", owner)
|
|
|
|
self.finalize_start = ConceptParserStartFinalizeState("finalize_start", owner)
|
|
self.finalize_end = ConceptParserEndFinalizeState("finalize_end", owner)
|
|
self.finalize_manage_parameters = ConceptParserManageParametersState("finalize_manage_parameters", owner,
|
|
self.finalize_end)
|
|
self.finalize_manage_unrecognized = ManageUnrecognizedState("finalize_manage_unrecognized", owner,
|
|
self.finalize_manage_parameters)
|
|
|
|
self._all = {prop.name: prop for prop in vars(self).values()}
|
|
|
|
def get_state(self, state):
|
|
return self._all[state.name] if state else None
|
|
|
|
|
|
@dataclass()
|
|
class AfterRunHint:
|
|
"""
|
|
What to do when a sub state machine ends
|
|
"""
|
|
next_state: SyaState
|
|
state_context: object
|
|
|
|
|
|
@dataclass()
|
|
class SyaConceptDef:
|
|
"""
|
|
Wrapper to concept
|
|
It gives the precedence and the associativity for the concept
|
|
"""
|
|
concept: Concept
|
|
precedence: int = SheerkaComparisonManager.DEFAULT_COMPARISON_VALUE
|
|
associativity: SyaAssociativity = SyaAssociativity.Right
|
|
|
|
@staticmethod
|
|
def get_sya_concept_def(context, concept):
|
|
sya_concept_def = SyaConceptDef(concept)
|
|
|
|
concept_weight = context.sheerka.get_weights(BuiltinConcepts.PRECEDENCE, CONCEPT_COMPARISON_CONTEXT)
|
|
if concept.str_id in concept_weight:
|
|
sya_concept_def.precedence = concept_weight[concept.str_id]
|
|
|
|
if associativity := concept.get_prop(BuiltinConcepts.ASSOCIATIVITY):
|
|
sya_concept_def.associativity = SyaAssociativity(associativity)
|
|
|
|
return sya_concept_def
|
|
|
|
def short_repr(self):
|
|
return f"({self.concept}, prio={self.precedence}, assoc={self.associativity})"
|
|
|
|
def get_concept(self):
|
|
return self.concept
|
|
|
|
|
|
class BaseSyaParser:
|
|
def __init__(self,
|
|
context,
|
|
parser_input,
|
|
all_states,
|
|
start_pos=None,
|
|
end_pos=None,
|
|
stack=None,
|
|
unrecognized_tokens=None,
|
|
state=None,
|
|
state_context=None,
|
|
debug_items=None):
|
|
self.context = context
|
|
self.parser_input = parser_input
|
|
|
|
self.start_pos = start_pos if start_pos is not None else parser_input.pos
|
|
self.end_pos = end_pos if end_pos is not None else self.start_pos
|
|
|
|
self.stack = stack if stack is not None else []
|
|
self.unrecognized_tokens = unrecognized_tokens or UnrecognizedTokensNode(-1, -1, [])
|
|
|
|
self.errors = []
|
|
|
|
self.all_states = all_states
|
|
|
|
self._state = self.all_states.get_state(state) if state else self.all_states.start
|
|
self.state_context = state_context
|
|
self.debug_items = debug_items if debug_items is not None else []
|
|
|
|
def has_error(self):
|
|
return len(self.errors) > 0
|
|
|
|
def add_to_unrecognized(self, token, pos):
|
|
self.unrecognized_tokens.add_token(token, pos)
|
|
self.end_pos = pos
|
|
|
|
@property
|
|
def state(self):
|
|
return self._state
|
|
|
|
def set_state(self, state):
|
|
self._state = self.all_states.get_state(state)
|
|
|
|
def get_unrecognized_tokens_requests_cache(self):
|
|
raise NotImplementedError(f"BaseSyaParser.get_unrecognized_tokens_requests_cache()")
|
|
|
|
def get_tokens_parser(self):
|
|
raise NotImplementedError(f"BaseSyaParser.get_tokens_parser()")
|
|
|
|
def add_debug(self, text, is_error=False, **kwargs):
|
|
args = {"token": self.parser_input.token,
|
|
"pos": self.parser_input.pos,
|
|
"stack": self.stack.copy()}
|
|
|
|
args.update(kwargs)
|
|
self.debug_items.append(DebugItem(text, is_error, args))
|
|
return args
|
|
|
|
|
|
class SyaTokensParser(BaseSyaParser):
|
|
def __init__(self,
|
|
context,
|
|
sya_node_parser,
|
|
parser_input: ParserInput,
|
|
start_pos=None,
|
|
stack=None,
|
|
unrecognized_tokens=None,
|
|
state=None,
|
|
state_context=None,
|
|
debug_items=None):
|
|
super().__init__(context,
|
|
parser_input,
|
|
TokensParserStates(self),
|
|
start_pos,
|
|
start_pos,
|
|
stack,
|
|
unrecognized_tokens,
|
|
state,
|
|
state_context,
|
|
debug_items)
|
|
self.id = sya_node_parser.get_next_tokens_parser_id()
|
|
self.sya_node_parser = sya_node_parser
|
|
self.has_sya_concept = False
|
|
self.has_unrecognized = False
|
|
self.debugger = context.get_debugger(sya_node_parser.NAME, "parse", forced_debug_id=self.id)
|
|
|
|
def __repr__(self):
|
|
return f"SyaTokensParser({debug_nodes(self.stack)})"
|
|
|
|
def parse(self):
|
|
try:
|
|
while self.state:
|
|
self.state.run()
|
|
self._state = self.state.next()
|
|
|
|
except SyaNodeException as err:
|
|
self.debug(err, is_error=True, error=err)
|
|
self.errors.append(err)
|
|
|
|
def finalize(self):
|
|
"""
|
|
Merge UnrecognizedTokensNodes when there are following each other
|
|
"""
|
|
|
|
# flush the unrecognized
|
|
if not self.unrecognized_tokens.is_empty():
|
|
self.unrecognized_tokens.fix_source()
|
|
self.stack.append(self.unrecognized_tokens)
|
|
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
|
|
|
# merge sequential unrecognized
|
|
res = []
|
|
unrecognized_tokens_node = None
|
|
for node in self.stack:
|
|
if isinstance(node, UnrecognizedTokensNode):
|
|
if unrecognized_tokens_node:
|
|
for i, token in enumerate(node.tokens):
|
|
unrecognized_tokens_node.add_token(token, node.start + i)
|
|
else:
|
|
unrecognized_tokens_node = node
|
|
res.append(unrecognized_tokens_node)
|
|
else:
|
|
res.append(node)
|
|
if unrecognized_tokens_node:
|
|
unrecognized_tokens_node.fix_source()
|
|
unrecognized_tokens_node = None
|
|
|
|
if unrecognized_tokens_node:
|
|
unrecognized_tokens_node.fix_source()
|
|
|
|
if len(res) != len(self.stack) and self.debugger.is_enabled():
|
|
self.debug(f"Transformed stack from {debug_nodes(self.stack)} into {debug_nodes(res)}")
|
|
|
|
self.stack = res
|
|
|
|
def clone(self):
|
|
clone = SyaTokensParser(self.context,
|
|
self.sya_node_parser,
|
|
self.parser_input.clone(),
|
|
self.start_pos,
|
|
self.stack.copy(),
|
|
self.unrecognized_tokens.clone(),
|
|
self.state,
|
|
self.state_context,
|
|
self.debug_items)
|
|
|
|
return clone
|
|
|
|
def n_clones(self, nb_clones):
|
|
return self.sya_node_parser.fork(self, nb_clones)
|
|
|
|
def get_unrecognized_tokens_requests_cache(self):
|
|
return self.sya_node_parser.cache
|
|
|
|
def get_tokens_parser(self):
|
|
return self
|
|
|
|
def debug(self, text, is_error=False, **kwargs):
|
|
args = self.add_debug(text, is_error, **kwargs)
|
|
if self.debugger.is_enabled():
|
|
debug_prefix = f"pos={self.parser_input.pos}, token='{self.parser_input.token.repr_value}'"
|
|
self.debugger.debug_log(f"{debug_prefix}: {text}", is_error, args)
|
|
|
|
|
|
class SyaConceptParser(BaseSyaParser):
|
|
def __init__(self,
|
|
tokens_parser: SyaTokensParser,
|
|
concept: Concept,
|
|
prefix_parameters,
|
|
stack=None,
|
|
unrecognized_tokens=None,
|
|
start_pos=None,
|
|
end_pos=None,
|
|
sub_concepts_start_positions=None,
|
|
sub_concept_detected=False,
|
|
debug_items=None):
|
|
super().__init__(tokens_parser.context,
|
|
tokens_parser.parser_input,
|
|
ConceptParserStates(self),
|
|
start_pos,
|
|
end_pos,
|
|
stack,
|
|
unrecognized_tokens,
|
|
debug_items=debug_items)
|
|
|
|
self.parent_tokens_parser = tokens_parser
|
|
self.concept = concept
|
|
self.prefix_parameters = prefix_parameters
|
|
self.prefix_parameters_snapshot = prefix_parameters.copy()
|
|
self.checkpoint = self.start_pos
|
|
|
|
self.expected = []
|
|
self.nb_expected_popped = 0
|
|
self.parameters = [] # parameters found during parsing
|
|
self.concept_node = None # concept_node returned
|
|
self.sub_concepts_start_positions = sub_concepts_start_positions or [] # sya concepts found during parsing
|
|
self.sub_concept_detected = sub_concept_detected
|
|
|
|
self.after_parsing_hint = None
|
|
self.debugger = self.parent_tokens_parser.debugger
|
|
|
|
def __repr__(self):
|
|
if self.concept_node:
|
|
return f"SyaConceptParser(concept_node={self.concept_node})"
|
|
|
|
text = f"SyaConceptParser(concept={self.concept}, "
|
|
if self.has_error():
|
|
text += f"errors={self.errors}"
|
|
else:
|
|
res = []
|
|
for tokens, nb_vars in self.expected:
|
|
debug_tokens = "".join([t.repr_value for t in tokens])
|
|
res.append((debug_tokens, nb_vars))
|
|
text += f"expected={res}"
|
|
text += f", stack={debug_nodes(self.stack)}"
|
|
return text + ")"
|
|
|
|
@staticmethod
|
|
def compute_expected_parameters(concept_key):
|
|
"""
|
|
Return of list of pairs
|
|
expected token and number of expected variable before this token
|
|
ex: 'if x y then z end' => ('if', 0), ('then', 2), ('end', 1)
|
|
"""
|
|
|
|
def custom_strip_tokens(_tokens):
|
|
res = []
|
|
buffer = None
|
|
for t in _tokens:
|
|
if t.type == TokenKind.WHITESPACE:
|
|
buffer = t
|
|
else:
|
|
if buffer:
|
|
res.append(buffer)
|
|
buffer = None
|
|
res.append(t)
|
|
|
|
if res and buffer: # add the buffer only is the result is not empty
|
|
res.append(buffer)
|
|
|
|
return res
|
|
|
|
expected = [] # tuple of expected token and number of expected variables before this token
|
|
tokens = []
|
|
nb_variables = 0
|
|
under_tokens = None
|
|
for token in Tokenizer(concept_key, yield_eof=False):
|
|
if token.type == TokenKind.WHITESPACE:
|
|
tokens.append(token)
|
|
elif token.type == TokenKind.VAR_DEF:
|
|
if under_tokens is not None and under_tokens:
|
|
expected.append((custom_strip_tokens(tokens), nb_variables))
|
|
nb_variables = 1
|
|
tokens = []
|
|
under_tokens = False
|
|
else:
|
|
nb_variables += 1
|
|
else:
|
|
tokens.append(token)
|
|
under_tokens = True
|
|
|
|
if tokens or nb_variables:
|
|
expected.append((custom_strip_tokens(tokens), nb_variables))
|
|
|
|
return expected
|
|
|
|
def initialize_expected_parameters(self):
|
|
self.expected = self.compute_expected_parameters(self.concept.key)
|
|
|
|
expected_parameters_before_first_token = self.expected[0][1]
|
|
|
|
# remove the trailing whitespace before counting the parameters
|
|
if len(self.prefix_parameters) > 0 and self.prefix_parameters[-1].source.isspace():
|
|
self.prefix_parameters.pop()
|
|
|
|
# Check the number of prefixed parameters
|
|
if len(self.prefix_parameters) < expected_parameters_before_first_token:
|
|
raise NotEnoughParameters(self.concept,
|
|
get_text_from_tokens(self.expected[0][0]),
|
|
self.start_pos,
|
|
expected_parameters_before_first_token,
|
|
self.prefix_parameters.copy())
|
|
|
|
# add the previous parameters to the list of available parameters
|
|
for i in range(expected_parameters_before_first_token):
|
|
self.stack.insert(0, self.prefix_parameters.pop())
|
|
|
|
# prepare the tokens to recognize
|
|
if self.expected[0][0][0].type == TokenKind.WHITESPACE:
|
|
# remove white space before the first token if any
|
|
self.expected[0][0].pop(0)
|
|
self.expected[0][0].pop(0) # pop the first token
|
|
|
|
def parse(self):
|
|
try:
|
|
while self.state:
|
|
self.state.run()
|
|
self._state = self.state.next()
|
|
|
|
except SyaNodeException as err:
|
|
self.debug(err, is_error=True, error=err)
|
|
self.errors.append(err)
|
|
|
|
def recognize(self, start_pos, tokens):
|
|
for i in range(len(tokens)):
|
|
pi_input_token = self.parser_input.tokens[i + start_pos]
|
|
token = tokens[i]
|
|
if pi_input_token.type != token.type or pi_input_token.value != token.value:
|
|
return False
|
|
|
|
return True
|
|
|
|
def is_part_of_concept_definition(self):
|
|
to_be_recognized = self.expected[0][0]
|
|
if not to_be_recognized:
|
|
return False
|
|
|
|
if self.parser_input.pos == self.parser_input.length - len(to_be_recognized):
|
|
return False
|
|
|
|
if self.recognize(self.parser_input.pos, to_be_recognized):
|
|
self.parser_input.seek(self.parser_input.pos + len(to_be_recognized) - 1)
|
|
self.end_pos = self.parser_input.pos
|
|
return True
|
|
|
|
return False
|
|
|
|
def must_pop(self, other_concept: Concept):
|
|
"""
|
|
Compare the priority of the 'other' concept against self.concept
|
|
"""
|
|
current = SyaConceptDef.get_sya_concept_def(self.context, self.concept)
|
|
other = SyaConceptDef.get_sya_concept_def(self.context, other_concept)
|
|
|
|
if current.associativity == SyaAssociativity.No and other.associativity == SyaAssociativity.No:
|
|
raise NoneAssociativeConceptsError(self.concept, other_concept)
|
|
|
|
if other.associativity == SyaAssociativity.Left and other.precedence <= current.precedence:
|
|
return True
|
|
|
|
if other.associativity == SyaAssociativity.Right and other.precedence < current.precedence:
|
|
return True
|
|
|
|
return False
|
|
|
|
def manage_parameters(self):
|
|
# check the number of expected parameters
|
|
if not self.expected:
|
|
return
|
|
|
|
nb_expected_parameters = self.expected[0][1]
|
|
if len(self.stack) < nb_expected_parameters:
|
|
raise NotEnoughParameters(self.concept,
|
|
get_text_from_tokens(self.expected[0][0]),
|
|
self.checkpoint,
|
|
nb_expected_parameters,
|
|
self.stack.copy())
|
|
|
|
if len(self.stack) > nb_expected_parameters and len(self.expected[0][0]) > 0:
|
|
# Only raise an exception when there are too many parameters for ternary like concepts
|
|
raise TooManyParameters(self.concept,
|
|
get_text_from_tokens(self.expected[0][0]),
|
|
self.checkpoint,
|
|
nb_expected_parameters,
|
|
self.stack.copy())
|
|
|
|
self.parameters.extend(self.stack)
|
|
self.stack.clear()
|
|
self.expected.pop(0)
|
|
self.nb_expected_popped += 1
|
|
|
|
def create_concept_node(self):
|
|
new_concept = self.context.sheerka.new_from_template(self.concept, self.concept.key)
|
|
new_concept.get_hints().use_copy = True
|
|
new_concept.get_hints().need_validation = True
|
|
new_concept.get_hints().recognized_by = RECOGNIZED_BY_KEY
|
|
start_pos = self.start_pos
|
|
end_pos = self.end_pos
|
|
|
|
assert len(new_concept.get_metadata().parameters) <= len(self.parameters)
|
|
|
|
# update the parameters
|
|
for param_name, param_value in zip(new_concept.get_metadata().parameters, self.parameters):
|
|
new_concept.get_compiled()[param_name] = param_value
|
|
|
|
# set sub_concept_detected to True if we have eaten an sya concept as an unrecognized token
|
|
# It may help if we fail to validate the current concept
|
|
# See test_i_always_look_for_the_longest_match() for more explanations
|
|
if param_value.start in self.sub_concepts_start_positions:
|
|
self.sub_concept_detected = True
|
|
|
|
if param_value.start < start_pos:
|
|
start_pos = param_value.start
|
|
if param_value.end > end_pos:
|
|
end_pos = param_value.end
|
|
|
|
# remove the parameters that are already used
|
|
for i in range(len(new_concept.get_metadata().parameters)):
|
|
self.parameters.pop(0)
|
|
|
|
# update variable metadata
|
|
new_concept.get_metadata().variables = get_new_variables_definitions(new_concept)
|
|
|
|
tokens = self.parser_input.tokens[start_pos:end_pos + 1]
|
|
concept_node = ConceptNode(new_concept,
|
|
start_pos,
|
|
end_pos,
|
|
tokens,
|
|
get_text_from_tokens(tokens))
|
|
|
|
if self.debugger.is_enabled():
|
|
msg = f"concept node {new_concept}"
|
|
for param_name, param_value in new_concept.get_compiled().items():
|
|
msg += f", {param_name}='{param_value.source}'"
|
|
|
|
msg += " successfully created."
|
|
self.debug(msg, concept_node=concept_node)
|
|
|
|
return concept_node
|
|
|
|
def clone(self):
|
|
fork = self.parent_tokens_parser.sya_node_parser.fork_tokens_parser(self.parent_tokens_parser)
|
|
clone = SyaConceptParser(fork,
|
|
self.concept,
|
|
self.prefix_parameters.copy(),
|
|
self.stack.copy(),
|
|
self.unrecognized_tokens.clone(),
|
|
self.start_pos,
|
|
self.end_pos,
|
|
self.sub_concepts_start_positions.copy(),
|
|
self.sub_concept_detected,
|
|
self.debug_items)
|
|
|
|
clone.expected = self.expected.copy()
|
|
clone.nb_expected_popped = self.nb_expected_popped
|
|
clone.parameters = self.parameters.copy()
|
|
clone.state_context = self.state_context
|
|
|
|
fork.state_context = clone
|
|
return clone
|
|
|
|
def n_clones(self, nb_clones):
|
|
if nb_clones == 1:
|
|
return [self]
|
|
|
|
return [self] + [self.clone() for _ in range(nb_clones - 1)]
|
|
|
|
def get_unrecognized_tokens_requests_cache(self):
|
|
return self.parent_tokens_parser.sya_node_parser.cache
|
|
|
|
def get_tokens_parser(self):
|
|
return self.parent_tokens_parser
|
|
|
|
def debug(self, text, is_error=False, **kwargs):
|
|
args = self.add_debug(text, is_error, **kwargs)
|
|
if self.debugger.is_enabled():
|
|
debug_prefix = f"pos={self.parser_input.pos}, token='{self.parser_input.token.repr_value}'"
|
|
self.debugger.debug_log(f"{debug_prefix}: {text}", is_error=is_error, args=args)
|
|
|
|
|
|
class SyaNodeParser(BaseNodeParser):
|
|
NAME = "Sya"
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(SyaNodeParser.NAME, 50, enabled=True, **kwargs)
|
|
self.cache = UnrecognizedTokensCache(PARSERS)
|
|
self.forks = []
|
|
self.tokens_parser_next_id = 0
|
|
|
|
@staticmethod
|
|
def _is_eligible(concept):
|
|
"""
|
|
Predicate that select concepts that must handled by SyaNodeParser
|
|
:param concept:
|
|
:return:
|
|
"""
|
|
# We only considers concepts that has parameter variables (refuse atoms)
|
|
# Bnf definitions are not supposed to be managed by this parser either
|
|
return (concept.get_metadata().definition_type != DEFINITION_TYPE_BNF and
|
|
len(concept.get_metadata().parameters) > 0)
|
|
|
|
@staticmethod
|
|
def _function_detected(tokens):
|
|
"""
|
|
Returns True if we thing that the result of the tokens parser is a function
|
|
"""
|
|
tokens = strip_tokens(tokens, True)
|
|
if (len(tokens) >= 3 and
|
|
tokens[0].type == TokenKind.IDENTIFIER and
|
|
tokens[1].value == "(" and
|
|
tokens[-1].value == ")"):
|
|
return True
|
|
|
|
if (len(tokens) >= 4 and
|
|
tokens[0].type == TokenKind.IDENTIFIER and
|
|
tokens[1].type == TokenKind.WHITESPACE and
|
|
tokens[2].value == "(" and
|
|
tokens[-1].value == ")"):
|
|
return True
|
|
|
|
return False
|
|
|
|
@staticmethod
|
|
def _merge_errors(sheerka, errors):
|
|
res = flatten(errors)
|
|
if len(res) == 1:
|
|
return res[0]
|
|
else:
|
|
return sheerka.err([e.body for e in res])
|
|
|
|
def get_next_tokens_parser_id(self):
|
|
self.tokens_parser_next_id += 1
|
|
return self.tokens_parser_next_id
|
|
|
|
def get_concepts(self, context, token):
|
|
return context.sheerka.get_concepts_by_first_token(token, self._is_eligible)
|
|
|
|
def fork(self, tokens_parser, number_of_forks):
|
|
if number_of_forks == 1:
|
|
return [tokens_parser]
|
|
|
|
forks = [tokens_parser.clone() for _ in range(number_of_forks - 1)]
|
|
self.forks.extend(forks)
|
|
|
|
return [tokens_parser] + forks
|
|
|
|
def fork_tokens_parser(self, tokens_parser):
|
|
clone = tokens_parser.clone()
|
|
self.forks.append(clone)
|
|
return clone
|
|
|
|
def parse_token_parser(self, context, tokens_parser: SyaTokensParser, result, in_error, not_for_me):
|
|
tokens_parser.parse()
|
|
|
|
# validate the nodes
|
|
for node in tokens_parser.stack:
|
|
if isinstance(node, ConceptNode):
|
|
errors = []
|
|
update_compiled(context, node.concept, errors)
|
|
tokens_parser.errors.extend(errors)
|
|
|
|
if self._is_eligible(node.concept) and node.concept.get_compiled():
|
|
tokens_parser.has_sya_concept = True
|
|
|
|
elif isinstance(node, UnrecognizedTokensNode):
|
|
tokens_parser.has_unrecognized = True
|
|
|
|
# put the tokens_parser in the correct bag
|
|
if tokens_parser.has_sya_concept and not tokens_parser.has_error():
|
|
result.append(tokens_parser)
|
|
elif tokens_parser.has_sya_concept and tokens_parser.has_error():
|
|
in_error.append(tokens_parser)
|
|
else:
|
|
tokens_parser.errors.append(NoSyaConceptFound())
|
|
not_for_me.append(tokens_parser)
|
|
|
|
# recurse on the forks
|
|
if self.forks:
|
|
forks = self.forks.copy()
|
|
self.forks.clear()
|
|
for fork in forks:
|
|
self.parse_token_parser(context, fork, result, in_error, not_for_me)
|
|
|
|
# @profile(filename="sya_node_parser_parse")
|
|
def parse(self, context, parser_input: ParserInput):
|
|
if not isinstance(parser_input, ParserInput):
|
|
return None
|
|
|
|
if parser_input.is_empty():
|
|
return context.sheerka.ret(
|
|
self.name,
|
|
False,
|
|
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
|
)
|
|
|
|
if not self.reset_parser(context, parser_input):
|
|
return self.sheerka.ret(
|
|
self.name,
|
|
False,
|
|
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
|
|
|
debugger = context.get_debugger(self.NAME, "parse")
|
|
debugger.debug_entering(source=self.parser_input.as_text())
|
|
|
|
if self._function_detected(parser_input.tokens):
|
|
return self.sheerka.ret(self.name,
|
|
False,
|
|
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME,
|
|
body=parser_input.as_text(),
|
|
reason=[FunctionDetected()]))
|
|
|
|
tokens_parser = SyaTokensParser(context, self, parser_input)
|
|
valid_tokens_parser, in_errors, not_for_me = [], [], []
|
|
self.parse_token_parser(context, tokens_parser, valid_tokens_parser, in_errors, not_for_me)
|
|
|
|
if debugger.is_enabled():
|
|
debugger.debug_var("stats", self.cache.to_dict())
|
|
debugger.debug_leaving(valid=valid_tokens_parser, error=in_errors, not_for_me=not_for_me)
|
|
|
|
ret = []
|
|
if valid_tokens_parser:
|
|
for tokens_parser in valid_tokens_parser:
|
|
ret.append(
|
|
self.sheerka.ret(self.name,
|
|
not tokens_parser.has_unrecognized,
|
|
self.sheerka.new(
|
|
BuiltinConcepts.PARSER_RESULT,
|
|
parser=self,
|
|
source=parser_input.as_text(),
|
|
body=tokens_parser.stack,
|
|
try_parsed=tokens_parser.stack)))
|
|
|
|
elif in_errors:
|
|
errors = self._merge_errors(self.sheerka, [tp.errors for tp in in_errors])
|
|
ret.append(self.sheerka.ret(self.name, False, errors))
|
|
|
|
else:
|
|
errors = []
|
|
for tp in not_for_me:
|
|
errors.extend(tp.errors)
|
|
ret.append(self.sheerka.ret(self.name, False, context.sheerka.new(BuiltinConcepts.NOT_FOR_ME,
|
|
body=parser_input.as_text(),
|
|
reason=errors)))
|
|
|
|
if len(ret) == 1:
|
|
self.log_result(context, parser_input, ret[0])
|
|
return ret[0]
|
|
else:
|
|
self.log_multiple_results(context, parser_input, ret)
|
|
return ret
|