Files
Sheerka-Old/src/parsers/SyaNodeParser.py
T
kodjo e69745adc8 Fixed #100 : SheerkaAdmin: Add builtins() command
Fixed #99 : SheerkaQueryManager: I can manage contains predicate when filtering objects
Fixed #97 : ERROR: list indices must be integers or slices, not Concept
Fixed #96 : SequenceNodeParser: SequenceNodeParser must correctly handle concept definition
Fixed #95 : ResolveAmbiguity must not remove concepts that do not require evaluation
Fixed #94 : Concepts with the same key are lost when new ontology
Fixed #93 : Introduce BuiltinConcepts.EVAL_GLOBAL_TRUTH_REQUESTED
Fixed #92 : ExpressionParser: Implement compile_disjunctions()
Fixed #91 : Implement get_concepts_complexity(context, concepts, concept_parts)
Fixed #90 : ResolveAmbiguity : where predicate is not used to resolve ambiguity
Fixed #89 : ResolveAmbiguityEvaluator: Concepts embedded in ConceptNode are not resolved
Fixed #88: SyaNodeParser: Parse multiple parameters when some of the are not recognized
Fixed #87: SyaNodeParser : Parse the multiple parameters
2021-07-31 08:52:00 +02:00

1534 lines
61 KiB
Python

from collections import namedtuple
from dataclasses import dataclass, field
from operator import attrgetter
from typing import List
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import update_compiled
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.global_symbols import CONCEPT_COMPARISON_CONTEXT, SyaAssociativity
from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Token, TokenKind, Tokenizer
from core.utils import get_n_clones, get_text_from_tokens, NextIdManager, replace_after
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, \
SourceCodeWithConceptNode, BaseNodeParser, VariableNode
from parsers.BaseParser import ParsingError
PARSERS = ["Sequence", "Bnf", "Python"]
function_parser_res = namedtuple("FunctionParserRes", 'to_out function')
DEBUG_PUSH = "PUSH"
DEBUG_PUSH_UNREC = "PUSH_UNREC"
DEBUG_POP = "POP"
DEBUG_EAT = "EAT"
DEBUG_RECOG = "RECOG"
DEBUG_CAN_POP = "CAN_POP"
@dataclass()
class DebugInfo:
"""
Debug item to trace how the sya parser worked
Possible action:
PUSH: push the token or the concept to the stack
PUSH_UNREC: push the token to the UnrecognizedTokensNode
POP: pop item to out
EAT: eat the current token (it means that it was part of the concept currently being parsed)
RECOG: when tokens from UnrecognizedTokensNode are parsed and recognized
"""
pos: int = -1 # position of the parser input
token: Token = None # current token
concept: Concept = None # current concept if any
action: str = None # action taken
level: str = None
def __repr__(self):
token_repr = self.token.repr_value if isinstance(self.token, Token) else self.token
msg = f"{self.pos:3}:{token_repr}" if self.pos != -1 else " _:"
if self.concept:
msg += f" {self.concept.short_repr()}"
return msg + f" => {self.action}"
class ParenthesisMismatchError(ParsingError):
def __init__(self, error_int):
if isinstance(error_int, tuple):
if isinstance(error_int[0], Token):
self.token_value = error_int[0].value
self.token = error_int[0]
else:
self.token_value = error_int[0]
self.token = None
self.pos = error_int[1]
elif isinstance(error_int, Token):
self.token = error_int
self.token_value = error_int.value
self.pos = -1
else: # isinstance(UnrecognizedTokensNode)
for i, t in reversed(list(enumerate(error_int.tokens))):
if t.type == TokenKind.LPAR:
self.token = t
self.token_value = t.value
self.pos = i + error_int.start
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, ParenthesisMismatchError):
return False
return self.token_value == other.token_value and self.pos == other.pos
def __hash__(self):
return hash(self.pos)
def __repr__(self):
return f"ParenthesisMismatchError('{self.token_value}', {self.pos}"
@dataclass()
class NoneAssociativeSequenceError(ParsingError):
concept: Concept
first: int
second: int
tokens: List[Token] = None
@dataclass()
class TooManyParametersFoundError(ParsingError):
concept: Concept
pos: int # position of the concept
token: Token # token of the concept where the error was noticed
parameters: list # list of unmatched parameters
def __repr__(self):
return f"Too many parameters found for '{self.concept}' before token '{self.token}'"
@dataclass()
class SyaConceptDef:
"""
Wrapper to concept
It gives the precedence and the associativity for the concept
"""
concept: Concept
precedence: int = SheerkaComparisonManager.DEFAULT_COMPARISON_VALUE
associativity: SyaAssociativity = SyaAssociativity.Right
@staticmethod
def get_sya_concept_def(concept, parser, sheerka):
sya_concept_def = SyaConceptDef(concept)
# first, try to look in the parser
# it is where to find the data during the unit tests
if parser and concept.id in parser.test_only_sya_definitions:
# Manage when precedence and associativity are given in the unit tests
sya_def = parser.test_only_sya_definitions.get(concept.id)
if sya_def[0] is not None:
sya_concept_def.precedence = sya_def[0]
if sya_def[1] is not None:
sya_concept_def.associativity = sya_def[1]
# otherwise, use sheerka # KSI 20210109 otherwise or override ??
if sheerka:
concept_weight = parser.sheerka.get_weights(BuiltinConcepts.PRECEDENCE, CONCEPT_COMPARISON_CONTEXT)
if concept.str_id in concept_weight:
sya_concept_def.precedence = concept_weight[concept.str_id]
# in the case of Sheerka, the associativity is managed by the concept itself
# There is no conflict with the settings of the unit test, as I don't use the props in the unit tests
if associativity := concept.get_prop(BuiltinConcepts.ASSOCIATIVITY):
sya_concept_def.associativity = SyaAssociativity(associativity)
return sya_concept_def
def short_repr(self):
return f"({self.concept}, prio={self.precedence}, assoc={self.associativity})"
@dataclass()
class SyaConceptParserHelper:
"""
Use because the is not enough information to create the final ConceptNode
"""
concept: Concept
start: int # position of the token in the tokenizer (Caution, it is not token.index)
end: int = field(default=-1, repr=False, compare=False, hash=None)
expected: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None)
expected_parameters_before_first_token: int = field(default=0, repr=False, compare=False, hash=None)
last_token_before_first_token: Token = field(default=None, repr=False, compare=False, hash=None)
potential_pos: int = field(default=-1, repr=False, compare=False, hash=None)
parameters_list_at_init: list = field(default_factory=list, repr=False, compare=False, hash=None)
tokens: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None) # tokens eaten
remember_whitespace: Token = field(default=None, repr=False, compare=False, hash=None)
error: str = None
def __post_init__(self):
concept = self.concept.concept if isinstance(self.concept, SyaConceptDef) else self.concept
if self.end == -1:
self.end = self.start
first_keyword_found = None
for token in Tokenizer(concept.key, yield_eof=False):
if not first_keyword_found and token.type != TokenKind.WHITESPACE and token.type != TokenKind.VAR_DEF:
first_keyword_found = token
if first_keyword_found:
self.expected.append(token)
else:
self.last_token_before_first_token = token
if token.type != TokenKind.WHITESPACE:
self.expected_parameters_before_first_token += 1
# remove useless whitespaces (spaces that are between VAR_DEF)
if len(self.expected) > 2:
temp = [self.expected[0]]
for i in range(1, len(self.expected) - 1):
token = self.expected[i]
if (token.type == TokenKind.WHITESPACE and
self.expected[i - 1].type == TokenKind.VAR_DEF and
self.expected[i + 1].type == TokenKind.VAR_DEF):
continue # skip it
temp.append(token)
temp.append(self.expected[-1])
self.expected = temp
self.eat_token(first_keyword_found) # remove the first token
self.tokens.append(first_keyword_found)
def is_matched(self):
return len(self.expected) == 0
def is_atom(self):
return len(self.concept.concept.get_metadata().variables) == 0 and len(self.expected) == 0
def is_next(self, token):
"""
To match long named concepts
:param token:
:return:
"""
if self.is_matched() or len(self.expected) == 0:
return False
# True if the next token is the one that is expected
# Or if the next token is a whitespace and the expected one is the one after
# (whitespace are sometimes not mandatory)
return token.strip_quote == self.expected[0].strip_quote or \
self.expected[0].type == TokenKind.WHITESPACE and token.strip_quote == self.expected[1].strip_quote
def is_expected(self, token):
if self.is_matched() or token.type == TokenKind.WHITESPACE:
return False
for expected in self.expected:
if expected.type != TokenKind.VAR_DEF and expected.strip_quote == token.strip_quote:
return True
return False
def expected_parameters(self):
return sum(map(lambda e: e.type == TokenKind.VAR_DEF, self.expected))
def eat_token(self, until_token):
"""
eat until token 'until'
:param until_token:
:return:
"""
# No check, as it is used only after is_expected() or is_next()
while self.expected[0].strip_quote != until_token.strip_quote:
del self.expected[0]
del self.expected[0]
# return True is a whole sequence of keyword is eaten
# example
# Concept("foo a bar baz qux b").def_var("a").def_var("b")
# 'bar' is just eaten. We will return False because 'baz' and 'qux' are still waiting
if len(self.expected) == 0:
return True
# also return True at the end of a name sequence
# ... <var0> bar baz qux <var1>
# return True after 'qux', to indicate all the parameters from <var0> must be processed
return self.expected[0].type == TokenKind.VAR_DEF
def eat_parameter(self, parameter):
if self.is_matched() and parameter == self:
return # not a error
if self.is_matched():
self.error = "No more parameter expected"
return
if self.expected[0].type != TokenKind.VAR_DEF:
self.error = "Parameter was not expected"
return
del self.expected[0]
def fix_concept(self):
"""
When the SYA is done, we only need the concept, not the sya concept
:return:
"""
if isinstance(self.concept, SyaConceptDef):
self.concept = self.concept.concept
return self
def clone(self):
clone = SyaConceptParserHelper(self.concept, self.start, self.end)
clone.expected = self.expected[:]
clone.expected_parameters_before_first_token = self.expected_parameters_before_first_token
clone.potential_pos = self.potential_pos
clone.parameters_list_at_init = self.parameters_list_at_init
clone.error = self.error
return clone
class InFixToPostFix:
def __init__(self, context, next_id_manager, debugger=None):
self.context = context
self.next_id_manager = next_id_manager
self.id = self.next_id_manager.get_next_id()
self.debugger = debugger
if debugger:
self.debug_enabled = debugger.is_enabled()
self.enabled_debug_levels = debugger.get_enabled_vars()
else:
self.debug_enabled = False
self.enabled_debug_levels = None
self.is_locked = False # when locked, cannot process input
self.out = [] # shunting yard algo out
self.stack = [] # shunting yard algo stack
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # buffer that keeps tracks of tokens positions
self.parameters_list = [] # list of the parameters that need to be associated to a concept
self.errors = [] # Not quite sure that I can handle more than one error
self.debug = []
self.false_positives = [] # concepts that looks like known one, but not (for debug purpose)
self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens
self.parsing_function = False # indicate that we are currently parsing a function
def __repr__(self):
return f"InFixToPostFix({self.debug})"
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, InFixToPostFix):
return False
return self.out == other.out and self.errors == other.errors
def __hash__(self):
return len(self.sequence) + len(self.errors)
def _add_error(self, error):
if self.debug_enabled:
self._add_debug(DebugInfo(action=f"=> ERROR {error}"))
self.errors.append(error)
def _add_debug(self, debug_info: DebugInfo):
if debug_info.level is None or (self.enabled_debug_levels and
(f"#{self.id}.{debug_info.level}" in self.enabled_debug_levels or
"*" in self.enabled_debug_levels)):
self.debug.append(debug_info)
def _is_lpar(self, token):
"""
True if the token is a left parenthesis '('
Note that when we are parsing non recognized tokens,
we consider that the parenthesis are part of the non recognized
:param token:
:return:
"""
# return isinstance(token, Token) and token.type == TokenKind.LPAR
if isinstance(token, Token) and token.type == TokenKind.LPAR:
return True
if isinstance(token, tuple) and token[0].type == TokenKind.LPAR:
return True
if isinstance(token, UnrecognizedTokensNode) and token.parenthesis_count > 0:
return True
return False
def _is_rpar(self, token):
"""
True if the token is a right parenthesis ')'
Note that when we are parsing non recognized tokens,
we consider that the parenthesis are part of the non recognized
:param token:
:return:
"""
return isinstance(token, Token) and token.type == TokenKind.RPAR
def _concepts(self):
"""
Return the concept currently being parsed
:return:
"""
res = []
for item in self.stack:
if isinstance(item, SyaConceptParserHelper):
res.append(item)
return res
def _put_to_out(self, item):
"""
Helper function that Put an item in the out
:param item:
:return:
"""
if isinstance(item, SyaConceptParserHelper) and len(item.expected) > 0 and not item.error:
if item.expected[0].type == TokenKind.VAR_DEF:
item.error = "Not enough suffix parameters"
else:
item.error = f"token '{item.expected[0].strip_quote}' not found"
if self.debug_enabled:
self._add_debug(DebugInfo(action=f"ERROR {item.error}"))
if self.debug_enabled:
self._add_debug(DebugInfo(action=f"{DEBUG_POP} {item}"))
if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
self.out.insert(item.potential_pos, item)
else:
self.out.append(item)
# put the item to the list of awaiting parameters only if it's not the end of function marker
if item != ")":
self.parameters_list.append(item)
if len(self._concepts()) > 0:
# try to predict the final position of the current concept
# This position can be altered by concept associativity and precedence
# So it's only a prediction
current = self._concepts()[-1]
if current.expected_parameters() == len(self.parameters_list) - len(current.parameters_list_at_init):
self._concepts()[-1].potential_pos = len(self.out)
def _stack_isinstance(self, type):
"""
Check the type of the top item in the stack
:param type:
:return:
"""
return len(self.stack) > 0 and isinstance(self.stack[-1], type)
def _make_source_code_with_concept(self, start, rpar_token, end):
"""
:param start:
:param rpar_token:
:param end:
:return:
"""
source_code = SourceCodeWithConceptNode(
self.stack.pop(),
UnrecognizedTokensNode(end, end, [rpar_token]),
self.out[start + 1:]
).pseudo_fix_source()
return source_code
def _transform_to_unrecognized(self, parser_helper):
# an Unrecognized when sent to out too prematurely
if len(self.out) > 0 and isinstance(self.out[-1], UnrecognizedTokensNode):
self.unrecognized_tokens = self.out.pop()
if parser_helper.remember_whitespace:
self.unrecognized_tokens.add_token(parser_helper.remember_whitespace, parser_helper.start - 1)
for i, token in enumerate(parser_helper.tokens):
self.unrecognized_tokens.add_token(token, parser_helper.start + i)
def _remove_debug_info_if_needed(self):
"""
Before trying to manage the unrecognized, a line is added to explain the token which has triggered
the recognition try
This line is useless if self.unrecognized_tokens was irrelevant
:return:
"""
if len(self.debug) > 0 and self.debug[-1].action == "??":
self.debug.pop()
def _debug_nodes(self, nodes_sequences):
"""
Returns a debug representation of a sequence of LexerNodes
:param nodes_sequences:
:return:
"""
res = "["
first = True
for sequence in nodes_sequences:
if not first:
res += ", "
res += "[" + ", ".join([n.to_short_str() for n in sequence]) + "]"
first = False
return res + "]"
def get_errors(self):
def has_error(item):
if isinstance(item, SyaConceptParserHelper) and item.error:
return True
if isinstance(item, SourceCodeWithConceptNode):
for n in item.nodes:
if hasattr(n, "error") and n.error:
return True
return False
res = []
res.extend(self.errors)
res.extend([item for item in self.out if has_error(item)])
return res
def lock(self):
self.is_locked = True
def reset(self):
if len(self.errors) > 0:
return
self.is_locked = False
def manage_parameters_when_new_concept(self, parser_helper):
"""
When a new concept is create, we need to check what to do with the parameters
that were queued
:param parser_helper: new concept
:return:
"""
if len(self.parameters_list) < parser_helper.expected_parameters_before_first_token:
# There is not enough parameters to fill the new concept
# Try to develop the UnrecognizedTokesNode, to see if it can match
developed_param_list = self.develop_parameter_list(self.parameters_list)
if len(developed_param_list) < parser_helper.expected_parameters_before_first_token:
# The new concept expect some prefix parameters, but there's not enough
parser_helper.error = "Not enough prefix parameters"
return
# the developed_param_list does the job. Let's replace the previous values
pivot = self.parameters_list[0]
replace_after(self.parameters_list, pivot, developed_param_list)
replace_after(self.out, pivot, developed_param_list)
if len(self.parameters_list) > parser_helper.expected_parameters_before_first_token:
# There are more parameters than needed by the new concept
# These others parameters are either
# - parameters for the previous suffixed concept (if any)
# - concepts on their own
# - syntax error
# In all the cases, the only thing that matter is to pop what is expected by the new concept
for i in range(parser_helper.expected_parameters_before_first_token):
self.parameters_list.pop()
parser_helper.parameters_list_at_init.extend(self.parameters_list)
return
# len(self.parameters_list) == temp_concept_node.expected_parameters_before_first_token
# => We consider that the parameter are part of the new concept
if len(self._concepts()) > 1:
# The new concept is a parameter of the previous one.
# So reset the potential_pos of the previous concept
self._concepts()[-2].potential_pos = -1
# eat them all
self.parameters_list.clear()
def manage_parameters(self):
"""
Some new parameters were added to the list.
What to do with them ?
:return:
"""
def nb_expected_parameters(expected):
"""
Count the number of successive variables that are expected
:param expected:
:return:
"""
i = 0
for token in expected:
if token.type == TokenKind.VAR_DEF:
i += 1
else:
break
return i
# manage parenthesis that didn't find any match
if self._is_lpar(self.stack[-1]):
self._add_error(ParenthesisMismatchError(self.stack[-1]))
# The parameter must be part the current concept being parsed
assert len(self._concepts()) != 0 # sanity check
current_concept = self._concepts()[-1]
if (nb_expected := nb_expected_parameters(current_concept.expected)) > len(self.parameters_list):
# There is not enough parameters in the list to fill the concept
# Try to develop the UnrecognizedTokensNode to see if it can match
developed_param_list = self.develop_parameter_list(self.parameters_list)
if nb_expected == len(developed_param_list):
pivot = self.parameters_list[0]
replace_after(self.parameters_list, pivot, developed_param_list)
replace_after(self.out, pivot, developed_param_list)
while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF:
# eat everything that was expected
if len(self.parameters_list) == 0:
current_concept.error = f"Failed to match parameter '{current_concept.expected[0].strip_quote}'"
return
del self.parameters_list[0]
del current_concept.expected[0]
def manage_unrecognized(self):
if self.unrecognized_tokens.is_empty():
return
# do not put empty UnrecognizedToken in out
if self.unrecognized_tokens.is_whitespace():
self.unrecognized_tokens.reset()
return
self.unrecognized_tokens.fix_source()
if self.unrecognized_tokens.parenthesis_count > 0:
# parenthesis mismatch detected, do not try to resolve the unrecognized
self._add_error(ParenthesisMismatchError(self.unrecognized_tokens))
self._put_to_out(self.unrecognized_tokens)
else:
# try to recognize concepts
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
self.context,
self.unrecognized_tokens,
PARSERS)
if nodes_sequences:
# There are more than one solution found
# In the case, we create a new InfixToPostfix for each new possibility
if self.debug_enabled:
self._add_debug(DebugInfo(action=f"{DEBUG_RECOG} {self._debug_nodes(nodes_sequences)}"))
if len(nodes_sequences) > 1:
for node_sequence in nodes_sequences[1:]:
clone = self.clone()
for node in node_sequence:
clone._put_to_out(node)
clone.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
self.forked.append(clone)
# Do not forget the first result that will go with the current InfixToPostfix
for node in nodes_sequences[0]:
self._put_to_out(node)
else:
self._put_to_out(self.unrecognized_tokens)
# create another instance
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
def get_functions_names_from_unrecognized(self, token, pos):
"""
The unrecognized ends with an lpar '('
It means that its a function like foo(something)
The problem is that we need to know if there are other concepts before the function
ex : suffix one function(x)
suffix and one are not / may not be part of the name of the function
We need to call the function to recognize the parts and act accordingly
:return: list of function_parser_res
"""
self.unrecognized_tokens.fix_source()
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
self.context,
self.unrecognized_tokens,
PARSERS)
if not nodes_sequences:
nodes_sequences = [[self.unrecognized_tokens.clone()]]
res = []
for sequence in nodes_sequences:
last_node = sequence[-1]
if len(last_node.tokens) > 1:
if isinstance(last_node, UnrecognizedTokensNode):
to_out = [UnrecognizedTokensNode(last_node.start, pos - 2, last_node.tokens[:-1]).fix_source()]
function_name = UnrecognizedTokensNode(pos - 1, pos - 1, [last_node.tokens[-1]])
function_name.add_token(token, pos)
else:
to_out = [last_node.fix_source()]
function_name = None
else: # len(last_node.tokens) == 1
if not isinstance(last_node, UnrecognizedTokensNode):
function_name = UnrecognizedTokensNode(last_node.start, last_node.end, last_node.tokens)
else:
function_name = last_node
function_name.add_token(token, pos)
to_out = []
res.append(function_parser_res(sequence[:-1] + to_out, function_name))
return res
def pop_stack_to_out(self):
"""
Helper function that pops the stack and put the item to the output, if needed
:return:
"""
item = self.stack[-1]
# fix the concept is needed
if isinstance(item, SyaConceptParserHelper):
if len(item.expected) > 0:
# make sure the expected parameters of this item are eaten
if len(item.expected) <= len(self.parameters_list):
self.manage_parameters()
else:
# second chance to match the parameter list when it contains unrecognized token
developed_param_list = self.develop_parameter_list(self.parameters_list)
if len(item.expected) <= len(developed_param_list):
pivot = self.parameters_list[0]
replace_after(self.parameters_list, pivot, developed_param_list)
replace_after(self.out, pivot, developed_param_list)
self.manage_parameters()
item.fix_concept()
self.stack.pop()
self._put_to_out(item)
def i_can_pop(self, sya_parser_helper):
"""
Validate the Shunting Yard Algorithm conditions to pop out from the stack
Note that it's a custom implementation as I need to manage UnrecognizedTokensNode
:param sya_parser_helper:
:return:
"""
if len(self.stack) == 0:
if self.debug_enabled:
self._add_debug(DebugInfo(action=f"No stack. {DEBUG_CAN_POP} false.", level="can_pop"))
return False
stack_head = self.stack[-1]
if not isinstance(stack_head, SyaConceptParserHelper): # mostly left parenthesis
if self.debug_enabled:
self._add_debug(DebugInfo(action=f"No concept. {DEBUG_CAN_POP} false.", level="can_pop"))
return False
current = sya_parser_helper.concept
stack = stack_head.concept
if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No:
self._add_error(
NoneAssociativeSequenceError(current.concept, stack_head.start, sya_parser_helper.start))
if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence:
if self.debug_enabled:
current_debug = f"{current.concept.id}({current.precedence})"
stack_debug = f"{stack.concept.id}({stack.precedence})"
self._add_debug(
DebugInfo(action=f"assoc=Left and {current_debug} <= {stack_debug}. {DEBUG_CAN_POP} True.",
level="can_pop"))
return True
if current.associativity == SyaAssociativity.Right and current.precedence < stack.precedence:
if self.debug_enabled:
current_debug = f"{current.concept.id}({current.precedence})"
stack_debug = f"{stack.concept.id}({stack.precedence})"
self._add_debug(
DebugInfo(action=f"assoc=Right and {current_debug} < {stack_debug}. {DEBUG_CAN_POP} True.",
level="can_pop"))
return True
if self.debug_enabled:
self._add_debug(DebugInfo(action=f"No rule. {DEBUG_CAN_POP} False.", level="can_pop"))
return False
def handle_expected_token(self, token, pos):
"""
True if the token is part of the concept being parsed and the last token in a sequence is eaten
Example : Concept("foo a bar b").def_var("a").def_var("b")
The expected tokens are 'foo' and 'bar' (as a and b are parameters)
Example: Concept("foo a bar baz b").def_var("a").def_var("b")
If the token is 'bar', it will be eaten but handle_expected_token() will return False
as we still expect 'baz'
:param token:
:param pos:
:return:
"""
def _pop_stack(c):
while self.stack[-1] != c and not self._is_lpar(c):
self.pop_stack_to_out()
if self._is_lpar(self.stack[-1]):
self._add_error(ParenthesisMismatchError(self.stack[-1]))
return False
# Manage concepts ending with long names
if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched():
self.pop_stack_to_out()
for current_concept in reversed(self._concepts()):
# As I may loose memory again ;-)
# it's a reversed loop to manage cases like
# if a plus b then ...
# The current concept is 'plus', but the token is 'then'
# It's means that I have finished to parse the 'plus' and started the second part of the 'if'
if current_concept.is_next(token):
current_concept.end = pos
current_concept.tokens.append(token)
if current_concept.eat_token(token):
_pop_stack(current_concept)
return True
if len(current_concept.expected) > 0 and current_concept.expected[0].type != TokenKind.VAR_DEF:
if current_concept.expected[0].type == TokenKind.WHITESPACE:
# drop it. It's the case where an optional whitespace is missing
del (current_concept.expected[0])
else:
# error
# We are not parsing the concept we thought we were parsing.
# Transform the eaten tokens into unrecognized
# and discard the current SyaConceptParserHelper
# TODO: manage the pending LPAR, RPAR ?
self._transform_to_unrecognized(current_concept)
self.false_positives.append(current_concept)
self.stack.pop()
return False
if current_concept.is_expected(token):
# Fix the whitespace between var and expected if needed
# current_concept.expected[0] is '<var>'
# current_concept.expected[1] is what separate var from expected (normally a whitespace)
if current_concept.expected[1].type == TokenKind.WHITESPACE:
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
current_concept.end = pos
if self.debug_enabled:
self._add_debug(DebugInfo(pos, token, None, "??"))
self.manage_unrecognized()
# manage that some clones may have been forked
for forked in self.forked:
forked.handle_expected_token(token, pos)
# manage concepts found between tokens (of ternary for example)
self.manage_parameters()
if len(self.parameters_list) > len(current_concept.parameters_list_at_init):
# we have eaten the parameters expected between two expected tokens
# But there are some remaining parameters
self._add_error(TooManyParametersFoundError(
current_concept.concept.concept,
current_concept.start,
token,
self.parameters_list[:]))
return True # no need to continue
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1] != current_concept:
current = self.stack[-1]
if current.error:
self._transform_to_unrecognized(current)
self.false_positives.append(current)
self.stack.pop()
if current_concept.expected[1].type == TokenKind.WHITESPACE:
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
self.manage_unrecognized()
# manage that some clones may have been forked
for forked in self.forked:
forked.handle_expected_token(token, pos)
else:
self.pop_stack_to_out()
self.manage_parameters()
# maybe eat whitespace that was between <var> and expected token
if current_concept.expected[0].type == TokenKind.WHITESPACE:
del current_concept.expected[0]
if current_concept.eat_token(token):
_pop_stack(current_concept)
return True
return False
def eat_token(self, token, pos):
"""
Receive at token.
It will be processed if it's expected by a concept or if it's a parenthesis
:param token:
:param pos:
:return:
"""
if self.is_locked:
return
if self.parsing_function:
if self.debug_enabled:
self._add_debug(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
self.unrecognized_tokens.add_token(token, pos)
if self.unrecognized_tokens.parenthesis_count == 0:
self.unrecognized_tokens.fix_source()
res = self.context.sheerka.parse_function(self.context,
self.unrecognized_tokens.source,
self.unrecognized_tokens.tokens[:],
self.unrecognized_tokens.start)
instances = get_n_clones(self, len(res))
self.forked.extend(instances[1:])
for instance, res_i in zip(instances, res):
if res_i.status or instance.context.sheerka.isinstance(res_i.body, BuiltinConcepts.PARSER_RESULT):
# 1. we manage to recognize a function
# 2. we almost manage, ex func(one two). It's not a function but almost
instance._put_to_out(res_i.body.body)
instance.unrecognized_tokens.reset()
else:
# it is not a function, try to recognized the token
# This situation is unlikely to occur
instance.manage_unrecognized()
instance.parsing_function = False
return True
if self.handle_expected_token(token, pos):
# a token is found, let's check if it's part of a concepts being parsed
# example Concept(name="foo", definition="foo a bar b").def_var("a").def_var("b")
# if the token 'bar' is found, it has to be considered as part of the concept foo
if self.debug_enabled:
self._remove_debug_info_if_needed()
self._add_debug(DebugInfo(pos, token, None, DEBUG_EAT))
return True
elif self._is_lpar(token):
if self.debug_enabled:
self._add_debug(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
if self.unrecognized_tokens.is_empty() or self.unrecognized_tokens.is_whitespace():
# first, remove what was in the buffer
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.eat_token(token, pos)
self.stack.append((token, pos))
else:
# the parenthesis is part of the unrecognized
# So it's maybe a function call
list_of_results = self.get_functions_names_from_unrecognized(token, pos)
instances = [self]
for i in range(len(list_of_results) - 1):
clone = self.clone()
self.forked.append(clone)
instances.append(clone)
# Manage the result for self and its clones
for instance, parsing_res in zip(instances, list_of_results):
for to_out in parsing_res.to_out:
instance._put_to_out(to_out)
if parsing_res.function:
instance.unrecognized_tokens = parsing_res.function
instance.parsing_function = True
else:
# special case of "twenty two(". It's not considered as a function
# The manage_unrecognized() what somewhat done by get_functions_names_from_unrecognized()
# So we just put the unrecognized to out
instance.unrecognized_tokens.reset()
# make sure to pop the current concept
if self._stack_isinstance(SyaConceptParserHelper):
self.pop_stack_to_out()
instance.stack.append((token, pos))
# # instance._put_to_out(")") # mark where the function should end
# # instance.stack.append(parsing_res.function)
# # instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # reset unrecognized
# else:
# # handle when there are multiple pending tokens
# if len(self.unrecognized_tokens.tokens) > 1:
# unrecognized = UnrecognizedTokensNode(self.unrecognized_tokens.start,
# pos - 2,
# self.unrecognized_tokens.tokens[:-1])
# unrecognized.fix_source()
# self._put_to_out(unrecognized)
# last_token = self.unrecognized_tokens.tokens[-1]
# self.unrecognized_tokens.reset()
# self.unrecognized_tokens.add_token(last_token, pos - 1)
#
# self.eat_unrecognized(token, pos) # add the '(' to the rest of the unknown
# self.parsing_function = True
# # self.stack.append(self.unrecognized_tokens.fix_source())
# # self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
return True
elif self._is_rpar(token):
if self.debug_enabled:
self._add_debug(DebugInfo(pos, token, None, DEBUG_EAT))
# first, remove what was in the buffer
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.eat_token(token, pos)
# pop everything but the lpar from stack to 'out'
while len(self.stack) > 0 and not self._is_lpar(self.stack[-1]):
self.pop_stack_to_out()
# checks consistency if an lpar is found
if len(self.stack) == 0:
self._add_error(ParenthesisMismatchError((token, pos)))
return None
if self._stack_isinstance(UnrecognizedTokensNode):
# the parenthesis was a function
# we need to return a SourceCodeWithConceptNode
for i in range(len(self.out) - 1, -1, -1):
if self.out[i] == ')':
start = i
break
else:
self._add_error(ParenthesisMismatchError((token, pos)))
return None
source_code = self._make_source_code_with_concept(start, token, pos)
for item in self.out[start:]:
# update the parameter list
try:
self.parameters_list.remove(item)
except ValueError:
pass
del self.out[start:]
self._put_to_out(source_code)
# self.pop_stack_to_out()
# # Replace the ')' marker by its real position
# for i in range(len(self.out) - 1, -1, -1):
# if self.out[i] == ')':
# self.out[i] = UnrecognizedTokensNode(pos, pos, [token])
else:
self.stack.pop() # discard the lpar
return True
return False
def eat_concept(self, sya_concept_def, token, pos, first_pass=True):
"""
a concept is found
:param sya_concept_def:
:param token:
:param pos:
:param first_pass: When not called from a fork after manage_unrecognized()
:return:
"""
if self.is_locked:
return
parser_helper = SyaConceptParserHelper(sya_concept_def, pos)
if first_pass:
if self.debug_enabled:
self._add_debug(DebugInfo(pos, token, sya_concept_def, "??"))
if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE:
parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1]
if Token.is_whitespace(parser_helper.last_token_before_first_token):
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
# First, try to recognize the tokens that are waiting
self.manage_unrecognized()
for forked in self.forked:
# manage the fact that some clone may have been forked
forked.eat_concept(sya_concept_def, token, pos, first_pass=False)
# then, check if this new concept is linked to the previous ones
# ie, is the previous concept fully matched ?
if parser_helper.expected_parameters_before_first_token == 0:
# => does not expect pending parameter (it's suffixed concept)
while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].potential_pos != -1:
# => previous seems to have everything it needs in the parameter list
self.pop_stack_to_out()
if parser_helper.is_atom():
self._put_to_out(parser_helper.fix_concept())
else:
# call shunting yard algorithm
while self.i_can_pop(parser_helper):
self.pop_stack_to_out()
if parser_helper.is_matched():
# case of a prefix concept which has found happiness with self.parameters_list
# directly put it in out
self.manage_parameters_when_new_concept(parser_helper)
self._put_to_out(parser_helper.fix_concept())
else:
if self.debug_enabled:
self._remove_debug_info_if_needed()
self._add_debug(DebugInfo(pos, token, sya_concept_def, DEBUG_PUSH))
self.stack.append(parser_helper)
self.manage_parameters_when_new_concept(parser_helper)
def eat_unrecognized(self, token, pos):
"""
The token was not recognized, add to the current UnrecognizedTokensNode
:param token:
:param pos:
:return:
"""
if self.is_locked:
return
if self.debug_enabled:
self._add_debug(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
self.unrecognized_tokens.add_token(token, pos)
def finalize(self, pos):
"""
Put the remaining items from the stack to out
:return:
"""
if self.is_locked:
return
if len(self.stack) == 0 and len(self.out) == 0:
# check for parenthesis mismatch
if self.unrecognized_tokens.parenthesis_count > 0:
self._add_error(ParenthesisMismatchError(self.unrecognized_tokens))
return # no need to pop the buffer, as no concept is found
if self.debug_enabled:
self._add_debug(DebugInfo(pos, "<EOF>", None, "??"))
while len(self.stack) > 0:
parser_helper = self.stack[-1]
# validate parenthesis
if self._is_lpar(parser_helper) or self._is_rpar(parser_helper):
self._add_error(ParenthesisMismatchError(parser_helper))
return None
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize(pos)
failed_to_match = sum(map(lambda e: e.type != TokenKind.VAR_DEF, parser_helper.expected))
if failed_to_match > 0:
# didn't manage to read all tokens.
# Transform them into unrecognized
self._transform_to_unrecognized(parser_helper)
self.false_positives.append(parser_helper)
self.stack.pop() # discard the parser helper
else:
self.pop_stack_to_out() # process it
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize(pos)
def clone(self):
clone = InFixToPostFix(self.context, self.next_id_manager, self.debugger)
clone.is_locked = self.is_locked
clone.out = self.out[:]
clone.stack = [i.clone() if hasattr(i, "clone") else i for i in self.stack]
clone.unrecognized_tokens = self.unrecognized_tokens.clone()
clone.parameters_list = self.parameters_list[:]
clone.errors = self.errors[:]
clone.debug = self.debug[:]
# clone.forked = self.forked
return clone
@staticmethod
def develop_parameter_list(parameter_list):
"""
given a list of parameter (solely from self.parameter_list)
develop UnrecognizedTokensNode parameter that contains whitespaces
:param parameter_list:
:return:
"""
temp = []
for parameter in parameter_list:
if isinstance(parameter, UnrecognizedTokensNode):
for i, token in [(i, t) for i, t in enumerate(parameter.tokens) if t.type != TokenKind.WHITESPACE]:
temp.append(UnrecognizedTokensNode(parameter.start + i,
parameter.start + i,
[token]))
else:
temp.append(parameter)
return temp
@dataclass()
class PostFixToItem:
concept: Concept
start: int
end: int
has_unrecognized: bool
source: str
class SyaNodeParser(BaseNodeParser):
NAME = "Sya"
def __init__(self, **kwargs):
super().__init__(SyaNodeParser.NAME, 50, **kwargs)
self.test_only_sya_definitions = {}
def init_from_concepts(self, context, concepts, **kwargs):
super().init_from_concepts(context, concepts)
sya_definitions = kwargs.get("sya", None)
if sya_definitions:
self.test_only_sya_definitions = sya_definitions
return self
@staticmethod
def _is_eligible(concept):
"""
Predicate that select concepts that must handled by SyaNodeParser
:param concept:
:return:
"""
# We only concepts that has parameter (refuse atoms)
# Bnf definitions are not supposed to be managed by this parser either
return len(
concept.get_metadata().variables) > 0 and concept.get_metadata().definition_type != DEFINITION_TYPE_BNF
def infix_to_postfix(self, context, parser_input: ParserInput):
"""
Implementing Shunting Yard Algorithm
:param context:
:param parser_input:
:return:
"""
if not self.reset_parser(context, parser_input):
return None
debugger = context.get_debugger(self.NAME, "parse")
debugger.debug_entering(source=self.parser_input.as_text())
forked = []
def _add_forked_to_res():
# check that if some new InfixToPostfix are created
for in_to_post in res:
if len(in_to_post.forked) > 0:
forked.extend(in_to_post.forked)
in_to_post.forked.clear()
if len(forked) > 0:
res.extend(forked)
forked.clear()
res = [InFixToPostFix(context, NextIdManager(), debugger)]
while self.parser_input.next_token(False):
for infix_to_postfix in res:
infix_to_postfix.reset()
token = self.parser_input.token
if debugger.is_enabled():
debug_prefix = f"pos={self.parser_input.pos}, {token=}, {len(res)} parser(s)"
try:
if token.type in (TokenKind.LPAR, TokenKind.RPAR):
# little optim, no need to lock, unlock or get the concept when parenthesis
if debugger.is_enabled():
debugger.debug_log(debug_prefix + ", eat token.")
for infix_to_postfix in res:
infix_to_postfix.eat_token(token, self.parser_input.pos)
continue
for infix_to_postfix in res:
if infix_to_postfix.eat_token(token, self.parser_input.pos):
infix_to_postfix.lock()
nb_locked = len([itp for itp in res if itp.is_locked])
if nb_locked == len(res):
if debugger.is_enabled():
debugger.debug_log(debug_prefix + f", all parsers are locked")
continue
concepts_def = context.sheerka.get_concepts_by_first_token(token,
self._is_eligible,
to_map=SyaConceptDef.get_sya_concept_def,
parser=self)
if not concepts_def:
if debugger.is_enabled():
debugger.debug_log(debug_prefix + f", no concept found")
for infix_to_postfix in res:
infix_to_postfix.eat_unrecognized(token, self.parser_input.pos)
continue
if debugger.is_enabled():
found = [cd.short_repr() for cd in concepts_def]
debugger.debug_log(debug_prefix + f", concept(s) found={found}")
if len(concepts_def) == 1:
for infix_to_postfix in res:
infix_to_postfix.eat_concept(concepts_def[0], token, self.parser_input.pos)
continue
# make the cartesian product
temp_res = []
for infix_to_postfix in res:
for concept in concepts_def:
clone = infix_to_postfix.clone()
temp_res.append(clone)
clone.eat_concept(concept, token, self.parser_input.pos)
res = temp_res
finally:
_add_forked_to_res()
# make sure that remaining items in stack are moved to out
for infix_to_postfix in res:
infix_to_postfix.reset()
infix_to_postfix.finalize(self.parser_input.pos)
_add_forked_to_res()
if debugger.is_enabled():
for r in res:
for line in r.debug:
if line.level:
debugger.debug_var(f"#{r.id}.{line.level}", line)
else:
debugger.debug_var(f"#{r.id}", line)
return res
def postfix_to_item(self, sheerka, postfixed):
item = postfixed.pop()
if isinstance(item, (UnrecognizedTokensNode, SourceCodeNode, ConceptNode, VariableNode)):
return item
if isinstance(item, SourceCodeWithConceptNode):
items = []
while len(item.nodes) > 0:
res = self.postfix_to_item(sheerka, item.nodes)
if isinstance(res, PostFixToItem):
items.append(ConceptNode(res.concept,
res.start,
res.end,
self.parser_input.tokens[res.start: res.end + 1]))
else:
items.append(res)
item.has_unrecognized |= hasattr(res, "has_unrecognized") and res.has_unrecognized or \
isinstance(res, UnrecognizedTokensNode)
item.nodes = items
item.fix_all_pos()
item.tokens = self.parser_input.tokens[item.start:item.end + 1]
item.fix_source(True)
return item
# ParserHelper
start = item.start
end = item.end
has_unrecognized = False
concept = sheerka.new_from_template(item.concept, item.concept.key)
concept_metadata = []
for param_index in reversed(range(len(concept.get_metadata().variables))):
inner_item = self.postfix_to_item(sheerka, postfixed)
if inner_item.start < start:
start = inner_item.start
if inner_item.end > end:
end = inner_item.end
has_unrecognized |= isinstance(inner_item, (UnrecognizedTokensNode, SourceCodeWithConceptNode)) or \
hasattr(inner_item, "has_unrecognized") and inner_item.has_unrecognized
param_name = concept.get_metadata().variables[param_index][0]
param_value = inner_item.concept if hasattr(inner_item, "concept") else \
[inner_item.return_value] if isinstance(inner_item, SourceCodeNode) else \
inner_item
concept.get_compiled()[param_name] = param_value
concept_metadata.append((param_name, inner_item.source))
# update the metadata
concept_metadata.reverse()
# ---- Sanity check. To remove at some point
assert len(concept_metadata) == len(concept.get_metadata().variables)
for meta_orig, meta_new in zip(concept.get_metadata().variables, concept_metadata):
assert meta_orig[0] == meta_new[0]
# ---- Sanity check. To remove at some point
concept.get_metadata().variables = concept_metadata
concept.get_hints().use_copy = True
concept.get_hints().need_validation = True
source = get_text_from_tokens(self.parser_input.tokens[start:end + 1])
return PostFixToItem(concept, start, end, has_unrecognized, source)
def parse(self, context, parser_input: ParserInput):
"""
:param context:
:param parser_input:
:return:
"""
if not isinstance(parser_input, ParserInput):
return None
if parser_input.is_empty():
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
)
ret = []
valid_infix_to_postfixs = self.get_valid(self.infix_to_postfix(context, parser_input))
if valid_infix_to_postfixs is None:
# token error
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
if len(valid_infix_to_postfixs) == 0:
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text()))
for infix_to_postfix in valid_infix_to_postfixs:
sequence = []
has_unrecognized = False
errors = []
while len(infix_to_postfix.out) > 0:
item = self.postfix_to_item(context.sheerka, infix_to_postfix.out)
has_unrecognized |= hasattr(item, "has_unrecognized") and item.has_unrecognized or \
isinstance(item, UnrecognizedTokensNode)
if isinstance(item, PostFixToItem):
to_insert = ConceptNode(item.concept,
item.start,
item.end,
self.parser_input.tokens[item.start: item.end + 1])
# validate the concept
update_compiled(context, item.concept, errors)
if errors:
break
else:
to_insert = item
sequence.insert(0, to_insert)
if errors:
if len(errors) == 1:
ret.append(
self.sheerka.ret(
self.name,
False,
errors[0]))
else:
ret.append(
self.sheerka.ret(
self.name,
False,
self.sheerka.err([e.body for e in errors])))
continue
if has_unrecognized:
# Manage some sick cases where missing parenthesis mess the order or the sequence
# example "foo bar(one plus two"
# too lazy to fix the why...
sequence.sort(key=attrgetter("start"))
ret.append(
self.sheerka.ret(
self.name,
not has_unrecognized,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input.as_text(),
body=sequence,
try_parsed=sequence)))
if len(ret) == 1:
self.log_result(context, parser_input, ret[0])
return ret[0]
else:
self.log_multiple_results(context, parser_input, ret)
return ret
@staticmethod
def get_valid(infix_to_postfixs):
"""
Gets the valid infixToPostfix
:param infix_to_postfixs:
:return:
"""
def _has_sya(items):
for item in items:
if isinstance(item, SourceCodeWithConceptNode):
return _has_sya(item.nodes)
if isinstance(item, SyaConceptParserHelper):
return True
return False
if infix_to_postfixs is None:
return None
result = []
for infix_to_postfix in infix_to_postfixs:
if len(infix_to_postfix.get_errors()) > 0:
continue
if len(infix_to_postfix.out) == 0:
continue
if infix_to_postfix in result:
continue
if not _has_sya(infix_to_postfix.out):
# refuses the result if it does not involve SYA
continue
result.append(infix_to_postfix)
return result