Resolved some remaining chicken and egg when parsing BNF nodes
This commit is contained in:
+117
-25
@@ -158,7 +158,7 @@ class ParsingExpression:
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.elements = args
|
||||
|
||||
nodes = kwargs.get('nodes', [])
|
||||
nodes = kwargs.get('nodes', []) or []
|
||||
if not hasattr(nodes, '__iter__'):
|
||||
nodes = [nodes]
|
||||
self.nodes = nodes
|
||||
@@ -201,8 +201,8 @@ class ConceptExpression(ParsingExpression):
|
||||
When the grammar is created, it is replaced by the actual concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, rule_name="", recurse_id=None):
|
||||
super().__init__(rule_name=rule_name)
|
||||
def __init__(self, concept, rule_name="", recurse_id=None, nodes=None):
|
||||
super().__init__(rule_name=rule_name, nodes=nodes)
|
||||
self.concept = concept
|
||||
self.recurse_id = recurse_id
|
||||
|
||||
@@ -216,6 +216,10 @@ class ConceptExpression(ParsingExpression):
|
||||
if not isinstance(other, ConceptExpression):
|
||||
return False
|
||||
|
||||
# TODO : enable self.recurse_id when it will be correctly implemented
|
||||
# if self.recurse_id != other.recurse_id:
|
||||
# return False
|
||||
|
||||
if isinstance(self.concept, Concept):
|
||||
return self.concept.id == other.concept.id
|
||||
|
||||
@@ -425,6 +429,12 @@ class Repetition(ParsingExpression):
|
||||
super(Repetition, self).__init__(*elements, **kwargs)
|
||||
self.sep = kwargs.get('sep', None)
|
||||
|
||||
def clone(self):
|
||||
return Repetition(self.elements,
|
||||
rule_name=self.rule_name,
|
||||
nodes=self.nodes,
|
||||
sep=self.sep)
|
||||
|
||||
|
||||
class ZeroOrMore(Repetition):
|
||||
"""
|
||||
@@ -711,6 +721,14 @@ class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor):
|
||||
return self.STOP
|
||||
|
||||
|
||||
class BnfNodeConceptExpressionVisitor(ParsingExpressionVisitor):
|
||||
def __init__(self):
|
||||
self.references = []
|
||||
|
||||
def visit_ConceptExpression(self, pe):
|
||||
self.references.append(pe.concept)
|
||||
|
||||
|
||||
class BnfConceptParserHelper:
|
||||
def __init__(self, parser):
|
||||
self.parser = parser
|
||||
@@ -1137,14 +1155,84 @@ class BnfNodeParser(BaseNodeParser):
|
||||
|
||||
return concept_parser_helpers
|
||||
|
||||
def check_for_infinite_recursion(self, parsing_expression, already_found, only_first=False):
|
||||
def fix_infinite_recursions(self, context, grammar, concept_id, parsing_expression):
|
||||
"""
|
||||
Check the newly created parsing expresion
|
||||
Some infinite recursion can be resolved, simply by removing the pexpression that causes the loop
|
||||
Let's look for that
|
||||
:param context:
|
||||
:param grammar:
|
||||
:param concept_id:
|
||||
:param parsing_expression:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def _find(expression_, path_):
|
||||
index_ = -1
|
||||
parent_ = None
|
||||
for node_id in path_:
|
||||
expression_ = expression_.nodes[0] if isinstance(expression_, ConceptExpression) else expression_
|
||||
for i, node in [(i, n) for i, n in enumerate(expression_.nodes) if isinstance(n, ConceptExpression)]:
|
||||
if node.recurse_id == node_id or node.concept.id == node_id:
|
||||
index_ = i
|
||||
parent_ = expression_
|
||||
expression_ = node # take the child of the ConceptExpression found
|
||||
break
|
||||
else:
|
||||
raise IndexError(f"path {path_} cannot be found in '{expression_}'")
|
||||
|
||||
return parent_, index_, expression_
|
||||
|
||||
def _fix_node(expression, path):
|
||||
parent, index, expression_update = _find(expression, path[1:-2])
|
||||
|
||||
assert isinstance(expression_update, ConceptExpression)
|
||||
|
||||
desc = f"Fixing circular reference {path}"
|
||||
with context.push(BuiltinConcepts.INIT_BNF,
|
||||
expression_update.concept,
|
||||
who=self.name,
|
||||
obj=expression_update.concept,
|
||||
concepts_to_skip=[concept_id],
|
||||
desc=desc) as sub_context:
|
||||
new_grammar = grammar.copy()
|
||||
for node_id in path[-2:]:
|
||||
del new_grammar[node_id]
|
||||
new_nodes = self.resolve_concept_parsing_expression(sub_context,
|
||||
expression_update.concept,
|
||||
expression_update.rule_name, new_grammar, set())
|
||||
new = ConceptExpression(expression_update.concept,
|
||||
rule_name=expression_update.rule_name,
|
||||
recurse_id=expression_update.recurse_id,
|
||||
nodes=new_nodes)
|
||||
|
||||
parent.nodes[index] = new
|
||||
|
||||
while True:
|
||||
already_found = [concept_id]
|
||||
concepts_in_recursion = []
|
||||
if self.check_for_infinite_recursion(parsing_expression, already_found, concepts_in_recursion):
|
||||
if "#" in concepts_in_recursion[-2]:
|
||||
# means that it's isaset concept
|
||||
_fix_node(parsing_expression, concepts_in_recursion[:-1])
|
||||
else:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
return concepts_in_recursion
|
||||
|
||||
def check_for_infinite_recursion(self, parsing_expression, already_found, in_recursion, only_first=False):
|
||||
|
||||
if isinstance(parsing_expression, ConceptExpression):
|
||||
id_to_use = parsing_expression.recurse_id or parsing_expression.concept.id
|
||||
if id_to_use in already_found:
|
||||
already_found.append(id_to_use) # add the id again, to know where the cycle starts
|
||||
in_recursion.extend(already_found)
|
||||
return True
|
||||
already_found.add(id_to_use)
|
||||
return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, only_first)
|
||||
already_found.append(id_to_use)
|
||||
return self.check_for_infinite_recursion(
|
||||
parsing_expression.nodes[0], already_found, in_recursion, only_first)
|
||||
|
||||
if isinstance(parsing_expression, Sequence):
|
||||
# for sequence, we need to check all nodes
|
||||
@@ -1154,8 +1242,7 @@ class BnfNodeParser(BaseNodeParser):
|
||||
nodes = parsing_expression.nodes
|
||||
for node in nodes:
|
||||
already_found_for_current_node = already_found.copy()
|
||||
if self.check_for_infinite_recursion(node, already_found_for_current_node, False):
|
||||
already_found.update(already_found_for_current_node)
|
||||
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, False):
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -1164,8 +1251,7 @@ class BnfNodeParser(BaseNodeParser):
|
||||
# we are safe
|
||||
for node in parsing_expression.nodes:
|
||||
already_found_for_current_node = already_found.copy()
|
||||
if self.check_for_infinite_recursion(node, already_found, True):
|
||||
already_found.update(already_found_for_current_node)
|
||||
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
@@ -1174,8 +1260,7 @@ class BnfNodeParser(BaseNodeParser):
|
||||
if isinstance(parsing_expression, UnOrderedChoice):
|
||||
for node in parsing_expression.nodes:
|
||||
already_found_for_current_node = already_found.copy()
|
||||
if self.check_for_infinite_recursion(node, already_found_for_current_node, True):
|
||||
already_found.update(already_found_for_current_node)
|
||||
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True):
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -1215,13 +1300,20 @@ class BnfNodeParser(BaseNodeParser):
|
||||
if isinstance(node, UnderConstruction):
|
||||
pe.nodes[i] = grammar.get(node.concept_id)
|
||||
|
||||
# check for infinite recursion.
|
||||
# We are adding a new concept. Does it create an infinite recursion ?
|
||||
concepts_in_recursion = set()
|
||||
if self.check_for_infinite_recursion(ret, concepts_in_recursion):
|
||||
cycle = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
|
||||
# # check for infinite recursions.
|
||||
# # and try to fix them when possible
|
||||
# already_found = [concept.id]
|
||||
# concepts_in_recursion = []
|
||||
# if self.check_for_infinite_recursion(ret, already_found, concepts_in_recursion):
|
||||
# chicken_anf_egg = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
|
||||
# for concept_id in concepts_in_recursion:
|
||||
# grammar[concept_id] = chicken_anf_egg
|
||||
|
||||
concepts_in_recursion = self.fix_infinite_recursions(context, grammar, concept.id, ret)
|
||||
if concepts_in_recursion:
|
||||
chicken_anf_egg = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
|
||||
for concept_id in concepts_in_recursion:
|
||||
grammar[concept_id] = cycle
|
||||
grammar[concept_id] = chicken_anf_egg
|
||||
|
||||
# update, in case of infinite circular recursion
|
||||
ret = grammar[concept.id]
|
||||
@@ -1287,13 +1379,16 @@ class BnfNodeParser(BaseNodeParser):
|
||||
if c.id == context.obj.id:
|
||||
continue
|
||||
|
||||
# c_pe = self.resolve_concept_parsing_expression(context, c, None, grammar, to_update, to_keep)
|
||||
# if self.check_for_infinite_recursion(c_pe, {concept.id}, True):
|
||||
# continue
|
||||
if hasattr(context, "concepts_to_skip") and c.id in context.concepts_to_skip:
|
||||
continue
|
||||
|
||||
valid_concepts.append(c)
|
||||
|
||||
nodes = [ConceptExpression(c, rule_name=c.name, recurse_id=key_to_use) for c in valid_concepts]
|
||||
nodes = []
|
||||
for c in valid_concepts:
|
||||
c_recurse_id = f"{c.id}#{c.name}#{concept.id}" if self.sheerka.isaset(context, c) else None
|
||||
nodes.append(ConceptExpression(c, rule_name=c.name, recurse_id=c_recurse_id))
|
||||
|
||||
resolved = self.resolve_parsing_expression(ssc,
|
||||
UnOrderedChoice(*nodes),
|
||||
grammar,
|
||||
@@ -1341,9 +1436,6 @@ class BnfNodeParser(BaseNodeParser):
|
||||
|
||||
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
|
||||
return pe # an error is detected, escalate it
|
||||
#
|
||||
# if isinstance(pe, UnderConstruction) and expression.concept.id == pe.concept_id:
|
||||
# return pe # we are looking for ourself, just return it
|
||||
|
||||
if isinstance(pe, UnderConstruction):
|
||||
to_update.add(ToUpdate(id(expression), expression))
|
||||
|
||||
+31
-10
@@ -35,7 +35,7 @@ class BnfParser(BaseParser):
|
||||
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.after_current = None
|
||||
self.after_current = None # one look ahead
|
||||
self.nb_open_par = 0
|
||||
self.context = None
|
||||
self.source = ""
|
||||
@@ -283,16 +283,37 @@ class BnfParser(BaseParser):
|
||||
|
||||
def eat_rule_name_if_needed(self, expression):
|
||||
token = self.get_token()
|
||||
if token is None or token.type != TokenKind.EQUALS:
|
||||
return expression
|
||||
if token is not None and token.type == TokenKind.EQUALS:
|
||||
self.next_token() # eat equals
|
||||
token = self.get_token()
|
||||
|
||||
self.next_token() # eat equals
|
||||
token = self.get_token()
|
||||
if token is None or token.type != TokenKind.IDENTIFIER:
|
||||
return self.add_error(
|
||||
UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.IDENTIFIER]))
|
||||
|
||||
if token is None or token.type != TokenKind.IDENTIFIER:
|
||||
return self.add_error(
|
||||
UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.IDENTIFIER]))
|
||||
expression.rule_name = token.value
|
||||
self.next_token()
|
||||
|
||||
if BnfParser.is_expression_a_set(self.context, expression):
|
||||
root_concept = self.context.search(
|
||||
start_with_self=True,
|
||||
predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF,
|
||||
get_obj=lambda ec: ec.action_context,
|
||||
stop=lambda ec: ec.action == BuiltinConcepts.INIT_BNF)
|
||||
root_concept = list(root_concept)
|
||||
if root_concept and hasattr(root_concept[0], "id"):
|
||||
expression.recurse_id = f"{expression.concept.id}#{expression.rule_name}#{root_concept[0].id}"
|
||||
|
||||
expression.rule_name = token.value
|
||||
self.next_token()
|
||||
return expression
|
||||
|
||||
@staticmethod
|
||||
def is_expression_a_set(context, expression):
|
||||
return isinstance(expression, ConceptExpression) and context.sheerka.isaset(context, expression.concept)
|
||||
|
||||
@staticmethod
|
||||
def update_recurse_id(context, concept_id, expression):
|
||||
if BnfParser.is_expression_a_set(context, expression):
|
||||
expression.recurse_id = f"{expression.concept.id}#{expression.rule_name}#{concept_id}"
|
||||
|
||||
for element in expression.elements:
|
||||
BnfParser.update_recurse_id(context, concept_id, element)
|
||||
|
||||
@@ -355,14 +355,14 @@ class DefaultParser(BaseParser):
|
||||
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
regex_parser = BnfParser()
|
||||
bnf_regex_parser = BnfParser()
|
||||
desc = f"Resolving BNF {current_concept_def.definition}"
|
||||
with self.context.push(BuiltinConcepts.INIT_BNF,
|
||||
current_concept_def,
|
||||
who=self.name,
|
||||
obj=current_concept_def,
|
||||
desc=desc) as sub_context:
|
||||
parsing_result = regex_parser.parse(sub_context, tokens)
|
||||
parsing_result = bnf_regex_parser.parse(sub_context, tokens)
|
||||
sub_context.add_values(return_values=parsing_result)
|
||||
|
||||
if not parsing_result.status:
|
||||
|
||||
Reference in New Issue
Block a user