Resolved some remaining chicken and egg when parsing BNF nodes

This commit is contained in:
2020-07-05 11:03:03 +02:00
parent ad8a997942
commit 71f753c925
14 changed files with 310 additions and 56 deletions
+117 -25
View File
@@ -158,7 +158,7 @@ class ParsingExpression:
def __init__(self, *args, **kwargs):
self.elements = args
nodes = kwargs.get('nodes', [])
nodes = kwargs.get('nodes', []) or []
if not hasattr(nodes, '__iter__'):
nodes = [nodes]
self.nodes = nodes
@@ -201,8 +201,8 @@ class ConceptExpression(ParsingExpression):
When the grammar is created, it is replaced by the actual concept
"""
def __init__(self, concept, rule_name="", recurse_id=None):
super().__init__(rule_name=rule_name)
def __init__(self, concept, rule_name="", recurse_id=None, nodes=None):
super().__init__(rule_name=rule_name, nodes=nodes)
self.concept = concept
self.recurse_id = recurse_id
@@ -216,6 +216,10 @@ class ConceptExpression(ParsingExpression):
if not isinstance(other, ConceptExpression):
return False
# TODO : enable self.recurse_id when it will be correctly implemented
# if self.recurse_id != other.recurse_id:
# return False
if isinstance(self.concept, Concept):
return self.concept.id == other.concept.id
@@ -425,6 +429,12 @@ class Repetition(ParsingExpression):
super(Repetition, self).__init__(*elements, **kwargs)
self.sep = kwargs.get('sep', None)
def clone(self):
return Repetition(self.elements,
rule_name=self.rule_name,
nodes=self.nodes,
sep=self.sep)
class ZeroOrMore(Repetition):
"""
@@ -711,6 +721,14 @@ class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor):
return self.STOP
class BnfNodeConceptExpressionVisitor(ParsingExpressionVisitor):
def __init__(self):
self.references = []
def visit_ConceptExpression(self, pe):
self.references.append(pe.concept)
class BnfConceptParserHelper:
def __init__(self, parser):
self.parser = parser
@@ -1137,14 +1155,84 @@ class BnfNodeParser(BaseNodeParser):
return concept_parser_helpers
def check_for_infinite_recursion(self, parsing_expression, already_found, only_first=False):
def fix_infinite_recursions(self, context, grammar, concept_id, parsing_expression):
"""
Check the newly created parsing expresion
Some infinite recursion can be resolved, simply by removing the pexpression that causes the loop
Let's look for that
:param context:
:param grammar:
:param concept_id:
:param parsing_expression:
:return:
"""
def _find(expression_, path_):
index_ = -1
parent_ = None
for node_id in path_:
expression_ = expression_.nodes[0] if isinstance(expression_, ConceptExpression) else expression_
for i, node in [(i, n) for i, n in enumerate(expression_.nodes) if isinstance(n, ConceptExpression)]:
if node.recurse_id == node_id or node.concept.id == node_id:
index_ = i
parent_ = expression_
expression_ = node # take the child of the ConceptExpression found
break
else:
raise IndexError(f"path {path_} cannot be found in '{expression_}'")
return parent_, index_, expression_
def _fix_node(expression, path):
parent, index, expression_update = _find(expression, path[1:-2])
assert isinstance(expression_update, ConceptExpression)
desc = f"Fixing circular reference {path}"
with context.push(BuiltinConcepts.INIT_BNF,
expression_update.concept,
who=self.name,
obj=expression_update.concept,
concepts_to_skip=[concept_id],
desc=desc) as sub_context:
new_grammar = grammar.copy()
for node_id in path[-2:]:
del new_grammar[node_id]
new_nodes = self.resolve_concept_parsing_expression(sub_context,
expression_update.concept,
expression_update.rule_name, new_grammar, set())
new = ConceptExpression(expression_update.concept,
rule_name=expression_update.rule_name,
recurse_id=expression_update.recurse_id,
nodes=new_nodes)
parent.nodes[index] = new
while True:
already_found = [concept_id]
concepts_in_recursion = []
if self.check_for_infinite_recursion(parsing_expression, already_found, concepts_in_recursion):
if "#" in concepts_in_recursion[-2]:
# means that it's isaset concept
_fix_node(parsing_expression, concepts_in_recursion[:-1])
else:
break
else:
break
return concepts_in_recursion
def check_for_infinite_recursion(self, parsing_expression, already_found, in_recursion, only_first=False):
if isinstance(parsing_expression, ConceptExpression):
id_to_use = parsing_expression.recurse_id or parsing_expression.concept.id
if id_to_use in already_found:
already_found.append(id_to_use) # add the id again, to know where the cycle starts
in_recursion.extend(already_found)
return True
already_found.add(id_to_use)
return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, only_first)
already_found.append(id_to_use)
return self.check_for_infinite_recursion(
parsing_expression.nodes[0], already_found, in_recursion, only_first)
if isinstance(parsing_expression, Sequence):
# for sequence, we need to check all nodes
@@ -1154,8 +1242,7 @@ class BnfNodeParser(BaseNodeParser):
nodes = parsing_expression.nodes
for node in nodes:
already_found_for_current_node = already_found.copy()
if self.check_for_infinite_recursion(node, already_found_for_current_node, False):
already_found.update(already_found_for_current_node)
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, False):
return True
return False
@@ -1164,8 +1251,7 @@ class BnfNodeParser(BaseNodeParser):
# we are safe
for node in parsing_expression.nodes:
already_found_for_current_node = already_found.copy()
if self.check_for_infinite_recursion(node, already_found, True):
already_found.update(already_found_for_current_node)
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True):
return True
else:
return False
@@ -1174,8 +1260,7 @@ class BnfNodeParser(BaseNodeParser):
if isinstance(parsing_expression, UnOrderedChoice):
for node in parsing_expression.nodes:
already_found_for_current_node = already_found.copy()
if self.check_for_infinite_recursion(node, already_found_for_current_node, True):
already_found.update(already_found_for_current_node)
if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True):
return True
return False
@@ -1215,13 +1300,20 @@ class BnfNodeParser(BaseNodeParser):
if isinstance(node, UnderConstruction):
pe.nodes[i] = grammar.get(node.concept_id)
# check for infinite recursion.
# We are adding a new concept. Does it create an infinite recursion ?
concepts_in_recursion = set()
if self.check_for_infinite_recursion(ret, concepts_in_recursion):
cycle = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
# # check for infinite recursions.
# # and try to fix them when possible
# already_found = [concept.id]
# concepts_in_recursion = []
# if self.check_for_infinite_recursion(ret, already_found, concepts_in_recursion):
# chicken_anf_egg = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
# for concept_id in concepts_in_recursion:
# grammar[concept_id] = chicken_anf_egg
concepts_in_recursion = self.fix_infinite_recursions(context, grammar, concept.id, ret)
if concepts_in_recursion:
chicken_anf_egg = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
for concept_id in concepts_in_recursion:
grammar[concept_id] = cycle
grammar[concept_id] = chicken_anf_egg
# update, in case of infinite circular recursion
ret = grammar[concept.id]
@@ -1287,13 +1379,16 @@ class BnfNodeParser(BaseNodeParser):
if c.id == context.obj.id:
continue
# c_pe = self.resolve_concept_parsing_expression(context, c, None, grammar, to_update, to_keep)
# if self.check_for_infinite_recursion(c_pe, {concept.id}, True):
# continue
if hasattr(context, "concepts_to_skip") and c.id in context.concepts_to_skip:
continue
valid_concepts.append(c)
nodes = [ConceptExpression(c, rule_name=c.name, recurse_id=key_to_use) for c in valid_concepts]
nodes = []
for c in valid_concepts:
c_recurse_id = f"{c.id}#{c.name}#{concept.id}" if self.sheerka.isaset(context, c) else None
nodes.append(ConceptExpression(c, rule_name=c.name, recurse_id=c_recurse_id))
resolved = self.resolve_parsing_expression(ssc,
UnOrderedChoice(*nodes),
grammar,
@@ -1341,9 +1436,6 @@ class BnfNodeParser(BaseNodeParser):
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
return pe # an error is detected, escalate it
#
# if isinstance(pe, UnderConstruction) and expression.concept.id == pe.concept_id:
# return pe # we are looking for ourself, just return it
if isinstance(pe, UnderConstruction):
to_update.add(ToUpdate(id(expression), expression))
+31 -10
View File
@@ -35,7 +35,7 @@ class BnfParser(BaseParser):
self.lexer_iter = None
self._current = None
self.after_current = None
self.after_current = None # one look ahead
self.nb_open_par = 0
self.context = None
self.source = ""
@@ -283,16 +283,37 @@ class BnfParser(BaseParser):
def eat_rule_name_if_needed(self, expression):
token = self.get_token()
if token is None or token.type != TokenKind.EQUALS:
return expression
if token is not None and token.type == TokenKind.EQUALS:
self.next_token() # eat equals
token = self.get_token()
self.next_token() # eat equals
token = self.get_token()
if token is None or token.type != TokenKind.IDENTIFIER:
return self.add_error(
UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.IDENTIFIER]))
if token is None or token.type != TokenKind.IDENTIFIER:
return self.add_error(
UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.IDENTIFIER]))
expression.rule_name = token.value
self.next_token()
if BnfParser.is_expression_a_set(self.context, expression):
root_concept = self.context.search(
start_with_self=True,
predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF,
get_obj=lambda ec: ec.action_context,
stop=lambda ec: ec.action == BuiltinConcepts.INIT_BNF)
root_concept = list(root_concept)
if root_concept and hasattr(root_concept[0], "id"):
expression.recurse_id = f"{expression.concept.id}#{expression.rule_name}#{root_concept[0].id}"
expression.rule_name = token.value
self.next_token()
return expression
@staticmethod
def is_expression_a_set(context, expression):
return isinstance(expression, ConceptExpression) and context.sheerka.isaset(context, expression.concept)
@staticmethod
def update_recurse_id(context, concept_id, expression):
if BnfParser.is_expression_a_set(context, expression):
expression.recurse_id = f"{expression.concept.id}#{expression.rule_name}#{concept_id}"
for element in expression.elements:
BnfParser.update_recurse_id(context, concept_id, element)
+2 -2
View File
@@ -355,14 +355,14 @@ class DefaultParser(BaseParser):
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
return None, NotInitializedNode()
regex_parser = BnfParser()
bnf_regex_parser = BnfParser()
desc = f"Resolving BNF {current_concept_def.definition}"
with self.context.push(BuiltinConcepts.INIT_BNF,
current_concept_def,
who=self.name,
obj=current_concept_def,
desc=desc) as sub_context:
parsing_result = regex_parser.parse(sub_context, tokens)
parsing_result = bnf_regex_parser.parse(sub_context, tokens)
sub_context.add_values(return_values=parsing_result)
if not parsing_result.status: