Fixed infinite recursion when parsing complex BNF node

This commit is contained in:
2020-06-23 15:22:27 +02:00
parent 912455c343
commit 7310bc5522
28 changed files with 1082 additions and 276 deletions
+42 -52
View File
@@ -1,6 +1,7 @@
from collections import namedtuple
from dataclasses import dataclass
from enum import Enum
from typing import Set
import core.utils
from core.builtin_concepts import BuiltinConcepts
@@ -12,6 +13,11 @@ from parsers.BaseParser import Node, BaseParser, ErrorNode
DEBUG_COMPILED = True
@dataclass
class ChickenAndEggError(Exception):
concepts: Set[str]
@dataclass()
class LexerNode(Node):
start: int # starting index in the tokens list
@@ -422,7 +428,7 @@ class CN(HelperWithPos):
ConceptNode tester class
It matches with ConceptNode but with less constraints
CNC == ConceptNode if concept key, start, end and source are the same
CN == ConceptNode if concept key, start, end and source are the same
"""
def __init__(self, concept, start=None, end=None, source=None):
@@ -496,6 +502,9 @@ class CNC(CN):
super().__init__(concept_key, start, end, source)
self.compiled = kwargs
self.exclude_body = exclude_body
if "body" in self.compiled:
self.compiled[ConceptParts.BODY] = self.compiled["body"]
del self.compiled["body"]
def __eq__(self, other):
if id(self) == id(other):
@@ -516,7 +525,10 @@ class CNC(CN):
to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
else:
to_compare = other.concept.compiled
return self.compiled == to_compare
if self.compiled == to_compare:
return True
else:
return False
if not isinstance(other, CNC):
return False
@@ -613,7 +625,7 @@ class BaseNodeParser(BaseParser):
:param concepts
:return:
"""
concepts_by_first_keyword = self.get_concepts_by_first_keyword(context, concepts).body
concepts_by_first_keyword = self.get_concepts_by_first_token(context, concepts).body
self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
def reset_parser(self, context, parser_input: ParserInput):
@@ -626,38 +638,6 @@ class BaseNodeParser(BaseParser):
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
return True
# self.text = text
#
# try:
# self.tokens = list(self.get_input_as_tokens(text))
#
#
# self.token = None
# self.pos = -1
# return True
# def add_error(self, error, next_token=True):
# self.error_sink.append(error)
# if next_token:
# self.parser_input.next_token()
# return error
# def get_token(self) -> Token:
# return self.token
#
# def next_token(self, skip_whitespace=True):
# if self.token and self.token.type == TokenKind.EOF:
# return False
#
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# if skip_whitespace:
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# return self.token.type != TokenKind.EOF
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
"""
@@ -698,7 +678,7 @@ class BaseNodeParser(BaseParser):
return custom_concepts if custom else None
@staticmethod
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
def get_concepts_by_first_token(context, concepts, use_sheerka=False):
"""
Create the map describing the first token expected by a concept
:param context:
@@ -718,22 +698,26 @@ class BaseNodeParser(BaseParser):
for keyword in keywords:
res.setdefault(keyword, []).append(concept.id)
# 'uniquify' the lists
for k, v in res.items():
res[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword):
sheerka = context.sheerka
def _make_unique(elements):
keys = {}
for e in elements:
keys[e] = 1
return list(keys.keys())
def _resolve_concepts(concept_str):
resolved = []
to_resolve = []
def resolve_concepts(concept_str):
resolved = set()
to_resolve = set()
concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1])
if concept.id in already_seen:
raise ChickenAndEggError(already_seen)
else:
already_seen.add(concept.id)
if sheerka.isaset(context, concept):
concepts = sheerka.get_set_elements(context, concept)
else:
@@ -743,25 +727,31 @@ class BaseNodeParser(BaseParser):
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
for keyword in keywords:
(to_resolve if keyword.startswith("c:|") else resolved).append(keyword)
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
for concept_to_resolve_str in to_resolve:
resolved += _resolve_concepts(concept_to_resolve_str)
resolved |= resolve_concepts(concept_to_resolve_str)
return resolved
res = {}
for k, v in concepts_by_first_keyword.items():
if k.startswith("c:|"):
resolved_keywords = _resolve_concepts(k)
for resolved in resolved_keywords:
res.setdefault(resolved, []).extend(v)
try:
already_seen = set()
resolved_keywords = resolve_concepts(k)
for resolved in resolved_keywords:
res.setdefault(resolved, []).extend(v)
except ChickenAndEggError as ex:
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
# res[k] = sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG,
# body=[sheerka.get_by_id(c) for c in ex.concepts])
else:
res.setdefault(k, []).extend(v)
# 'uniquify' the lists
for k, v in res.items():
res[k] = _make_unique(v)
res[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
@@ -797,7 +787,7 @@ class BaseNodeParser(BaseParser):
if concept.metadata.definition_type == DEFINITION_TYPE_BNF and not concept.bnf:
from parsers.BnfParser import BnfParser
regex_parser = BnfParser()
desc = f"Resolving BNF {concept.metadata.definition}"
desc = f"Resolving BNF '{concept.metadata.definition}'"
with context.push(BuiltinConcepts.INIT_BNF,
concept,
who=parser_name,