Fixed infinite recursion when parsing complex BNF node
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Set
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
@@ -12,6 +13,11 @@ from parsers.BaseParser import Node, BaseParser, ErrorNode
|
||||
DEBUG_COMPILED = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChickenAndEggError(Exception):
|
||||
concepts: Set[str]
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerNode(Node):
|
||||
start: int # starting index in the tokens list
|
||||
@@ -422,7 +428,7 @@ class CN(HelperWithPos):
|
||||
ConceptNode tester class
|
||||
It matches with ConceptNode but with less constraints
|
||||
|
||||
CNC == ConceptNode if concept key, start, end and source are the same
|
||||
CN == ConceptNode if concept key, start, end and source are the same
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start=None, end=None, source=None):
|
||||
@@ -496,6 +502,9 @@ class CNC(CN):
|
||||
super().__init__(concept_key, start, end, source)
|
||||
self.compiled = kwargs
|
||||
self.exclude_body = exclude_body
|
||||
if "body" in self.compiled:
|
||||
self.compiled[ConceptParts.BODY] = self.compiled["body"]
|
||||
del self.compiled["body"]
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
@@ -516,7 +525,10 @@ class CNC(CN):
|
||||
to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
|
||||
else:
|
||||
to_compare = other.concept.compiled
|
||||
return self.compiled == to_compare
|
||||
if self.compiled == to_compare:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
if not isinstance(other, CNC):
|
||||
return False
|
||||
@@ -613,7 +625,7 @@ class BaseNodeParser(BaseParser):
|
||||
:param concepts
|
||||
:return:
|
||||
"""
|
||||
concepts_by_first_keyword = self.get_concepts_by_first_keyword(context, concepts).body
|
||||
concepts_by_first_keyword = self.get_concepts_by_first_token(context, concepts).body
|
||||
self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
|
||||
|
||||
def reset_parser(self, context, parser_input: ParserInput):
|
||||
@@ -626,38 +638,6 @@ class BaseNodeParser(BaseParser):
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
return True
|
||||
# self.text = text
|
||||
#
|
||||
# try:
|
||||
# self.tokens = list(self.get_input_as_tokens(text))
|
||||
#
|
||||
#
|
||||
# self.token = None
|
||||
# self.pos = -1
|
||||
# return True
|
||||
|
||||
# def add_error(self, error, next_token=True):
|
||||
# self.error_sink.append(error)
|
||||
# if next_token:
|
||||
# self.parser_input.next_token()
|
||||
# return error
|
||||
|
||||
# def get_token(self) -> Token:
|
||||
# return self.token
|
||||
#
|
||||
# def next_token(self, skip_whitespace=True):
|
||||
# if self.token and self.token.type == TokenKind.EOF:
|
||||
# return False
|
||||
#
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# if skip_whitespace:
|
||||
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
# self.pos += 1
|
||||
# self.token = self.tokens[self.pos]
|
||||
#
|
||||
# return self.token.type != TokenKind.EOF
|
||||
|
||||
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
|
||||
"""
|
||||
@@ -698,7 +678,7 @@ class BaseNodeParser(BaseParser):
|
||||
return custom_concepts if custom else None
|
||||
|
||||
@staticmethod
|
||||
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
|
||||
def get_concepts_by_first_token(context, concepts, use_sheerka=False):
|
||||
"""
|
||||
Create the map describing the first token expected by a concept
|
||||
:param context:
|
||||
@@ -718,22 +698,26 @@ class BaseNodeParser(BaseParser):
|
||||
for keyword in keywords:
|
||||
res.setdefault(keyword, []).append(concept.id)
|
||||
|
||||
# 'uniquify' the lists
|
||||
for k, v in res.items():
|
||||
res[k] = core.utils.make_unique(v)
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
@staticmethod
|
||||
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword):
|
||||
sheerka = context.sheerka
|
||||
|
||||
def _make_unique(elements):
|
||||
keys = {}
|
||||
for e in elements:
|
||||
keys[e] = 1
|
||||
return list(keys.keys())
|
||||
|
||||
def _resolve_concepts(concept_str):
|
||||
resolved = []
|
||||
to_resolve = []
|
||||
def resolve_concepts(concept_str):
|
||||
resolved = set()
|
||||
to_resolve = set()
|
||||
concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1])
|
||||
|
||||
if concept.id in already_seen:
|
||||
raise ChickenAndEggError(already_seen)
|
||||
else:
|
||||
already_seen.add(concept.id)
|
||||
|
||||
if sheerka.isaset(context, concept):
|
||||
concepts = sheerka.get_set_elements(context, concept)
|
||||
else:
|
||||
@@ -743,25 +727,31 @@ class BaseNodeParser(BaseParser):
|
||||
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
|
||||
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
|
||||
for keyword in keywords:
|
||||
(to_resolve if keyword.startswith("c:|") else resolved).append(keyword)
|
||||
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
|
||||
|
||||
for concept_to_resolve_str in to_resolve:
|
||||
resolved += _resolve_concepts(concept_to_resolve_str)
|
||||
resolved |= resolve_concepts(concept_to_resolve_str)
|
||||
|
||||
return resolved
|
||||
|
||||
res = {}
|
||||
for k, v in concepts_by_first_keyword.items():
|
||||
if k.startswith("c:|"):
|
||||
resolved_keywords = _resolve_concepts(k)
|
||||
for resolved in resolved_keywords:
|
||||
res.setdefault(resolved, []).extend(v)
|
||||
try:
|
||||
already_seen = set()
|
||||
resolved_keywords = resolve_concepts(k)
|
||||
for resolved in resolved_keywords:
|
||||
res.setdefault(resolved, []).extend(v)
|
||||
except ChickenAndEggError as ex:
|
||||
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
|
||||
# res[k] = sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG,
|
||||
# body=[sheerka.get_by_id(c) for c in ex.concepts])
|
||||
else:
|
||||
res.setdefault(k, []).extend(v)
|
||||
|
||||
# 'uniquify' the lists
|
||||
for k, v in res.items():
|
||||
res[k] = _make_unique(v)
|
||||
res[k] = core.utils.make_unique(v)
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
@@ -797,7 +787,7 @@ class BaseNodeParser(BaseParser):
|
||||
if concept.metadata.definition_type == DEFINITION_TYPE_BNF and not concept.bnf:
|
||||
from parsers.BnfParser import BnfParser
|
||||
regex_parser = BnfParser()
|
||||
desc = f"Resolving BNF {concept.metadata.definition}"
|
||||
desc = f"Resolving BNF '{concept.metadata.definition}'"
|
||||
with context.push(BuiltinConcepts.INIT_BNF,
|
||||
concept,
|
||||
who=parser_name,
|
||||
|
||||
+256
-91
@@ -154,18 +154,6 @@ class ConceptExpression(ParsingExpression):
|
||||
[node])
|
||||
|
||||
|
||||
# class ConceptGroupExpression(ConceptExpression):
|
||||
# def _parse(self, parser_helper):
|
||||
# node = self.nodes[0].parse(parser_helper)
|
||||
# if node is None:
|
||||
# return None
|
||||
# return NonTerminalNode(self,
|
||||
# node.start,
|
||||
# node.end,
|
||||
# node.tokens, # node is an OrderedChoice
|
||||
# [node])
|
||||
|
||||
|
||||
class Sequence(ParsingExpression):
|
||||
"""
|
||||
Will match sequence of parser expressions in exact order they are defined.
|
||||
@@ -422,6 +410,69 @@ class StrMatch(Match):
|
||||
return None
|
||||
|
||||
|
||||
# class RegExMatch(Match):
|
||||
# '''
|
||||
# This Match class will perform input matching based on Regular Expressions.
|
||||
#
|
||||
# Args:
|
||||
# to_match (regex string): A regular expression string to match.
|
||||
# It will be used to create regular expression using re.compile.
|
||||
# ignore_case(bool): If case insensitive match is needed.
|
||||
# Default is None to support propagation from global parser setting.
|
||||
# multiline(bool): allow regex to works on multiple lines
|
||||
# (re.DOTALL flag). Default is None to support propagation from
|
||||
# global parser setting.
|
||||
# str_repr(str): A string that is used to represent this regex.
|
||||
# re_flags: flags parameter for re.compile if neither ignore_case
|
||||
# or multiple are set.
|
||||
#
|
||||
# '''
|
||||
# def __init__(self, to_match, rule_name='', root=False, ignore_case=None,
|
||||
# multiline=None, str_repr=None, re_flags=re.MULTILINE):
|
||||
# super(RegExMatch, self).__init__(rule_name, root)
|
||||
# self.to_match_regex = to_match
|
||||
# self.ignore_case = ignore_case
|
||||
# self.multiline = multiline
|
||||
# self.explicit_flags = re_flags
|
||||
#
|
||||
# self.to_match = str_repr if str_repr is not None else to_match
|
||||
#
|
||||
# def compile(self):
|
||||
# flags = self.explicit_flags
|
||||
# if self.multiline is True:
|
||||
# flags |= re.DOTALL
|
||||
# if self.multiline is False and flags & re.DOTALL:
|
||||
# flags -= re.DOTALL
|
||||
# if self.ignore_case is True:
|
||||
# flags |= re.IGNORECASE
|
||||
# if self.ignore_case is False and flags & re.IGNORECASE:
|
||||
# flags -= re.IGNORECASE
|
||||
# self.regex = re.compile(self.to_match_regex, flags)
|
||||
#
|
||||
# def __str__(self):
|
||||
# return self.to_match
|
||||
#
|
||||
# def __unicode__(self):
|
||||
# return self.__str__()
|
||||
#
|
||||
# def _parse(self, parser):
|
||||
# c_pos = parser.position
|
||||
# m = self.regex.match(parser.input, c_pos)
|
||||
# if m:
|
||||
# matched = m.group()
|
||||
# if parser.debug:
|
||||
# parser.dprint(
|
||||
# "++ Match '%s' at %d => '%s'" %
|
||||
# (matched, c_pos, parser.context(len(matched))))
|
||||
# parser.position += len(matched)
|
||||
# if matched:
|
||||
# return Terminal(self, c_pos, matched, extra_info=m)
|
||||
# else:
|
||||
# if parser.debug:
|
||||
# parser.dprint("-- NoMatch at {}".format(c_pos))
|
||||
# parser._nm_raise(self, c_pos, parser)
|
||||
|
||||
|
||||
class ParsingExpressionVisitor:
|
||||
"""
|
||||
visit ParsingExpression
|
||||
@@ -550,7 +601,7 @@ class BnfConceptParserHelper:
|
||||
forked.eat_concept(concept, token)
|
||||
|
||||
# init
|
||||
parsing_expression = self.parser.get_parsing_expression(concept)
|
||||
parsing_expression = self.parser.get_parsing_expression(self.parser.context, concept)
|
||||
if not isinstance(parsing_expression, ParsingExpression):
|
||||
self.debug.append(concept)
|
||||
error_msg = f"Failed to parse concept '{concept}'"
|
||||
@@ -733,6 +784,11 @@ class BnfConceptParserHelper:
|
||||
return concept
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnderConstruction:
|
||||
concept_id: str
|
||||
|
||||
|
||||
class BnfNodeParser(BaseNodeParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("BnfNode", 50, **kwargs)
|
||||
@@ -769,6 +825,11 @@ class BnfNodeParser(BaseNodeParser):
|
||||
return valid_parser_helpers
|
||||
|
||||
def get_concepts_sequences(self):
|
||||
"""
|
||||
Main method that parses the tokens and extract the concepts
|
||||
:return:
|
||||
"""
|
||||
|
||||
def _add_forked_to_concept_parser_helpers():
|
||||
# check that if some new InfixToPostfix are created
|
||||
for parser in concept_parser_helpers:
|
||||
@@ -836,110 +897,214 @@ class BnfNodeParser(BaseNodeParser):
|
||||
|
||||
return concept_parser_helpers
|
||||
|
||||
def get_parsing_expression(self, concept, already_seen=None):
|
||||
def check_for_infinite_recursion(self, parsing_expression, already_found, only_first=False):
|
||||
|
||||
if isinstance(parsing_expression, ConceptExpression):
|
||||
if parsing_expression.concept in already_found:
|
||||
return True
|
||||
already_found.add(parsing_expression.concept)
|
||||
return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, False)
|
||||
|
||||
if isinstance(parsing_expression, Sequence):
|
||||
# for sequence, we need to check all nodes
|
||||
if only_first:
|
||||
nodes = [] if len(parsing_expression.nodes) == 0 else [parsing_expression.nodes[0]]
|
||||
else:
|
||||
nodes = parsing_expression.nodes
|
||||
for node in nodes:
|
||||
already_found_for_current_node = already_found.copy()
|
||||
if self.check_for_infinite_recursion(node, already_found_for_current_node, False):
|
||||
already_found.update(already_found_for_current_node)
|
||||
return True
|
||||
return False
|
||||
|
||||
if isinstance(parsing_expression, OrderedChoice):
|
||||
# for ordered choice, if there is at least one node that does not resolved to a recursion
|
||||
# we are safe
|
||||
for node in parsing_expression.nodes:
|
||||
already_found_for_current_node = already_found.copy()
|
||||
if self.check_for_infinite_recursion(node, already_found, True):
|
||||
already_found.update(already_found_for_current_node)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def get_parsing_expression(self, context, concept):
|
||||
if concept.id in self.concepts_grammars:
|
||||
return self.concepts_grammars.get(concept.id)
|
||||
|
||||
if not concept.bnf:
|
||||
BaseNodeParser.ensure_bnf(self.context, concept, self.name)
|
||||
grammar = self.concepts_grammars.copy()
|
||||
to_resolve = {} # the key is the instance id of the parsing expression
|
||||
isa_concepts = set()
|
||||
self.resolve_concept_parsing_expression(context, concept, grammar, to_resolve, isa_concepts)
|
||||
|
||||
expression = concept.bnf
|
||||
desc = f"Resolving parsing expression {expression}"
|
||||
with self.context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context:
|
||||
sub_context.add_inputs(expression=expression)
|
||||
resolved = self.resolve_parsing_expression(expression, already_seen or set())
|
||||
sub_context.add_values(return_values=resolved)
|
||||
for _id, pe in to_resolve.items():
|
||||
for i, node in enumerate(pe.nodes):
|
||||
if isinstance(node, UnderConstruction):
|
||||
pe.nodes[i] = grammar.get(node.concept_id)
|
||||
|
||||
self.concepts_grammars.put(concept.id, resolved)
|
||||
concepts_in_recursion = set()
|
||||
if self.check_for_infinite_recursion(pe, concepts_in_recursion):
|
||||
cycle = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body={c.id for c in concepts_in_recursion})
|
||||
for concept in concepts_in_recursion:
|
||||
grammar[concept.id] = cycle
|
||||
|
||||
if self.has_error:
|
||||
return None
|
||||
# Make sure you do not put isa concepts in cache
|
||||
# why :
|
||||
# twenties = 'twenty' number where number < 10
|
||||
# hundreds = number 'hundred' where number < 99
|
||||
# the concept of number depends on its utilisation
|
||||
for concept_id in [c for c in grammar if c not in isa_concepts]:
|
||||
self.concepts_grammars.put(concept_id, grammar[concept_id])
|
||||
|
||||
return self.concepts_grammars.get(concept.id)
|
||||
|
||||
def resolve_parsing_expression(self, parsing_expression, already_seen):
|
||||
def resolve_concept_parsing_expression(self, context, concept, grammar, to_resolve, isa_concepts):
|
||||
if concept.id in grammar:
|
||||
return grammar.get(concept.id)
|
||||
|
||||
def inner_resolve(expression, inner_already_seen):
|
||||
# if isinstance(expression, Concept):
|
||||
# if self.sheerka.isaset(self.context, expression):
|
||||
# ret = ConceptGroupExpression(expression, rule_name=expression.name)
|
||||
# else:
|
||||
# ret = ConceptExpression(expression, rule_name=expression.name)
|
||||
# possible_recursion.add(expression)
|
||||
if isinstance(expression, str):
|
||||
ret = StrMatch(expression, ignore_case=self.ignore_case)
|
||||
desc = f"Get parsing expression for '{concept}'"
|
||||
with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context:
|
||||
if not concept.bnf: # to save a function call. Not sure it worth it.
|
||||
BaseNodeParser.ensure_bnf(sub_context, concept, self.name)
|
||||
|
||||
elif not isinstance(expression, ParsingExpression):
|
||||
return expression # escalate the error
|
||||
grammar[concept.id] = UnderConstruction(concept.id)
|
||||
sheerka = context.sheerka
|
||||
|
||||
elif isinstance(expression, ConceptExpression):
|
||||
concept = self.get_concept(expression.concept)
|
||||
if concept in inner_already_seen:
|
||||
return self.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concept)
|
||||
expression.concept = concept
|
||||
inner_already_seen.add(concept)
|
||||
if concept.metadata.definition_type == DEFINITION_TYPE_BNF:
|
||||
expression = concept.bnf
|
||||
desc = f"Bnf concept detected. Resolving parsing expression '{expression}'"
|
||||
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
||||
ssc.add_inputs(expression=expression)
|
||||
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_resolve, isa_concepts)
|
||||
ssc.add_values(return_values=resolved)
|
||||
|
||||
if not self.sheerka.is_known(concept):
|
||||
unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept)
|
||||
return self.add_error(unknown_concept)
|
||||
elif sheerka.isaset(context, concept):
|
||||
desc = f"Concept is a group. Resolving parsing expression using 'isa'"
|
||||
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
||||
ssc.add_inputs(concept=concept)
|
||||
isa_concepts.add(concept.id)
|
||||
concepts_in_group = self.sheerka.get_set_elements(ssc, concept)
|
||||
|
||||
# bnf concept
|
||||
elif concept.metadata.definition_type == DEFINITION_TYPE_BNF:
|
||||
pe = self.get_parsing_expression(concept, inner_already_seen)
|
||||
# concepts_in_group comes from a set, so the order of its elements is not guaranteed
|
||||
# to avoid random failure (ie random CHICKEN_AND_EGG), we need to rearrange
|
||||
# We also remove the root concept (the one from get_parsing_expression())
|
||||
|
||||
elif self.sheerka.isaset(self.context, concept):
|
||||
concepts_in_group = self.sheerka.get_set_elements(self.context, concept)
|
||||
nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
|
||||
pe = inner_resolve(OrderedChoice(*nodes), inner_already_seen)
|
||||
root_concept_as_set = set(context.search(
|
||||
predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF,
|
||||
get_obj=lambda ec: ec.obj,
|
||||
stop=lambda ec: ec.action != BuiltinConcepts.INIT_BNF)) # there only one item in the set
|
||||
root_concept = list(root_concept_as_set)[0]
|
||||
reordered = []
|
||||
for c in concepts_in_group:
|
||||
if c.id == root_concept.id:
|
||||
continue
|
||||
|
||||
else:
|
||||
# regular concepts
|
||||
tokens = Tokenizer(concept.name)
|
||||
nodes = [StrMatch(token.strip_quote) for token in list(tokens)[:-1]]
|
||||
pe = inner_resolve(nodes[0] if len(nodes) == 1 else Sequence(nodes), inner_already_seen)
|
||||
# I do not guaranty the same order every time, but I minimize the ChickenAndEgg random issue
|
||||
if c.metadata.definition_type == DEFINITION_TYPE_BNF or sheerka.isaset(ssc, c):
|
||||
reordered.append(c)
|
||||
else:
|
||||
reordered.insert(0, c)
|
||||
|
||||
if not isinstance(pe, ParsingExpression):
|
||||
return pe
|
||||
expression.nodes = [pe]
|
||||
expression.rule_name = expression.rule_name or concept.name
|
||||
ret = expression
|
||||
|
||||
elif isinstance(expression, StrMatch):
|
||||
ret = expression
|
||||
if ret.ignore_case is None:
|
||||
ret.ignore_case = self.ignore_case
|
||||
|
||||
elif isinstance(expression, Sequence) or \
|
||||
isinstance(expression, OrderedChoice) or \
|
||||
isinstance(expression, ZeroOrMore) or \
|
||||
isinstance(expression, OneOrMore) or \
|
||||
isinstance(expression, Optional):
|
||||
ret = expression
|
||||
ret.nodes = []
|
||||
for e in ret.elements:
|
||||
pe = inner_resolve(e, already_seen.copy())
|
||||
if not isinstance(pe, ParsingExpression):
|
||||
return pe
|
||||
ret.nodes.append(pe)
|
||||
nodes = [ConceptExpression(c, rule_name=c.name) for c in reordered]
|
||||
resolved = self.resolve_parsing_expression(ssc,
|
||||
OrderedChoice(*nodes),
|
||||
grammar,
|
||||
to_resolve,
|
||||
isa_concepts)
|
||||
ssc.add_values(concepts_in_group=concepts_in_group)
|
||||
ssc.add_values(return_values=resolved)
|
||||
|
||||
else:
|
||||
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
||||
desc = f"Concept is a simple concept."
|
||||
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
||||
tokens = Tokenizer(concept.name, yield_eof=False)
|
||||
nodes = [StrMatch(token.strip_quote) for token in tokens]
|
||||
expression = nodes[0] if len(nodes) == 1 else Sequence(nodes)
|
||||
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_resolve, isa_concepts)
|
||||
|
||||
# Translate separator expression.
|
||||
if isinstance(expression, Repetition) and expression.sep:
|
||||
expression.sep = inner_resolve(expression.sep, already_seen)
|
||||
grammar[concept.id] = resolved
|
||||
|
||||
return ret
|
||||
if self.has_error:
|
||||
sub_context.add_values(errors=self.error_sink)
|
||||
return None
|
||||
|
||||
parsing_expression = inner_resolve(parsing_expression, already_seen)
|
||||
return parsing_expression
|
||||
sub_context.add_values(return_values=resolved)
|
||||
return resolved
|
||||
|
||||
def get_concept(self, concept):
|
||||
def resolve_parsing_expression(self, context, expression, grammar, to_resolve, isa_concepts):
|
||||
|
||||
if isinstance(expression, str):
|
||||
ret = StrMatch(expression, ignore_case=self.ignore_case)
|
||||
|
||||
elif not isinstance(expression, ParsingExpression):
|
||||
return expression # escalate the error
|
||||
|
||||
elif isinstance(expression, ConceptExpression):
|
||||
concept = self.get_concept(context, expression.concept)
|
||||
expression.concept = concept
|
||||
|
||||
if not self.sheerka.is_known(concept):
|
||||
unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept)
|
||||
return self.add_error(unknown_concept)
|
||||
|
||||
pe = self.resolve_concept_parsing_expression(context, concept, grammar, to_resolve, isa_concepts)
|
||||
|
||||
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
|
||||
return pe # an error is detected, escalate it
|
||||
#
|
||||
# if isinstance(pe, UnderConstruction) and expression.concept.id == pe.concept_id:
|
||||
# return pe # we are looking for ourself, just return it
|
||||
|
||||
if isinstance(pe, UnderConstruction):
|
||||
to_resolve[id(expression)] = expression
|
||||
|
||||
expression.nodes = [pe]
|
||||
expression.rule_name = expression.rule_name or concept.name
|
||||
ret = expression
|
||||
|
||||
elif isinstance(expression, StrMatch):
|
||||
ret = expression
|
||||
if ret.ignore_case is None:
|
||||
ret.ignore_case = self.ignore_case
|
||||
|
||||
elif isinstance(expression, Sequence) or \
|
||||
isinstance(expression, OrderedChoice) or \
|
||||
isinstance(expression, ZeroOrMore) or \
|
||||
isinstance(expression, OneOrMore) or \
|
||||
isinstance(expression, Optional):
|
||||
ret = expression
|
||||
ret.nodes = []
|
||||
for e in ret.elements:
|
||||
pe = self.resolve_parsing_expression(context, e, grammar, to_resolve, isa_concepts)
|
||||
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
|
||||
return pe # an error is detected, escalate it
|
||||
if isinstance(pe, UnderConstruction):
|
||||
to_resolve[id(ret)] = ret # remember that there is an unresolved parsing expression
|
||||
ret.nodes.append(pe)
|
||||
|
||||
else:
|
||||
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
||||
|
||||
# Translate separator expression.
|
||||
if isinstance(ret, Repetition) and expression.sep:
|
||||
expression.sep = self.resolve_parsing_expression(context,
|
||||
expression.sep,
|
||||
grammar,
|
||||
to_resolve,
|
||||
isa_concepts)
|
||||
|
||||
return ret
|
||||
|
||||
def get_concept(self, context, concept):
|
||||
if isinstance(concept, Concept):
|
||||
return concept
|
||||
|
||||
if concept in self.context.concepts:
|
||||
return self.context.concepts[concept]
|
||||
if concept in context.concepts:
|
||||
return context.concepts[concept]
|
||||
return self.sheerka.get_by_key(concept)
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
|
||||
Reference in New Issue
Block a user