Fixed BnfNodeParser to allow expressions like 'number hundred' when number is a group
This commit is contained in:
@@ -36,6 +36,7 @@ class Sheerka(Concept):
|
||||
CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions"
|
||||
RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY = "Resolved_Concepts_Sya_Definitions"
|
||||
CONCEPTS_GRAMMARS_ENTRY = "Concepts_Grammars"
|
||||
CHICKEN_AND_EGG_CONCEPTS_ENTRY = "Chicken_And_Egg_Concepts"
|
||||
|
||||
CONCEPTS_KEYS_ENTRY = "Concepts_Keys"
|
||||
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
|
||||
@@ -105,6 +106,10 @@ class Sheerka(Concept):
|
||||
|
||||
@property
|
||||
def concepts_grammars(self):
|
||||
return self.cache_manager.caches[self.CHICKEN_AND_EGG_CONCEPTS_ENTRY].cache
|
||||
|
||||
@property
|
||||
def chicken_and_eggs(self):
|
||||
return self.cache_manager.caches[self.CONCEPTS_GRAMMARS_ENTRY].cache
|
||||
|
||||
def bind_service_method(self, bound_method, as_name=None):
|
||||
@@ -227,6 +232,9 @@ class Sheerka(Concept):
|
||||
cache = Cache()
|
||||
self.cache_manager.register_cache(self.CONCEPTS_GRAMMARS_ENTRY, cache, persist=False)
|
||||
|
||||
cache = Cache()
|
||||
self.cache_manager.register_cache(self.CHICKEN_AND_EGG_CONCEPTS_ENTRY, cache, persist=False)
|
||||
|
||||
def initialize_services(self):
|
||||
"""
|
||||
Introspect to find services and bind them
|
||||
|
||||
@@ -26,7 +26,7 @@ class SheerkaEvaluateConcept(BaseService):
|
||||
|
||||
parent = context.get_parent()
|
||||
while parent is not None:
|
||||
if parent.who == context.who and parent.obj == concept:
|
||||
if parent.who == context.who and parent.obj == concept and parent.obj.compiled == concept.compiled:
|
||||
return True
|
||||
|
||||
parent = parent.get_parent()
|
||||
@@ -147,6 +147,11 @@ class SheerkaEvaluateConcept(BaseService):
|
||||
|
||||
def resolve(self, context, to_resolve, current_prop, current_concept, force_evaluation):
|
||||
|
||||
def get_path(context_, prop_name):
|
||||
prefix = context_.path if hasattr(context_, "path") else "<N/A>"
|
||||
value = prop_name.name if isinstance(current_prop, ConceptParts) else prop_name
|
||||
return prefix + "." + value
|
||||
|
||||
if isinstance(to_resolve, DoNotResolve):
|
||||
return to_resolve.value
|
||||
|
||||
@@ -161,12 +166,14 @@ class SheerkaEvaluateConcept(BaseService):
|
||||
sub_context.add_values(return_values=ret_val)
|
||||
return ret_val.body
|
||||
|
||||
desc = f"Evaluating {current_prop} (concept={current_concept})"
|
||||
path = get_path(context, current_prop)
|
||||
desc = f"Evaluating {path} (concept={current_concept})"
|
||||
context.log(desc, self.NAME)
|
||||
with context.push(BuiltinConcepts.EVALUATING_CONCEPT,
|
||||
current_prop,
|
||||
desc=desc,
|
||||
obj=current_concept) as sub_context:
|
||||
obj=current_concept,
|
||||
path=path) as sub_context:
|
||||
|
||||
if force_evaluation:
|
||||
sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED)
|
||||
|
||||
@@ -76,6 +76,7 @@ class SheerkaSetsManager(BaseService):
|
||||
self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, body=concept, concept_set=concept_set))
|
||||
|
||||
self.sets.put(concept_set.id, concept.id)
|
||||
self.concepts_in_set.delete(concept_set.id)
|
||||
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
|
||||
|
||||
def add_concepts_to_set(self, context, concepts, concept_set):
|
||||
@@ -95,6 +96,7 @@ class SheerkaSetsManager(BaseService):
|
||||
concept_set=concept_set)
|
||||
else:
|
||||
body = self.sheerka.new(BuiltinConcepts.SUCCESS)
|
||||
self.concepts_in_set.delete(concept_set.id)
|
||||
|
||||
return self.sheerka.ret(self.NAME, len(already_in_set) != len(concepts), body)
|
||||
|
||||
|
||||
+3
-3
@@ -205,9 +205,9 @@ def make_unique(lst, get_id=None):
|
||||
yield x
|
||||
else:
|
||||
for x in seq:
|
||||
x = get_id(x)
|
||||
if x not in seen:
|
||||
seen.add(x)
|
||||
_id = get_id(x)
|
||||
if _id not in seen:
|
||||
seen.add(_id)
|
||||
yield x
|
||||
|
||||
return list(_make_unique(lst, get_id))
|
||||
|
||||
@@ -264,6 +264,7 @@ class AtomNodeParser(BaseNodeParser):
|
||||
concept_parser.lock()
|
||||
|
||||
concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name)
|
||||
#self.context.log(f"concepts found for {token=}: {concepts}", who=self.name)
|
||||
if not concepts:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.eat_unrecognized(token, pos)
|
||||
|
||||
@@ -525,7 +525,7 @@ class CNC(CN):
|
||||
to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
|
||||
else:
|
||||
to_compare = other.concept.compiled
|
||||
if self.compiled == to_compare:
|
||||
if self.compiled == to_compare: # expanded form to ease the debug
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
@@ -673,7 +673,8 @@ class BaseNodeParser(BaseParser):
|
||||
|
||||
concept = to_map(self, concept) if to_map else concept
|
||||
result.append(concept)
|
||||
return result + custom_concepts
|
||||
return core.utils.make_unique(result + custom_concepts,
|
||||
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
|
||||
|
||||
return custom_concepts if custom else None
|
||||
|
||||
@@ -707,16 +708,20 @@ class BaseNodeParser(BaseParser):
|
||||
@staticmethod
|
||||
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword):
|
||||
sheerka = context.sheerka
|
||||
res = {}
|
||||
|
||||
def resolve_concepts(concept_str):
|
||||
c_key, c_id = core.utils.unstr_concept(concept_str)
|
||||
if c_id in already_seen:
|
||||
return ChickenAndEggError(already_seen)
|
||||
|
||||
already_seen.add(c_id)
|
||||
|
||||
resolved = set()
|
||||
to_resolve = set()
|
||||
concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1])
|
||||
chicken_and_egg = set()
|
||||
|
||||
if concept.id in already_seen:
|
||||
raise ChickenAndEggError(already_seen)
|
||||
else:
|
||||
already_seen.add(concept.id)
|
||||
concept = sheerka.get_by_id(c_id)
|
||||
|
||||
if sheerka.isaset(context, concept):
|
||||
concepts = sheerka.get_set_elements(context, concept)
|
||||
@@ -730,11 +735,18 @@ class BaseNodeParser(BaseParser):
|
||||
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
|
||||
|
||||
for concept_to_resolve_str in to_resolve:
|
||||
resolved |= resolve_concepts(concept_to_resolve_str)
|
||||
res = resolve_concepts(concept_to_resolve_str)
|
||||
if isinstance(res, ChickenAndEggError):
|
||||
chicken_and_egg |= res.concepts
|
||||
else:
|
||||
resolved |= res
|
||||
to_resolve.clear()
|
||||
|
||||
return resolved
|
||||
if len(resolved) == 0 and len(chicken_and_egg) > 0:
|
||||
raise ChickenAndEggError(chicken_and_egg)
|
||||
else:
|
||||
return resolved
|
||||
|
||||
res = {}
|
||||
for k, v in concepts_by_first_keyword.items():
|
||||
if k.startswith("c:|"):
|
||||
try:
|
||||
@@ -744,8 +756,16 @@ class BaseNodeParser(BaseParser):
|
||||
res.setdefault(resolved, []).extend(v)
|
||||
except ChickenAndEggError as ex:
|
||||
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
|
||||
# res[k] = sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG,
|
||||
# body=[sheerka.get_by_id(c) for c in ex.concepts])
|
||||
concepts_in_recursion = ex.concepts
|
||||
# make sure to have all the parents
|
||||
for parent in v:
|
||||
concepts_in_recursion.add(parent)
|
||||
|
||||
for concept_id in concepts_in_recursion:
|
||||
# make sure we keep the longest chain
|
||||
old = sheerka.chicken_and_eggs.get(concept_id)
|
||||
if old is None or len(old) < len(ex.concepts):
|
||||
sheerka.chicken_and_eggs.put(concept_id, concepts_in_recursion)
|
||||
else:
|
||||
res.setdefault(k, []).extend(v)
|
||||
|
||||
@@ -755,6 +775,53 @@ class BaseNodeParser(BaseParser):
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
@staticmethod
|
||||
def get_referenced_concepts(context, concept_id, already_seen):
|
||||
"""
|
||||
Gets all the tokens that may allow to recognize concept concept_id
|
||||
Basically, it returns all the starting tokens for concept concept_id
|
||||
CHICKEN_AND_EGG is returned when a circular references are found
|
||||
:param context:
|
||||
:param concept_id:
|
||||
:param already_seen:
|
||||
:return:
|
||||
"""
|
||||
if concept_id in already_seen:
|
||||
return ChickenAndEggError(already_seen)
|
||||
|
||||
already_seen.add(concept_id)
|
||||
|
||||
resolved = set()
|
||||
to_resolve = set()
|
||||
chicken_and_egg = set()
|
||||
sheerka = context.sheerka
|
||||
concept = sheerka.get_by_id(concept_id)
|
||||
|
||||
if sheerka.isaset(context, concept):
|
||||
concepts = sheerka.get_set_elements(context, concept)
|
||||
else:
|
||||
concepts = [concept]
|
||||
|
||||
for concept in concepts:
|
||||
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
|
||||
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
|
||||
for keyword in keywords:
|
||||
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
|
||||
|
||||
for concept_to_resolve_str in to_resolve:
|
||||
c_key, c_id = core.utils.unstr_concept(concept_to_resolve_str)
|
||||
res = BaseNodeParser.get_referenced_concepts(context, c_id, already_seen)
|
||||
if isinstance(res, ChickenAndEggError):
|
||||
chicken_and_egg |= res.concepts
|
||||
else:
|
||||
resolved |= res
|
||||
to_resolve.clear()
|
||||
|
||||
if len(resolved) == 0 and len(chicken_and_egg) > 0:
|
||||
raise ChickenAndEggError(chicken_and_egg)
|
||||
else:
|
||||
return resolved
|
||||
|
||||
@staticmethod
|
||||
def resolve_sya_associativity_and_precedence(context, sya):
|
||||
pass
|
||||
|
||||
+185
-65
@@ -38,7 +38,7 @@ class NonTerminalNode(LexerNode):
|
||||
self.children = children
|
||||
|
||||
def __repr__(self):
|
||||
name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
|
||||
name = "Node:" + (self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__)
|
||||
if len(self.children) > 0:
|
||||
sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
|
||||
else:
|
||||
@@ -69,7 +69,7 @@ class TerminalNode(LexerNode):
|
||||
self.value = value
|
||||
|
||||
def __repr__(self):
|
||||
name = self.parsing_expression.rule_name or ""
|
||||
name = "Node:" + (self.parsing_expression.rule_name or "")
|
||||
return name + f"'{self.value}'"
|
||||
|
||||
def __eq__(self, other):
|
||||
@@ -186,7 +186,7 @@ class Sequence(ParsingExpression):
|
||||
|
||||
class OrderedChoice(ParsingExpression):
|
||||
"""
|
||||
Will match one among multiple
|
||||
Will match the first one among multiple
|
||||
It will stop at the first match (so the order of definition is important)
|
||||
"""
|
||||
|
||||
@@ -211,6 +211,42 @@ class OrderedChoice(ParsingExpression):
|
||||
return self.add_rule_name_if_needed(f"({to_str})")
|
||||
|
||||
|
||||
class LongestChoice(ParsingExpression):
|
||||
"""
|
||||
Will match the longest one among multiple
|
||||
All elements will be tested, so the order is not important
|
||||
The behaviour when multiple candidate is found is not defined yet
|
||||
"""
|
||||
|
||||
def _parse(self, parser_helper):
|
||||
init_pos = parser_helper.pos
|
||||
longest_node = None
|
||||
end_pos = -1
|
||||
|
||||
for e in self.nodes:
|
||||
node = e.parse(parser_helper)
|
||||
if node:
|
||||
if longest_node is None or node.end > longest_node.end:
|
||||
longest_node = node
|
||||
end_pos = parser_helper.pos
|
||||
|
||||
parser_helper.seek(init_pos) # backtrack
|
||||
|
||||
if longest_node is None:
|
||||
return None
|
||||
|
||||
parser_helper.seek(end_pos)
|
||||
return NonTerminalNode(self,
|
||||
init_pos,
|
||||
longest_node.end,
|
||||
parser_helper.parser.parser_input.tokens[init_pos: longest_node.end + 1],
|
||||
[longest_node])
|
||||
|
||||
def __repr__(self):
|
||||
to_str = "# ".join(repr(n) for n in self.elements)
|
||||
return self.add_rule_name_if_needed(f"({to_str})")
|
||||
|
||||
|
||||
class Optional(ParsingExpression):
|
||||
"""
|
||||
Will match or not the elements
|
||||
@@ -386,7 +422,12 @@ class StrMatch(Match):
|
||||
self.skip_white_space = skip_whitespace
|
||||
|
||||
def __repr__(self):
|
||||
return self.add_rule_name_if_needed(f"'{self.to_match}'")
|
||||
text = self.to_match
|
||||
if not self.ignore_case:
|
||||
text += "#!ic"
|
||||
if not self.skip_white_space:
|
||||
text += "#!sw"
|
||||
return self.add_rule_name_if_needed(f"'{text}'")
|
||||
|
||||
def __eq__(self, other):
|
||||
if not super().__eq__(other):
|
||||
@@ -395,7 +436,9 @@ class StrMatch(Match):
|
||||
if not isinstance(other, StrMatch):
|
||||
return False
|
||||
|
||||
return self.to_match == other.to_match and self.ignore_case == other.ignore_case
|
||||
return self.to_match == other.to_match and \
|
||||
self.ignore_case == other.ignore_case and \
|
||||
self.skip_white_space == other.skip_white_space
|
||||
|
||||
def _parse(self, parser_helper):
|
||||
token = parser_helper.get_token()
|
||||
@@ -766,7 +809,7 @@ class BnfConceptParserHelper:
|
||||
_add_prop(_concept, _underlying.parsing_expression.rule_name, value)
|
||||
_concept.metadata.need_validation = True
|
||||
|
||||
if isinstance(_underlying, NonTerminalNode):
|
||||
elif isinstance(_underlying, NonTerminalNode):
|
||||
for child in _underlying.children:
|
||||
_process_rule_name(_concept, child)
|
||||
|
||||
@@ -789,6 +832,15 @@ class UnderConstruction:
|
||||
concept_id: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class ToUpdate:
|
||||
parent_id: int
|
||||
parsing_expression: ParsingExpression
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.parent_id)
|
||||
|
||||
|
||||
class BnfNodeParser(BaseNodeParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("BnfNode", 50, **kwargs)
|
||||
@@ -824,6 +876,34 @@ class BnfNodeParser(BaseNodeParser):
|
||||
|
||||
return valid_parser_helpers
|
||||
|
||||
@staticmethod
|
||||
def get_expression_from_concept_name(name):
|
||||
"""
|
||||
Create the parsing expression from the name
|
||||
This function differs from BNFParser.parse() as it does not try to resolve identifiers into concepts
|
||||
>>> assert get_expression_from_concept_name('one hundred') == Sequence(StrMatch("one"), StrMatch("hundred"))
|
||||
while BNFParser.parse("one hundred") will look for concept 'one' and concept 'hundred'
|
||||
:param name:
|
||||
:return:
|
||||
"""
|
||||
if name is None or name.strip() == "":
|
||||
return []
|
||||
|
||||
res = []
|
||||
tokens = Tokenizer(name, yield_eof=False)
|
||||
for token in tokens:
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
elif token.type == TokenKind.STRING:
|
||||
sub_tokens = list(Tokenizer(token.strip_quote, yield_eof=False))
|
||||
for sub_token in sub_tokens[:-1]:
|
||||
res.append(StrMatch(sub_token.str_value, skip_whitespace=False))
|
||||
res.append(StrMatch(sub_tokens[-1].str_value))
|
||||
else:
|
||||
res.append(StrMatch(token.str_value))
|
||||
|
||||
return res[0] if len(res) == 1 else Sequence(*res)
|
||||
|
||||
def get_concepts_sequences(self):
|
||||
"""
|
||||
Main method that parses the tokens and extract the concepts
|
||||
@@ -900,10 +980,10 @@ class BnfNodeParser(BaseNodeParser):
|
||||
def check_for_infinite_recursion(self, parsing_expression, already_found, only_first=False):
|
||||
|
||||
if isinstance(parsing_expression, ConceptExpression):
|
||||
if parsing_expression.concept in already_found:
|
||||
if parsing_expression.concept.id in already_found:
|
||||
return True
|
||||
already_found.add(parsing_expression.concept)
|
||||
return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, False)
|
||||
already_found.add(parsing_expression.concept.id)
|
||||
return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, only_first)
|
||||
|
||||
if isinstance(parsing_expression, Sequence):
|
||||
# for sequence, we need to check all nodes
|
||||
@@ -930,43 +1010,93 @@ class BnfNodeParser(BaseNodeParser):
|
||||
return False
|
||||
return False
|
||||
|
||||
if isinstance(parsing_expression, LongestChoice):
|
||||
for node in parsing_expression.nodes:
|
||||
already_found_for_current_node = already_found.copy()
|
||||
if self.check_for_infinite_recursion(node, already_found_for_current_node, True):
|
||||
already_found.update(already_found_for_current_node)
|
||||
return True
|
||||
return False
|
||||
|
||||
if isinstance(parsing_expression, UnderConstruction):
|
||||
if parsing_expression.concept_id in already_found:
|
||||
return True
|
||||
already_found.add(parsing_expression.concept_id)
|
||||
|
||||
return False
|
||||
|
||||
def get_parsing_expression(self, context, concept):
|
||||
"""
|
||||
Compute the parsing expression for a given concept
|
||||
:param context:
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
if concept.id in self.concepts_grammars:
|
||||
return self.concepts_grammars.get(concept.id)
|
||||
|
||||
grammar = self.concepts_grammars.copy()
|
||||
to_resolve = {} # the key is the instance id of the parsing expression
|
||||
isa_concepts = set()
|
||||
self.resolve_concept_parsing_expression(context, concept, grammar, to_resolve, isa_concepts)
|
||||
# internal cache of already computed parsing expression to use during the recursion
|
||||
grammar = {}
|
||||
|
||||
for _id, pe in to_resolve.items():
|
||||
for i, node in enumerate(pe.nodes):
|
||||
if isinstance(node, UnderConstruction):
|
||||
pe.nodes[i] = grammar.get(node.concept_id)
|
||||
# concept that are not totally resolved, because they reference parsing expression under construction
|
||||
to_update = set() # the key is the instance id of the parsing expression
|
||||
|
||||
# during the parsing of concept, we will resolve other concepts
|
||||
# keep the track of the concepts that can safely be added to self.concept_grammars
|
||||
to_keep = {concept.id}
|
||||
|
||||
desc = f"Get parsing expression for concept {concept}"
|
||||
with context.push(BuiltinConcepts.INIT_BNF, concept,
|
||||
who=self.name,
|
||||
obj=concept,
|
||||
root_concept=concept,
|
||||
desc=desc) as sub_context:
|
||||
# get the parsing expression
|
||||
ret = self.resolve_concept_parsing_expression(sub_context, concept, grammar, to_update, to_keep)
|
||||
|
||||
# check and update parsing expression that are still under construction
|
||||
# Note that we only update the concept that will update concepts_grammars
|
||||
# because pe.node may be large
|
||||
for item in to_update:
|
||||
if item.parent_id in to_keep:
|
||||
pe = item.parsing_expression
|
||||
for i, node in enumerate(pe.nodes):
|
||||
if isinstance(node, UnderConstruction):
|
||||
pe.nodes[i] = grammar.get(node.concept_id)
|
||||
|
||||
# check for infinite recursion.
|
||||
# We are adding a new concept. Does it create an infinite recursion ?
|
||||
concepts_in_recursion = set()
|
||||
if self.check_for_infinite_recursion(pe, concepts_in_recursion):
|
||||
cycle = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body={c.id for c in concepts_in_recursion})
|
||||
for concept in concepts_in_recursion:
|
||||
grammar[concept.id] = cycle
|
||||
if self.check_for_infinite_recursion(ret, concepts_in_recursion):
|
||||
cycle = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
|
||||
for concept_id in concepts_in_recursion:
|
||||
grammar[concept_id] = cycle
|
||||
|
||||
# Make sure you do not put isa concepts in cache
|
||||
# why :
|
||||
# twenties = 'twenty' number where number < 10
|
||||
# hundreds = number 'hundred' where number < 99
|
||||
# the concept of number depends on its utilisation
|
||||
for concept_id in [c for c in grammar if c not in isa_concepts]:
|
||||
self.concepts_grammars.put(concept_id, grammar[concept_id])
|
||||
# update, in case of infinite circular recursion
|
||||
ret = grammar[concept.id]
|
||||
|
||||
return self.concepts_grammars.get(concept.id)
|
||||
# finally, update concept grammar
|
||||
for k, v in grammar.items():
|
||||
if k in to_keep:
|
||||
self.concepts_grammars.put(k, v)
|
||||
|
||||
def resolve_concept_parsing_expression(self, context, concept, grammar, to_resolve, isa_concepts):
|
||||
if concept.id in grammar:
|
||||
# not quite sure that it is a good idea.
|
||||
# Why do we want to corrupt previous valid entries ?
|
||||
if context.sheerka.isinstance(v, BuiltinConcepts.CHICKEN_AND_EGG):
|
||||
self.concepts_grammars.put(k, v)
|
||||
|
||||
sub_context.add_values(return_values=ret)
|
||||
|
||||
return ret
|
||||
|
||||
def resolve_concept_parsing_expression(self, context, concept, grammar, to_update, to_keep):
|
||||
if concept.id in self.concepts_grammars: # validated entry
|
||||
return self.concepts_grammars.get(concept.id)
|
||||
|
||||
if concept.id in grammar: # under construction entry
|
||||
return grammar.get(concept.id)
|
||||
|
||||
desc = f"Get parsing expression for '{concept}'"
|
||||
desc = f"Resolve concept parsing expression for '{concept}'"
|
||||
with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context:
|
||||
if not concept.bnf: # to save a function call. Not sure it worth it.
|
||||
BaseNodeParser.ensure_bnf(sub_context, concept, self.name)
|
||||
@@ -979,52 +1109,41 @@ class BnfNodeParser(BaseNodeParser):
|
||||
desc = f"Bnf concept detected. Resolving parsing expression '{expression}'"
|
||||
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
||||
ssc.add_inputs(expression=expression)
|
||||
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_resolve, isa_concepts)
|
||||
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update, to_keep)
|
||||
ssc.add_values(return_values=resolved)
|
||||
|
||||
elif sheerka.isaset(context, concept):
|
||||
desc = f"Concept is a group. Resolving parsing expression using 'isa'"
|
||||
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
||||
ssc.add_inputs(concept=concept)
|
||||
isa_concepts.add(concept.id)
|
||||
concepts_in_group = self.sheerka.get_set_elements(ssc, concept)
|
||||
|
||||
# concepts_in_group comes from a set, so the order of its elements is not guaranteed
|
||||
# to avoid random failure (ie random CHICKEN_AND_EGG), we need to rearrange
|
||||
# We also remove the root concept (the one from get_parsing_expression())
|
||||
|
||||
root_concept_as_set = set(context.search(
|
||||
predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF,
|
||||
get_obj=lambda ec: ec.obj,
|
||||
stop=lambda ec: ec.action != BuiltinConcepts.INIT_BNF)) # there only one item in the set
|
||||
root_concept = list(root_concept_as_set)[0]
|
||||
reordered = []
|
||||
valid_concepts = []
|
||||
for c in concepts_in_group:
|
||||
if c.id == root_concept.id:
|
||||
if c.id == context.root_concept.id:
|
||||
continue
|
||||
|
||||
# I do not guaranty the same order every time, but I minimize the ChickenAndEgg random issue
|
||||
if c.metadata.definition_type == DEFINITION_TYPE_BNF or sheerka.isaset(ssc, c):
|
||||
reordered.append(c)
|
||||
else:
|
||||
reordered.insert(0, c)
|
||||
c_pe = self.resolve_concept_parsing_expression(context, c, grammar, to_update, to_keep)
|
||||
if self.check_for_infinite_recursion(c_pe, {concept.id}, True):
|
||||
continue
|
||||
|
||||
nodes = [ConceptExpression(c, rule_name=c.name) for c in reordered]
|
||||
valid_concepts.append(c)
|
||||
|
||||
nodes = [ConceptExpression(c, rule_name=c.name) for c in valid_concepts]
|
||||
resolved = self.resolve_parsing_expression(ssc,
|
||||
OrderedChoice(*nodes),
|
||||
LongestChoice(*nodes),
|
||||
grammar,
|
||||
to_resolve,
|
||||
isa_concepts)
|
||||
to_update,
|
||||
to_keep)
|
||||
ssc.add_values(concepts_in_group=concepts_in_group)
|
||||
ssc.add_values(return_values=resolved)
|
||||
|
||||
else:
|
||||
desc = f"Concept is a simple concept."
|
||||
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
||||
tokens = Tokenizer(concept.name, yield_eof=False)
|
||||
nodes = [StrMatch(token.strip_quote) for token in tokens]
|
||||
expression = nodes[0] if len(nodes) == 1 else Sequence(nodes)
|
||||
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_resolve, isa_concepts)
|
||||
to_keep.add(concept.id)
|
||||
expression = self.get_expression_from_concept_name(concept.name)
|
||||
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update, to_keep)
|
||||
|
||||
grammar[concept.id] = resolved
|
||||
|
||||
@@ -1035,7 +1154,7 @@ class BnfNodeParser(BaseNodeParser):
|
||||
sub_context.add_values(return_values=resolved)
|
||||
return resolved
|
||||
|
||||
def resolve_parsing_expression(self, context, expression, grammar, to_resolve, isa_concepts):
|
||||
def resolve_parsing_expression(self, context, expression, grammar, to_update, to_keep):
|
||||
|
||||
if isinstance(expression, str):
|
||||
ret = StrMatch(expression, ignore_case=self.ignore_case)
|
||||
@@ -1051,7 +1170,7 @@ class BnfNodeParser(BaseNodeParser):
|
||||
unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept)
|
||||
return self.add_error(unknown_concept)
|
||||
|
||||
pe = self.resolve_concept_parsing_expression(context, concept, grammar, to_resolve, isa_concepts)
|
||||
pe = self.resolve_concept_parsing_expression(context, concept, grammar, to_update, to_keep)
|
||||
|
||||
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
|
||||
return pe # an error is detected, escalate it
|
||||
@@ -1060,7 +1179,7 @@ class BnfNodeParser(BaseNodeParser):
|
||||
# return pe # we are looking for ourself, just return it
|
||||
|
||||
if isinstance(pe, UnderConstruction):
|
||||
to_resolve[id(expression)] = expression
|
||||
to_update.add(ToUpdate(context.obj.id, expression))
|
||||
|
||||
expression.nodes = [pe]
|
||||
expression.rule_name = expression.rule_name or concept.name
|
||||
@@ -1073,17 +1192,18 @@ class BnfNodeParser(BaseNodeParser):
|
||||
|
||||
elif isinstance(expression, Sequence) or \
|
||||
isinstance(expression, OrderedChoice) or \
|
||||
isinstance(expression, LongestChoice) or \
|
||||
isinstance(expression, ZeroOrMore) or \
|
||||
isinstance(expression, OneOrMore) or \
|
||||
isinstance(expression, Optional):
|
||||
ret = expression
|
||||
ret.nodes = []
|
||||
for e in ret.elements:
|
||||
pe = self.resolve_parsing_expression(context, e, grammar, to_resolve, isa_concepts)
|
||||
pe = self.resolve_parsing_expression(context, e, grammar, to_update, to_keep)
|
||||
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
|
||||
return pe # an error is detected, escalate it
|
||||
if isinstance(pe, UnderConstruction):
|
||||
to_resolve[id(ret)] = ret # remember that there is an unresolved parsing expression
|
||||
to_update.add(ToUpdate(context.obj.id, ret))
|
||||
ret.nodes.append(pe)
|
||||
|
||||
else:
|
||||
@@ -1094,8 +1214,8 @@ class BnfNodeParser(BaseNodeParser):
|
||||
expression.sep = self.resolve_parsing_expression(context,
|
||||
expression.sep,
|
||||
grammar,
|
||||
to_resolve,
|
||||
isa_concepts)
|
||||
to_update,
|
||||
to_keep)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
Reference in New Issue
Block a user