Fixed BnfNodeParser to allow expressions like 'number hundred' when number is a group

This commit is contained in:
2020-06-27 18:56:04 +02:00
parent d4468da8a3
commit 2c5840752a
14 changed files with 593 additions and 228 deletions
+6 -2
View File
@@ -71,8 +71,12 @@ def concept nineties from bnf ninety number where number < 10 as ninety + number
nineties isa number nineties isa number
# def concept hundreds1 from number 'hundred' where number < 10 as number * 100 # def concept hundreds1 from number 'hundred' where number < 10 as number * 100
# def concept hundreds2 from number=number1 'hundred' 'and' number=number2 where number1 < 10 and number2 < 100 as number1 * 100 + number2 # def concept hundreds2 from number=number1 'hundred' 'and' number=number2 where number1 < 10 and number2 < 100 as number1 * 100 + number2
# def concept one hundred as 100 def concept one hundred as 100
# c:one hundred: isa number one hundred isa number
# hundreds1 isa number # hundreds1 isa number
# hundreds2 isa number # hundreds2 isa number
def concept thousands from bnf number 'thousand' where number < 1000 as number * 1000
thousands isa number
def concept history as history() def concept history as history()
def concept plus from a plus b as a + b
def concept mult from a mult b as a * b
+8
View File
@@ -36,6 +36,7 @@ class Sheerka(Concept):
CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions" CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions"
RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY = "Resolved_Concepts_Sya_Definitions" RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY = "Resolved_Concepts_Sya_Definitions"
CONCEPTS_GRAMMARS_ENTRY = "Concepts_Grammars" CONCEPTS_GRAMMARS_ENTRY = "Concepts_Grammars"
CHICKEN_AND_EGG_CONCEPTS_ENTRY = "Chicken_And_Egg_Concepts"
CONCEPTS_KEYS_ENTRY = "Concepts_Keys" CONCEPTS_KEYS_ENTRY = "Concepts_Keys"
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
@@ -105,6 +106,10 @@ class Sheerka(Concept):
@property @property
def concepts_grammars(self): def concepts_grammars(self):
return self.cache_manager.caches[self.CHICKEN_AND_EGG_CONCEPTS_ENTRY].cache
@property
def chicken_and_eggs(self):
return self.cache_manager.caches[self.CONCEPTS_GRAMMARS_ENTRY].cache return self.cache_manager.caches[self.CONCEPTS_GRAMMARS_ENTRY].cache
def bind_service_method(self, bound_method, as_name=None): def bind_service_method(self, bound_method, as_name=None):
@@ -227,6 +232,9 @@ class Sheerka(Concept):
cache = Cache() cache = Cache()
self.cache_manager.register_cache(self.CONCEPTS_GRAMMARS_ENTRY, cache, persist=False) self.cache_manager.register_cache(self.CONCEPTS_GRAMMARS_ENTRY, cache, persist=False)
cache = Cache()
self.cache_manager.register_cache(self.CHICKEN_AND_EGG_CONCEPTS_ENTRY, cache, persist=False)
def initialize_services(self): def initialize_services(self):
""" """
Introspect to find services and bind them Introspect to find services and bind them
@@ -26,7 +26,7 @@ class SheerkaEvaluateConcept(BaseService):
parent = context.get_parent() parent = context.get_parent()
while parent is not None: while parent is not None:
if parent.who == context.who and parent.obj == concept: if parent.who == context.who and parent.obj == concept and parent.obj.compiled == concept.compiled:
return True return True
parent = parent.get_parent() parent = parent.get_parent()
@@ -147,6 +147,11 @@ class SheerkaEvaluateConcept(BaseService):
def resolve(self, context, to_resolve, current_prop, current_concept, force_evaluation): def resolve(self, context, to_resolve, current_prop, current_concept, force_evaluation):
def get_path(context_, prop_name):
prefix = context_.path if hasattr(context_, "path") else "<N/A>"
value = prop_name.name if isinstance(current_prop, ConceptParts) else prop_name
return prefix + "." + value
if isinstance(to_resolve, DoNotResolve): if isinstance(to_resolve, DoNotResolve):
return to_resolve.value return to_resolve.value
@@ -161,12 +166,14 @@ class SheerkaEvaluateConcept(BaseService):
sub_context.add_values(return_values=ret_val) sub_context.add_values(return_values=ret_val)
return ret_val.body return ret_val.body
desc = f"Evaluating {current_prop} (concept={current_concept})" path = get_path(context, current_prop)
desc = f"Evaluating {path} (concept={current_concept})"
context.log(desc, self.NAME) context.log(desc, self.NAME)
with context.push(BuiltinConcepts.EVALUATING_CONCEPT, with context.push(BuiltinConcepts.EVALUATING_CONCEPT,
current_prop, current_prop,
desc=desc, desc=desc,
obj=current_concept) as sub_context: obj=current_concept,
path=path) as sub_context:
if force_evaluation: if force_evaluation:
sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED) sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED)
@@ -76,6 +76,7 @@ class SheerkaSetsManager(BaseService):
self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, body=concept, concept_set=concept_set)) self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, body=concept, concept_set=concept_set))
self.sets.put(concept_set.id, concept.id) self.sets.put(concept_set.id, concept.id)
self.concepts_in_set.delete(concept_set.id)
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS)) return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
def add_concepts_to_set(self, context, concepts, concept_set): def add_concepts_to_set(self, context, concepts, concept_set):
@@ -95,6 +96,7 @@ class SheerkaSetsManager(BaseService):
concept_set=concept_set) concept_set=concept_set)
else: else:
body = self.sheerka.new(BuiltinConcepts.SUCCESS) body = self.sheerka.new(BuiltinConcepts.SUCCESS)
self.concepts_in_set.delete(concept_set.id)
return self.sheerka.ret(self.NAME, len(already_in_set) != len(concepts), body) return self.sheerka.ret(self.NAME, len(already_in_set) != len(concepts), body)
+3 -3
View File
@@ -205,9 +205,9 @@ def make_unique(lst, get_id=None):
yield x yield x
else: else:
for x in seq: for x in seq:
x = get_id(x) _id = get_id(x)
if x not in seen: if _id not in seen:
seen.add(x) seen.add(_id)
yield x yield x
return list(_make_unique(lst, get_id)) return list(_make_unique(lst, get_id))
+1
View File
@@ -264,6 +264,7 @@ class AtomNodeParser(BaseNodeParser):
concept_parser.lock() concept_parser.lock()
concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name) concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name)
#self.context.log(f"concepts found for {token=}: {concepts}", who=self.name)
if not concepts: if not concepts:
for concept_parser in concept_parser_helpers: for concept_parser in concept_parser_helpers:
concept_parser.eat_unrecognized(token, pos) concept_parser.eat_unrecognized(token, pos)
+78 -11
View File
@@ -525,7 +525,7 @@ class CNC(CN):
to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY} to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
else: else:
to_compare = other.concept.compiled to_compare = other.concept.compiled
if self.compiled == to_compare: if self.compiled == to_compare: # expanded form to ease the debug
return True return True
else: else:
return False return False
@@ -673,7 +673,8 @@ class BaseNodeParser(BaseParser):
concept = to_map(self, concept) if to_map else concept concept = to_map(self, concept) if to_map else concept
result.append(concept) result.append(concept)
return result + custom_concepts return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
return custom_concepts if custom else None return custom_concepts if custom else None
@@ -707,16 +708,20 @@ class BaseNodeParser(BaseParser):
@staticmethod @staticmethod
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword): def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword):
sheerka = context.sheerka sheerka = context.sheerka
res = {}
def resolve_concepts(concept_str): def resolve_concepts(concept_str):
c_key, c_id = core.utils.unstr_concept(concept_str)
if c_id in already_seen:
return ChickenAndEggError(already_seen)
already_seen.add(c_id)
resolved = set() resolved = set()
to_resolve = set() to_resolve = set()
concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1]) chicken_and_egg = set()
if concept.id in already_seen: concept = sheerka.get_by_id(c_id)
raise ChickenAndEggError(already_seen)
else:
already_seen.add(concept.id)
if sheerka.isaset(context, concept): if sheerka.isaset(context, concept):
concepts = sheerka.get_set_elements(context, concept) concepts = sheerka.get_set_elements(context, concept)
@@ -730,11 +735,18 @@ class BaseNodeParser(BaseParser):
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword) (to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
for concept_to_resolve_str in to_resolve: for concept_to_resolve_str in to_resolve:
resolved |= resolve_concepts(concept_to_resolve_str) res = resolve_concepts(concept_to_resolve_str)
if isinstance(res, ChickenAndEggError):
chicken_and_egg |= res.concepts
else:
resolved |= res
to_resolve.clear()
if len(resolved) == 0 and len(chicken_and_egg) > 0:
raise ChickenAndEggError(chicken_and_egg)
else:
return resolved return resolved
res = {}
for k, v in concepts_by_first_keyword.items(): for k, v in concepts_by_first_keyword.items():
if k.startswith("c:|"): if k.startswith("c:|"):
try: try:
@@ -744,8 +756,16 @@ class BaseNodeParser(BaseParser):
res.setdefault(resolved, []).extend(v) res.setdefault(resolved, []).extend(v)
except ChickenAndEggError as ex: except ChickenAndEggError as ex:
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}") context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
# res[k] = sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, concepts_in_recursion = ex.concepts
# body=[sheerka.get_by_id(c) for c in ex.concepts]) # make sure to have all the parents
for parent in v:
concepts_in_recursion.add(parent)
for concept_id in concepts_in_recursion:
# make sure we keep the longest chain
old = sheerka.chicken_and_eggs.get(concept_id)
if old is None or len(old) < len(ex.concepts):
sheerka.chicken_and_eggs.put(concept_id, concepts_in_recursion)
else: else:
res.setdefault(k, []).extend(v) res.setdefault(k, []).extend(v)
@@ -755,6 +775,53 @@ class BaseNodeParser(BaseParser):
return sheerka.ret("BaseNodeParser", True, res) return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def get_referenced_concepts(context, concept_id, already_seen):
"""
Gets all the tokens that may allow to recognize concept concept_id
Basically, it returns all the starting tokens for concept concept_id
CHICKEN_AND_EGG is returned when a circular references are found
:param context:
:param concept_id:
:param already_seen:
:return:
"""
if concept_id in already_seen:
return ChickenAndEggError(already_seen)
already_seen.add(concept_id)
resolved = set()
to_resolve = set()
chicken_and_egg = set()
sheerka = context.sheerka
concept = sheerka.get_by_id(concept_id)
if sheerka.isaset(context, concept):
concepts = sheerka.get_set_elements(context, concept)
else:
concepts = [concept]
for concept in concepts:
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
for keyword in keywords:
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
for concept_to_resolve_str in to_resolve:
c_key, c_id = core.utils.unstr_concept(concept_to_resolve_str)
res = BaseNodeParser.get_referenced_concepts(context, c_id, already_seen)
if isinstance(res, ChickenAndEggError):
chicken_and_egg |= res.concepts
else:
resolved |= res
to_resolve.clear()
if len(resolved) == 0 and len(chicken_and_egg) > 0:
raise ChickenAndEggError(chicken_and_egg)
else:
return resolved
@staticmethod @staticmethod
def resolve_sya_associativity_and_precedence(context, sya): def resolve_sya_associativity_and_precedence(context, sya):
pass pass
+181 -61
View File
@@ -38,7 +38,7 @@ class NonTerminalNode(LexerNode):
self.children = children self.children = children
def __repr__(self): def __repr__(self):
name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__ name = "Node:" + (self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__)
if len(self.children) > 0: if len(self.children) > 0:
sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")" sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
else: else:
@@ -69,7 +69,7 @@ class TerminalNode(LexerNode):
self.value = value self.value = value
def __repr__(self): def __repr__(self):
name = self.parsing_expression.rule_name or "" name = "Node:" + (self.parsing_expression.rule_name or "")
return name + f"'{self.value}'" return name + f"'{self.value}'"
def __eq__(self, other): def __eq__(self, other):
@@ -186,7 +186,7 @@ class Sequence(ParsingExpression):
class OrderedChoice(ParsingExpression): class OrderedChoice(ParsingExpression):
""" """
Will match one among multiple Will match the first one among multiple
It will stop at the first match (so the order of definition is important) It will stop at the first match (so the order of definition is important)
""" """
@@ -211,6 +211,42 @@ class OrderedChoice(ParsingExpression):
return self.add_rule_name_if_needed(f"({to_str})") return self.add_rule_name_if_needed(f"({to_str})")
class LongestChoice(ParsingExpression):
"""
Will match the longest one among multiple
All elements will be tested, so the order is not important
The behaviour when multiple candidate is found is not defined yet
"""
def _parse(self, parser_helper):
init_pos = parser_helper.pos
longest_node = None
end_pos = -1
for e in self.nodes:
node = e.parse(parser_helper)
if node:
if longest_node is None or node.end > longest_node.end:
longest_node = node
end_pos = parser_helper.pos
parser_helper.seek(init_pos) # backtrack
if longest_node is None:
return None
parser_helper.seek(end_pos)
return NonTerminalNode(self,
init_pos,
longest_node.end,
parser_helper.parser.parser_input.tokens[init_pos: longest_node.end + 1],
[longest_node])
def __repr__(self):
to_str = "# ".join(repr(n) for n in self.elements)
return self.add_rule_name_if_needed(f"({to_str})")
class Optional(ParsingExpression): class Optional(ParsingExpression):
""" """
Will match or not the elements Will match or not the elements
@@ -386,7 +422,12 @@ class StrMatch(Match):
self.skip_white_space = skip_whitespace self.skip_white_space = skip_whitespace
def __repr__(self): def __repr__(self):
return self.add_rule_name_if_needed(f"'{self.to_match}'") text = self.to_match
if not self.ignore_case:
text += "#!ic"
if not self.skip_white_space:
text += "#!sw"
return self.add_rule_name_if_needed(f"'{text}'")
def __eq__(self, other): def __eq__(self, other):
if not super().__eq__(other): if not super().__eq__(other):
@@ -395,7 +436,9 @@ class StrMatch(Match):
if not isinstance(other, StrMatch): if not isinstance(other, StrMatch):
return False return False
return self.to_match == other.to_match and self.ignore_case == other.ignore_case return self.to_match == other.to_match and \
self.ignore_case == other.ignore_case and \
self.skip_white_space == other.skip_white_space
def _parse(self, parser_helper): def _parse(self, parser_helper):
token = parser_helper.get_token() token = parser_helper.get_token()
@@ -766,7 +809,7 @@ class BnfConceptParserHelper:
_add_prop(_concept, _underlying.parsing_expression.rule_name, value) _add_prop(_concept, _underlying.parsing_expression.rule_name, value)
_concept.metadata.need_validation = True _concept.metadata.need_validation = True
if isinstance(_underlying, NonTerminalNode): elif isinstance(_underlying, NonTerminalNode):
for child in _underlying.children: for child in _underlying.children:
_process_rule_name(_concept, child) _process_rule_name(_concept, child)
@@ -789,6 +832,15 @@ class UnderConstruction:
concept_id: str concept_id: str
@dataclass()
class ToUpdate:
parent_id: int
parsing_expression: ParsingExpression
def __hash__(self):
return hash(self.parent_id)
class BnfNodeParser(BaseNodeParser): class BnfNodeParser(BaseNodeParser):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__("BnfNode", 50, **kwargs) super().__init__("BnfNode", 50, **kwargs)
@@ -824,6 +876,34 @@ class BnfNodeParser(BaseNodeParser):
return valid_parser_helpers return valid_parser_helpers
@staticmethod
def get_expression_from_concept_name(name):
"""
Create the parsing expression from the name
This function differs from BNFParser.parse() as it does not try to resolve identifiers into concepts
>>> assert get_expression_from_concept_name('one hundred') == Sequence(StrMatch("one"), StrMatch("hundred"))
while BNFParser.parse("one hundred") will look for concept 'one' and concept 'hundred'
:param name:
:return:
"""
if name is None or name.strip() == "":
return []
res = []
tokens = Tokenizer(name, yield_eof=False)
for token in tokens:
if token.type == TokenKind.WHITESPACE:
continue
elif token.type == TokenKind.STRING:
sub_tokens = list(Tokenizer(token.strip_quote, yield_eof=False))
for sub_token in sub_tokens[:-1]:
res.append(StrMatch(sub_token.str_value, skip_whitespace=False))
res.append(StrMatch(sub_tokens[-1].str_value))
else:
res.append(StrMatch(token.str_value))
return res[0] if len(res) == 1 else Sequence(*res)
def get_concepts_sequences(self): def get_concepts_sequences(self):
""" """
Main method that parses the tokens and extract the concepts Main method that parses the tokens and extract the concepts
@@ -900,10 +980,10 @@ class BnfNodeParser(BaseNodeParser):
def check_for_infinite_recursion(self, parsing_expression, already_found, only_first=False): def check_for_infinite_recursion(self, parsing_expression, already_found, only_first=False):
if isinstance(parsing_expression, ConceptExpression): if isinstance(parsing_expression, ConceptExpression):
if parsing_expression.concept in already_found: if parsing_expression.concept.id in already_found:
return True return True
already_found.add(parsing_expression.concept) already_found.add(parsing_expression.concept.id)
return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, False) return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, only_first)
if isinstance(parsing_expression, Sequence): if isinstance(parsing_expression, Sequence):
# for sequence, we need to check all nodes # for sequence, we need to check all nodes
@@ -930,43 +1010,93 @@ class BnfNodeParser(BaseNodeParser):
return False return False
return False return False
if isinstance(parsing_expression, LongestChoice):
for node in parsing_expression.nodes:
already_found_for_current_node = already_found.copy()
if self.check_for_infinite_recursion(node, already_found_for_current_node, True):
already_found.update(already_found_for_current_node)
return True
return False
if isinstance(parsing_expression, UnderConstruction):
if parsing_expression.concept_id in already_found:
return True
already_found.add(parsing_expression.concept_id)
return False return False
def get_parsing_expression(self, context, concept): def get_parsing_expression(self, context, concept):
"""
Compute the parsing expression for a given concept
:param context:
:param concept:
:return:
"""
if concept.id in self.concepts_grammars: if concept.id in self.concepts_grammars:
return self.concepts_grammars.get(concept.id) return self.concepts_grammars.get(concept.id)
grammar = self.concepts_grammars.copy() # internal cache of already computed parsing expression to use during the recursion
to_resolve = {} # the key is the instance id of the parsing expression grammar = {}
isa_concepts = set()
self.resolve_concept_parsing_expression(context, concept, grammar, to_resolve, isa_concepts)
for _id, pe in to_resolve.items(): # concept that are not totally resolved, because they reference parsing expression under construction
to_update = set() # the key is the instance id of the parsing expression
# during the parsing of concept, we will resolve other concepts
# keep the track of the concepts that can safely be added to self.concept_grammars
to_keep = {concept.id}
desc = f"Get parsing expression for concept {concept}"
with context.push(BuiltinConcepts.INIT_BNF, concept,
who=self.name,
obj=concept,
root_concept=concept,
desc=desc) as sub_context:
# get the parsing expression
ret = self.resolve_concept_parsing_expression(sub_context, concept, grammar, to_update, to_keep)
# check and update parsing expression that are still under construction
# Note that we only update the concept that will update concepts_grammars
# because pe.node may be large
for item in to_update:
if item.parent_id in to_keep:
pe = item.parsing_expression
for i, node in enumerate(pe.nodes): for i, node in enumerate(pe.nodes):
if isinstance(node, UnderConstruction): if isinstance(node, UnderConstruction):
pe.nodes[i] = grammar.get(node.concept_id) pe.nodes[i] = grammar.get(node.concept_id)
# check for infinite recursion.
# We are adding a new concept. Does it create an infinite recursion ?
concepts_in_recursion = set() concepts_in_recursion = set()
if self.check_for_infinite_recursion(pe, concepts_in_recursion): if self.check_for_infinite_recursion(ret, concepts_in_recursion):
cycle = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body={c.id for c in concepts_in_recursion}) cycle = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
for concept in concepts_in_recursion: for concept_id in concepts_in_recursion:
grammar[concept.id] = cycle grammar[concept_id] = cycle
# Make sure you do not put isa concepts in cache # update, in case of infinite circular recursion
# why : ret = grammar[concept.id]
# twenties = 'twenty' number where number < 10
# hundreds = number 'hundred' where number < 99
# the concept of number depends on its utilisation
for concept_id in [c for c in grammar if c not in isa_concepts]:
self.concepts_grammars.put(concept_id, grammar[concept_id])
# finally, update concept grammar
for k, v in grammar.items():
if k in to_keep:
self.concepts_grammars.put(k, v)
# not quite sure that it is a good idea.
# Why do we want to corrupt previous valid entries ?
if context.sheerka.isinstance(v, BuiltinConcepts.CHICKEN_AND_EGG):
self.concepts_grammars.put(k, v)
sub_context.add_values(return_values=ret)
return ret
def resolve_concept_parsing_expression(self, context, concept, grammar, to_update, to_keep):
if concept.id in self.concepts_grammars: # validated entry
return self.concepts_grammars.get(concept.id) return self.concepts_grammars.get(concept.id)
def resolve_concept_parsing_expression(self, context, concept, grammar, to_resolve, isa_concepts): if concept.id in grammar: # under construction entry
if concept.id in grammar:
return grammar.get(concept.id) return grammar.get(concept.id)
desc = f"Get parsing expression for '{concept}'" desc = f"Resolve concept parsing expression for '{concept}'"
with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context: with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context:
if not concept.bnf: # to save a function call. Not sure it worth it. if not concept.bnf: # to save a function call. Not sure it worth it.
BaseNodeParser.ensure_bnf(sub_context, concept, self.name) BaseNodeParser.ensure_bnf(sub_context, concept, self.name)
@@ -979,52 +1109,41 @@ class BnfNodeParser(BaseNodeParser):
desc = f"Bnf concept detected. Resolving parsing expression '{expression}'" desc = f"Bnf concept detected. Resolving parsing expression '{expression}'"
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc: with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
ssc.add_inputs(expression=expression) ssc.add_inputs(expression=expression)
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_resolve, isa_concepts) resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update, to_keep)
ssc.add_values(return_values=resolved) ssc.add_values(return_values=resolved)
elif sheerka.isaset(context, concept): elif sheerka.isaset(context, concept):
desc = f"Concept is a group. Resolving parsing expression using 'isa'" desc = f"Concept is a group. Resolving parsing expression using 'isa'"
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc: with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
ssc.add_inputs(concept=concept) ssc.add_inputs(concept=concept)
isa_concepts.add(concept.id)
concepts_in_group = self.sheerka.get_set_elements(ssc, concept) concepts_in_group = self.sheerka.get_set_elements(ssc, concept)
# concepts_in_group comes from a set, so the order of its elements is not guaranteed valid_concepts = []
# to avoid random failure (ie random CHICKEN_AND_EGG), we need to rearrange
# We also remove the root concept (the one from get_parsing_expression())
root_concept_as_set = set(context.search(
predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF,
get_obj=lambda ec: ec.obj,
stop=lambda ec: ec.action != BuiltinConcepts.INIT_BNF)) # there only one item in the set
root_concept = list(root_concept_as_set)[0]
reordered = []
for c in concepts_in_group: for c in concepts_in_group:
if c.id == root_concept.id: if c.id == context.root_concept.id:
continue continue
# I do not guaranty the same order every time, but I minimize the ChickenAndEgg random issue c_pe = self.resolve_concept_parsing_expression(context, c, grammar, to_update, to_keep)
if c.metadata.definition_type == DEFINITION_TYPE_BNF or sheerka.isaset(ssc, c): if self.check_for_infinite_recursion(c_pe, {concept.id}, True):
reordered.append(c) continue
else:
reordered.insert(0, c)
nodes = [ConceptExpression(c, rule_name=c.name) for c in reordered] valid_concepts.append(c)
nodes = [ConceptExpression(c, rule_name=c.name) for c in valid_concepts]
resolved = self.resolve_parsing_expression(ssc, resolved = self.resolve_parsing_expression(ssc,
OrderedChoice(*nodes), LongestChoice(*nodes),
grammar, grammar,
to_resolve, to_update,
isa_concepts) to_keep)
ssc.add_values(concepts_in_group=concepts_in_group) ssc.add_values(concepts_in_group=concepts_in_group)
ssc.add_values(return_values=resolved) ssc.add_values(return_values=resolved)
else: else:
desc = f"Concept is a simple concept." desc = f"Concept is a simple concept."
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc: with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
tokens = Tokenizer(concept.name, yield_eof=False) to_keep.add(concept.id)
nodes = [StrMatch(token.strip_quote) for token in tokens] expression = self.get_expression_from_concept_name(concept.name)
expression = nodes[0] if len(nodes) == 1 else Sequence(nodes) resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update, to_keep)
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_resolve, isa_concepts)
grammar[concept.id] = resolved grammar[concept.id] = resolved
@@ -1035,7 +1154,7 @@ class BnfNodeParser(BaseNodeParser):
sub_context.add_values(return_values=resolved) sub_context.add_values(return_values=resolved)
return resolved return resolved
def resolve_parsing_expression(self, context, expression, grammar, to_resolve, isa_concepts): def resolve_parsing_expression(self, context, expression, grammar, to_update, to_keep):
if isinstance(expression, str): if isinstance(expression, str):
ret = StrMatch(expression, ignore_case=self.ignore_case) ret = StrMatch(expression, ignore_case=self.ignore_case)
@@ -1051,7 +1170,7 @@ class BnfNodeParser(BaseNodeParser):
unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept) unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept)
return self.add_error(unknown_concept) return self.add_error(unknown_concept)
pe = self.resolve_concept_parsing_expression(context, concept, grammar, to_resolve, isa_concepts) pe = self.resolve_concept_parsing_expression(context, concept, grammar, to_update, to_keep)
if not isinstance(pe, (ParsingExpression, UnderConstruction)): if not isinstance(pe, (ParsingExpression, UnderConstruction)):
return pe # an error is detected, escalate it return pe # an error is detected, escalate it
@@ -1060,7 +1179,7 @@ class BnfNodeParser(BaseNodeParser):
# return pe # we are looking for ourself, just return it # return pe # we are looking for ourself, just return it
if isinstance(pe, UnderConstruction): if isinstance(pe, UnderConstruction):
to_resolve[id(expression)] = expression to_update.add(ToUpdate(context.obj.id, expression))
expression.nodes = [pe] expression.nodes = [pe]
expression.rule_name = expression.rule_name or concept.name expression.rule_name = expression.rule_name or concept.name
@@ -1073,17 +1192,18 @@ class BnfNodeParser(BaseNodeParser):
elif isinstance(expression, Sequence) or \ elif isinstance(expression, Sequence) or \
isinstance(expression, OrderedChoice) or \ isinstance(expression, OrderedChoice) or \
isinstance(expression, LongestChoice) or \
isinstance(expression, ZeroOrMore) or \ isinstance(expression, ZeroOrMore) or \
isinstance(expression, OneOrMore) or \ isinstance(expression, OneOrMore) or \
isinstance(expression, Optional): isinstance(expression, Optional):
ret = expression ret = expression
ret.nodes = [] ret.nodes = []
for e in ret.elements: for e in ret.elements:
pe = self.resolve_parsing_expression(context, e, grammar, to_resolve, isa_concepts) pe = self.resolve_parsing_expression(context, e, grammar, to_update, to_keep)
if not isinstance(pe, (ParsingExpression, UnderConstruction)): if not isinstance(pe, (ParsingExpression, UnderConstruction)):
return pe # an error is detected, escalate it return pe # an error is detected, escalate it
if isinstance(pe, UnderConstruction): if isinstance(pe, UnderConstruction):
to_resolve[id(ret)] = ret # remember that there is an unresolved parsing expression to_update.add(ToUpdate(context.obj.id, ret))
ret.nodes.append(pe) ret.nodes.append(pe)
else: else:
@@ -1094,8 +1214,8 @@ class BnfNodeParser(BaseNodeParser):
expression.sep = self.resolve_parsing_expression(context, expression.sep = self.resolve_parsing_expression(context,
expression.sep, expression.sep,
grammar, grammar,
to_resolve, to_update,
isa_concepts) to_keep)
return ret return ret
+29
View File
@@ -494,3 +494,32 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka):
evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), one) evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), one)
assert evaluated.key == one.key assert evaluated.key == one.key
assert evaluated.body == 1 assert evaluated.body == 1
def test_i_can_evaluate_thousand_concept(self):
sheerka, context, thousand, number, forties, forty, one = self.init_concepts(
Concept("thousand", body="number * 1000").def_var("number"),
Concept("number"),
Concept("forties", body="forty + number").def_var("forty").def_var("number"),
Concept("forty", body="40"),
Concept("one", body="1"),
eval_body=True
)
one = sheerka.new("one")
number2 = sheerka.new("number")
number2.compiled["one"] = one
number2.compiled[ConceptParts.BODY] = one
forties = sheerka.new("forties")
forties.compiled["forty"] = sheerka.new("forty")
forties.compiled["number"] = number2
number1 = sheerka.new("number")
number1.compiled["forties"] = forties
number1.compiled[ConceptParts.BODY] = forties
forty_one_thousand = sheerka.new("thousand")
forty_one_thousand.compiled["number"] = number1
evaluated = sheerka.evaluate_concept(context, forty_one_thousand)
assert evaluated.body == 41000
+17
View File
@@ -1,9 +1,20 @@
from dataclasses import dataclass
import core.utils import core.utils
import pytest import pytest
from core.concept import ConceptParts, Concept from core.concept import ConceptParts, Concept
from core.tokenizer import Token, TokenKind from core.tokenizer import Token, TokenKind
@dataclass
class Obj:
prop1: str
prop2: str
def __hash__(self):
return hash((self.prop1, self.prop1))
def get_tokens(lst): def get_tokens(lst):
res = [] res = []
for e in lst: for e in lst:
@@ -229,3 +240,9 @@ def test_decode_concept_key_id():
]) ])
def test_dict_product(a, b, expected): def test_dict_product(a, b, expected):
assert core.utils.dict_product(a, b) == expected assert core.utils.dict_product(a, b) == expected
def test_i_can_make_unique():
assert core.utils.make_unique(["a", "a", "b", "c", "c"]) == ["a", "b", "c"]
assert core.utils.make_unique([Obj("a", "b"), Obj("a", "c"), Obj("a", "b")]) == [Obj("a", "b"), Obj("a", "c")]
assert core.utils.make_unique([Obj("a", "b"), Obj("a", "c")], lambda o: o.prop1) == [Obj("a", "b")]
@@ -118,3 +118,4 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka):
assert not context.sheerka.is_success(error_concept) # it's indeed an error assert not context.sheerka.is_success(error_concept) # it's indeed an error
assert result.status assert result.status
assert result.value == error_concept assert result.value == error_concept
+72 -40
View File
@@ -137,28 +137,38 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
} }
def test_i_can_resolve_when_concepts_are_sets(self): def test_i_can_resolve_when_concepts_are_sets(self):
sheerka, context, one, two, three, number, foo = self.init_concepts( sheerka, context, number, *concepts = self.init_concepts(
"number",
"one", "one",
"two", "two",
"three", "twenty",
"number", "hundred",
Concept("foo", definition="number three"), Concept("twenties", definition="twenty number"),
Concept("hundreds", definition="number hundred"),
create_new=True # mandatory because set_isa() needs it create_new=True # mandatory because set_isa() needs it
) )
sheerka.set_isa(context, sheerka.new("one"), number) sheerka.set_isa(context, sheerka.new("one"), number)
sheerka.set_isa(context, sheerka.new("two"), number) sheerka.set_isa(context, sheerka.new("two"), number)
sheerka.set_isa(context, sheerka.new("twenty"), number)
sheerka.set_isa(context, sheerka.new("thirty"), number)
sheerka.set_isa(context, sheerka.new("hundred"), number)
sheerka.set_isa(context, sheerka.new("twenties"), number)
sheerka.set_isa(context, sheerka.new("hundreds"), number)
cbfk = BaseNodeParser.get_concepts_by_first_token(context, [one, two, three, number, foo]).body sheerka.concepts_grammars.clear() # reset all the grammar to simulate Sheerka restart
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, cbfk) # cbft : concept_by_first_token (I usually don't use abbreviation)
cbft = BaseNodeParser.get_concepts_by_first_token(context, [number] + concepts).body
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, cbft)
assert resolved_ret_val.status assert resolved_ret_val.status
assert resolved_ret_val.body == { assert resolved_ret_val.body == {
"one": ["1001", "1005"], 'number': ['1001'],
"two": ["1002", "1005"], 'one': ['1002', '1007'],
"three": ["1003"], 'two': ['1003', '1007'],
"number": ["1004"], 'twenty': ['1004', '1006', '1007'],
'hundred': ['1005', '1007'],
} }
def test_concepts_are_defined_once(self): def test_concepts_are_defined_once(self):
@@ -196,36 +206,58 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
"one": ["1001", "1002"], "one": ["1001", "1002"],
} }
# def tests_i_can_detect_direct_recursion(self): def tests_i_can_detect_direct_recursion(self):
# sheerka = self.get_sheerka() sheerka, context, good, foo, bar = self.init_concepts(
# good = self.get_concept(sheerka, "good") "good",
# foo = self.get_concept(sheerka, "foo", ConceptExpression("bar")) self.bnf_concept("foo", ConceptExpression("bar")),
# bar = self.get_concept(sheerka, "bar", ConceptExpression("foo")) self.bnf_concept("bar", ConceptExpression("foo")),
# )
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, foo, bar]).body
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, foo, bar]).body
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords) resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
# assert resolved_ret_val.status assert resolved_ret_val.status
# assert resolved_ret_val.body == { assert resolved_ret_val.body == {
# "good": ["1001"], "good": ["1001"],
# BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1003"] }
# } assert sheerka.chicken_and_eggs.get(foo.id) == {foo.id, bar.id}
# assert sheerka.chicken_and_eggs.get(bar.id) == {foo.id, bar.id}
# def test_i_can_detect_indirect_infinite_recursion(self):
# sheerka = self.get_sheerka() def test_i_can_detect_indirect_infinite_recursion(self):
# good = self.get_concept(sheerka, "good") sheerka, context, good, one, two, three = self.init_concepts(
# one = self.get_concept(sheerka, "one", ConceptExpression("two")) "good",
# two = self.get_concept(sheerka, "two", ConceptExpression("three")) self.bnf_concept("one", ConceptExpression("two")),
# three = self.get_concept(sheerka, "three", ConceptExpression("two")) self.bnf_concept("two", ConceptExpression("three")),
# self.bnf_concept("three", ConceptExpression("two")),
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, one, two, three]).body )
#
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords) concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, one, two, three]).body
# assert resolved_ret_val.status resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
# assert resolved_ret_val.body == { assert resolved_ret_val.status
# "good": ["1001"], assert resolved_ret_val.body == {
# BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1004", "1003"] "good": ["1001"],
# } }
assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id}
def test_i_can_detect_the_longest_infinite_recursion_chain(self):
sheerka, context, good, one, two, three = self.init_concepts(
"good",
self.bnf_concept("two", ConceptExpression("three")),
self.bnf_concept("three", ConceptExpression("two")),
self.bnf_concept("one", ConceptExpression("three")),
)
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, one, two, three]).body
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
assert resolved_ret_val.status
assert resolved_ret_val.body == {
"good": ["1001"],
}
assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id}
assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id}
# #
# def test_i_can_detect_infinite_recursion_from_ordered_choice(self): # def test_i_can_detect_infinite_recursion_from_ordered_choice(self):
# sheerka = self.get_sheerka() # sheerka = self.get_sheerka()
+177 -100
View File
@@ -4,7 +4,7 @@ from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYP
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC, UTN, CN from parsers.BaseNodeParser import CNC, UTN, CN
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
Optional, ZeroOrMore, OneOrMore, ConceptExpression Optional, ZeroOrMore, OneOrMore, ConceptExpression, LongestChoice
from parsers.BnfParser import BnfParser from parsers.BnfParser import BnfParser
import tests.parsers.parsers_utils import tests.parsers.parsers_utils
@@ -16,13 +16,15 @@ cmap = {
"two": Concept("two"), "two": Concept("two"),
"three": Concept("three"), "three": Concept("three"),
"four": Concept("four"), "four": Concept("four"),
"thirty": Concept("thirty", body=30), "thirty": Concept("thirty", body="30"),
"forty": Concept("forty", body=40), "forty": Concept("forty", body="40"),
"fifty": Concept("fifty", body=50), "fifty": Concept("fifty", body="50"),
"number": Concept("number"), "number": Concept("number"),
"foo": Concept("foo"), "foo": Concept("foo"),
"bar": Concept("bar"), "bar": Concept("bar"),
"baz": Concept("baz"), "baz": Concept("baz"),
"one hundred": Concept("one hundred", body="100"),
"one_hundred": Concept("'one hundred'", body="100"),
"bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen "bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen
@@ -80,17 +82,6 @@ def compute_expected_array(my_concepts_map, expression, expected, exclude_body=F
class TestBnfNodeParser(TestUsingMemoryBasedSheerka): class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka = None sheerka = None
@staticmethod
def update_bnf(context, concept):
bnf_parser = BnfParser()
res = bnf_parser.parse(context, concept.metadata.definition)
if res.status:
concept.bnf = res.value.value
concept.metadata.definition_type = DEFINITION_TYPE_BNF
else:
raise Exception(res)
return concept
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
t = cls() t = cls()
@@ -109,6 +100,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka.set_isa(context, sheerka.new("thirty"), sheerka.new("number")) sheerka.set_isa(context, sheerka.new("thirty"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("forty"), sheerka.new("number")) sheerka.set_isa(context, sheerka.new("forty"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number")) sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number"))
sheerka.set_isa(context, sheerka.new("one hundred"), sheerka.new("number"))
thirties = cls.update_bnf(context, Concept("thirties", thirties = cls.update_bnf(context, Concept("thirties",
definition="thirty number", definition="thirty number",
@@ -131,6 +123,24 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
cmap["fifties"] = sheerka.create_new_concept(context, fifties).body.body cmap["fifties"] = sheerka.create_new_concept(context, fifties).body.body
sheerka.set_isa(context, sheerka.new("fifties"), sheerka.new("number")) sheerka.set_isa(context, sheerka.new("fifties"), sheerka.new("number"))
thousands = cls.update_bnf(context, Concept("thousands",
definition="number 'thousand'",
where="number < 999",
body="number * 1000").def_var("number"))
cmap["thousands"] = sheerka.create_new_concept(context, thousands).body.body
sheerka.set_isa(context, sheerka.new("thousands"), sheerka.new("number"))
@staticmethod
def update_bnf(context, concept):
bnf_parser = BnfParser()
res = bnf_parser.parse(context, concept.metadata.definition)
if res.status:
concept.bnf = res.value.value
concept.metadata.definition_type = DEFINITION_TYPE_BNF
else:
raise Exception(res)
return concept
def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs): def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs):
if my_concepts_map is not None: if my_concepts_map is not None:
sheerka, context, *updated = self.init_concepts(*my_concepts_map.values(), **kwargs) sheerka, context, *updated = self.init_concepts(*my_concepts_map.values(), **kwargs)
@@ -351,6 +361,21 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
self.validate_get_concepts_sequences(my_map, text, expected) self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
("one", [CNC("foo", source="one")]),
("one two", [CNC("foo", source="one two")]),
("three", []),
])
def test_i_can_parse_longest_choice(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", LongestChoice(
StrMatch("one"),
Sequence(StrMatch("one"), StrMatch("two")))),
}
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [ @pytest.mark.parametrize("text, expected", [
("one", [CNC("foo", source="one")]), ("one", [CNC("foo", source="one")]),
("", []), ("", []),
@@ -724,7 +749,10 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"foo": self.bnf_concept("foo", "foo": self.bnf_concept("foo",
Sequence( Sequence(
StrMatch("twenty"), StrMatch("twenty"),
OrderedChoice(ConceptExpression("one"), ConceptExpression("two")))), OrderedChoice(
ConceptExpression("one"),
ConceptExpression("two"),
rule_name="unit"))),
} }
text = "twenty one" text = "twenty one"
@@ -733,7 +761,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
concept_foo = sequences[0].concept concept_foo = sequences[0].concept
assert concept_foo.compiled == { assert concept_foo.compiled == {
ConceptParts.BODY: DoNotResolve("twenty one"), ConceptParts.BODY: DoNotResolve("twenty one"),
"one": my_map["one"], "unit": my_map["one"],
} }
def test_i_can_refer_to_group_concepts(self): def test_i_can_refer_to_group_concepts(self):
@@ -755,8 +783,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
# explicit validations of the compiled # explicit validations of the compiled
concept_foo = sequences[0].concept concept_foo = sequences[0].concept
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
assert concept_foo.compiled == {'number': my_map["number"], assert concept_foo.compiled == {'number': CC(my_map["number"], body=my_map["two"], two=my_map["two"]),
'two': my_map["two"],
ConceptParts.BODY: DoNotResolve(value='twenty two')} ConceptParts.BODY: DoNotResolve(value='twenty two')}
text = "twenty one" text = "twenty one"
@@ -766,8 +793,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
# explicit validations of the compiled # explicit validations of the compiled
concept_foo = sequences[0].concept concept_foo = sequences[0].concept
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
assert concept_foo.compiled == {'number': my_map["number"], assert concept_foo.compiled == {'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]),
'one': my_map["one"],
ConceptParts.BODY: DoNotResolve(value='twenty one')} ConceptParts.BODY: DoNotResolve(value='twenty one')}
@pytest.mark.parametrize("bar_expr, expected", [ @pytest.mark.parametrize("bar_expr, expected", [
@@ -842,8 +868,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
res = parser.get_parsing_expression(context, my_map["foo"]) res = parser.get_parsing_expression(context, my_map["foo"])
assert sheerka.isinstance(res, BuiltinConcepts.CHICKEN_AND_EGG) == expected assert sheerka.isinstance(res, BuiltinConcepts.CHICKEN_AND_EGG) == expected
def test_i_can_get_parsing_expression_when_ending_by_concept_isa(self):
def test_i_can_get_parsing_expression_when_concept_isa(self):
my_map = { my_map = {
"one": Concept("one"), "one": Concept("one"),
"twenty": Concept("twenty"), "twenty": Concept("twenty"),
@@ -869,42 +894,43 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
number_nodes = parsing_expression.nodes[1].nodes number_nodes = parsing_expression.nodes[1].nodes
assert len(number_nodes) == 1 assert len(number_nodes) == 1
assert isinstance(number_nodes[0], OrderedChoice) assert isinstance(number_nodes[0], LongestChoice)
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
assert my_map["number"].id not in parser.concepts_grammars assert my_map["number"].id not in parser.concepts_grammars
# def test_i_can_get_parsing_expression_when_starting_by_isa_concept(self):
# def test_i_cannot_get_parsing_expression_when_concept_is_part_of_a_group(self): my_map = {
# """ "one": Concept("one"),
# In this test, twenties isa number "two": Concept("two"),
# # So 'number' in Sequence(thirty, number) will spawn 'twenties' which, because there is no other indication, "number": Concept("number"),
# # will create an infinite loop "hundreds": self.bnf_concept("hundreds", Sequence(ConceptExpression("number"), StrMatch("hundred")))
# :return: }
# """
# my_map = { sheerka, context, parser = self.init_parser(my_map, singleton=True)
# "one": Concept("one"), parser.context = context
# "twenty": Concept("twenty"), parser.sheerka = sheerka
# "number": Concept("number"), sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
# "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number"))) sheerka.set_isa(context, sheerka.new("two"), my_map["number"])
# } sheerka.set_isa(context, sheerka.new("hundreds"), my_map["number"])
# sheerka, context, parser = self.init_parser(my_map, singleton=True)
# parser.context = context parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
# parser.sheerka = sheerka parsing_expression = parser.get_parsing_expression(context, my_map["hundreds"])
# sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
# sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"]) assert parsing_expression == Sequence(
# sheerka.set_isa(context, sheerka.new("twenties"), my_map["number"]) # <- twenties is also a number ConceptExpression(my_map["number"], rule_name="number"),
# StrMatch("hundred"))
# parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
# assert len(parsing_expression.nodes) == len(parsing_expression.elements)
# parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
# assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) number_nodes = parsing_expression.nodes[0].nodes
# assert parsing_expression.body == {my_map["twenties"].id, my_map["number"].id} assert len(number_nodes) == 1
# assert isinstance(number_nodes[0], LongestChoice)
# assert isinstance(parser.concepts_grammars.get(my_map["one"].id), ParsingExpression) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
# assert isinstance(parser.concepts_grammars.get(my_map["twenty"].id), ParsingExpression) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
assert ConceptExpression(my_map["two"], rule_name="two") in number_nodes[0].nodes
def test_i_can_get_parsing_expression_when_concept_is_part_of_a_group(self): def test_i_can_get_parsing_expression_when_concept_is_part_of_a_group(self):
my_map = { my_map = {
@@ -933,7 +959,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
number_nodes = parsing_expression.nodes[1].nodes number_nodes = parsing_expression.nodes[1].nodes
assert len(number_nodes) == 1 assert len(number_nodes) == 1
assert isinstance(number_nodes[0], OrderedChoice) assert isinstance(number_nodes[0], LongestChoice)
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
@@ -994,7 +1020,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("foo bar", True, [CNC("foo then bar", source="foo bar", foo="foo", bar="bar")]), ("foo bar", True, [CNC("foo then bar", source="foo bar", foo="foo", bar="bar")]),
("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]), ("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]),
("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]), ("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]),
("twenty one", True, [CNC("t1", source="twenty one", unit="one", one="one")]), ("twenty one", True, [CNC("t1", source="twenty one", unit="one")]),
]) ])
def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected): def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected):
sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka, context, parser = self.init_parser(init_from_sheerka=True)
@@ -1008,7 +1034,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array assert concepts_nodes == expected_array
def test_i_can_when_multiple_times_the_same_variable(self): def test_i_can_parse_when_multiple_times_the_same_variable(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka, context, parser = self.init_parser(init_from_sheerka=True)
text = "foo foo foo" text = "foo foo foo"
@@ -1032,8 +1058,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
unit=CC("three_four", unit=CC("three_four",
source="four", source="four",
four=CC("four", body=DoNotResolve("four")), four=CC("four", body=DoNotResolve("four")),
body=CC("four", body=DoNotResolve("four"))), body=CC("four", body=DoNotResolve("four"))))
four="four")
expected_array = compute_expected_array(cmap, text, [expected]) expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text)) res = parser.parse(context, ParserInput(text))
@@ -1044,40 +1069,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array assert concepts_nodes == expected_array
# def test_i_cannot_parse_bnf_concept_mixed_with_isa_concepts(self):
# sheerka, context, parser = self.init_parser(init_from_sheerka=True)
#
# # thirties = cls.update_bnf(context, Concept("thirties",
# # definition="thirty number",
# # where="number < 10",
# # body="thirty + number").def_var("thirty").def_var("number"))
# # with thirties isa number
# # So number in 'thirty number' will spawn 'thirties' which, because there is no other indication, will
# # create an infinite loop
#
# text = "thirty one"
# expected = CNC("thirties",
# source=text,
# number=CC("number",
# source="one",
# one=CC("one", body=DoNotResolve("one")),
# body=CC("one", body=DoNotResolve("one"))),
# one=CC("one", body=DoNotResolve("one")),
# thirty="thirty")
# expected_array = compute_expected_array(cmap, text, [expected])
#
# res = parser.parse(context, ParserInput(text))
# not_for_me = res.value
# reason = res.value.body
#
# assert not res.status
# assert sheerka.isinstance(not_for_me, BuiltinConcepts.NOT_FOR_ME)
# assert sheerka.isinstance(reason, BuiltinConcepts.CHICKEN_AND_EGG)
# assert reason.body == {cmap["thirties"].id, cmap["number"].id}
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts(self): def test_i_can_parse_bnf_concept_mixed_with_isa_concepts(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka, context, parser = self.init_parser(init_from_sheerka=True)
# thirties is defined in the global variable cmap as
# thirties = cls.update_bnf(context, Concept("thirties", # thirties = cls.update_bnf(context, Concept("thirties",
# definition="thirty number", # definition="thirty number",
# where="number < 10", # where="number < 10",
@@ -1090,7 +1084,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
source="one", source="one",
one=CC("one", body=DoNotResolve("one")), one=CC("one", body=DoNotResolve("one")),
body=CC("one", body=DoNotResolve("one"))), body=CC("one", body=DoNotResolve("one"))),
one=CC("one", body=DoNotResolve("one")),
thirty="thirty") thirty="thirty")
expected_array = compute_expected_array(cmap, text, [expected]) expected_array = compute_expected_array(cmap, text, [expected])
@@ -1113,7 +1106,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
source="three", source="three",
three=CC("three", body=DoNotResolve("three")), three=CC("three", body=DoNotResolve("three")),
body=CC("three", body=DoNotResolve("three"))), body=CC("three", body=DoNotResolve("three"))),
three=CC("three", body=DoNotResolve("three")),
thirty="thirty") thirty="thirty")
expected_array = compute_expected_array(cmap, text, [expected]) expected_array = compute_expected_array(cmap, text, [expected])
@@ -1125,6 +1117,56 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array assert concepts_nodes == expected_array
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_when_concept_starts_with_isa(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
one = CC("one", body=DoNotResolve("one"))
text = "one thousand"
expected = CNC("thousands",
source=text,
number=CC("number",
source="one",
one=one,
body=one))
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
sheerka.concepts_grammars.clear() # to simulate restart
text = "fifty one thousand"
fifty_one = CC("fifties",
source="fifty one",
fifty="fifty",
number=CC("number", source="one", body=one, one=one))
expected = CNC("thousands",
source=text,
number=CC("number",
source="fifty one",
fifties=fifty_one,
body=fifty_one))
expected_array = compute_expected_array(cmap, text, [expected])
res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
text = "one hundred thousand"
res = parser.parse(context, ParserInput(text))
parser_result = res.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self): def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka, context, parser = self.init_parser(init_from_sheerka=True)
sheerka.concepts_grammars.clear() # simulate restart sheerka.concepts_grammars.clear() # simulate restart
@@ -1138,7 +1180,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
source="three", source="three",
three=CC("three", body=DoNotResolve("three")), three=CC("three", body=DoNotResolve("three")),
body=CC("three", body=DoNotResolve("three"))), body=CC("three", body=DoNotResolve("three"))),
three=CC("three", body=DoNotResolve("three")),
thirty="thirty") thirty="thirty")
expected_array = compute_expected_array(cmap, text, [expected]) expected_array = compute_expected_array(cmap, text, [expected])
@@ -1157,7 +1198,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
source="one", source="one",
one=CC("one", body=DoNotResolve("one")), one=CC("one", body=DoNotResolve("one")),
body=CC("one", body=DoNotResolve("one"))), body=CC("one", body=DoNotResolve("one"))),
one=CC("one", body=DoNotResolve("one")),
forty="forty") forty="forty")
expected_array = compute_expected_array(cmap, text, [expected]) expected_array = compute_expected_array(cmap, text, [expected])
@@ -1173,7 +1213,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka, context, parser = self.init_parser(init_from_sheerka=True)
parser_input = "def one" parser_input = "def one"
expected = [CNC("def number", source="def one", number="one", one="one")] expected = [CNC("def number", source="def one", number="one")]
res = parser.parse(context, ParserInput(parser_input)) res = parser.parse(context, ParserInput(parser_input))
expected_array = compute_expected_array(cmap, parser_input, expected) expected_array = compute_expected_array(cmap, parser_input, expected)
@@ -1190,7 +1230,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = "--filter one" expression = "--filter one"
expected = [CNC("filter", source="--filter one", one="one")] expected = [CN("filter", source="--filter one")]
res = parser.parse(context, ParserInput(expression)) res = parser.parse(context, ParserInput(expression))
expected_array = compute_expected_array(cmap, expression, expected) expected_array = compute_expected_array(cmap, expression, expected)
@@ -1233,10 +1273,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
CC(factor, body=DoNotResolve("2")), CC(factor, body=DoNotResolve("2")),
CC(factor, body=DoNotResolve("3")), CC(factor, body=DoNotResolve("3")),
])], ])],
factor=[
CC(factor, body=DoNotResolve("1")),
CC(factor, body=DoNotResolve("2")),
CC(factor, body=DoNotResolve("3"))],
body=DoNotResolve("1 + 2 * 3"))] body=DoNotResolve("1 + 2 * 3"))]
def test_i_can_parse_recursive_descent_grammar(self): def test_i_can_parse_recursive_descent_grammar(self):
@@ -1255,8 +1291,34 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
text = "1 + 2 * 3" text = "1 + 2 * 3"
res = parser.parse(context, ParserInput(text)) res = parser.parse(context, ParserInput(text))
parser_result = res.value
concepts_nodes = res.value.value
factor = my_map["factor"]
term = my_map["term"]
expr = my_map["expr"]
# concepts_nodes = res.value.value is too complicated to be validated # concepts_nodes = res.value.value is too complicated to be validated
assert res.status assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == [CNC(expr,
term=CC(term,
body=CC(factor, body=DoNotResolve("1")),
factor=CC(factor, body=DoNotResolve("1"))),
expr=CC(expr,
body=CC(term,
body=DoNotResolve("2 * 3"),
factor=CC(factor, body=DoNotResolve("2")),
term=CC(term,
body=CC(factor, body=DoNotResolve("3")),
factor=CC(factor, body=DoNotResolve("3")))),
term=CC(term,
body=DoNotResolve("2 * 3"),
factor=CC(factor, body=DoNotResolve("2")),
term=CC(term,
body=CC(factor, body=DoNotResolve("3")),
factor=CC(factor, body=DoNotResolve("3"))))),
body=DoNotResolve("1 + 2 * 3"))]
def test_i_can_parse_simple_recursive_grammar(self): def test_i_can_parse_simple_recursive_grammar(self):
my_map = { my_map = {
@@ -1271,6 +1333,21 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert parser.parse(context, ParserInput("foo foo foo bar")).status assert parser.parse(context, ParserInput("foo foo foo bar")).status
assert not parser.parse(context, ParserInput("foo baz")).status assert not parser.parse(context, ParserInput("foo baz")).status
@pytest.mark.parametrize("name, expected", [
(None, []),
("", []),
("foo", StrMatch("foo")),
("foo bar", Sequence(StrMatch("foo"), StrMatch("bar"))),
("'foo bar baz' qux", Sequence(StrMatch("foo", skip_whitespace=False),
StrMatch(" ", skip_whitespace=False),
StrMatch("bar", skip_whitespace=False),
StrMatch(" ", skip_whitespace=False),
StrMatch("baz"),
StrMatch("qux"))),
])
def test_i_can_get_expression_from_concept_name(self, name, expected):
assert BnfNodeParser.get_expression_from_concept_name(name) == expected
# @pytest.mark.parametrize("parser_input, expected", [ # @pytest.mark.parametrize("parser_input, expected", [
# ("one", [ # ("one", [
# (True, [CNC("bnf_one", source="one", one="one", body="one")]), # (True, [CNC("bnf_one", source="one", one="one", body="one")]),
+3 -3
View File
@@ -128,7 +128,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka):
expected_nodes = compute_expected_array( expected_nodes = compute_expected_array(
concepts_map, concepts_map,
" twenty one ", " twenty one ",
[CNC("twenties", source="twenty one", unit="one", one="one")]) [CNC("twenties", source="twenty one", unit="one")])
assert concept.compiled["c"][0].body.body == expected_nodes assert concept.compiled["c"][0].body.body == expected_nodes
assert len(concept.compiled["d"]) == 1 assert len(concept.compiled["d"]) == 1
@@ -185,7 +185,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka):
expected_nodes = compute_expected_array( expected_nodes = compute_expected_array(
concepts_map, concepts_map,
" twenty two", " twenty two",
[CNC("twenties", source="twenty two", unit="two", two="two")]) [CNC("twenties", source="twenty two", unit="two")])
assert res.body.concept.compiled["b"].compiled["b"][0].body.body == expected_nodes assert res.body.concept.compiled["b"].compiled["b"][0].body.body == expected_nodes
# def test_i_can_validate_and_evaluate_a_concept_node_with_python(self): # def test_i_can_validate_and_evaluate_a_concept_node_with_python(self):
@@ -281,7 +281,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka):
assert len(actual_nodes) == 1 assert len(actual_nodes) == 1
expected_array = compute_expected_array( expected_array = compute_expected_array(
concepts_map, concepts_map,
expression, [CNC("twenties", source=expression, unit="one", one="one")]) expression, [CNC("twenties", source=expression, unit="one")])
assert actual_nodes == expected_array assert actual_nodes == expected_array
def test_i_can_parse_unrecognized_sya_concept_node(self): def test_i_can_parse_unrecognized_sya_concept_node(self):