diff --git a/_concepts.txt b/_concepts.txt index 8a5e8d0..e0dd1bc 100644 --- a/_concepts.txt +++ b/_concepts.txt @@ -71,8 +71,12 @@ def concept nineties from bnf ninety number where number < 10 as ninety + number nineties isa number # def concept hundreds1 from number 'hundred' where number < 10 as number * 100 # def concept hundreds2 from number=number1 'hundred' 'and' number=number2 where number1 < 10 and number2 < 100 as number1 * 100 + number2 -# def concept one hundred as 100 -# c:one hundred: isa number +def concept one hundred as 100 +one hundred isa number # hundreds1 isa number # hundreds2 isa number +def concept thousands from bnf number 'thousand' where number < 1000 as number * 1000 +thousands isa number def concept history as history() +def concept plus from a plus b as a + b +def concept mult from a mult b as a * b diff --git a/src/core/sheerka/Sheerka.py b/src/core/sheerka/Sheerka.py index c605170..ed9ae53 100644 --- a/src/core/sheerka/Sheerka.py +++ b/src/core/sheerka/Sheerka.py @@ -36,6 +36,7 @@ class Sheerka(Concept): CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions" RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY = "Resolved_Concepts_Sya_Definitions" CONCEPTS_GRAMMARS_ENTRY = "Concepts_Grammars" + CHICKEN_AND_EGG_CONCEPTS_ENTRY = "Chicken_And_Egg_Concepts" CONCEPTS_KEYS_ENTRY = "Concepts_Keys" BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts @@ -105,6 +106,10 @@ class Sheerka(Concept): @property def concepts_grammars(self): + return self.cache_manager.caches[self.CHICKEN_AND_EGG_CONCEPTS_ENTRY].cache + + @property + def chicken_and_eggs(self): return self.cache_manager.caches[self.CONCEPTS_GRAMMARS_ENTRY].cache def bind_service_method(self, bound_method, as_name=None): @@ -227,6 +232,9 @@ class Sheerka(Concept): cache = Cache() self.cache_manager.register_cache(self.CONCEPTS_GRAMMARS_ENTRY, cache, persist=False) + cache = Cache() + self.cache_manager.register_cache(self.CHICKEN_AND_EGG_CONCEPTS_ENTRY, cache, persist=False) + def initialize_services(self): """ Introspect to find services and bind them diff --git a/src/core/sheerka/services/SheerkaEvaluateConcept.py b/src/core/sheerka/services/SheerkaEvaluateConcept.py index 2fa3df8..ed18458 100644 --- a/src/core/sheerka/services/SheerkaEvaluateConcept.py +++ b/src/core/sheerka/services/SheerkaEvaluateConcept.py @@ -26,7 +26,7 @@ class SheerkaEvaluateConcept(BaseService): parent = context.get_parent() while parent is not None: - if parent.who == context.who and parent.obj == concept: + if parent.who == context.who and parent.obj == concept and parent.obj.compiled == concept.compiled: return True parent = parent.get_parent() @@ -147,6 +147,11 @@ class SheerkaEvaluateConcept(BaseService): def resolve(self, context, to_resolve, current_prop, current_concept, force_evaluation): + def get_path(context_, prop_name): + prefix = context_.path if hasattr(context_, "path") else "" + value = prop_name.name if isinstance(current_prop, ConceptParts) else prop_name + return prefix + "." + value + if isinstance(to_resolve, DoNotResolve): return to_resolve.value @@ -161,12 +166,14 @@ class SheerkaEvaluateConcept(BaseService): sub_context.add_values(return_values=ret_val) return ret_val.body - desc = f"Evaluating {current_prop} (concept={current_concept})" + path = get_path(context, current_prop) + desc = f"Evaluating {path} (concept={current_concept})" context.log(desc, self.NAME) with context.push(BuiltinConcepts.EVALUATING_CONCEPT, current_prop, desc=desc, - obj=current_concept) as sub_context: + obj=current_concept, + path=path) as sub_context: if force_evaluation: sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED) diff --git a/src/core/sheerka/services/SheerkaSetsManager.py b/src/core/sheerka/services/SheerkaSetsManager.py index fac9fce..b12d7cc 100644 --- a/src/core/sheerka/services/SheerkaSetsManager.py +++ b/src/core/sheerka/services/SheerkaSetsManager.py @@ -76,6 +76,7 @@ class SheerkaSetsManager(BaseService): self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, body=concept, concept_set=concept_set)) self.sets.put(concept_set.id, concept.id) + self.concepts_in_set.delete(concept_set.id) return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS)) def add_concepts_to_set(self, context, concepts, concept_set): @@ -95,6 +96,7 @@ class SheerkaSetsManager(BaseService): concept_set=concept_set) else: body = self.sheerka.new(BuiltinConcepts.SUCCESS) + self.concepts_in_set.delete(concept_set.id) return self.sheerka.ret(self.NAME, len(already_in_set) != len(concepts), body) diff --git a/src/core/utils.py b/src/core/utils.py index c993d48..6e7b7b1 100644 --- a/src/core/utils.py +++ b/src/core/utils.py @@ -205,9 +205,9 @@ def make_unique(lst, get_id=None): yield x else: for x in seq: - x = get_id(x) - if x not in seen: - seen.add(x) + _id = get_id(x) + if _id not in seen: + seen.add(_id) yield x return list(_make_unique(lst, get_id)) diff --git a/src/parsers/AtomNodeParser.py b/src/parsers/AtomNodeParser.py index 7a6a567..6d8d81b 100644 --- a/src/parsers/AtomNodeParser.py +++ b/src/parsers/AtomNodeParser.py @@ -264,6 +264,7 @@ class AtomNodeParser(BaseNodeParser): concept_parser.lock() concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name) + #self.context.log(f"concepts found for {token=}: {concepts}", who=self.name) if not concepts: for concept_parser in concept_parser_helpers: concept_parser.eat_unrecognized(token, pos) diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index f0b92b8..762b050 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -525,7 +525,7 @@ class CNC(CN): to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY} else: to_compare = other.concept.compiled - if self.compiled == to_compare: + if self.compiled == to_compare: # expanded form to ease the debug return True else: return False @@ -673,7 +673,8 @@ class BaseNodeParser(BaseParser): concept = to_map(self, concept) if to_map else concept result.append(concept) - return result + custom_concepts + return core.utils.make_unique(result + custom_concepts, + lambda c: c.concept.id if hasattr(c, "concept") else c.id) return custom_concepts if custom else None @@ -707,16 +708,20 @@ class BaseNodeParser(BaseParser): @staticmethod def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword): sheerka = context.sheerka + res = {} def resolve_concepts(concept_str): + c_key, c_id = core.utils.unstr_concept(concept_str) + if c_id in already_seen: + return ChickenAndEggError(already_seen) + + already_seen.add(c_id) + resolved = set() to_resolve = set() - concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1]) + chicken_and_egg = set() - if concept.id in already_seen: - raise ChickenAndEggError(already_seen) - else: - already_seen.add(concept.id) + concept = sheerka.get_by_id(c_id) if sheerka.isaset(context, concept): concepts = sheerka.get_set_elements(context, concept) @@ -730,11 +735,18 @@ class BaseNodeParser(BaseParser): (to_resolve if keyword.startswith("c:|") else resolved).add(keyword) for concept_to_resolve_str in to_resolve: - resolved |= resolve_concepts(concept_to_resolve_str) + res = resolve_concepts(concept_to_resolve_str) + if isinstance(res, ChickenAndEggError): + chicken_and_egg |= res.concepts + else: + resolved |= res + to_resolve.clear() - return resolved + if len(resolved) == 0 and len(chicken_and_egg) > 0: + raise ChickenAndEggError(chicken_and_egg) + else: + return resolved - res = {} for k, v in concepts_by_first_keyword.items(): if k.startswith("c:|"): try: @@ -744,8 +756,16 @@ class BaseNodeParser(BaseParser): res.setdefault(resolved, []).extend(v) except ChickenAndEggError as ex: context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}") - # res[k] = sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, - # body=[sheerka.get_by_id(c) for c in ex.concepts]) + concepts_in_recursion = ex.concepts + # make sure to have all the parents + for parent in v: + concepts_in_recursion.add(parent) + + for concept_id in concepts_in_recursion: + # make sure we keep the longest chain + old = sheerka.chicken_and_eggs.get(concept_id) + if old is None or len(old) < len(ex.concepts): + sheerka.chicken_and_eggs.put(concept_id, concepts_in_recursion) else: res.setdefault(k, []).extend(v) @@ -755,6 +775,53 @@ class BaseNodeParser(BaseParser): return sheerka.ret("BaseNodeParser", True, res) + @staticmethod + def get_referenced_concepts(context, concept_id, already_seen): + """ + Gets all the tokens that may allow to recognize concept concept_id + Basically, it returns all the starting tokens for concept concept_id + CHICKEN_AND_EGG is returned when a circular references are found + :param context: + :param concept_id: + :param already_seen: + :return: + """ + if concept_id in already_seen: + return ChickenAndEggError(already_seen) + + already_seen.add(concept_id) + + resolved = set() + to_resolve = set() + chicken_and_egg = set() + sheerka = context.sheerka + concept = sheerka.get_by_id(concept_id) + + if sheerka.isaset(context, concept): + concepts = sheerka.get_set_elements(context, concept) + else: + concepts = [concept] + + for concept in concepts: + BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail + keywords = BaseNodeParser.get_first_tokens(sheerka, concept) + for keyword in keywords: + (to_resolve if keyword.startswith("c:|") else resolved).add(keyword) + + for concept_to_resolve_str in to_resolve: + c_key, c_id = core.utils.unstr_concept(concept_to_resolve_str) + res = BaseNodeParser.get_referenced_concepts(context, c_id, already_seen) + if isinstance(res, ChickenAndEggError): + chicken_and_egg |= res.concepts + else: + resolved |= res + to_resolve.clear() + + if len(resolved) == 0 and len(chicken_and_egg) > 0: + raise ChickenAndEggError(chicken_and_egg) + else: + return resolved + @staticmethod def resolve_sya_associativity_and_precedence(context, sya): pass diff --git a/src/parsers/BnfNodeParser.py b/src/parsers/BnfNodeParser.py index 26eeb69..42a00be 100644 --- a/src/parsers/BnfNodeParser.py +++ b/src/parsers/BnfNodeParser.py @@ -38,7 +38,7 @@ class NonTerminalNode(LexerNode): self.children = children def __repr__(self): - name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__ + name = "Node:" + (self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__) if len(self.children) > 0: sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")" else: @@ -69,7 +69,7 @@ class TerminalNode(LexerNode): self.value = value def __repr__(self): - name = self.parsing_expression.rule_name or "" + name = "Node:" + (self.parsing_expression.rule_name or "") return name + f"'{self.value}'" def __eq__(self, other): @@ -186,7 +186,7 @@ class Sequence(ParsingExpression): class OrderedChoice(ParsingExpression): """ - Will match one among multiple + Will match the first one among multiple It will stop at the first match (so the order of definition is important) """ @@ -211,6 +211,42 @@ class OrderedChoice(ParsingExpression): return self.add_rule_name_if_needed(f"({to_str})") +class LongestChoice(ParsingExpression): + """ + Will match the longest one among multiple + All elements will be tested, so the order is not important + The behaviour when multiple candidate is found is not defined yet + """ + + def _parse(self, parser_helper): + init_pos = parser_helper.pos + longest_node = None + end_pos = -1 + + for e in self.nodes: + node = e.parse(parser_helper) + if node: + if longest_node is None or node.end > longest_node.end: + longest_node = node + end_pos = parser_helper.pos + + parser_helper.seek(init_pos) # backtrack + + if longest_node is None: + return None + + parser_helper.seek(end_pos) + return NonTerminalNode(self, + init_pos, + longest_node.end, + parser_helper.parser.parser_input.tokens[init_pos: longest_node.end + 1], + [longest_node]) + + def __repr__(self): + to_str = "# ".join(repr(n) for n in self.elements) + return self.add_rule_name_if_needed(f"({to_str})") + + class Optional(ParsingExpression): """ Will match or not the elements @@ -386,7 +422,12 @@ class StrMatch(Match): self.skip_white_space = skip_whitespace def __repr__(self): - return self.add_rule_name_if_needed(f"'{self.to_match}'") + text = self.to_match + if not self.ignore_case: + text += "#!ic" + if not self.skip_white_space: + text += "#!sw" + return self.add_rule_name_if_needed(f"'{text}'") def __eq__(self, other): if not super().__eq__(other): @@ -395,7 +436,9 @@ class StrMatch(Match): if not isinstance(other, StrMatch): return False - return self.to_match == other.to_match and self.ignore_case == other.ignore_case + return self.to_match == other.to_match and \ + self.ignore_case == other.ignore_case and \ + self.skip_white_space == other.skip_white_space def _parse(self, parser_helper): token = parser_helper.get_token() @@ -766,7 +809,7 @@ class BnfConceptParserHelper: _add_prop(_concept, _underlying.parsing_expression.rule_name, value) _concept.metadata.need_validation = True - if isinstance(_underlying, NonTerminalNode): + elif isinstance(_underlying, NonTerminalNode): for child in _underlying.children: _process_rule_name(_concept, child) @@ -789,6 +832,15 @@ class UnderConstruction: concept_id: str +@dataclass() +class ToUpdate: + parent_id: int + parsing_expression: ParsingExpression + + def __hash__(self): + return hash(self.parent_id) + + class BnfNodeParser(BaseNodeParser): def __init__(self, **kwargs): super().__init__("BnfNode", 50, **kwargs) @@ -824,6 +876,34 @@ class BnfNodeParser(BaseNodeParser): return valid_parser_helpers + @staticmethod + def get_expression_from_concept_name(name): + """ + Create the parsing expression from the name + This function differs from BNFParser.parse() as it does not try to resolve identifiers into concepts + >>> assert get_expression_from_concept_name('one hundred') == Sequence(StrMatch("one"), StrMatch("hundred")) + while BNFParser.parse("one hundred") will look for concept 'one' and concept 'hundred' + :param name: + :return: + """ + if name is None or name.strip() == "": + return [] + + res = [] + tokens = Tokenizer(name, yield_eof=False) + for token in tokens: + if token.type == TokenKind.WHITESPACE: + continue + elif token.type == TokenKind.STRING: + sub_tokens = list(Tokenizer(token.strip_quote, yield_eof=False)) + for sub_token in sub_tokens[:-1]: + res.append(StrMatch(sub_token.str_value, skip_whitespace=False)) + res.append(StrMatch(sub_tokens[-1].str_value)) + else: + res.append(StrMatch(token.str_value)) + + return res[0] if len(res) == 1 else Sequence(*res) + def get_concepts_sequences(self): """ Main method that parses the tokens and extract the concepts @@ -900,10 +980,10 @@ class BnfNodeParser(BaseNodeParser): def check_for_infinite_recursion(self, parsing_expression, already_found, only_first=False): if isinstance(parsing_expression, ConceptExpression): - if parsing_expression.concept in already_found: + if parsing_expression.concept.id in already_found: return True - already_found.add(parsing_expression.concept) - return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, False) + already_found.add(parsing_expression.concept.id) + return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, only_first) if isinstance(parsing_expression, Sequence): # for sequence, we need to check all nodes @@ -930,43 +1010,93 @@ class BnfNodeParser(BaseNodeParser): return False return False + if isinstance(parsing_expression, LongestChoice): + for node in parsing_expression.nodes: + already_found_for_current_node = already_found.copy() + if self.check_for_infinite_recursion(node, already_found_for_current_node, True): + already_found.update(already_found_for_current_node) + return True + return False + + if isinstance(parsing_expression, UnderConstruction): + if parsing_expression.concept_id in already_found: + return True + already_found.add(parsing_expression.concept_id) + return False def get_parsing_expression(self, context, concept): + """ + Compute the parsing expression for a given concept + :param context: + :param concept: + :return: + """ if concept.id in self.concepts_grammars: return self.concepts_grammars.get(concept.id) - grammar = self.concepts_grammars.copy() - to_resolve = {} # the key is the instance id of the parsing expression - isa_concepts = set() - self.resolve_concept_parsing_expression(context, concept, grammar, to_resolve, isa_concepts) + # internal cache of already computed parsing expression to use during the recursion + grammar = {} - for _id, pe in to_resolve.items(): - for i, node in enumerate(pe.nodes): - if isinstance(node, UnderConstruction): - pe.nodes[i] = grammar.get(node.concept_id) + # concept that are not totally resolved, because they reference parsing expression under construction + to_update = set() # the key is the instance id of the parsing expression + # during the parsing of concept, we will resolve other concepts + # keep the track of the concepts that can safely be added to self.concept_grammars + to_keep = {concept.id} + + desc = f"Get parsing expression for concept {concept}" + with context.push(BuiltinConcepts.INIT_BNF, concept, + who=self.name, + obj=concept, + root_concept=concept, + desc=desc) as sub_context: + # get the parsing expression + ret = self.resolve_concept_parsing_expression(sub_context, concept, grammar, to_update, to_keep) + + # check and update parsing expression that are still under construction + # Note that we only update the concept that will update concepts_grammars + # because pe.node may be large + for item in to_update: + if item.parent_id in to_keep: + pe = item.parsing_expression + for i, node in enumerate(pe.nodes): + if isinstance(node, UnderConstruction): + pe.nodes[i] = grammar.get(node.concept_id) + + # check for infinite recursion. + # We are adding a new concept. Does it create an infinite recursion ? concepts_in_recursion = set() - if self.check_for_infinite_recursion(pe, concepts_in_recursion): - cycle = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body={c.id for c in concepts_in_recursion}) - for concept in concepts_in_recursion: - grammar[concept.id] = cycle + if self.check_for_infinite_recursion(ret, concepts_in_recursion): + cycle = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion) + for concept_id in concepts_in_recursion: + grammar[concept_id] = cycle - # Make sure you do not put isa concepts in cache - # why : - # twenties = 'twenty' number where number < 10 - # hundreds = number 'hundred' where number < 99 - # the concept of number depends on its utilisation - for concept_id in [c for c in grammar if c not in isa_concepts]: - self.concepts_grammars.put(concept_id, grammar[concept_id]) + # update, in case of infinite circular recursion + ret = grammar[concept.id] - return self.concepts_grammars.get(concept.id) + # finally, update concept grammar + for k, v in grammar.items(): + if k in to_keep: + self.concepts_grammars.put(k, v) - def resolve_concept_parsing_expression(self, context, concept, grammar, to_resolve, isa_concepts): - if concept.id in grammar: + # not quite sure that it is a good idea. + # Why do we want to corrupt previous valid entries ? + if context.sheerka.isinstance(v, BuiltinConcepts.CHICKEN_AND_EGG): + self.concepts_grammars.put(k, v) + + sub_context.add_values(return_values=ret) + + return ret + + def resolve_concept_parsing_expression(self, context, concept, grammar, to_update, to_keep): + if concept.id in self.concepts_grammars: # validated entry + return self.concepts_grammars.get(concept.id) + + if concept.id in grammar: # under construction entry return grammar.get(concept.id) - desc = f"Get parsing expression for '{concept}'" + desc = f"Resolve concept parsing expression for '{concept}'" with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context: if not concept.bnf: # to save a function call. Not sure it worth it. BaseNodeParser.ensure_bnf(sub_context, concept, self.name) @@ -979,52 +1109,41 @@ class BnfNodeParser(BaseNodeParser): desc = f"Bnf concept detected. Resolving parsing expression '{expression}'" with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc: ssc.add_inputs(expression=expression) - resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_resolve, isa_concepts) + resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update, to_keep) ssc.add_values(return_values=resolved) elif sheerka.isaset(context, concept): desc = f"Concept is a group. Resolving parsing expression using 'isa'" with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc: ssc.add_inputs(concept=concept) - isa_concepts.add(concept.id) concepts_in_group = self.sheerka.get_set_elements(ssc, concept) - # concepts_in_group comes from a set, so the order of its elements is not guaranteed - # to avoid random failure (ie random CHICKEN_AND_EGG), we need to rearrange - # We also remove the root concept (the one from get_parsing_expression()) - - root_concept_as_set = set(context.search( - predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF, - get_obj=lambda ec: ec.obj, - stop=lambda ec: ec.action != BuiltinConcepts.INIT_BNF)) # there only one item in the set - root_concept = list(root_concept_as_set)[0] - reordered = [] + valid_concepts = [] for c in concepts_in_group: - if c.id == root_concept.id: + if c.id == context.root_concept.id: continue - # I do not guaranty the same order every time, but I minimize the ChickenAndEgg random issue - if c.metadata.definition_type == DEFINITION_TYPE_BNF or sheerka.isaset(ssc, c): - reordered.append(c) - else: - reordered.insert(0, c) + c_pe = self.resolve_concept_parsing_expression(context, c, grammar, to_update, to_keep) + if self.check_for_infinite_recursion(c_pe, {concept.id}, True): + continue - nodes = [ConceptExpression(c, rule_name=c.name) for c in reordered] + valid_concepts.append(c) + + nodes = [ConceptExpression(c, rule_name=c.name) for c in valid_concepts] resolved = self.resolve_parsing_expression(ssc, - OrderedChoice(*nodes), + LongestChoice(*nodes), grammar, - to_resolve, - isa_concepts) + to_update, + to_keep) ssc.add_values(concepts_in_group=concepts_in_group) ssc.add_values(return_values=resolved) else: desc = f"Concept is a simple concept." with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc: - tokens = Tokenizer(concept.name, yield_eof=False) - nodes = [StrMatch(token.strip_quote) for token in tokens] - expression = nodes[0] if len(nodes) == 1 else Sequence(nodes) - resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_resolve, isa_concepts) + to_keep.add(concept.id) + expression = self.get_expression_from_concept_name(concept.name) + resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update, to_keep) grammar[concept.id] = resolved @@ -1035,7 +1154,7 @@ class BnfNodeParser(BaseNodeParser): sub_context.add_values(return_values=resolved) return resolved - def resolve_parsing_expression(self, context, expression, grammar, to_resolve, isa_concepts): + def resolve_parsing_expression(self, context, expression, grammar, to_update, to_keep): if isinstance(expression, str): ret = StrMatch(expression, ignore_case=self.ignore_case) @@ -1051,7 +1170,7 @@ class BnfNodeParser(BaseNodeParser): unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept) return self.add_error(unknown_concept) - pe = self.resolve_concept_parsing_expression(context, concept, grammar, to_resolve, isa_concepts) + pe = self.resolve_concept_parsing_expression(context, concept, grammar, to_update, to_keep) if not isinstance(pe, (ParsingExpression, UnderConstruction)): return pe # an error is detected, escalate it @@ -1060,7 +1179,7 @@ class BnfNodeParser(BaseNodeParser): # return pe # we are looking for ourself, just return it if isinstance(pe, UnderConstruction): - to_resolve[id(expression)] = expression + to_update.add(ToUpdate(context.obj.id, expression)) expression.nodes = [pe] expression.rule_name = expression.rule_name or concept.name @@ -1073,17 +1192,18 @@ class BnfNodeParser(BaseNodeParser): elif isinstance(expression, Sequence) or \ isinstance(expression, OrderedChoice) or \ + isinstance(expression, LongestChoice) or \ isinstance(expression, ZeroOrMore) or \ isinstance(expression, OneOrMore) or \ isinstance(expression, Optional): ret = expression ret.nodes = [] for e in ret.elements: - pe = self.resolve_parsing_expression(context, e, grammar, to_resolve, isa_concepts) + pe = self.resolve_parsing_expression(context, e, grammar, to_update, to_keep) if not isinstance(pe, (ParsingExpression, UnderConstruction)): return pe # an error is detected, escalate it if isinstance(pe, UnderConstruction): - to_resolve[id(ret)] = ret # remember that there is an unresolved parsing expression + to_update.add(ToUpdate(context.obj.id, ret)) ret.nodes.append(pe) else: @@ -1094,8 +1214,8 @@ class BnfNodeParser(BaseNodeParser): expression.sep = self.resolve_parsing_expression(context, expression.sep, grammar, - to_resolve, - isa_concepts) + to_update, + to_keep) return ret diff --git a/tests/core/test_SheerkaEvaluateConcept.py b/tests/core/test_SheerkaEvaluateConcept.py index 0711379..1f87533 100644 --- a/tests/core/test_SheerkaEvaluateConcept.py +++ b/tests/core/test_SheerkaEvaluateConcept.py @@ -494,3 +494,32 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), one) assert evaluated.key == one.key assert evaluated.body == 1 + + def test_i_can_evaluate_thousand_concept(self): + sheerka, context, thousand, number, forties, forty, one = self.init_concepts( + Concept("thousand", body="number * 1000").def_var("number"), + Concept("number"), + Concept("forties", body="forty + number").def_var("forty").def_var("number"), + Concept("forty", body="40"), + Concept("one", body="1"), + eval_body=True + ) + + one = sheerka.new("one") + number2 = sheerka.new("number") + number2.compiled["one"] = one + number2.compiled[ConceptParts.BODY] = one + forties = sheerka.new("forties") + forties.compiled["forty"] = sheerka.new("forty") + forties.compiled["number"] = number2 + + number1 = sheerka.new("number") + number1.compiled["forties"] = forties + number1.compiled[ConceptParts.BODY] = forties + + forty_one_thousand = sheerka.new("thousand") + forty_one_thousand.compiled["number"] = number1 + + evaluated = sheerka.evaluate_concept(context, forty_one_thousand) + + assert evaluated.body == 41000 diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index 7325b83..58f423d 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -1,9 +1,20 @@ +from dataclasses import dataclass + import core.utils import pytest from core.concept import ConceptParts, Concept from core.tokenizer import Token, TokenKind +@dataclass +class Obj: + prop1: str + prop2: str + + def __hash__(self): + return hash((self.prop1, self.prop1)) + + def get_tokens(lst): res = [] for e in lst: @@ -229,3 +240,9 @@ def test_decode_concept_key_id(): ]) def test_dict_product(a, b, expected): assert core.utils.dict_product(a, b) == expected + + +def test_i_can_make_unique(): + assert core.utils.make_unique(["a", "a", "b", "c", "c"]) == ["a", "b", "c"] + assert core.utils.make_unique([Obj("a", "b"), Obj("a", "c"), Obj("a", "b")]) == [Obj("a", "b"), Obj("a", "c")] + assert core.utils.make_unique([Obj("a", "b"), Obj("a", "c")], lambda o: o.prop1) == [Obj("a", "b")] diff --git a/tests/evaluators/test_ConceptEvaluator.py b/tests/evaluators/test_ConceptEvaluator.py index 767b58c..3ed57e3 100644 --- a/tests/evaluators/test_ConceptEvaluator.py +++ b/tests/evaluators/test_ConceptEvaluator.py @@ -118,3 +118,4 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka): assert not context.sheerka.is_success(error_concept) # it's indeed an error assert result.status assert result.value == error_concept + diff --git a/tests/parsers/test_BaseNodeParser.py b/tests/parsers/test_BaseNodeParser.py index 8929263..bf3e7be 100644 --- a/tests/parsers/test_BaseNodeParser.py +++ b/tests/parsers/test_BaseNodeParser.py @@ -137,28 +137,38 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka): } def test_i_can_resolve_when_concepts_are_sets(self): - sheerka, context, one, two, three, number, foo = self.init_concepts( + sheerka, context, number, *concepts = self.init_concepts( + "number", "one", "two", - "three", - "number", - Concept("foo", definition="number three"), + "twenty", + "hundred", + Concept("twenties", definition="twenty number"), + Concept("hundreds", definition="number hundred"), create_new=True # mandatory because set_isa() needs it ) sheerka.set_isa(context, sheerka.new("one"), number) sheerka.set_isa(context, sheerka.new("two"), number) + sheerka.set_isa(context, sheerka.new("twenty"), number) + sheerka.set_isa(context, sheerka.new("thirty"), number) + sheerka.set_isa(context, sheerka.new("hundred"), number) + sheerka.set_isa(context, sheerka.new("twenties"), number) + sheerka.set_isa(context, sheerka.new("hundreds"), number) - cbfk = BaseNodeParser.get_concepts_by_first_token(context, [one, two, three, number, foo]).body + sheerka.concepts_grammars.clear() # reset all the grammar to simulate Sheerka restart - resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, cbfk) + # cbft : concept_by_first_token (I usually don't use abbreviation) + cbft = BaseNodeParser.get_concepts_by_first_token(context, [number] + concepts).body + resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, cbft) assert resolved_ret_val.status assert resolved_ret_val.body == { - "one": ["1001", "1005"], - "two": ["1002", "1005"], - "three": ["1003"], - "number": ["1004"], + 'number': ['1001'], + 'one': ['1002', '1007'], + 'two': ['1003', '1007'], + 'twenty': ['1004', '1006', '1007'], + 'hundred': ['1005', '1007'], } def test_concepts_are_defined_once(self): @@ -196,36 +206,58 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka): "one": ["1001", "1002"], } - # def tests_i_can_detect_direct_recursion(self): - # sheerka = self.get_sheerka() - # good = self.get_concept(sheerka, "good") - # foo = self.get_concept(sheerka, "foo", ConceptExpression("bar")) - # bar = self.get_concept(sheerka, "bar", ConceptExpression("foo")) - # - # concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, foo, bar]).body - # - # resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords) - # assert resolved_ret_val.status - # assert resolved_ret_val.body == { - # "good": ["1001"], - # BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1003"] - # } - # - # def test_i_can_detect_indirect_infinite_recursion(self): - # sheerka = self.get_sheerka() - # good = self.get_concept(sheerka, "good") - # one = self.get_concept(sheerka, "one", ConceptExpression("two")) - # two = self.get_concept(sheerka, "two", ConceptExpression("three")) - # three = self.get_concept(sheerka, "three", ConceptExpression("two")) - # - # concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, one, two, three]).body - # - # resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords) - # assert resolved_ret_val.status - # assert resolved_ret_val.body == { - # "good": ["1001"], - # BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1004", "1003"] - # } + def tests_i_can_detect_direct_recursion(self): + sheerka, context, good, foo, bar = self.init_concepts( + "good", + self.bnf_concept("foo", ConceptExpression("bar")), + self.bnf_concept("bar", ConceptExpression("foo")), + ) + + concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, foo, bar]).body + resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords) + assert resolved_ret_val.status + assert resolved_ret_val.body == { + "good": ["1001"], + } + assert sheerka.chicken_and_eggs.get(foo.id) == {foo.id, bar.id} + assert sheerka.chicken_and_eggs.get(bar.id) == {foo.id, bar.id} + + def test_i_can_detect_indirect_infinite_recursion(self): + sheerka, context, good, one, two, three = self.init_concepts( + "good", + self.bnf_concept("one", ConceptExpression("two")), + self.bnf_concept("two", ConceptExpression("three")), + self.bnf_concept("three", ConceptExpression("two")), + ) + + concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, one, two, three]).body + resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords) + assert resolved_ret_val.status + assert resolved_ret_val.body == { + "good": ["1001"], + } + assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id} + assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id} + assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id} + + def test_i_can_detect_the_longest_infinite_recursion_chain(self): + sheerka, context, good, one, two, three = self.init_concepts( + "good", + self.bnf_concept("two", ConceptExpression("three")), + self.bnf_concept("three", ConceptExpression("two")), + self.bnf_concept("one", ConceptExpression("three")), + ) + + concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, one, two, three]).body + resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords) + assert resolved_ret_val.status + assert resolved_ret_val.body == { + "good": ["1001"], + } + assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id} + assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id} + assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id} + # # def test_i_can_detect_infinite_recursion_from_ordered_choice(self): # sheerka = self.get_sheerka() diff --git a/tests/parsers/test_BnfNodeParser.py b/tests/parsers/test_BnfNodeParser.py index fe72fa0..00d8aba 100644 --- a/tests/parsers/test_BnfNodeParser.py +++ b/tests/parsers/test_BnfNodeParser.py @@ -4,7 +4,7 @@ from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYP from core.sheerka.services.SheerkaExecute import ParserInput from parsers.BaseNodeParser import CNC, UTN, CN from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ - Optional, ZeroOrMore, OneOrMore, ConceptExpression + Optional, ZeroOrMore, OneOrMore, ConceptExpression, LongestChoice from parsers.BnfParser import BnfParser import tests.parsers.parsers_utils @@ -16,13 +16,15 @@ cmap = { "two": Concept("two"), "three": Concept("three"), "four": Concept("four"), - "thirty": Concept("thirty", body=30), - "forty": Concept("forty", body=40), - "fifty": Concept("fifty", body=50), + "thirty": Concept("thirty", body="30"), + "forty": Concept("forty", body="40"), + "fifty": Concept("fifty", body="50"), "number": Concept("number"), "foo": Concept("foo"), "bar": Concept("bar"), "baz": Concept("baz"), + "one hundred": Concept("one hundred", body="100"), + "one_hundred": Concept("'one hundred'", body="100"), "bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen @@ -80,17 +82,6 @@ def compute_expected_array(my_concepts_map, expression, expected, exclude_body=F class TestBnfNodeParser(TestUsingMemoryBasedSheerka): sheerka = None - @staticmethod - def update_bnf(context, concept): - bnf_parser = BnfParser() - res = bnf_parser.parse(context, concept.metadata.definition) - if res.status: - concept.bnf = res.value.value - concept.metadata.definition_type = DEFINITION_TYPE_BNF - else: - raise Exception(res) - return concept - @classmethod def setup_class(cls): t = cls() @@ -109,6 +100,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): sheerka.set_isa(context, sheerka.new("thirty"), sheerka.new("number")) sheerka.set_isa(context, sheerka.new("forty"), sheerka.new("number")) sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number")) + sheerka.set_isa(context, sheerka.new("one hundred"), sheerka.new("number")) thirties = cls.update_bnf(context, Concept("thirties", definition="thirty number", @@ -131,6 +123,24 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): cmap["fifties"] = sheerka.create_new_concept(context, fifties).body.body sheerka.set_isa(context, sheerka.new("fifties"), sheerka.new("number")) + thousands = cls.update_bnf(context, Concept("thousands", + definition="number 'thousand'", + where="number < 999", + body="number * 1000").def_var("number")) + cmap["thousands"] = sheerka.create_new_concept(context, thousands).body.body + sheerka.set_isa(context, sheerka.new("thousands"), sheerka.new("number")) + + @staticmethod + def update_bnf(context, concept): + bnf_parser = BnfParser() + res = bnf_parser.parse(context, concept.metadata.definition) + if res.status: + concept.bnf = res.value.value + concept.metadata.definition_type = DEFINITION_TYPE_BNF + else: + raise Exception(res) + return concept + def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs): if my_concepts_map is not None: sheerka, context, *updated = self.init_concepts(*my_concepts_map.values(), **kwargs) @@ -351,6 +361,21 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): self.validate_get_concepts_sequences(my_map, text, expected) + @pytest.mark.parametrize("text, expected", [ + ("one", [CNC("foo", source="one")]), + ("one two", [CNC("foo", source="one two")]), + ("three", []), + + ]) + def test_i_can_parse_longest_choice(self, text, expected): + my_map = { + "foo": self.bnf_concept("foo", LongestChoice( + StrMatch("one"), + Sequence(StrMatch("one"), StrMatch("two")))), + } + + self.validate_get_concepts_sequences(my_map, text, expected) + @pytest.mark.parametrize("text, expected", [ ("one", [CNC("foo", source="one")]), ("", []), @@ -724,7 +749,10 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): "foo": self.bnf_concept("foo", Sequence( StrMatch("twenty"), - OrderedChoice(ConceptExpression("one"), ConceptExpression("two")))), + OrderedChoice( + ConceptExpression("one"), + ConceptExpression("two"), + rule_name="unit"))), } text = "twenty one" @@ -733,7 +761,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): concept_foo = sequences[0].concept assert concept_foo.compiled == { ConceptParts.BODY: DoNotResolve("twenty one"), - "one": my_map["one"], + "unit": my_map["one"], } def test_i_can_refer_to_group_concepts(self): @@ -755,8 +783,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): # explicit validations of the compiled concept_foo = sequences[0].concept assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED - assert concept_foo.compiled == {'number': my_map["number"], - 'two': my_map["two"], + assert concept_foo.compiled == {'number': CC(my_map["number"], body=my_map["two"], two=my_map["two"]), ConceptParts.BODY: DoNotResolve(value='twenty two')} text = "twenty one" @@ -766,8 +793,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): # explicit validations of the compiled concept_foo = sequences[0].concept assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED - assert concept_foo.compiled == {'number': my_map["number"], - 'one': my_map["one"], + assert concept_foo.compiled == {'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]), ConceptParts.BODY: DoNotResolve(value='twenty one')} @pytest.mark.parametrize("bar_expr, expected", [ @@ -842,8 +868,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): res = parser.get_parsing_expression(context, my_map["foo"]) assert sheerka.isinstance(res, BuiltinConcepts.CHICKEN_AND_EGG) == expected - - def test_i_can_get_parsing_expression_when_concept_isa(self): + def test_i_can_get_parsing_expression_when_ending_by_concept_isa(self): my_map = { "one": Concept("one"), "twenty": Concept("twenty"), @@ -869,42 +894,43 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): number_nodes = parsing_expression.nodes[1].nodes assert len(number_nodes) == 1 - assert isinstance(number_nodes[0], OrderedChoice) + assert isinstance(number_nodes[0], LongestChoice) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes assert my_map["number"].id not in parser.concepts_grammars - # - # def test_i_cannot_get_parsing_expression_when_concept_is_part_of_a_group(self): - # """ - # In this test, twenties isa number - # # So 'number' in Sequence(thirty, number) will spawn 'twenties' which, because there is no other indication, - # # will create an infinite loop - # :return: - # """ - # my_map = { - # "one": Concept("one"), - # "twenty": Concept("twenty"), - # "number": Concept("number"), - # "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number"))) - # } - # sheerka, context, parser = self.init_parser(my_map, singleton=True) - # parser.context = context - # parser.sheerka = sheerka - # sheerka.set_isa(context, sheerka.new("one"), my_map["number"]) - # sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"]) - # sheerka.set_isa(context, sheerka.new("twenties"), my_map["number"]) # <- twenties is also a number - # - # parser.concepts_grammars.clear() # make sure parsing expression is created from scratch - # - # parsing_expression = parser.get_parsing_expression(context, my_map["twenties"]) - # assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG) - # assert parsing_expression.body == {my_map["twenties"].id, my_map["number"].id} - # - # assert isinstance(parser.concepts_grammars.get(my_map["one"].id), ParsingExpression) - # assert isinstance(parser.concepts_grammars.get(my_map["twenty"].id), ParsingExpression) + def test_i_can_get_parsing_expression_when_starting_by_isa_concept(self): + my_map = { + "one": Concept("one"), + "two": Concept("two"), + "number": Concept("number"), + "hundreds": self.bnf_concept("hundreds", Sequence(ConceptExpression("number"), StrMatch("hundred"))) + } + + sheerka, context, parser = self.init_parser(my_map, singleton=True) + parser.context = context + parser.sheerka = sheerka + sheerka.set_isa(context, sheerka.new("one"), my_map["number"]) + sheerka.set_isa(context, sheerka.new("two"), my_map["number"]) + sheerka.set_isa(context, sheerka.new("hundreds"), my_map["number"]) + + parser.concepts_grammars.clear() # make sure parsing expression is created from scratch + parsing_expression = parser.get_parsing_expression(context, my_map["hundreds"]) + + assert parsing_expression == Sequence( + ConceptExpression(my_map["number"], rule_name="number"), + StrMatch("hundred")) + + assert len(parsing_expression.nodes) == len(parsing_expression.elements) + + number_nodes = parsing_expression.nodes[0].nodes + assert len(number_nodes) == 1 + assert isinstance(number_nodes[0], LongestChoice) + assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) + assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes + assert ConceptExpression(my_map["two"], rule_name="two") in number_nodes[0].nodes def test_i_can_get_parsing_expression_when_concept_is_part_of_a_group(self): my_map = { @@ -933,7 +959,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): number_nodes = parsing_expression.nodes[1].nodes assert len(number_nodes) == 1 - assert isinstance(number_nodes[0], OrderedChoice) + assert isinstance(number_nodes[0], LongestChoice) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes @@ -994,7 +1020,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): ("foo bar", True, [CNC("foo then bar", source="foo bar", foo="foo", bar="bar")]), ("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]), ("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]), - ("twenty one", True, [CNC("t1", source="twenty one", unit="one", one="one")]), + ("twenty one", True, [CNC("t1", source="twenty one", unit="one")]), ]) def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected): sheerka, context, parser = self.init_parser(init_from_sheerka=True) @@ -1008,7 +1034,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array - def test_i_can_when_multiple_times_the_same_variable(self): + def test_i_can_parse_when_multiple_times_the_same_variable(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) text = "foo foo foo" @@ -1032,8 +1058,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): unit=CC("three_four", source="four", four=CC("four", body=DoNotResolve("four")), - body=CC("four", body=DoNotResolve("four"))), - four="four") + body=CC("four", body=DoNotResolve("four")))) expected_array = compute_expected_array(cmap, text, [expected]) res = parser.parse(context, ParserInput(text)) @@ -1044,40 +1069,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array - # def test_i_cannot_parse_bnf_concept_mixed_with_isa_concepts(self): - # sheerka, context, parser = self.init_parser(init_from_sheerka=True) - # - # # thirties = cls.update_bnf(context, Concept("thirties", - # # definition="thirty number", - # # where="number < 10", - # # body="thirty + number").def_var("thirty").def_var("number")) - # # with thirties isa number - # # So number in 'thirty number' will spawn 'thirties' which, because there is no other indication, will - # # create an infinite loop - # - # text = "thirty one" - # expected = CNC("thirties", - # source=text, - # number=CC("number", - # source="one", - # one=CC("one", body=DoNotResolve("one")), - # body=CC("one", body=DoNotResolve("one"))), - # one=CC("one", body=DoNotResolve("one")), - # thirty="thirty") - # expected_array = compute_expected_array(cmap, text, [expected]) - # - # res = parser.parse(context, ParserInput(text)) - # not_for_me = res.value - # reason = res.value.body - # - # assert not res.status - # assert sheerka.isinstance(not_for_me, BuiltinConcepts.NOT_FOR_ME) - # assert sheerka.isinstance(reason, BuiltinConcepts.CHICKEN_AND_EGG) - # assert reason.body == {cmap["thirties"].id, cmap["number"].id} - def test_i_can_parse_bnf_concept_mixed_with_isa_concepts(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) - + # thirties is defined in the global variable cmap as # thirties = cls.update_bnf(context, Concept("thirties", # definition="thirty number", # where="number < 10", @@ -1090,7 +1084,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): source="one", one=CC("one", body=DoNotResolve("one")), body=CC("one", body=DoNotResolve("one"))), - one=CC("one", body=DoNotResolve("one")), thirty="thirty") expected_array = compute_expected_array(cmap, text, [expected]) @@ -1113,7 +1106,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): source="three", three=CC("three", body=DoNotResolve("three")), body=CC("three", body=DoNotResolve("three"))), - three=CC("three", body=DoNotResolve("three")), thirty="thirty") expected_array = compute_expected_array(cmap, text, [expected]) @@ -1125,6 +1117,56 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array + def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_when_concept_starts_with_isa(self): + sheerka, context, parser = self.init_parser(init_from_sheerka=True) + one = CC("one", body=DoNotResolve("one")) + + text = "one thousand" + expected = CNC("thousands", + source=text, + number=CC("number", + source="one", + one=one, + body=one)) + expected_array = compute_expected_array(cmap, text, [expected]) + + res = parser.parse(context, ParserInput(text)) + parser_result = res.value + concepts_nodes = res.value.value + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert concepts_nodes == expected_array + + sheerka.concepts_grammars.clear() # to simulate restart + text = "fifty one thousand" + fifty_one = CC("fifties", + source="fifty one", + fifty="fifty", + number=CC("number", source="one", body=one, one=one)) + expected = CNC("thousands", + source=text, + number=CC("number", + source="fifty one", + fifties=fifty_one, + body=fifty_one)) + expected_array = compute_expected_array(cmap, text, [expected]) + + res = parser.parse(context, ParserInput(text)) + parser_result = res.value + concepts_nodes = res.value.value + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert concepts_nodes == expected_array + + text = "one hundred thousand" + res = parser.parse(context, ParserInput(text)) + parser_result = res.value + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.concepts_grammars.clear() # simulate restart @@ -1138,7 +1180,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): source="three", three=CC("three", body=DoNotResolve("three")), body=CC("three", body=DoNotResolve("three"))), - three=CC("three", body=DoNotResolve("three")), thirty="thirty") expected_array = compute_expected_array(cmap, text, [expected]) @@ -1157,7 +1198,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): source="one", one=CC("one", body=DoNotResolve("one")), body=CC("one", body=DoNotResolve("one"))), - one=CC("one", body=DoNotResolve("one")), forty="forty") expected_array = compute_expected_array(cmap, text, [expected]) @@ -1173,7 +1213,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(init_from_sheerka=True) parser_input = "def one" - expected = [CNC("def number", source="def one", number="one", one="one")] + expected = [CNC("def number", source="def one", number="one")] res = parser.parse(context, ParserInput(parser_input)) expected_array = compute_expected_array(cmap, parser_input, expected) @@ -1190,7 +1230,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser(init_from_sheerka=True) expression = "--filter one" - expected = [CNC("filter", source="--filter one", one="one")] + expected = [CN("filter", source="--filter one")] res = parser.parse(context, ParserInput(expression)) expected_array = compute_expected_array(cmap, expression, expected) @@ -1233,10 +1273,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): CC(factor, body=DoNotResolve("2")), CC(factor, body=DoNotResolve("3")), ])], - factor=[ - CC(factor, body=DoNotResolve("1")), - CC(factor, body=DoNotResolve("2")), - CC(factor, body=DoNotResolve("3"))], body=DoNotResolve("1 + 2 * 3"))] def test_i_can_parse_recursive_descent_grammar(self): @@ -1255,8 +1291,34 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): text = "1 + 2 * 3" res = parser.parse(context, ParserInput(text)) + parser_result = res.value + concepts_nodes = res.value.value + factor = my_map["factor"] + term = my_map["term"] + expr = my_map["expr"] + # concepts_nodes = res.value.value is too complicated to be validated assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert concepts_nodes == [CNC(expr, + term=CC(term, + body=CC(factor, body=DoNotResolve("1")), + factor=CC(factor, body=DoNotResolve("1"))), + expr=CC(expr, + body=CC(term, + body=DoNotResolve("2 * 3"), + factor=CC(factor, body=DoNotResolve("2")), + term=CC(term, + body=CC(factor, body=DoNotResolve("3")), + factor=CC(factor, body=DoNotResolve("3")))), + term=CC(term, + body=DoNotResolve("2 * 3"), + factor=CC(factor, body=DoNotResolve("2")), + term=CC(term, + body=CC(factor, body=DoNotResolve("3")), + factor=CC(factor, body=DoNotResolve("3"))))), + + body=DoNotResolve("1 + 2 * 3"))] def test_i_can_parse_simple_recursive_grammar(self): my_map = { @@ -1271,6 +1333,21 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert parser.parse(context, ParserInput("foo foo foo bar")).status assert not parser.parse(context, ParserInput("foo baz")).status + @pytest.mark.parametrize("name, expected", [ + (None, []), + ("", []), + ("foo", StrMatch("foo")), + ("foo bar", Sequence(StrMatch("foo"), StrMatch("bar"))), + ("'foo bar baz' qux", Sequence(StrMatch("foo", skip_whitespace=False), + StrMatch(" ", skip_whitespace=False), + StrMatch("bar", skip_whitespace=False), + StrMatch(" ", skip_whitespace=False), + StrMatch("baz"), + StrMatch("qux"))), + ]) + def test_i_can_get_expression_from_concept_name(self, name, expected): + assert BnfNodeParser.get_expression_from_concept_name(name) == expected + # @pytest.mark.parametrize("parser_input, expected", [ # ("one", [ # (True, [CNC("bnf_one", source="one", one="one", body="one")]), diff --git a/tests/parsers/test_UnrecognizedNodeParser.py b/tests/parsers/test_UnrecognizedNodeParser.py index 0f0e8b4..c17ae57 100644 --- a/tests/parsers/test_UnrecognizedNodeParser.py +++ b/tests/parsers/test_UnrecognizedNodeParser.py @@ -128,7 +128,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): expected_nodes = compute_expected_array( concepts_map, " twenty one ", - [CNC("twenties", source="twenty one", unit="one", one="one")]) + [CNC("twenties", source="twenty one", unit="one")]) assert concept.compiled["c"][0].body.body == expected_nodes assert len(concept.compiled["d"]) == 1 @@ -185,7 +185,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): expected_nodes = compute_expected_array( concepts_map, " twenty two", - [CNC("twenties", source="twenty two", unit="two", two="two")]) + [CNC("twenties", source="twenty two", unit="two")]) assert res.body.concept.compiled["b"].compiled["b"][0].body.body == expected_nodes # def test_i_can_validate_and_evaluate_a_concept_node_with_python(self): @@ -281,7 +281,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): assert len(actual_nodes) == 1 expected_array = compute_expected_array( concepts_map, - expression, [CNC("twenties", source=expression, unit="one", one="one")]) + expression, [CNC("twenties", source=expression, unit="one")]) assert actual_nodes == expected_array def test_i_can_parse_unrecognized_sya_concept_node(self):