Fixed BnfNodeParser to allow expressions like 'number hundred' when number is a group
This commit is contained in:
+6
-2
@@ -71,8 +71,12 @@ def concept nineties from bnf ninety number where number < 10 as ninety + number
|
|||||||
nineties isa number
|
nineties isa number
|
||||||
# def concept hundreds1 from number 'hundred' where number < 10 as number * 100
|
# def concept hundreds1 from number 'hundred' where number < 10 as number * 100
|
||||||
# def concept hundreds2 from number=number1 'hundred' 'and' number=number2 where number1 < 10 and number2 < 100 as number1 * 100 + number2
|
# def concept hundreds2 from number=number1 'hundred' 'and' number=number2 where number1 < 10 and number2 < 100 as number1 * 100 + number2
|
||||||
# def concept one hundred as 100
|
def concept one hundred as 100
|
||||||
# c:one hundred: isa number
|
one hundred isa number
|
||||||
# hundreds1 isa number
|
# hundreds1 isa number
|
||||||
# hundreds2 isa number
|
# hundreds2 isa number
|
||||||
|
def concept thousands from bnf number 'thousand' where number < 1000 as number * 1000
|
||||||
|
thousands isa number
|
||||||
def concept history as history()
|
def concept history as history()
|
||||||
|
def concept plus from a plus b as a + b
|
||||||
|
def concept mult from a mult b as a * b
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ class Sheerka(Concept):
|
|||||||
CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions"
|
CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions"
|
||||||
RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY = "Resolved_Concepts_Sya_Definitions"
|
RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY = "Resolved_Concepts_Sya_Definitions"
|
||||||
CONCEPTS_GRAMMARS_ENTRY = "Concepts_Grammars"
|
CONCEPTS_GRAMMARS_ENTRY = "Concepts_Grammars"
|
||||||
|
CHICKEN_AND_EGG_CONCEPTS_ENTRY = "Chicken_And_Egg_Concepts"
|
||||||
|
|
||||||
CONCEPTS_KEYS_ENTRY = "Concepts_Keys"
|
CONCEPTS_KEYS_ENTRY = "Concepts_Keys"
|
||||||
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
|
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
|
||||||
@@ -105,6 +106,10 @@ class Sheerka(Concept):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def concepts_grammars(self):
|
def concepts_grammars(self):
|
||||||
|
return self.cache_manager.caches[self.CHICKEN_AND_EGG_CONCEPTS_ENTRY].cache
|
||||||
|
|
||||||
|
@property
|
||||||
|
def chicken_and_eggs(self):
|
||||||
return self.cache_manager.caches[self.CONCEPTS_GRAMMARS_ENTRY].cache
|
return self.cache_manager.caches[self.CONCEPTS_GRAMMARS_ENTRY].cache
|
||||||
|
|
||||||
def bind_service_method(self, bound_method, as_name=None):
|
def bind_service_method(self, bound_method, as_name=None):
|
||||||
@@ -227,6 +232,9 @@ class Sheerka(Concept):
|
|||||||
cache = Cache()
|
cache = Cache()
|
||||||
self.cache_manager.register_cache(self.CONCEPTS_GRAMMARS_ENTRY, cache, persist=False)
|
self.cache_manager.register_cache(self.CONCEPTS_GRAMMARS_ENTRY, cache, persist=False)
|
||||||
|
|
||||||
|
cache = Cache()
|
||||||
|
self.cache_manager.register_cache(self.CHICKEN_AND_EGG_CONCEPTS_ENTRY, cache, persist=False)
|
||||||
|
|
||||||
def initialize_services(self):
|
def initialize_services(self):
|
||||||
"""
|
"""
|
||||||
Introspect to find services and bind them
|
Introspect to find services and bind them
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ class SheerkaEvaluateConcept(BaseService):
|
|||||||
|
|
||||||
parent = context.get_parent()
|
parent = context.get_parent()
|
||||||
while parent is not None:
|
while parent is not None:
|
||||||
if parent.who == context.who and parent.obj == concept:
|
if parent.who == context.who and parent.obj == concept and parent.obj.compiled == concept.compiled:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
parent = parent.get_parent()
|
parent = parent.get_parent()
|
||||||
@@ -147,6 +147,11 @@ class SheerkaEvaluateConcept(BaseService):
|
|||||||
|
|
||||||
def resolve(self, context, to_resolve, current_prop, current_concept, force_evaluation):
|
def resolve(self, context, to_resolve, current_prop, current_concept, force_evaluation):
|
||||||
|
|
||||||
|
def get_path(context_, prop_name):
|
||||||
|
prefix = context_.path if hasattr(context_, "path") else "<N/A>"
|
||||||
|
value = prop_name.name if isinstance(current_prop, ConceptParts) else prop_name
|
||||||
|
return prefix + "." + value
|
||||||
|
|
||||||
if isinstance(to_resolve, DoNotResolve):
|
if isinstance(to_resolve, DoNotResolve):
|
||||||
return to_resolve.value
|
return to_resolve.value
|
||||||
|
|
||||||
@@ -161,12 +166,14 @@ class SheerkaEvaluateConcept(BaseService):
|
|||||||
sub_context.add_values(return_values=ret_val)
|
sub_context.add_values(return_values=ret_val)
|
||||||
return ret_val.body
|
return ret_val.body
|
||||||
|
|
||||||
desc = f"Evaluating {current_prop} (concept={current_concept})"
|
path = get_path(context, current_prop)
|
||||||
|
desc = f"Evaluating {path} (concept={current_concept})"
|
||||||
context.log(desc, self.NAME)
|
context.log(desc, self.NAME)
|
||||||
with context.push(BuiltinConcepts.EVALUATING_CONCEPT,
|
with context.push(BuiltinConcepts.EVALUATING_CONCEPT,
|
||||||
current_prop,
|
current_prop,
|
||||||
desc=desc,
|
desc=desc,
|
||||||
obj=current_concept) as sub_context:
|
obj=current_concept,
|
||||||
|
path=path) as sub_context:
|
||||||
|
|
||||||
if force_evaluation:
|
if force_evaluation:
|
||||||
sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED)
|
sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED)
|
||||||
|
|||||||
@@ -76,6 +76,7 @@ class SheerkaSetsManager(BaseService):
|
|||||||
self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, body=concept, concept_set=concept_set))
|
self.sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_IN_SET, body=concept, concept_set=concept_set))
|
||||||
|
|
||||||
self.sets.put(concept_set.id, concept.id)
|
self.sets.put(concept_set.id, concept.id)
|
||||||
|
self.concepts_in_set.delete(concept_set.id)
|
||||||
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
|
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
|
||||||
|
|
||||||
def add_concepts_to_set(self, context, concepts, concept_set):
|
def add_concepts_to_set(self, context, concepts, concept_set):
|
||||||
@@ -95,6 +96,7 @@ class SheerkaSetsManager(BaseService):
|
|||||||
concept_set=concept_set)
|
concept_set=concept_set)
|
||||||
else:
|
else:
|
||||||
body = self.sheerka.new(BuiltinConcepts.SUCCESS)
|
body = self.sheerka.new(BuiltinConcepts.SUCCESS)
|
||||||
|
self.concepts_in_set.delete(concept_set.id)
|
||||||
|
|
||||||
return self.sheerka.ret(self.NAME, len(already_in_set) != len(concepts), body)
|
return self.sheerka.ret(self.NAME, len(already_in_set) != len(concepts), body)
|
||||||
|
|
||||||
|
|||||||
+3
-3
@@ -205,9 +205,9 @@ def make_unique(lst, get_id=None):
|
|||||||
yield x
|
yield x
|
||||||
else:
|
else:
|
||||||
for x in seq:
|
for x in seq:
|
||||||
x = get_id(x)
|
_id = get_id(x)
|
||||||
if x not in seen:
|
if _id not in seen:
|
||||||
seen.add(x)
|
seen.add(_id)
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
return list(_make_unique(lst, get_id))
|
return list(_make_unique(lst, get_id))
|
||||||
|
|||||||
@@ -264,6 +264,7 @@ class AtomNodeParser(BaseNodeParser):
|
|||||||
concept_parser.lock()
|
concept_parser.lock()
|
||||||
|
|
||||||
concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name)
|
concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name)
|
||||||
|
#self.context.log(f"concepts found for {token=}: {concepts}", who=self.name)
|
||||||
if not concepts:
|
if not concepts:
|
||||||
for concept_parser in concept_parser_helpers:
|
for concept_parser in concept_parser_helpers:
|
||||||
concept_parser.eat_unrecognized(token, pos)
|
concept_parser.eat_unrecognized(token, pos)
|
||||||
|
|||||||
@@ -525,7 +525,7 @@ class CNC(CN):
|
|||||||
to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
|
to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
|
||||||
else:
|
else:
|
||||||
to_compare = other.concept.compiled
|
to_compare = other.concept.compiled
|
||||||
if self.compiled == to_compare:
|
if self.compiled == to_compare: # expanded form to ease the debug
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
@@ -673,7 +673,8 @@ class BaseNodeParser(BaseParser):
|
|||||||
|
|
||||||
concept = to_map(self, concept) if to_map else concept
|
concept = to_map(self, concept) if to_map else concept
|
||||||
result.append(concept)
|
result.append(concept)
|
||||||
return result + custom_concepts
|
return core.utils.make_unique(result + custom_concepts,
|
||||||
|
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
|
||||||
|
|
||||||
return custom_concepts if custom else None
|
return custom_concepts if custom else None
|
||||||
|
|
||||||
@@ -707,16 +708,20 @@ class BaseNodeParser(BaseParser):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword):
|
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword):
|
||||||
sheerka = context.sheerka
|
sheerka = context.sheerka
|
||||||
|
res = {}
|
||||||
|
|
||||||
def resolve_concepts(concept_str):
|
def resolve_concepts(concept_str):
|
||||||
|
c_key, c_id = core.utils.unstr_concept(concept_str)
|
||||||
|
if c_id in already_seen:
|
||||||
|
return ChickenAndEggError(already_seen)
|
||||||
|
|
||||||
|
already_seen.add(c_id)
|
||||||
|
|
||||||
resolved = set()
|
resolved = set()
|
||||||
to_resolve = set()
|
to_resolve = set()
|
||||||
concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1])
|
chicken_and_egg = set()
|
||||||
|
|
||||||
if concept.id in already_seen:
|
concept = sheerka.get_by_id(c_id)
|
||||||
raise ChickenAndEggError(already_seen)
|
|
||||||
else:
|
|
||||||
already_seen.add(concept.id)
|
|
||||||
|
|
||||||
if sheerka.isaset(context, concept):
|
if sheerka.isaset(context, concept):
|
||||||
concepts = sheerka.get_set_elements(context, concept)
|
concepts = sheerka.get_set_elements(context, concept)
|
||||||
@@ -730,11 +735,18 @@ class BaseNodeParser(BaseParser):
|
|||||||
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
|
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
|
||||||
|
|
||||||
for concept_to_resolve_str in to_resolve:
|
for concept_to_resolve_str in to_resolve:
|
||||||
resolved |= resolve_concepts(concept_to_resolve_str)
|
res = resolve_concepts(concept_to_resolve_str)
|
||||||
|
if isinstance(res, ChickenAndEggError):
|
||||||
|
chicken_and_egg |= res.concepts
|
||||||
|
else:
|
||||||
|
resolved |= res
|
||||||
|
to_resolve.clear()
|
||||||
|
|
||||||
return resolved
|
if len(resolved) == 0 and len(chicken_and_egg) > 0:
|
||||||
|
raise ChickenAndEggError(chicken_and_egg)
|
||||||
|
else:
|
||||||
|
return resolved
|
||||||
|
|
||||||
res = {}
|
|
||||||
for k, v in concepts_by_first_keyword.items():
|
for k, v in concepts_by_first_keyword.items():
|
||||||
if k.startswith("c:|"):
|
if k.startswith("c:|"):
|
||||||
try:
|
try:
|
||||||
@@ -744,8 +756,16 @@ class BaseNodeParser(BaseParser):
|
|||||||
res.setdefault(resolved, []).extend(v)
|
res.setdefault(resolved, []).extend(v)
|
||||||
except ChickenAndEggError as ex:
|
except ChickenAndEggError as ex:
|
||||||
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
|
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
|
||||||
# res[k] = sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG,
|
concepts_in_recursion = ex.concepts
|
||||||
# body=[sheerka.get_by_id(c) for c in ex.concepts])
|
# make sure to have all the parents
|
||||||
|
for parent in v:
|
||||||
|
concepts_in_recursion.add(parent)
|
||||||
|
|
||||||
|
for concept_id in concepts_in_recursion:
|
||||||
|
# make sure we keep the longest chain
|
||||||
|
old = sheerka.chicken_and_eggs.get(concept_id)
|
||||||
|
if old is None or len(old) < len(ex.concepts):
|
||||||
|
sheerka.chicken_and_eggs.put(concept_id, concepts_in_recursion)
|
||||||
else:
|
else:
|
||||||
res.setdefault(k, []).extend(v)
|
res.setdefault(k, []).extend(v)
|
||||||
|
|
||||||
@@ -755,6 +775,53 @@ class BaseNodeParser(BaseParser):
|
|||||||
|
|
||||||
return sheerka.ret("BaseNodeParser", True, res)
|
return sheerka.ret("BaseNodeParser", True, res)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_referenced_concepts(context, concept_id, already_seen):
|
||||||
|
"""
|
||||||
|
Gets all the tokens that may allow to recognize concept concept_id
|
||||||
|
Basically, it returns all the starting tokens for concept concept_id
|
||||||
|
CHICKEN_AND_EGG is returned when a circular references are found
|
||||||
|
:param context:
|
||||||
|
:param concept_id:
|
||||||
|
:param already_seen:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if concept_id in already_seen:
|
||||||
|
return ChickenAndEggError(already_seen)
|
||||||
|
|
||||||
|
already_seen.add(concept_id)
|
||||||
|
|
||||||
|
resolved = set()
|
||||||
|
to_resolve = set()
|
||||||
|
chicken_and_egg = set()
|
||||||
|
sheerka = context.sheerka
|
||||||
|
concept = sheerka.get_by_id(concept_id)
|
||||||
|
|
||||||
|
if sheerka.isaset(context, concept):
|
||||||
|
concepts = sheerka.get_set_elements(context, concept)
|
||||||
|
else:
|
||||||
|
concepts = [concept]
|
||||||
|
|
||||||
|
for concept in concepts:
|
||||||
|
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
|
||||||
|
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
|
||||||
|
for keyword in keywords:
|
||||||
|
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
|
||||||
|
|
||||||
|
for concept_to_resolve_str in to_resolve:
|
||||||
|
c_key, c_id = core.utils.unstr_concept(concept_to_resolve_str)
|
||||||
|
res = BaseNodeParser.get_referenced_concepts(context, c_id, already_seen)
|
||||||
|
if isinstance(res, ChickenAndEggError):
|
||||||
|
chicken_and_egg |= res.concepts
|
||||||
|
else:
|
||||||
|
resolved |= res
|
||||||
|
to_resolve.clear()
|
||||||
|
|
||||||
|
if len(resolved) == 0 and len(chicken_and_egg) > 0:
|
||||||
|
raise ChickenAndEggError(chicken_and_egg)
|
||||||
|
else:
|
||||||
|
return resolved
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def resolve_sya_associativity_and_precedence(context, sya):
|
def resolve_sya_associativity_and_precedence(context, sya):
|
||||||
pass
|
pass
|
||||||
|
|||||||
+185
-65
@@ -38,7 +38,7 @@ class NonTerminalNode(LexerNode):
|
|||||||
self.children = children
|
self.children = children
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
|
name = "Node:" + (self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__)
|
||||||
if len(self.children) > 0:
|
if len(self.children) > 0:
|
||||||
sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
|
sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
|
||||||
else:
|
else:
|
||||||
@@ -69,7 +69,7 @@ class TerminalNode(LexerNode):
|
|||||||
self.value = value
|
self.value = value
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
name = self.parsing_expression.rule_name or ""
|
name = "Node:" + (self.parsing_expression.rule_name or "")
|
||||||
return name + f"'{self.value}'"
|
return name + f"'{self.value}'"
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
@@ -186,7 +186,7 @@ class Sequence(ParsingExpression):
|
|||||||
|
|
||||||
class OrderedChoice(ParsingExpression):
|
class OrderedChoice(ParsingExpression):
|
||||||
"""
|
"""
|
||||||
Will match one among multiple
|
Will match the first one among multiple
|
||||||
It will stop at the first match (so the order of definition is important)
|
It will stop at the first match (so the order of definition is important)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -211,6 +211,42 @@ class OrderedChoice(ParsingExpression):
|
|||||||
return self.add_rule_name_if_needed(f"({to_str})")
|
return self.add_rule_name_if_needed(f"({to_str})")
|
||||||
|
|
||||||
|
|
||||||
|
class LongestChoice(ParsingExpression):
|
||||||
|
"""
|
||||||
|
Will match the longest one among multiple
|
||||||
|
All elements will be tested, so the order is not important
|
||||||
|
The behaviour when multiple candidate is found is not defined yet
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _parse(self, parser_helper):
|
||||||
|
init_pos = parser_helper.pos
|
||||||
|
longest_node = None
|
||||||
|
end_pos = -1
|
||||||
|
|
||||||
|
for e in self.nodes:
|
||||||
|
node = e.parse(parser_helper)
|
||||||
|
if node:
|
||||||
|
if longest_node is None or node.end > longest_node.end:
|
||||||
|
longest_node = node
|
||||||
|
end_pos = parser_helper.pos
|
||||||
|
|
||||||
|
parser_helper.seek(init_pos) # backtrack
|
||||||
|
|
||||||
|
if longest_node is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
parser_helper.seek(end_pos)
|
||||||
|
return NonTerminalNode(self,
|
||||||
|
init_pos,
|
||||||
|
longest_node.end,
|
||||||
|
parser_helper.parser.parser_input.tokens[init_pos: longest_node.end + 1],
|
||||||
|
[longest_node])
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
to_str = "# ".join(repr(n) for n in self.elements)
|
||||||
|
return self.add_rule_name_if_needed(f"({to_str})")
|
||||||
|
|
||||||
|
|
||||||
class Optional(ParsingExpression):
|
class Optional(ParsingExpression):
|
||||||
"""
|
"""
|
||||||
Will match or not the elements
|
Will match or not the elements
|
||||||
@@ -386,7 +422,12 @@ class StrMatch(Match):
|
|||||||
self.skip_white_space = skip_whitespace
|
self.skip_white_space = skip_whitespace
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return self.add_rule_name_if_needed(f"'{self.to_match}'")
|
text = self.to_match
|
||||||
|
if not self.ignore_case:
|
||||||
|
text += "#!ic"
|
||||||
|
if not self.skip_white_space:
|
||||||
|
text += "#!sw"
|
||||||
|
return self.add_rule_name_if_needed(f"'{text}'")
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if not super().__eq__(other):
|
if not super().__eq__(other):
|
||||||
@@ -395,7 +436,9 @@ class StrMatch(Match):
|
|||||||
if not isinstance(other, StrMatch):
|
if not isinstance(other, StrMatch):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return self.to_match == other.to_match and self.ignore_case == other.ignore_case
|
return self.to_match == other.to_match and \
|
||||||
|
self.ignore_case == other.ignore_case and \
|
||||||
|
self.skip_white_space == other.skip_white_space
|
||||||
|
|
||||||
def _parse(self, parser_helper):
|
def _parse(self, parser_helper):
|
||||||
token = parser_helper.get_token()
|
token = parser_helper.get_token()
|
||||||
@@ -766,7 +809,7 @@ class BnfConceptParserHelper:
|
|||||||
_add_prop(_concept, _underlying.parsing_expression.rule_name, value)
|
_add_prop(_concept, _underlying.parsing_expression.rule_name, value)
|
||||||
_concept.metadata.need_validation = True
|
_concept.metadata.need_validation = True
|
||||||
|
|
||||||
if isinstance(_underlying, NonTerminalNode):
|
elif isinstance(_underlying, NonTerminalNode):
|
||||||
for child in _underlying.children:
|
for child in _underlying.children:
|
||||||
_process_rule_name(_concept, child)
|
_process_rule_name(_concept, child)
|
||||||
|
|
||||||
@@ -789,6 +832,15 @@ class UnderConstruction:
|
|||||||
concept_id: str
|
concept_id: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class ToUpdate:
|
||||||
|
parent_id: int
|
||||||
|
parsing_expression: ParsingExpression
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.parent_id)
|
||||||
|
|
||||||
|
|
||||||
class BnfNodeParser(BaseNodeParser):
|
class BnfNodeParser(BaseNodeParser):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__("BnfNode", 50, **kwargs)
|
super().__init__("BnfNode", 50, **kwargs)
|
||||||
@@ -824,6 +876,34 @@ class BnfNodeParser(BaseNodeParser):
|
|||||||
|
|
||||||
return valid_parser_helpers
|
return valid_parser_helpers
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_expression_from_concept_name(name):
|
||||||
|
"""
|
||||||
|
Create the parsing expression from the name
|
||||||
|
This function differs from BNFParser.parse() as it does not try to resolve identifiers into concepts
|
||||||
|
>>> assert get_expression_from_concept_name('one hundred') == Sequence(StrMatch("one"), StrMatch("hundred"))
|
||||||
|
while BNFParser.parse("one hundred") will look for concept 'one' and concept 'hundred'
|
||||||
|
:param name:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if name is None or name.strip() == "":
|
||||||
|
return []
|
||||||
|
|
||||||
|
res = []
|
||||||
|
tokens = Tokenizer(name, yield_eof=False)
|
||||||
|
for token in tokens:
|
||||||
|
if token.type == TokenKind.WHITESPACE:
|
||||||
|
continue
|
||||||
|
elif token.type == TokenKind.STRING:
|
||||||
|
sub_tokens = list(Tokenizer(token.strip_quote, yield_eof=False))
|
||||||
|
for sub_token in sub_tokens[:-1]:
|
||||||
|
res.append(StrMatch(sub_token.str_value, skip_whitespace=False))
|
||||||
|
res.append(StrMatch(sub_tokens[-1].str_value))
|
||||||
|
else:
|
||||||
|
res.append(StrMatch(token.str_value))
|
||||||
|
|
||||||
|
return res[0] if len(res) == 1 else Sequence(*res)
|
||||||
|
|
||||||
def get_concepts_sequences(self):
|
def get_concepts_sequences(self):
|
||||||
"""
|
"""
|
||||||
Main method that parses the tokens and extract the concepts
|
Main method that parses the tokens and extract the concepts
|
||||||
@@ -900,10 +980,10 @@ class BnfNodeParser(BaseNodeParser):
|
|||||||
def check_for_infinite_recursion(self, parsing_expression, already_found, only_first=False):
|
def check_for_infinite_recursion(self, parsing_expression, already_found, only_first=False):
|
||||||
|
|
||||||
if isinstance(parsing_expression, ConceptExpression):
|
if isinstance(parsing_expression, ConceptExpression):
|
||||||
if parsing_expression.concept in already_found:
|
if parsing_expression.concept.id in already_found:
|
||||||
return True
|
return True
|
||||||
already_found.add(parsing_expression.concept)
|
already_found.add(parsing_expression.concept.id)
|
||||||
return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, False)
|
return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, only_first)
|
||||||
|
|
||||||
if isinstance(parsing_expression, Sequence):
|
if isinstance(parsing_expression, Sequence):
|
||||||
# for sequence, we need to check all nodes
|
# for sequence, we need to check all nodes
|
||||||
@@ -930,43 +1010,93 @@ class BnfNodeParser(BaseNodeParser):
|
|||||||
return False
|
return False
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
if isinstance(parsing_expression, LongestChoice):
|
||||||
|
for node in parsing_expression.nodes:
|
||||||
|
already_found_for_current_node = already_found.copy()
|
||||||
|
if self.check_for_infinite_recursion(node, already_found_for_current_node, True):
|
||||||
|
already_found.update(already_found_for_current_node)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
if isinstance(parsing_expression, UnderConstruction):
|
||||||
|
if parsing_expression.concept_id in already_found:
|
||||||
|
return True
|
||||||
|
already_found.add(parsing_expression.concept_id)
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_parsing_expression(self, context, concept):
|
def get_parsing_expression(self, context, concept):
|
||||||
|
"""
|
||||||
|
Compute the parsing expression for a given concept
|
||||||
|
:param context:
|
||||||
|
:param concept:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
if concept.id in self.concepts_grammars:
|
if concept.id in self.concepts_grammars:
|
||||||
return self.concepts_grammars.get(concept.id)
|
return self.concepts_grammars.get(concept.id)
|
||||||
|
|
||||||
grammar = self.concepts_grammars.copy()
|
# internal cache of already computed parsing expression to use during the recursion
|
||||||
to_resolve = {} # the key is the instance id of the parsing expression
|
grammar = {}
|
||||||
isa_concepts = set()
|
|
||||||
self.resolve_concept_parsing_expression(context, concept, grammar, to_resolve, isa_concepts)
|
|
||||||
|
|
||||||
for _id, pe in to_resolve.items():
|
# concept that are not totally resolved, because they reference parsing expression under construction
|
||||||
for i, node in enumerate(pe.nodes):
|
to_update = set() # the key is the instance id of the parsing expression
|
||||||
if isinstance(node, UnderConstruction):
|
|
||||||
pe.nodes[i] = grammar.get(node.concept_id)
|
|
||||||
|
|
||||||
|
# during the parsing of concept, we will resolve other concepts
|
||||||
|
# keep the track of the concepts that can safely be added to self.concept_grammars
|
||||||
|
to_keep = {concept.id}
|
||||||
|
|
||||||
|
desc = f"Get parsing expression for concept {concept}"
|
||||||
|
with context.push(BuiltinConcepts.INIT_BNF, concept,
|
||||||
|
who=self.name,
|
||||||
|
obj=concept,
|
||||||
|
root_concept=concept,
|
||||||
|
desc=desc) as sub_context:
|
||||||
|
# get the parsing expression
|
||||||
|
ret = self.resolve_concept_parsing_expression(sub_context, concept, grammar, to_update, to_keep)
|
||||||
|
|
||||||
|
# check and update parsing expression that are still under construction
|
||||||
|
# Note that we only update the concept that will update concepts_grammars
|
||||||
|
# because pe.node may be large
|
||||||
|
for item in to_update:
|
||||||
|
if item.parent_id in to_keep:
|
||||||
|
pe = item.parsing_expression
|
||||||
|
for i, node in enumerate(pe.nodes):
|
||||||
|
if isinstance(node, UnderConstruction):
|
||||||
|
pe.nodes[i] = grammar.get(node.concept_id)
|
||||||
|
|
||||||
|
# check for infinite recursion.
|
||||||
|
# We are adding a new concept. Does it create an infinite recursion ?
|
||||||
concepts_in_recursion = set()
|
concepts_in_recursion = set()
|
||||||
if self.check_for_infinite_recursion(pe, concepts_in_recursion):
|
if self.check_for_infinite_recursion(ret, concepts_in_recursion):
|
||||||
cycle = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body={c.id for c in concepts_in_recursion})
|
cycle = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion)
|
||||||
for concept in concepts_in_recursion:
|
for concept_id in concepts_in_recursion:
|
||||||
grammar[concept.id] = cycle
|
grammar[concept_id] = cycle
|
||||||
|
|
||||||
# Make sure you do not put isa concepts in cache
|
# update, in case of infinite circular recursion
|
||||||
# why :
|
ret = grammar[concept.id]
|
||||||
# twenties = 'twenty' number where number < 10
|
|
||||||
# hundreds = number 'hundred' where number < 99
|
|
||||||
# the concept of number depends on its utilisation
|
|
||||||
for concept_id in [c for c in grammar if c not in isa_concepts]:
|
|
||||||
self.concepts_grammars.put(concept_id, grammar[concept_id])
|
|
||||||
|
|
||||||
return self.concepts_grammars.get(concept.id)
|
# finally, update concept grammar
|
||||||
|
for k, v in grammar.items():
|
||||||
|
if k in to_keep:
|
||||||
|
self.concepts_grammars.put(k, v)
|
||||||
|
|
||||||
def resolve_concept_parsing_expression(self, context, concept, grammar, to_resolve, isa_concepts):
|
# not quite sure that it is a good idea.
|
||||||
if concept.id in grammar:
|
# Why do we want to corrupt previous valid entries ?
|
||||||
|
if context.sheerka.isinstance(v, BuiltinConcepts.CHICKEN_AND_EGG):
|
||||||
|
self.concepts_grammars.put(k, v)
|
||||||
|
|
||||||
|
sub_context.add_values(return_values=ret)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def resolve_concept_parsing_expression(self, context, concept, grammar, to_update, to_keep):
|
||||||
|
if concept.id in self.concepts_grammars: # validated entry
|
||||||
|
return self.concepts_grammars.get(concept.id)
|
||||||
|
|
||||||
|
if concept.id in grammar: # under construction entry
|
||||||
return grammar.get(concept.id)
|
return grammar.get(concept.id)
|
||||||
|
|
||||||
desc = f"Get parsing expression for '{concept}'"
|
desc = f"Resolve concept parsing expression for '{concept}'"
|
||||||
with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context:
|
with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context:
|
||||||
if not concept.bnf: # to save a function call. Not sure it worth it.
|
if not concept.bnf: # to save a function call. Not sure it worth it.
|
||||||
BaseNodeParser.ensure_bnf(sub_context, concept, self.name)
|
BaseNodeParser.ensure_bnf(sub_context, concept, self.name)
|
||||||
@@ -979,52 +1109,41 @@ class BnfNodeParser(BaseNodeParser):
|
|||||||
desc = f"Bnf concept detected. Resolving parsing expression '{expression}'"
|
desc = f"Bnf concept detected. Resolving parsing expression '{expression}'"
|
||||||
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
||||||
ssc.add_inputs(expression=expression)
|
ssc.add_inputs(expression=expression)
|
||||||
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_resolve, isa_concepts)
|
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update, to_keep)
|
||||||
ssc.add_values(return_values=resolved)
|
ssc.add_values(return_values=resolved)
|
||||||
|
|
||||||
elif sheerka.isaset(context, concept):
|
elif sheerka.isaset(context, concept):
|
||||||
desc = f"Concept is a group. Resolving parsing expression using 'isa'"
|
desc = f"Concept is a group. Resolving parsing expression using 'isa'"
|
||||||
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
||||||
ssc.add_inputs(concept=concept)
|
ssc.add_inputs(concept=concept)
|
||||||
isa_concepts.add(concept.id)
|
|
||||||
concepts_in_group = self.sheerka.get_set_elements(ssc, concept)
|
concepts_in_group = self.sheerka.get_set_elements(ssc, concept)
|
||||||
|
|
||||||
# concepts_in_group comes from a set, so the order of its elements is not guaranteed
|
valid_concepts = []
|
||||||
# to avoid random failure (ie random CHICKEN_AND_EGG), we need to rearrange
|
|
||||||
# We also remove the root concept (the one from get_parsing_expression())
|
|
||||||
|
|
||||||
root_concept_as_set = set(context.search(
|
|
||||||
predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF,
|
|
||||||
get_obj=lambda ec: ec.obj,
|
|
||||||
stop=lambda ec: ec.action != BuiltinConcepts.INIT_BNF)) # there only one item in the set
|
|
||||||
root_concept = list(root_concept_as_set)[0]
|
|
||||||
reordered = []
|
|
||||||
for c in concepts_in_group:
|
for c in concepts_in_group:
|
||||||
if c.id == root_concept.id:
|
if c.id == context.root_concept.id:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# I do not guaranty the same order every time, but I minimize the ChickenAndEgg random issue
|
c_pe = self.resolve_concept_parsing_expression(context, c, grammar, to_update, to_keep)
|
||||||
if c.metadata.definition_type == DEFINITION_TYPE_BNF or sheerka.isaset(ssc, c):
|
if self.check_for_infinite_recursion(c_pe, {concept.id}, True):
|
||||||
reordered.append(c)
|
continue
|
||||||
else:
|
|
||||||
reordered.insert(0, c)
|
|
||||||
|
|
||||||
nodes = [ConceptExpression(c, rule_name=c.name) for c in reordered]
|
valid_concepts.append(c)
|
||||||
|
|
||||||
|
nodes = [ConceptExpression(c, rule_name=c.name) for c in valid_concepts]
|
||||||
resolved = self.resolve_parsing_expression(ssc,
|
resolved = self.resolve_parsing_expression(ssc,
|
||||||
OrderedChoice(*nodes),
|
LongestChoice(*nodes),
|
||||||
grammar,
|
grammar,
|
||||||
to_resolve,
|
to_update,
|
||||||
isa_concepts)
|
to_keep)
|
||||||
ssc.add_values(concepts_in_group=concepts_in_group)
|
ssc.add_values(concepts_in_group=concepts_in_group)
|
||||||
ssc.add_values(return_values=resolved)
|
ssc.add_values(return_values=resolved)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
desc = f"Concept is a simple concept."
|
desc = f"Concept is a simple concept."
|
||||||
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
|
||||||
tokens = Tokenizer(concept.name, yield_eof=False)
|
to_keep.add(concept.id)
|
||||||
nodes = [StrMatch(token.strip_quote) for token in tokens]
|
expression = self.get_expression_from_concept_name(concept.name)
|
||||||
expression = nodes[0] if len(nodes) == 1 else Sequence(nodes)
|
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update, to_keep)
|
||||||
resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_resolve, isa_concepts)
|
|
||||||
|
|
||||||
grammar[concept.id] = resolved
|
grammar[concept.id] = resolved
|
||||||
|
|
||||||
@@ -1035,7 +1154,7 @@ class BnfNodeParser(BaseNodeParser):
|
|||||||
sub_context.add_values(return_values=resolved)
|
sub_context.add_values(return_values=resolved)
|
||||||
return resolved
|
return resolved
|
||||||
|
|
||||||
def resolve_parsing_expression(self, context, expression, grammar, to_resolve, isa_concepts):
|
def resolve_parsing_expression(self, context, expression, grammar, to_update, to_keep):
|
||||||
|
|
||||||
if isinstance(expression, str):
|
if isinstance(expression, str):
|
||||||
ret = StrMatch(expression, ignore_case=self.ignore_case)
|
ret = StrMatch(expression, ignore_case=self.ignore_case)
|
||||||
@@ -1051,7 +1170,7 @@ class BnfNodeParser(BaseNodeParser):
|
|||||||
unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept)
|
unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept)
|
||||||
return self.add_error(unknown_concept)
|
return self.add_error(unknown_concept)
|
||||||
|
|
||||||
pe = self.resolve_concept_parsing_expression(context, concept, grammar, to_resolve, isa_concepts)
|
pe = self.resolve_concept_parsing_expression(context, concept, grammar, to_update, to_keep)
|
||||||
|
|
||||||
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
|
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
|
||||||
return pe # an error is detected, escalate it
|
return pe # an error is detected, escalate it
|
||||||
@@ -1060,7 +1179,7 @@ class BnfNodeParser(BaseNodeParser):
|
|||||||
# return pe # we are looking for ourself, just return it
|
# return pe # we are looking for ourself, just return it
|
||||||
|
|
||||||
if isinstance(pe, UnderConstruction):
|
if isinstance(pe, UnderConstruction):
|
||||||
to_resolve[id(expression)] = expression
|
to_update.add(ToUpdate(context.obj.id, expression))
|
||||||
|
|
||||||
expression.nodes = [pe]
|
expression.nodes = [pe]
|
||||||
expression.rule_name = expression.rule_name or concept.name
|
expression.rule_name = expression.rule_name or concept.name
|
||||||
@@ -1073,17 +1192,18 @@ class BnfNodeParser(BaseNodeParser):
|
|||||||
|
|
||||||
elif isinstance(expression, Sequence) or \
|
elif isinstance(expression, Sequence) or \
|
||||||
isinstance(expression, OrderedChoice) or \
|
isinstance(expression, OrderedChoice) or \
|
||||||
|
isinstance(expression, LongestChoice) or \
|
||||||
isinstance(expression, ZeroOrMore) or \
|
isinstance(expression, ZeroOrMore) or \
|
||||||
isinstance(expression, OneOrMore) or \
|
isinstance(expression, OneOrMore) or \
|
||||||
isinstance(expression, Optional):
|
isinstance(expression, Optional):
|
||||||
ret = expression
|
ret = expression
|
||||||
ret.nodes = []
|
ret.nodes = []
|
||||||
for e in ret.elements:
|
for e in ret.elements:
|
||||||
pe = self.resolve_parsing_expression(context, e, grammar, to_resolve, isa_concepts)
|
pe = self.resolve_parsing_expression(context, e, grammar, to_update, to_keep)
|
||||||
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
|
if not isinstance(pe, (ParsingExpression, UnderConstruction)):
|
||||||
return pe # an error is detected, escalate it
|
return pe # an error is detected, escalate it
|
||||||
if isinstance(pe, UnderConstruction):
|
if isinstance(pe, UnderConstruction):
|
||||||
to_resolve[id(ret)] = ret # remember that there is an unresolved parsing expression
|
to_update.add(ToUpdate(context.obj.id, ret))
|
||||||
ret.nodes.append(pe)
|
ret.nodes.append(pe)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@@ -1094,8 +1214,8 @@ class BnfNodeParser(BaseNodeParser):
|
|||||||
expression.sep = self.resolve_parsing_expression(context,
|
expression.sep = self.resolve_parsing_expression(context,
|
||||||
expression.sep,
|
expression.sep,
|
||||||
grammar,
|
grammar,
|
||||||
to_resolve,
|
to_update,
|
||||||
isa_concepts)
|
to_keep)
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|||||||
@@ -494,3 +494,32 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka):
|
|||||||
evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), one)
|
evaluated = sheerka.evaluate_concept(self.get_context(sheerka, True), one)
|
||||||
assert evaluated.key == one.key
|
assert evaluated.key == one.key
|
||||||
assert evaluated.body == 1
|
assert evaluated.body == 1
|
||||||
|
|
||||||
|
def test_i_can_evaluate_thousand_concept(self):
|
||||||
|
sheerka, context, thousand, number, forties, forty, one = self.init_concepts(
|
||||||
|
Concept("thousand", body="number * 1000").def_var("number"),
|
||||||
|
Concept("number"),
|
||||||
|
Concept("forties", body="forty + number").def_var("forty").def_var("number"),
|
||||||
|
Concept("forty", body="40"),
|
||||||
|
Concept("one", body="1"),
|
||||||
|
eval_body=True
|
||||||
|
)
|
||||||
|
|
||||||
|
one = sheerka.new("one")
|
||||||
|
number2 = sheerka.new("number")
|
||||||
|
number2.compiled["one"] = one
|
||||||
|
number2.compiled[ConceptParts.BODY] = one
|
||||||
|
forties = sheerka.new("forties")
|
||||||
|
forties.compiled["forty"] = sheerka.new("forty")
|
||||||
|
forties.compiled["number"] = number2
|
||||||
|
|
||||||
|
number1 = sheerka.new("number")
|
||||||
|
number1.compiled["forties"] = forties
|
||||||
|
number1.compiled[ConceptParts.BODY] = forties
|
||||||
|
|
||||||
|
forty_one_thousand = sheerka.new("thousand")
|
||||||
|
forty_one_thousand.compiled["number"] = number1
|
||||||
|
|
||||||
|
evaluated = sheerka.evaluate_concept(context, forty_one_thousand)
|
||||||
|
|
||||||
|
assert evaluated.body == 41000
|
||||||
|
|||||||
@@ -1,9 +1,20 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
import core.utils
|
import core.utils
|
||||||
import pytest
|
import pytest
|
||||||
from core.concept import ConceptParts, Concept
|
from core.concept import ConceptParts, Concept
|
||||||
from core.tokenizer import Token, TokenKind
|
from core.tokenizer import Token, TokenKind
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Obj:
|
||||||
|
prop1: str
|
||||||
|
prop2: str
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash((self.prop1, self.prop1))
|
||||||
|
|
||||||
|
|
||||||
def get_tokens(lst):
|
def get_tokens(lst):
|
||||||
res = []
|
res = []
|
||||||
for e in lst:
|
for e in lst:
|
||||||
@@ -229,3 +240,9 @@ def test_decode_concept_key_id():
|
|||||||
])
|
])
|
||||||
def test_dict_product(a, b, expected):
|
def test_dict_product(a, b, expected):
|
||||||
assert core.utils.dict_product(a, b) == expected
|
assert core.utils.dict_product(a, b) == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_make_unique():
|
||||||
|
assert core.utils.make_unique(["a", "a", "b", "c", "c"]) == ["a", "b", "c"]
|
||||||
|
assert core.utils.make_unique([Obj("a", "b"), Obj("a", "c"), Obj("a", "b")]) == [Obj("a", "b"), Obj("a", "c")]
|
||||||
|
assert core.utils.make_unique([Obj("a", "b"), Obj("a", "c")], lambda o: o.prop1) == [Obj("a", "b")]
|
||||||
|
|||||||
@@ -118,3 +118,4 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka):
|
|||||||
assert not context.sheerka.is_success(error_concept) # it's indeed an error
|
assert not context.sheerka.is_success(error_concept) # it's indeed an error
|
||||||
assert result.status
|
assert result.status
|
||||||
assert result.value == error_concept
|
assert result.value == error_concept
|
||||||
|
|
||||||
|
|||||||
@@ -137,28 +137,38 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def test_i_can_resolve_when_concepts_are_sets(self):
|
def test_i_can_resolve_when_concepts_are_sets(self):
|
||||||
sheerka, context, one, two, three, number, foo = self.init_concepts(
|
sheerka, context, number, *concepts = self.init_concepts(
|
||||||
|
"number",
|
||||||
"one",
|
"one",
|
||||||
"two",
|
"two",
|
||||||
"three",
|
"twenty",
|
||||||
"number",
|
"hundred",
|
||||||
Concept("foo", definition="number three"),
|
Concept("twenties", definition="twenty number"),
|
||||||
|
Concept("hundreds", definition="number hundred"),
|
||||||
create_new=True # mandatory because set_isa() needs it
|
create_new=True # mandatory because set_isa() needs it
|
||||||
)
|
)
|
||||||
|
|
||||||
sheerka.set_isa(context, sheerka.new("one"), number)
|
sheerka.set_isa(context, sheerka.new("one"), number)
|
||||||
sheerka.set_isa(context, sheerka.new("two"), number)
|
sheerka.set_isa(context, sheerka.new("two"), number)
|
||||||
|
sheerka.set_isa(context, sheerka.new("twenty"), number)
|
||||||
|
sheerka.set_isa(context, sheerka.new("thirty"), number)
|
||||||
|
sheerka.set_isa(context, sheerka.new("hundred"), number)
|
||||||
|
sheerka.set_isa(context, sheerka.new("twenties"), number)
|
||||||
|
sheerka.set_isa(context, sheerka.new("hundreds"), number)
|
||||||
|
|
||||||
cbfk = BaseNodeParser.get_concepts_by_first_token(context, [one, two, three, number, foo]).body
|
sheerka.concepts_grammars.clear() # reset all the grammar to simulate Sheerka restart
|
||||||
|
|
||||||
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, cbfk)
|
# cbft : concept_by_first_token (I usually don't use abbreviation)
|
||||||
|
cbft = BaseNodeParser.get_concepts_by_first_token(context, [number] + concepts).body
|
||||||
|
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, cbft)
|
||||||
|
|
||||||
assert resolved_ret_val.status
|
assert resolved_ret_val.status
|
||||||
assert resolved_ret_val.body == {
|
assert resolved_ret_val.body == {
|
||||||
"one": ["1001", "1005"],
|
'number': ['1001'],
|
||||||
"two": ["1002", "1005"],
|
'one': ['1002', '1007'],
|
||||||
"three": ["1003"],
|
'two': ['1003', '1007'],
|
||||||
"number": ["1004"],
|
'twenty': ['1004', '1006', '1007'],
|
||||||
|
'hundred': ['1005', '1007'],
|
||||||
}
|
}
|
||||||
|
|
||||||
def test_concepts_are_defined_once(self):
|
def test_concepts_are_defined_once(self):
|
||||||
@@ -196,36 +206,58 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
"one": ["1001", "1002"],
|
"one": ["1001", "1002"],
|
||||||
}
|
}
|
||||||
|
|
||||||
# def tests_i_can_detect_direct_recursion(self):
|
def tests_i_can_detect_direct_recursion(self):
|
||||||
# sheerka = self.get_sheerka()
|
sheerka, context, good, foo, bar = self.init_concepts(
|
||||||
# good = self.get_concept(sheerka, "good")
|
"good",
|
||||||
# foo = self.get_concept(sheerka, "foo", ConceptExpression("bar"))
|
self.bnf_concept("foo", ConceptExpression("bar")),
|
||||||
# bar = self.get_concept(sheerka, "bar", ConceptExpression("foo"))
|
self.bnf_concept("bar", ConceptExpression("foo")),
|
||||||
#
|
)
|
||||||
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, foo, bar]).body
|
|
||||||
#
|
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, foo, bar]).body
|
||||||
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords)
|
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
|
||||||
# assert resolved_ret_val.status
|
assert resolved_ret_val.status
|
||||||
# assert resolved_ret_val.body == {
|
assert resolved_ret_val.body == {
|
||||||
# "good": ["1001"],
|
"good": ["1001"],
|
||||||
# BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1003"]
|
}
|
||||||
# }
|
assert sheerka.chicken_and_eggs.get(foo.id) == {foo.id, bar.id}
|
||||||
#
|
assert sheerka.chicken_and_eggs.get(bar.id) == {foo.id, bar.id}
|
||||||
# def test_i_can_detect_indirect_infinite_recursion(self):
|
|
||||||
# sheerka = self.get_sheerka()
|
def test_i_can_detect_indirect_infinite_recursion(self):
|
||||||
# good = self.get_concept(sheerka, "good")
|
sheerka, context, good, one, two, three = self.init_concepts(
|
||||||
# one = self.get_concept(sheerka, "one", ConceptExpression("two"))
|
"good",
|
||||||
# two = self.get_concept(sheerka, "two", ConceptExpression("three"))
|
self.bnf_concept("one", ConceptExpression("two")),
|
||||||
# three = self.get_concept(sheerka, "three", ConceptExpression("two"))
|
self.bnf_concept("two", ConceptExpression("three")),
|
||||||
#
|
self.bnf_concept("three", ConceptExpression("two")),
|
||||||
# concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(sheerka, [good, one, two, three]).body
|
)
|
||||||
#
|
|
||||||
# resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(sheerka, concepts_by_first_keywords)
|
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, one, two, three]).body
|
||||||
# assert resolved_ret_val.status
|
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
|
||||||
# assert resolved_ret_val.body == {
|
assert resolved_ret_val.status
|
||||||
# "good": ["1001"],
|
assert resolved_ret_val.body == {
|
||||||
# BuiltinConcepts.CHICKEN_AND_EGG: ["1002", "1004", "1003"]
|
"good": ["1001"],
|
||||||
# }
|
}
|
||||||
|
assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id}
|
||||||
|
assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id}
|
||||||
|
assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id}
|
||||||
|
|
||||||
|
def test_i_can_detect_the_longest_infinite_recursion_chain(self):
|
||||||
|
sheerka, context, good, one, two, three = self.init_concepts(
|
||||||
|
"good",
|
||||||
|
self.bnf_concept("two", ConceptExpression("three")),
|
||||||
|
self.bnf_concept("three", ConceptExpression("two")),
|
||||||
|
self.bnf_concept("one", ConceptExpression("three")),
|
||||||
|
)
|
||||||
|
|
||||||
|
concepts_by_first_keywords = BaseNodeParser.get_concepts_by_first_token(context, [good, one, two, three]).body
|
||||||
|
resolved_ret_val = BaseNodeParser.resolve_concepts_by_first_keyword(context, concepts_by_first_keywords)
|
||||||
|
assert resolved_ret_val.status
|
||||||
|
assert resolved_ret_val.body == {
|
||||||
|
"good": ["1001"],
|
||||||
|
}
|
||||||
|
assert sheerka.chicken_and_eggs.get(one.id) == {one.id, two.id, three.id}
|
||||||
|
assert sheerka.chicken_and_eggs.get(two.id) == {one.id, two.id, three.id}
|
||||||
|
assert sheerka.chicken_and_eggs.get(three.id) == {one.id, two.id, three.id}
|
||||||
|
|
||||||
#
|
#
|
||||||
# def test_i_can_detect_infinite_recursion_from_ordered_choice(self):
|
# def test_i_can_detect_infinite_recursion_from_ordered_choice(self):
|
||||||
# sheerka = self.get_sheerka()
|
# sheerka = self.get_sheerka()
|
||||||
|
|||||||
+177
-100
@@ -4,7 +4,7 @@ from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYP
|
|||||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||||
from parsers.BaseNodeParser import CNC, UTN, CN
|
from parsers.BaseNodeParser import CNC, UTN, CN
|
||||||
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
|
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
|
||||||
Optional, ZeroOrMore, OneOrMore, ConceptExpression
|
Optional, ZeroOrMore, OneOrMore, ConceptExpression, LongestChoice
|
||||||
from parsers.BnfParser import BnfParser
|
from parsers.BnfParser import BnfParser
|
||||||
|
|
||||||
import tests.parsers.parsers_utils
|
import tests.parsers.parsers_utils
|
||||||
@@ -16,13 +16,15 @@ cmap = {
|
|||||||
"two": Concept("two"),
|
"two": Concept("two"),
|
||||||
"three": Concept("three"),
|
"three": Concept("three"),
|
||||||
"four": Concept("four"),
|
"four": Concept("four"),
|
||||||
"thirty": Concept("thirty", body=30),
|
"thirty": Concept("thirty", body="30"),
|
||||||
"forty": Concept("forty", body=40),
|
"forty": Concept("forty", body="40"),
|
||||||
"fifty": Concept("fifty", body=50),
|
"fifty": Concept("fifty", body="50"),
|
||||||
"number": Concept("number"),
|
"number": Concept("number"),
|
||||||
"foo": Concept("foo"),
|
"foo": Concept("foo"),
|
||||||
"bar": Concept("bar"),
|
"bar": Concept("bar"),
|
||||||
"baz": Concept("baz"),
|
"baz": Concept("baz"),
|
||||||
|
"one hundred": Concept("one hundred", body="100"),
|
||||||
|
"one_hundred": Concept("'one hundred'", body="100"),
|
||||||
|
|
||||||
"bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen
|
"bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen
|
||||||
|
|
||||||
@@ -80,17 +82,6 @@ def compute_expected_array(my_concepts_map, expression, expected, exclude_body=F
|
|||||||
class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||||
sheerka = None
|
sheerka = None
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def update_bnf(context, concept):
|
|
||||||
bnf_parser = BnfParser()
|
|
||||||
res = bnf_parser.parse(context, concept.metadata.definition)
|
|
||||||
if res.status:
|
|
||||||
concept.bnf = res.value.value
|
|
||||||
concept.metadata.definition_type = DEFINITION_TYPE_BNF
|
|
||||||
else:
|
|
||||||
raise Exception(res)
|
|
||||||
return concept
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
t = cls()
|
t = cls()
|
||||||
@@ -109,6 +100,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
sheerka.set_isa(context, sheerka.new("thirty"), sheerka.new("number"))
|
sheerka.set_isa(context, sheerka.new("thirty"), sheerka.new("number"))
|
||||||
sheerka.set_isa(context, sheerka.new("forty"), sheerka.new("number"))
|
sheerka.set_isa(context, sheerka.new("forty"), sheerka.new("number"))
|
||||||
sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number"))
|
sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number"))
|
||||||
|
sheerka.set_isa(context, sheerka.new("one hundred"), sheerka.new("number"))
|
||||||
|
|
||||||
thirties = cls.update_bnf(context, Concept("thirties",
|
thirties = cls.update_bnf(context, Concept("thirties",
|
||||||
definition="thirty number",
|
definition="thirty number",
|
||||||
@@ -131,6 +123,24 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
cmap["fifties"] = sheerka.create_new_concept(context, fifties).body.body
|
cmap["fifties"] = sheerka.create_new_concept(context, fifties).body.body
|
||||||
sheerka.set_isa(context, sheerka.new("fifties"), sheerka.new("number"))
|
sheerka.set_isa(context, sheerka.new("fifties"), sheerka.new("number"))
|
||||||
|
|
||||||
|
thousands = cls.update_bnf(context, Concept("thousands",
|
||||||
|
definition="number 'thousand'",
|
||||||
|
where="number < 999",
|
||||||
|
body="number * 1000").def_var("number"))
|
||||||
|
cmap["thousands"] = sheerka.create_new_concept(context, thousands).body.body
|
||||||
|
sheerka.set_isa(context, sheerka.new("thousands"), sheerka.new("number"))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def update_bnf(context, concept):
|
||||||
|
bnf_parser = BnfParser()
|
||||||
|
res = bnf_parser.parse(context, concept.metadata.definition)
|
||||||
|
if res.status:
|
||||||
|
concept.bnf = res.value.value
|
||||||
|
concept.metadata.definition_type = DEFINITION_TYPE_BNF
|
||||||
|
else:
|
||||||
|
raise Exception(res)
|
||||||
|
return concept
|
||||||
|
|
||||||
def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs):
|
def init_parser(self, my_concepts_map=None, init_from_sheerka=False, **kwargs):
|
||||||
if my_concepts_map is not None:
|
if my_concepts_map is not None:
|
||||||
sheerka, context, *updated = self.init_concepts(*my_concepts_map.values(), **kwargs)
|
sheerka, context, *updated = self.init_concepts(*my_concepts_map.values(), **kwargs)
|
||||||
@@ -351,6 +361,21 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
|
|
||||||
self.validate_get_concepts_sequences(my_map, text, expected)
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("text, expected", [
|
||||||
|
("one", [CNC("foo", source="one")]),
|
||||||
|
("one two", [CNC("foo", source="one two")]),
|
||||||
|
("three", []),
|
||||||
|
|
||||||
|
])
|
||||||
|
def test_i_can_parse_longest_choice(self, text, expected):
|
||||||
|
my_map = {
|
||||||
|
"foo": self.bnf_concept("foo", LongestChoice(
|
||||||
|
StrMatch("one"),
|
||||||
|
Sequence(StrMatch("one"), StrMatch("two")))),
|
||||||
|
}
|
||||||
|
|
||||||
|
self.validate_get_concepts_sequences(my_map, text, expected)
|
||||||
|
|
||||||
@pytest.mark.parametrize("text, expected", [
|
@pytest.mark.parametrize("text, expected", [
|
||||||
("one", [CNC("foo", source="one")]),
|
("one", [CNC("foo", source="one")]),
|
||||||
("", []),
|
("", []),
|
||||||
@@ -724,7 +749,10 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
"foo": self.bnf_concept("foo",
|
"foo": self.bnf_concept("foo",
|
||||||
Sequence(
|
Sequence(
|
||||||
StrMatch("twenty"),
|
StrMatch("twenty"),
|
||||||
OrderedChoice(ConceptExpression("one"), ConceptExpression("two")))),
|
OrderedChoice(
|
||||||
|
ConceptExpression("one"),
|
||||||
|
ConceptExpression("two"),
|
||||||
|
rule_name="unit"))),
|
||||||
}
|
}
|
||||||
|
|
||||||
text = "twenty one"
|
text = "twenty one"
|
||||||
@@ -733,7 +761,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
concept_foo = sequences[0].concept
|
concept_foo = sequences[0].concept
|
||||||
assert concept_foo.compiled == {
|
assert concept_foo.compiled == {
|
||||||
ConceptParts.BODY: DoNotResolve("twenty one"),
|
ConceptParts.BODY: DoNotResolve("twenty one"),
|
||||||
"one": my_map["one"],
|
"unit": my_map["one"],
|
||||||
}
|
}
|
||||||
|
|
||||||
def test_i_can_refer_to_group_concepts(self):
|
def test_i_can_refer_to_group_concepts(self):
|
||||||
@@ -755,8 +783,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
# explicit validations of the compiled
|
# explicit validations of the compiled
|
||||||
concept_foo = sequences[0].concept
|
concept_foo = sequences[0].concept
|
||||||
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
|
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
|
||||||
assert concept_foo.compiled == {'number': my_map["number"],
|
assert concept_foo.compiled == {'number': CC(my_map["number"], body=my_map["two"], two=my_map["two"]),
|
||||||
'two': my_map["two"],
|
|
||||||
ConceptParts.BODY: DoNotResolve(value='twenty two')}
|
ConceptParts.BODY: DoNotResolve(value='twenty two')}
|
||||||
|
|
||||||
text = "twenty one"
|
text = "twenty one"
|
||||||
@@ -766,8 +793,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
# explicit validations of the compiled
|
# explicit validations of the compiled
|
||||||
concept_foo = sequences[0].concept
|
concept_foo = sequences[0].concept
|
||||||
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
|
assert concept_foo.body == BuiltinConcepts.NOT_INITIALIZED
|
||||||
assert concept_foo.compiled == {'number': my_map["number"],
|
assert concept_foo.compiled == {'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]),
|
||||||
'one': my_map["one"],
|
|
||||||
ConceptParts.BODY: DoNotResolve(value='twenty one')}
|
ConceptParts.BODY: DoNotResolve(value='twenty one')}
|
||||||
|
|
||||||
@pytest.mark.parametrize("bar_expr, expected", [
|
@pytest.mark.parametrize("bar_expr, expected", [
|
||||||
@@ -842,8 +868,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
res = parser.get_parsing_expression(context, my_map["foo"])
|
res = parser.get_parsing_expression(context, my_map["foo"])
|
||||||
assert sheerka.isinstance(res, BuiltinConcepts.CHICKEN_AND_EGG) == expected
|
assert sheerka.isinstance(res, BuiltinConcepts.CHICKEN_AND_EGG) == expected
|
||||||
|
|
||||||
|
def test_i_can_get_parsing_expression_when_ending_by_concept_isa(self):
|
||||||
def test_i_can_get_parsing_expression_when_concept_isa(self):
|
|
||||||
my_map = {
|
my_map = {
|
||||||
"one": Concept("one"),
|
"one": Concept("one"),
|
||||||
"twenty": Concept("twenty"),
|
"twenty": Concept("twenty"),
|
||||||
@@ -869,42 +894,43 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
|
|
||||||
number_nodes = parsing_expression.nodes[1].nodes
|
number_nodes = parsing_expression.nodes[1].nodes
|
||||||
assert len(number_nodes) == 1
|
assert len(number_nodes) == 1
|
||||||
assert isinstance(number_nodes[0], OrderedChoice)
|
assert isinstance(number_nodes[0], LongestChoice)
|
||||||
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
||||||
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
||||||
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
|
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
|
||||||
|
|
||||||
assert my_map["number"].id not in parser.concepts_grammars
|
assert my_map["number"].id not in parser.concepts_grammars
|
||||||
|
|
||||||
#
|
def test_i_can_get_parsing_expression_when_starting_by_isa_concept(self):
|
||||||
# def test_i_cannot_get_parsing_expression_when_concept_is_part_of_a_group(self):
|
my_map = {
|
||||||
# """
|
"one": Concept("one"),
|
||||||
# In this test, twenties isa number
|
"two": Concept("two"),
|
||||||
# # So 'number' in Sequence(thirty, number) will spawn 'twenties' which, because there is no other indication,
|
"number": Concept("number"),
|
||||||
# # will create an infinite loop
|
"hundreds": self.bnf_concept("hundreds", Sequence(ConceptExpression("number"), StrMatch("hundred")))
|
||||||
# :return:
|
}
|
||||||
# """
|
|
||||||
# my_map = {
|
sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
||||||
# "one": Concept("one"),
|
parser.context = context
|
||||||
# "twenty": Concept("twenty"),
|
parser.sheerka = sheerka
|
||||||
# "number": Concept("number"),
|
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
|
||||||
# "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
|
sheerka.set_isa(context, sheerka.new("two"), my_map["number"])
|
||||||
# }
|
sheerka.set_isa(context, sheerka.new("hundreds"), my_map["number"])
|
||||||
# sheerka, context, parser = self.init_parser(my_map, singleton=True)
|
|
||||||
# parser.context = context
|
parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
|
||||||
# parser.sheerka = sheerka
|
parsing_expression = parser.get_parsing_expression(context, my_map["hundreds"])
|
||||||
# sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
|
|
||||||
# sheerka.set_isa(context, sheerka.new("twenty"), my_map["number"])
|
assert parsing_expression == Sequence(
|
||||||
# sheerka.set_isa(context, sheerka.new("twenties"), my_map["number"]) # <- twenties is also a number
|
ConceptExpression(my_map["number"], rule_name="number"),
|
||||||
#
|
StrMatch("hundred"))
|
||||||
# parser.concepts_grammars.clear() # make sure parsing expression is created from scratch
|
|
||||||
#
|
assert len(parsing_expression.nodes) == len(parsing_expression.elements)
|
||||||
# parsing_expression = parser.get_parsing_expression(context, my_map["twenties"])
|
|
||||||
# assert sheerka.isinstance(parsing_expression, BuiltinConcepts.CHICKEN_AND_EGG)
|
number_nodes = parsing_expression.nodes[0].nodes
|
||||||
# assert parsing_expression.body == {my_map["twenties"].id, my_map["number"].id}
|
assert len(number_nodes) == 1
|
||||||
#
|
assert isinstance(number_nodes[0], LongestChoice)
|
||||||
# assert isinstance(parser.concepts_grammars.get(my_map["one"].id), ParsingExpression)
|
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
||||||
# assert isinstance(parser.concepts_grammars.get(my_map["twenty"].id), ParsingExpression)
|
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
||||||
|
assert ConceptExpression(my_map["two"], rule_name="two") in number_nodes[0].nodes
|
||||||
|
|
||||||
def test_i_can_get_parsing_expression_when_concept_is_part_of_a_group(self):
|
def test_i_can_get_parsing_expression_when_concept_is_part_of_a_group(self):
|
||||||
my_map = {
|
my_map = {
|
||||||
@@ -933,7 +959,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
|
|
||||||
number_nodes = parsing_expression.nodes[1].nodes
|
number_nodes = parsing_expression.nodes[1].nodes
|
||||||
assert len(number_nodes) == 1
|
assert len(number_nodes) == 1
|
||||||
assert isinstance(number_nodes[0], OrderedChoice)
|
assert isinstance(number_nodes[0], LongestChoice)
|
||||||
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
assert len(number_nodes[0].nodes) == len(number_nodes[0].elements)
|
||||||
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes
|
||||||
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
|
assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes
|
||||||
@@ -994,7 +1020,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
("foo bar", True, [CNC("foo then bar", source="foo bar", foo="foo", bar="bar")]),
|
("foo bar", True, [CNC("foo then bar", source="foo bar", foo="foo", bar="bar")]),
|
||||||
("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]),
|
("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]),
|
||||||
("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]),
|
("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]),
|
||||||
("twenty one", True, [CNC("t1", source="twenty one", unit="one", one="one")]),
|
("twenty one", True, [CNC("t1", source="twenty one", unit="one")]),
|
||||||
])
|
])
|
||||||
def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected):
|
def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected):
|
||||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||||
@@ -1008,7 +1034,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert concepts_nodes == expected_array
|
assert concepts_nodes == expected_array
|
||||||
|
|
||||||
def test_i_can_when_multiple_times_the_same_variable(self):
|
def test_i_can_parse_when_multiple_times_the_same_variable(self):
|
||||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||||
|
|
||||||
text = "foo foo foo"
|
text = "foo foo foo"
|
||||||
@@ -1032,8 +1058,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
unit=CC("three_four",
|
unit=CC("three_four",
|
||||||
source="four",
|
source="four",
|
||||||
four=CC("four", body=DoNotResolve("four")),
|
four=CC("four", body=DoNotResolve("four")),
|
||||||
body=CC("four", body=DoNotResolve("four"))),
|
body=CC("four", body=DoNotResolve("four"))))
|
||||||
four="four")
|
|
||||||
expected_array = compute_expected_array(cmap, text, [expected])
|
expected_array = compute_expected_array(cmap, text, [expected])
|
||||||
|
|
||||||
res = parser.parse(context, ParserInput(text))
|
res = parser.parse(context, ParserInput(text))
|
||||||
@@ -1044,40 +1069,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert concepts_nodes == expected_array
|
assert concepts_nodes == expected_array
|
||||||
|
|
||||||
# def test_i_cannot_parse_bnf_concept_mixed_with_isa_concepts(self):
|
|
||||||
# sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
|
||||||
#
|
|
||||||
# # thirties = cls.update_bnf(context, Concept("thirties",
|
|
||||||
# # definition="thirty number",
|
|
||||||
# # where="number < 10",
|
|
||||||
# # body="thirty + number").def_var("thirty").def_var("number"))
|
|
||||||
# # with thirties isa number
|
|
||||||
# # So number in 'thirty number' will spawn 'thirties' which, because there is no other indication, will
|
|
||||||
# # create an infinite loop
|
|
||||||
#
|
|
||||||
# text = "thirty one"
|
|
||||||
# expected = CNC("thirties",
|
|
||||||
# source=text,
|
|
||||||
# number=CC("number",
|
|
||||||
# source="one",
|
|
||||||
# one=CC("one", body=DoNotResolve("one")),
|
|
||||||
# body=CC("one", body=DoNotResolve("one"))),
|
|
||||||
# one=CC("one", body=DoNotResolve("one")),
|
|
||||||
# thirty="thirty")
|
|
||||||
# expected_array = compute_expected_array(cmap, text, [expected])
|
|
||||||
#
|
|
||||||
# res = parser.parse(context, ParserInput(text))
|
|
||||||
# not_for_me = res.value
|
|
||||||
# reason = res.value.body
|
|
||||||
#
|
|
||||||
# assert not res.status
|
|
||||||
# assert sheerka.isinstance(not_for_me, BuiltinConcepts.NOT_FOR_ME)
|
|
||||||
# assert sheerka.isinstance(reason, BuiltinConcepts.CHICKEN_AND_EGG)
|
|
||||||
# assert reason.body == {cmap["thirties"].id, cmap["number"].id}
|
|
||||||
|
|
||||||
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts(self):
|
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts(self):
|
||||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||||
|
# thirties is defined in the global variable cmap as
|
||||||
# thirties = cls.update_bnf(context, Concept("thirties",
|
# thirties = cls.update_bnf(context, Concept("thirties",
|
||||||
# definition="thirty number",
|
# definition="thirty number",
|
||||||
# where="number < 10",
|
# where="number < 10",
|
||||||
@@ -1090,7 +1084,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
source="one",
|
source="one",
|
||||||
one=CC("one", body=DoNotResolve("one")),
|
one=CC("one", body=DoNotResolve("one")),
|
||||||
body=CC("one", body=DoNotResolve("one"))),
|
body=CC("one", body=DoNotResolve("one"))),
|
||||||
one=CC("one", body=DoNotResolve("one")),
|
|
||||||
thirty="thirty")
|
thirty="thirty")
|
||||||
expected_array = compute_expected_array(cmap, text, [expected])
|
expected_array = compute_expected_array(cmap, text, [expected])
|
||||||
|
|
||||||
@@ -1113,7 +1106,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
source="three",
|
source="three",
|
||||||
three=CC("three", body=DoNotResolve("three")),
|
three=CC("three", body=DoNotResolve("three")),
|
||||||
body=CC("three", body=DoNotResolve("three"))),
|
body=CC("three", body=DoNotResolve("three"))),
|
||||||
three=CC("three", body=DoNotResolve("three")),
|
|
||||||
thirty="thirty")
|
thirty="thirty")
|
||||||
expected_array = compute_expected_array(cmap, text, [expected])
|
expected_array = compute_expected_array(cmap, text, [expected])
|
||||||
|
|
||||||
@@ -1125,6 +1117,56 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert concepts_nodes == expected_array
|
assert concepts_nodes == expected_array
|
||||||
|
|
||||||
|
def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_when_concept_starts_with_isa(self):
|
||||||
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||||
|
one = CC("one", body=DoNotResolve("one"))
|
||||||
|
|
||||||
|
text = "one thousand"
|
||||||
|
expected = CNC("thousands",
|
||||||
|
source=text,
|
||||||
|
number=CC("number",
|
||||||
|
source="one",
|
||||||
|
one=one,
|
||||||
|
body=one))
|
||||||
|
expected_array = compute_expected_array(cmap, text, [expected])
|
||||||
|
|
||||||
|
res = parser.parse(context, ParserInput(text))
|
||||||
|
parser_result = res.value
|
||||||
|
concepts_nodes = res.value.value
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||||
|
assert concepts_nodes == expected_array
|
||||||
|
|
||||||
|
sheerka.concepts_grammars.clear() # to simulate restart
|
||||||
|
text = "fifty one thousand"
|
||||||
|
fifty_one = CC("fifties",
|
||||||
|
source="fifty one",
|
||||||
|
fifty="fifty",
|
||||||
|
number=CC("number", source="one", body=one, one=one))
|
||||||
|
expected = CNC("thousands",
|
||||||
|
source=text,
|
||||||
|
number=CC("number",
|
||||||
|
source="fifty one",
|
||||||
|
fifties=fifty_one,
|
||||||
|
body=fifty_one))
|
||||||
|
expected_array = compute_expected_array(cmap, text, [expected])
|
||||||
|
|
||||||
|
res = parser.parse(context, ParserInput(text))
|
||||||
|
parser_result = res.value
|
||||||
|
concepts_nodes = res.value.value
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||||
|
assert concepts_nodes == expected_array
|
||||||
|
|
||||||
|
text = "one hundred thousand"
|
||||||
|
res = parser.parse(context, ParserInput(text))
|
||||||
|
parser_result = res.value
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||||
|
|
||||||
def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self):
|
def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self):
|
||||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||||
sheerka.concepts_grammars.clear() # simulate restart
|
sheerka.concepts_grammars.clear() # simulate restart
|
||||||
@@ -1138,7 +1180,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
source="three",
|
source="three",
|
||||||
three=CC("three", body=DoNotResolve("three")),
|
three=CC("three", body=DoNotResolve("three")),
|
||||||
body=CC("three", body=DoNotResolve("three"))),
|
body=CC("three", body=DoNotResolve("three"))),
|
||||||
three=CC("three", body=DoNotResolve("three")),
|
|
||||||
thirty="thirty")
|
thirty="thirty")
|
||||||
expected_array = compute_expected_array(cmap, text, [expected])
|
expected_array = compute_expected_array(cmap, text, [expected])
|
||||||
|
|
||||||
@@ -1157,7 +1198,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
source="one",
|
source="one",
|
||||||
one=CC("one", body=DoNotResolve("one")),
|
one=CC("one", body=DoNotResolve("one")),
|
||||||
body=CC("one", body=DoNotResolve("one"))),
|
body=CC("one", body=DoNotResolve("one"))),
|
||||||
one=CC("one", body=DoNotResolve("one")),
|
|
||||||
forty="forty")
|
forty="forty")
|
||||||
expected_array = compute_expected_array(cmap, text, [expected])
|
expected_array = compute_expected_array(cmap, text, [expected])
|
||||||
|
|
||||||
@@ -1173,7 +1213,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||||
|
|
||||||
parser_input = "def one"
|
parser_input = "def one"
|
||||||
expected = [CNC("def number", source="def one", number="one", one="one")]
|
expected = [CNC("def number", source="def one", number="one")]
|
||||||
|
|
||||||
res = parser.parse(context, ParserInput(parser_input))
|
res = parser.parse(context, ParserInput(parser_input))
|
||||||
expected_array = compute_expected_array(cmap, parser_input, expected)
|
expected_array = compute_expected_array(cmap, parser_input, expected)
|
||||||
@@ -1190,7 +1230,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||||
|
|
||||||
expression = "--filter one"
|
expression = "--filter one"
|
||||||
expected = [CNC("filter", source="--filter one", one="one")]
|
expected = [CN("filter", source="--filter one")]
|
||||||
|
|
||||||
res = parser.parse(context, ParserInput(expression))
|
res = parser.parse(context, ParserInput(expression))
|
||||||
expected_array = compute_expected_array(cmap, expression, expected)
|
expected_array = compute_expected_array(cmap, expression, expected)
|
||||||
@@ -1233,10 +1273,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
CC(factor, body=DoNotResolve("2")),
|
CC(factor, body=DoNotResolve("2")),
|
||||||
CC(factor, body=DoNotResolve("3")),
|
CC(factor, body=DoNotResolve("3")),
|
||||||
])],
|
])],
|
||||||
factor=[
|
|
||||||
CC(factor, body=DoNotResolve("1")),
|
|
||||||
CC(factor, body=DoNotResolve("2")),
|
|
||||||
CC(factor, body=DoNotResolve("3"))],
|
|
||||||
body=DoNotResolve("1 + 2 * 3"))]
|
body=DoNotResolve("1 + 2 * 3"))]
|
||||||
|
|
||||||
def test_i_can_parse_recursive_descent_grammar(self):
|
def test_i_can_parse_recursive_descent_grammar(self):
|
||||||
@@ -1255,8 +1291,34 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
text = "1 + 2 * 3"
|
text = "1 + 2 * 3"
|
||||||
|
|
||||||
res = parser.parse(context, ParserInput(text))
|
res = parser.parse(context, ParserInput(text))
|
||||||
|
parser_result = res.value
|
||||||
|
concepts_nodes = res.value.value
|
||||||
|
factor = my_map["factor"]
|
||||||
|
term = my_map["term"]
|
||||||
|
expr = my_map["expr"]
|
||||||
|
|
||||||
# concepts_nodes = res.value.value is too complicated to be validated
|
# concepts_nodes = res.value.value is too complicated to be validated
|
||||||
assert res.status
|
assert res.status
|
||||||
|
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||||
|
assert concepts_nodes == [CNC(expr,
|
||||||
|
term=CC(term,
|
||||||
|
body=CC(factor, body=DoNotResolve("1")),
|
||||||
|
factor=CC(factor, body=DoNotResolve("1"))),
|
||||||
|
expr=CC(expr,
|
||||||
|
body=CC(term,
|
||||||
|
body=DoNotResolve("2 * 3"),
|
||||||
|
factor=CC(factor, body=DoNotResolve("2")),
|
||||||
|
term=CC(term,
|
||||||
|
body=CC(factor, body=DoNotResolve("3")),
|
||||||
|
factor=CC(factor, body=DoNotResolve("3")))),
|
||||||
|
term=CC(term,
|
||||||
|
body=DoNotResolve("2 * 3"),
|
||||||
|
factor=CC(factor, body=DoNotResolve("2")),
|
||||||
|
term=CC(term,
|
||||||
|
body=CC(factor, body=DoNotResolve("3")),
|
||||||
|
factor=CC(factor, body=DoNotResolve("3"))))),
|
||||||
|
|
||||||
|
body=DoNotResolve("1 + 2 * 3"))]
|
||||||
|
|
||||||
def test_i_can_parse_simple_recursive_grammar(self):
|
def test_i_can_parse_simple_recursive_grammar(self):
|
||||||
my_map = {
|
my_map = {
|
||||||
@@ -1271,6 +1333,21 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert parser.parse(context, ParserInput("foo foo foo bar")).status
|
assert parser.parse(context, ParserInput("foo foo foo bar")).status
|
||||||
assert not parser.parse(context, ParserInput("foo baz")).status
|
assert not parser.parse(context, ParserInput("foo baz")).status
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("name, expected", [
|
||||||
|
(None, []),
|
||||||
|
("", []),
|
||||||
|
("foo", StrMatch("foo")),
|
||||||
|
("foo bar", Sequence(StrMatch("foo"), StrMatch("bar"))),
|
||||||
|
("'foo bar baz' qux", Sequence(StrMatch("foo", skip_whitespace=False),
|
||||||
|
StrMatch(" ", skip_whitespace=False),
|
||||||
|
StrMatch("bar", skip_whitespace=False),
|
||||||
|
StrMatch(" ", skip_whitespace=False),
|
||||||
|
StrMatch("baz"),
|
||||||
|
StrMatch("qux"))),
|
||||||
|
])
|
||||||
|
def test_i_can_get_expression_from_concept_name(self, name, expected):
|
||||||
|
assert BnfNodeParser.get_expression_from_concept_name(name) == expected
|
||||||
|
|
||||||
# @pytest.mark.parametrize("parser_input, expected", [
|
# @pytest.mark.parametrize("parser_input, expected", [
|
||||||
# ("one", [
|
# ("one", [
|
||||||
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),
|
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),
|
||||||
|
|||||||
@@ -128,7 +128,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
expected_nodes = compute_expected_array(
|
expected_nodes = compute_expected_array(
|
||||||
concepts_map,
|
concepts_map,
|
||||||
" twenty one ",
|
" twenty one ",
|
||||||
[CNC("twenties", source="twenty one", unit="one", one="one")])
|
[CNC("twenties", source="twenty one", unit="one")])
|
||||||
assert concept.compiled["c"][0].body.body == expected_nodes
|
assert concept.compiled["c"][0].body.body == expected_nodes
|
||||||
|
|
||||||
assert len(concept.compiled["d"]) == 1
|
assert len(concept.compiled["d"]) == 1
|
||||||
@@ -185,7 +185,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
expected_nodes = compute_expected_array(
|
expected_nodes = compute_expected_array(
|
||||||
concepts_map,
|
concepts_map,
|
||||||
" twenty two",
|
" twenty two",
|
||||||
[CNC("twenties", source="twenty two", unit="two", two="two")])
|
[CNC("twenties", source="twenty two", unit="two")])
|
||||||
assert res.body.concept.compiled["b"].compiled["b"][0].body.body == expected_nodes
|
assert res.body.concept.compiled["b"].compiled["b"][0].body.body == expected_nodes
|
||||||
|
|
||||||
# def test_i_can_validate_and_evaluate_a_concept_node_with_python(self):
|
# def test_i_can_validate_and_evaluate_a_concept_node_with_python(self):
|
||||||
@@ -281,7 +281,7 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
assert len(actual_nodes) == 1
|
assert len(actual_nodes) == 1
|
||||||
expected_array = compute_expected_array(
|
expected_array = compute_expected_array(
|
||||||
concepts_map,
|
concepts_map,
|
||||||
expression, [CNC("twenties", source=expression, unit="one", one="one")])
|
expression, [CNC("twenties", source=expression, unit="one")])
|
||||||
assert actual_nodes == expected_array
|
assert actual_nodes == expected_array
|
||||||
|
|
||||||
def test_i_can_parse_unrecognized_sya_concept_node(self):
|
def test_i_can_parse_unrecognized_sya_concept_node(self):
|
||||||
|
|||||||
Reference in New Issue
Block a user