Fixed #3: Added sheerka.resolve_rule()
Fixed #5: Refactored SheerkaComparisonManager Fixed #6: Sya parser no longer works after restart
This commit is contained in:
@@ -1,14 +1,17 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Set
|
||||
|
||||
import core.utils
|
||||
from cache.Cache import Cache
|
||||
from cache.CacheManager import ConceptNotFound
|
||||
from cache.DictionaryCache import DictionaryCache
|
||||
from cache.ListIfNeededCache import ListIfNeededCache
|
||||
from cache.SetCache import SetCache
|
||||
from core.builtin_concepts import ErrorConcept
|
||||
from core.builtin_concepts_ids import BuiltinConcepts, AllBuiltinConcepts, BuiltinUnique
|
||||
from core.builtin_helpers import ensure_concept
|
||||
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata
|
||||
from core.builtin_helpers import ensure_concept, ensure_bnf
|
||||
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata, \
|
||||
VARIABLE_PREFIX
|
||||
from core.error import ErrorObj
|
||||
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound
|
||||
from core.sheerka.services.sheerka_service import BaseService
|
||||
@@ -18,6 +21,17 @@ from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
|
||||
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChickenAndEggError(Exception):
|
||||
concepts: Set[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class NoFirstTokenError(ErrorObj):
|
||||
concept: Concept
|
||||
key: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class NoModificationFound(ErrorObj):
|
||||
"""
|
||||
@@ -82,9 +96,11 @@ class SheerkaConceptManager(BaseService):
|
||||
CONCEPTS_BY_HASH_ENTRY = "ConceptManager:Concepts_By_Hash" # store hash of concepts definitions (not values)
|
||||
CONCEPTS_REFERENCES_ENTRY = "ConceptManager:Concepts_References" # tracks references between concepts
|
||||
|
||||
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Concepts_By_First_Keyword"
|
||||
RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Resolved_Concepts_By_First_Keyword"
|
||||
|
||||
def __init__(self, sheerka):
|
||||
super().__init__(sheerka)
|
||||
self.bnp = core.utils.get_class(BASE_NODE_PARSER_CLASS) # BaseNodeParser
|
||||
self.forbidden_meta = {"is_builtin", "key", "id", "props", "variables"}
|
||||
self.allowed_meta = {attr for attr in vars(ConceptMetadata) if
|
||||
not attr.startswith("_") and attr not in self.forbidden_meta}
|
||||
@@ -101,6 +117,7 @@ class SheerkaConceptManager(BaseService):
|
||||
self.sheerka.bind_service_method(self.get_by_hash, False, visible=False)
|
||||
self.sheerka.bind_service_method(self.get_by_id, False, visible=False)
|
||||
self.sheerka.bind_service_method(self.is_not_a_variable, False, visible=False)
|
||||
self.sheerka.bind_service_method(self.get_concepts_by_first_token, False, visible=False)
|
||||
|
||||
register_concept_cache = self.sheerka.om.register_concept_cache
|
||||
|
||||
@@ -119,10 +136,22 @@ class SheerkaConceptManager(BaseService):
|
||||
cache = SetCache().auto_configure(self.CONCEPTS_REFERENCES_ENTRY)
|
||||
self.sheerka.om.register_cache(self.CONCEPTS_REFERENCES_ENTRY, cache)
|
||||
|
||||
cache = DictionaryCache().auto_configure(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
|
||||
self.sheerka.om.register_cache(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache)
|
||||
|
||||
cache = DictionaryCache().auto_configure(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
|
||||
self.sheerka.om.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False)
|
||||
|
||||
def initialize_deferred(self, context, is_first_time):
|
||||
if is_first_time:
|
||||
self.sheerka.om.put(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS, 1000)
|
||||
|
||||
# initialize the dictionary of first tokens
|
||||
self.sheerka.om.get(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, None) # to init the cache with the values from sdp
|
||||
concepts_by_first_keyword = self.sheerka.om.current_cache_manager().copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
|
||||
res = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
|
||||
self.sheerka.om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
|
||||
|
||||
def initialize_builtin_concepts(self):
|
||||
"""
|
||||
Initializes the builtin concepts
|
||||
@@ -181,18 +210,18 @@ class SheerkaConceptManager(BaseService):
|
||||
|
||||
# check if the bnf definition is correctly computed
|
||||
try:
|
||||
self.bnp.ensure_bnf(context, concept)
|
||||
ensure_bnf(context, concept)
|
||||
except Exception as ex:
|
||||
return sheerka.ret(self.NAME, False, ex.args[0])
|
||||
|
||||
# compute new concepts_by_first_keyword
|
||||
init_ret_value = self.bnp.compute_concepts_by_first_token(context, [concept], True)
|
||||
init_ret_value = self.compute_concepts_by_first_token(context, [concept], True)
|
||||
if not init_ret_value.status:
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
|
||||
concepts_by_first_keyword = init_ret_value.body
|
||||
|
||||
# computes resolved concepts_by_first_keyword
|
||||
init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
|
||||
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
|
||||
if not init_ret_value.status:
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
|
||||
resolved_concepts_by_first_keyword = init_ret_value.body
|
||||
@@ -202,8 +231,8 @@ class SheerkaConceptManager(BaseService):
|
||||
concept.freeze_definition_hash()
|
||||
|
||||
ontology.add_concept(concept)
|
||||
ontology.put(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
|
||||
ontology.put(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
|
||||
ontology.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
|
||||
ontology.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
|
||||
|
||||
if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF and concept.get_metadata().definition != concept.name:
|
||||
# allow search by definition when definition relevant
|
||||
@@ -268,8 +297,8 @@ class SheerkaConceptManager(BaseService):
|
||||
|
||||
# To update concept by first keyword
|
||||
# first remove the old references
|
||||
keywords = self.bnp.get_first_tokens(sheerka, concept) # keyword of the old concept
|
||||
concepts_by_first_keyword = cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
|
||||
keywords = self.get_first_tokens(sheerka, concept) # keyword of the old concept
|
||||
concepts_by_first_keyword = cache_manager.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
|
||||
for keyword in keywords:
|
||||
try:
|
||||
concepts_by_first_keyword[keyword].remove(concept.id)
|
||||
@@ -279,15 +308,15 @@ class SheerkaConceptManager(BaseService):
|
||||
pass
|
||||
|
||||
# and then update
|
||||
init_ret_value = self.bnp.compute_concepts_by_first_token(context, [new_concept], False, concepts_by_first_keyword)
|
||||
init_ret_value = self.compute_concepts_by_first_token(context, [new_concept], False, concepts_by_first_keyword)
|
||||
if not init_ret_value.status:
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
|
||||
concepts_by_first_keyword = init_ret_value.body
|
||||
|
||||
# computes resolved concepts_by_first_keyword
|
||||
init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context,
|
||||
concepts_by_first_keyword,
|
||||
{new_concept.id: new_concept})
|
||||
init_ret_value = self.resolve_concepts_by_first_keyword(context,
|
||||
concepts_by_first_keyword,
|
||||
{new_concept.id: new_concept})
|
||||
if not init_ret_value.status:
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
|
||||
resolved_concepts_by_first_keyword = init_ret_value.body
|
||||
@@ -302,9 +331,8 @@ class SheerkaConceptManager(BaseService):
|
||||
cache_manager.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
|
||||
|
||||
cache_manager.update_concept(concept, new_concept)
|
||||
cache_manager.put(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
|
||||
cache_manager.put(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False,
|
||||
resolved_concepts_by_first_keyword)
|
||||
cache_manager.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
|
||||
cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
|
||||
|
||||
# everything seems to be fine. Update the list of attributes
|
||||
# Caution. Must be done AFTER update_concept()
|
||||
@@ -620,8 +648,192 @@ class SheerkaConceptManager(BaseService):
|
||||
concept.get_metadata().key = None
|
||||
if self._definition_has_changed(to_add) and concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
|
||||
concept.set_bnf(None)
|
||||
self.bnp.ensure_bnf(context, concept)
|
||||
ensure_bnf(context, concept)
|
||||
|
||||
concept.init_key()
|
||||
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def get_first_tokens(sheerka, concept):
|
||||
"""
|
||||
|
||||
:param sheerka:
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
if concept.get_bnf():
|
||||
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
|
||||
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
|
||||
bnf_visitor.visit(concept.get_bnf())
|
||||
return bnf_visitor.first_tokens
|
||||
else:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
return [keyword]
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
|
||||
"""
|
||||
Create the map describing the first token expected by a concept
|
||||
eg the dictionary that goes into CONCEPTS_BY_FIRST_KEYWORD_ENTRY
|
||||
:param context:
|
||||
:param concepts: lists of concepts to parse
|
||||
:param use_sheerka: if True, update concepts_by_first_keyword from sheerka
|
||||
:param previous_entries:
|
||||
:return:
|
||||
"""
|
||||
sheerka = context.sheerka
|
||||
res = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka \
|
||||
else (previous_entries or {})
|
||||
for concept in concepts:
|
||||
keywords = SheerkaConceptManager.get_first_tokens(sheerka, concept)
|
||||
|
||||
if keywords is None:
|
||||
# no first token found for a concept ?
|
||||
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
|
||||
|
||||
for keyword in keywords:
|
||||
res.setdefault(keyword, []).append(concept.id)
|
||||
|
||||
# 'uniquify' the lists
|
||||
for k, v in res.items():
|
||||
res[k] = core.utils.make_unique(v)
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
@staticmethod
|
||||
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword, modified_concepts=None):
|
||||
"""
|
||||
From a dictionary of first tokens, create another dictionary where all references to other concepts
|
||||
are resolved
|
||||
fom example, from entries
|
||||
{
|
||||
'c:|1001:' : 'c:|1002:' # which means than the concept 1002 starts with the concept 1001
|
||||
'foo': 'c:|1001:'
|
||||
}
|
||||
It will create
|
||||
{
|
||||
'foo': ['c:|1001:, 'c:|1002:'],
|
||||
}
|
||||
|
||||
This dictionary is supposed to go into CONCEPTS_REFERENCES_ENTRY
|
||||
"""
|
||||
sheerka = context.sheerka
|
||||
res = {}
|
||||
|
||||
def get_by_id(c_id):
|
||||
if modified_concepts and c_id in modified_concepts:
|
||||
return modified_concepts[c_id]
|
||||
return sheerka.get_by_id(c_id)
|
||||
|
||||
def resolve_concepts(concept_str):
|
||||
c_key, c_id = core.utils.unstr_concept(concept_str)
|
||||
if c_id in already_seen:
|
||||
return ChickenAndEggError(already_seen)
|
||||
|
||||
already_seen.add(c_id)
|
||||
|
||||
resolved = set()
|
||||
to_resolve = set()
|
||||
chicken_and_egg = set()
|
||||
|
||||
concept = get_by_id(c_id)
|
||||
|
||||
if sheerka.isaset(context, concept):
|
||||
concepts = sheerka.get_set_elements(context, concept)
|
||||
else:
|
||||
concepts = [concept]
|
||||
|
||||
for concept in concepts:
|
||||
ensure_bnf(context, concept) # need to make sure that it cannot fail
|
||||
keywords = SheerkaConceptManager.get_first_tokens(sheerka, concept)
|
||||
for keyword in keywords:
|
||||
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
|
||||
|
||||
for concept_to_resolve_str in to_resolve:
|
||||
res = resolve_concepts(concept_to_resolve_str)
|
||||
if isinstance(res, ChickenAndEggError):
|
||||
chicken_and_egg |= res.concepts
|
||||
else:
|
||||
resolved |= res
|
||||
to_resolve.clear()
|
||||
|
||||
if len(resolved) == 0 and len(chicken_and_egg) > 0:
|
||||
raise ChickenAndEggError(chicken_and_egg)
|
||||
else:
|
||||
return resolved
|
||||
|
||||
for k, v in concepts_by_first_keyword.items():
|
||||
if k.startswith("c:|"):
|
||||
try:
|
||||
already_seen = set()
|
||||
resolved_keywords = resolve_concepts(k)
|
||||
for resolved in resolved_keywords:
|
||||
res.setdefault(resolved, []).extend(v)
|
||||
except ChickenAndEggError as ex:
|
||||
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
|
||||
concepts_in_recursion = ex.concepts
|
||||
# make sure to have all the parents
|
||||
for parent in v:
|
||||
concepts_in_recursion.add(parent)
|
||||
|
||||
for concept_id in concepts_in_recursion:
|
||||
# make sure we keep the longest chain
|
||||
old = sheerka.chicken_and_eggs.get(concept_id)
|
||||
if old is NotFound or len(old) < len(ex.concepts):
|
||||
sheerka.chicken_and_eggs.put(concept_id, concepts_in_recursion)
|
||||
else:
|
||||
res.setdefault(k, []).extend(v)
|
||||
|
||||
# 'uniquify' the lists
|
||||
for k, v in res.items():
|
||||
res[k] = core.utils.make_unique(v)
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
def get_concepts_by_first_token(self, token, to_keep, custom=None, to_map=None, strip_quotes=False, parser=None):
|
||||
"""
|
||||
Tries to find if there are concepts that match the value of the token
|
||||
Caution: Returns the actual cache, not a copy
|
||||
:param token:
|
||||
:param to_keep: predicate to tell if the concept is eligible
|
||||
:param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
|
||||
:param to_map:
|
||||
:param strip_quotes: Remove quotes from strings
|
||||
:param parser: If needed, parser which requested the concepts
|
||||
:return:
|
||||
"""
|
||||
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
return None
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
name = token.value[1:-1] if strip_quotes else token.value
|
||||
else:
|
||||
name = token.value
|
||||
|
||||
custom_concepts = custom(name) if custom else [] # to get extra concepts using an alternative method
|
||||
|
||||
result = []
|
||||
concepts_ids = self.sheerka.om.get(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, name)
|
||||
if concepts_ids is NotFound:
|
||||
return custom_concepts if custom else None
|
||||
|
||||
for concept_id in concepts_ids:
|
||||
|
||||
concept = self.sheerka.get_by_id(concept_id)
|
||||
|
||||
if not to_keep(concept):
|
||||
continue
|
||||
|
||||
concept = to_map(concept, parser, self.sheerka) if to_map else concept
|
||||
result.append(concept)
|
||||
|
||||
return core.utils.make_unique(result + custom_concepts,
|
||||
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
|
||||
|
||||
Reference in New Issue
Block a user