Fixed #3: Added sheerka.resolve_rule()

Fixed #5: Refactored SheerkaComparisonManager
Fixed #6: Sya parser no longer works after restart
This commit is contained in:
2021-01-15 07:11:04 +01:00
parent e26c83a825
commit 821dbed189
44 changed files with 1617 additions and 1068 deletions
@@ -1,14 +1,17 @@
from dataclasses import dataclass
from typing import Set
import core.utils
from cache.Cache import Cache
from cache.CacheManager import ConceptNotFound
from cache.DictionaryCache import DictionaryCache
from cache.ListIfNeededCache import ListIfNeededCache
from cache.SetCache import SetCache
from core.builtin_concepts import ErrorConcept
from core.builtin_concepts_ids import BuiltinConcepts, AllBuiltinConcepts, BuiltinUnique
from core.builtin_helpers import ensure_concept
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata
from core.builtin_helpers import ensure_concept, ensure_bnf
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata, \
VARIABLE_PREFIX
from core.error import ErrorObj
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound
from core.sheerka.services.sheerka_service import BaseService
@@ -18,6 +21,17 @@ from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
@dataclass
class ChickenAndEggError(Exception):
concepts: Set[str]
@dataclass
class NoFirstTokenError(ErrorObj):
concept: Concept
key: str
@dataclass
class NoModificationFound(ErrorObj):
"""
@@ -82,9 +96,11 @@ class SheerkaConceptManager(BaseService):
CONCEPTS_BY_HASH_ENTRY = "ConceptManager:Concepts_By_Hash" # store hash of concepts definitions (not values)
CONCEPTS_REFERENCES_ENTRY = "ConceptManager:Concepts_References" # tracks references between concepts
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Concepts_By_First_Keyword"
RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Resolved_Concepts_By_First_Keyword"
def __init__(self, sheerka):
super().__init__(sheerka)
self.bnp = core.utils.get_class(BASE_NODE_PARSER_CLASS) # BaseNodeParser
self.forbidden_meta = {"is_builtin", "key", "id", "props", "variables"}
self.allowed_meta = {attr for attr in vars(ConceptMetadata) if
not attr.startswith("_") and attr not in self.forbidden_meta}
@@ -101,6 +117,7 @@ class SheerkaConceptManager(BaseService):
self.sheerka.bind_service_method(self.get_by_hash, False, visible=False)
self.sheerka.bind_service_method(self.get_by_id, False, visible=False)
self.sheerka.bind_service_method(self.is_not_a_variable, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_token, False, visible=False)
register_concept_cache = self.sheerka.om.register_concept_cache
@@ -119,10 +136,22 @@ class SheerkaConceptManager(BaseService):
cache = SetCache().auto_configure(self.CONCEPTS_REFERENCES_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_REFERENCES_ENTRY, cache)
cache = DictionaryCache().auto_configure(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache)
cache = DictionaryCache().auto_configure(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
self.sheerka.om.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False)
def initialize_deferred(self, context, is_first_time):
if is_first_time:
self.sheerka.om.put(self.sheerka.OBJECTS_IDS_ENTRY, self.USER_CONCEPTS_IDS, 1000)
# initialize the dictionary of first tokens
self.sheerka.om.get(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, None) # to init the cache with the values from sdp
concepts_by_first_keyword = self.sheerka.om.current_cache_manager().copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
res = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
self.sheerka.om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
def initialize_builtin_concepts(self):
"""
Initializes the builtin concepts
@@ -181,18 +210,18 @@ class SheerkaConceptManager(BaseService):
# check if the bnf definition is correctly computed
try:
self.bnp.ensure_bnf(context, concept)
ensure_bnf(context, concept)
except Exception as ex:
return sheerka.ret(self.NAME, False, ex.args[0])
# compute new concepts_by_first_keyword
init_ret_value = self.bnp.compute_concepts_by_first_token(context, [concept], True)
init_ret_value = self.compute_concepts_by_first_token(context, [concept], True)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
@@ -202,8 +231,8 @@ class SheerkaConceptManager(BaseService):
concept.freeze_definition_hash()
ontology.add_concept(concept)
ontology.put(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
ontology.put(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
ontology.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
ontology.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF and concept.get_metadata().definition != concept.name:
# allow search by definition when definition relevant
@@ -268,8 +297,8 @@ class SheerkaConceptManager(BaseService):
# To update concept by first keyword
# first remove the old references
keywords = self.bnp.get_first_tokens(sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
keywords = self.get_first_tokens(sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = cache_manager.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
for keyword in keywords:
try:
concepts_by_first_keyword[keyword].remove(concept.id)
@@ -279,15 +308,15 @@ class SheerkaConceptManager(BaseService):
pass
# and then update
init_ret_value = self.bnp.compute_concepts_by_first_token(context, [new_concept], False, concepts_by_first_keyword)
init_ret_value = self.compute_concepts_by_first_token(context, [new_concept], False, concepts_by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context,
concepts_by_first_keyword,
{new_concept.id: new_concept})
init_ret_value = self.resolve_concepts_by_first_keyword(context,
concepts_by_first_keyword,
{new_concept.id: new_concept})
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
@@ -302,9 +331,8 @@ class SheerkaConceptManager(BaseService):
cache_manager.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
cache_manager.update_concept(concept, new_concept)
cache_manager.put(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
cache_manager.put(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False,
resolved_concepts_by_first_keyword)
cache_manager.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
# everything seems to be fine. Update the list of attributes
# Caution. Must be done AFTER update_concept()
@@ -620,8 +648,192 @@ class SheerkaConceptManager(BaseService):
concept.get_metadata().key = None
if self._definition_has_changed(to_add) and concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
concept.set_bnf(None)
self.bnp.ensure_bnf(context, concept)
ensure_bnf(context, concept)
concept.init_key()
return
@staticmethod
def get_first_tokens(sheerka, concept):
"""
:param sheerka:
:param concept:
:return:
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.get_bnf())
return bnf_visitor.first_tokens
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
"""
Create the map describing the first token expected by a concept
eg the dictionary that goes into CONCEPTS_BY_FIRST_KEYWORD_ENTRY
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, update concepts_by_first_keyword from sheerka
:param previous_entries:
:return:
"""
sheerka = context.sheerka
res = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka \
else (previous_entries or {})
for concept in concepts:
keywords = SheerkaConceptManager.get_first_tokens(sheerka, concept)
if keywords is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
for keyword in keywords:
res.setdefault(keyword, []).append(concept.id)
# 'uniquify' the lists
for k, v in res.items():
res[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword, modified_concepts=None):
"""
From a dictionary of first tokens, create another dictionary where all references to other concepts
are resolved
fom example, from entries
{
'c:|1001:' : 'c:|1002:' # which means than the concept 1002 starts with the concept 1001
'foo': 'c:|1001:'
}
It will create
{
'foo': ['c:|1001:, 'c:|1002:'],
}
This dictionary is supposed to go into CONCEPTS_REFERENCES_ENTRY
"""
sheerka = context.sheerka
res = {}
def get_by_id(c_id):
if modified_concepts and c_id in modified_concepts:
return modified_concepts[c_id]
return sheerka.get_by_id(c_id)
def resolve_concepts(concept_str):
c_key, c_id = core.utils.unstr_concept(concept_str)
if c_id in already_seen:
return ChickenAndEggError(already_seen)
already_seen.add(c_id)
resolved = set()
to_resolve = set()
chicken_and_egg = set()
concept = get_by_id(c_id)
if sheerka.isaset(context, concept):
concepts = sheerka.get_set_elements(context, concept)
else:
concepts = [concept]
for concept in concepts:
ensure_bnf(context, concept) # need to make sure that it cannot fail
keywords = SheerkaConceptManager.get_first_tokens(sheerka, concept)
for keyword in keywords:
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
for concept_to_resolve_str in to_resolve:
res = resolve_concepts(concept_to_resolve_str)
if isinstance(res, ChickenAndEggError):
chicken_and_egg |= res.concepts
else:
resolved |= res
to_resolve.clear()
if len(resolved) == 0 and len(chicken_and_egg) > 0:
raise ChickenAndEggError(chicken_and_egg)
else:
return resolved
for k, v in concepts_by_first_keyword.items():
if k.startswith("c:|"):
try:
already_seen = set()
resolved_keywords = resolve_concepts(k)
for resolved in resolved_keywords:
res.setdefault(resolved, []).extend(v)
except ChickenAndEggError as ex:
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
concepts_in_recursion = ex.concepts
# make sure to have all the parents
for parent in v:
concepts_in_recursion.add(parent)
for concept_id in concepts_in_recursion:
# make sure we keep the longest chain
old = sheerka.chicken_and_eggs.get(concept_id)
if old is NotFound or len(old) < len(ex.concepts):
sheerka.chicken_and_eggs.put(concept_id, concepts_in_recursion)
else:
res.setdefault(k, []).extend(v)
# 'uniquify' the lists
for k, v in res.items():
res[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
def get_concepts_by_first_token(self, token, to_keep, custom=None, to_map=None, strip_quotes=False, parser=None):
"""
Tries to find if there are concepts that match the value of the token
Caution: Returns the actual cache, not a copy
:param token:
:param to_keep: predicate to tell if the concept is eligible
:param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
:param to_map:
:param strip_quotes: Remove quotes from strings
:param parser: If needed, parser which requested the concepts
:return:
"""
if token.type == TokenKind.WHITESPACE:
return None
if token.type == TokenKind.STRING:
name = token.value[1:-1] if strip_quotes else token.value
else:
name = token.value
custom_concepts = custom(name) if custom else [] # to get extra concepts using an alternative method
result = []
concepts_ids = self.sheerka.om.get(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, name)
if concepts_ids is NotFound:
return custom_concepts if custom else None
for concept_id in concepts_ids:
concept = self.sheerka.get_by_id(concept_id)
if not to_keep(concept):
continue
concept = to_map(concept, parser, self.sheerka) if to_map else concept
result.append(concept)
return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id)