Fixed #3: Added sheerka.resolve_rule()

Fixed #5: Refactored SheerkaComparisonManager
Fixed #6: Sya parser no longer works after restart
This commit is contained in:
2021-01-15 07:11:04 +01:00
parent e26c83a825
commit 821dbed189
44 changed files with 1617 additions and 1068 deletions
+5 -257
View File
@@ -1,12 +1,9 @@
from collections import namedtuple
from dataclasses import dataclass
from enum import Enum
from typing import Set
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
from core.global_symbols import NotFound
from core.concept import Concept, ConceptParts
from core.rule import Rule
from core.tokenizer import TokenKind, Token
from parsers.BaseParser import Node, BaseParser, ParsingError
@@ -14,17 +11,6 @@ from parsers.BaseParser import Node, BaseParser, ParsingError
DEBUG_COMPILED = True
@dataclass
class ChickenAndEggError(Exception):
concepts: Set[str]
@dataclass
class NoFirstTokenError(ParsingError):
concept: Concept
key: str
@dataclass()
class LexerNode(Node):
start: int # starting index in the tokens list
@@ -827,248 +813,10 @@ class BaseNodeParser(BaseParser):
:param concepts
:return:
"""
concepts_by_first_keyword = self.compute_concepts_by_first_token(context, concepts).body
resolved = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
concepts_by_first_keyword = SheerkaConceptManager.compute_concepts_by_first_token(context, concepts).body
resolved = SheerkaConceptManager.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
context.sheerka.om.put(context.sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
context.sheerka.om.put(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
False,
resolved)
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
"""
Tries to find if there are concepts that match the value of the token
Caution: Returns the actual cache, not a copy
:param token:
:param to_keep: predicate to tell if the concept is eligible
:param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
:param to_map:
:param strip_quotes: Remove quotes from strings
:return:
"""
if token.type == TokenKind.WHITESPACE:
return None
if token.type == TokenKind.STRING:
name = token.value[1:-1] if strip_quotes else token.value
else:
name = token.value
custom_concepts = custom(name) if custom else [] # to get extra concepts using an alternative method
result = []
concepts_ids = self.sheerka.om.get(self.sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, name)
if concepts_ids is NotFound:
return custom_concepts if custom else None
for concept_id in concepts_ids:
concept = self.sheerka.get_by_id(concept_id)
if not to_keep(concept):
continue
concept = to_map(concept, self, self.sheerka) if to_map else concept
result.append(concept)
return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
@staticmethod
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
"""
Create the map describing the first token expected by a concept
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, update concepts_by_first_keyword from sheerka
:param previous_entries:
:return:
"""
sheerka = context.sheerka
res = sheerka.om.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka else (previous_entries or {})
for concept in concepts:
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
if keywords is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
for keyword in keywords:
res.setdefault(keyword, []).append(concept.id)
# 'uniquify' the lists
for k, v in res.items():
res[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword, modified_concepts=None):
sheerka = context.sheerka
res = {}
def get_by_id(c_id):
if modified_concepts and c_id in modified_concepts:
return modified_concepts[c_id]
return sheerka.get_by_id(c_id)
def resolve_concepts(concept_str):
c_key, c_id = core.utils.unstr_concept(concept_str)
if c_id in already_seen:
return ChickenAndEggError(already_seen)
already_seen.add(c_id)
resolved = set()
to_resolve = set()
chicken_and_egg = set()
concept = get_by_id(c_id)
if sheerka.isaset(context, concept):
concepts = sheerka.get_set_elements(context, concept)
else:
concepts = [concept]
for concept in concepts:
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
for keyword in keywords:
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
for concept_to_resolve_str in to_resolve:
res = resolve_concepts(concept_to_resolve_str)
if isinstance(res, ChickenAndEggError):
chicken_and_egg |= res.concepts
else:
resolved |= res
to_resolve.clear()
if len(resolved) == 0 and len(chicken_and_egg) > 0:
raise ChickenAndEggError(chicken_and_egg)
else:
return resolved
for k, v in concepts_by_first_keyword.items():
if k.startswith("c:|"):
try:
already_seen = set()
resolved_keywords = resolve_concepts(k)
for resolved in resolved_keywords:
res.setdefault(resolved, []).extend(v)
except ChickenAndEggError as ex:
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
concepts_in_recursion = ex.concepts
# make sure to have all the parents
for parent in v:
concepts_in_recursion.add(parent)
for concept_id in concepts_in_recursion:
# make sure we keep the longest chain
old = sheerka.chicken_and_eggs.get(concept_id)
if old is NotFound or len(old) < len(ex.concepts):
sheerka.chicken_and_eggs.put(concept_id, concepts_in_recursion)
else:
res.setdefault(k, []).extend(v)
# 'uniquify' the lists
for k, v in res.items():
res[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def get_referenced_concepts(context, concept_id, already_seen):
"""
Gets all the tokens that may allow to recognize concept concept_id
Basically, it returns all the starting tokens for concept concept_id
CHICKEN_AND_EGG is returned when a circular references are found
:param context:
:param concept_id:
:param already_seen:
:return:
"""
if concept_id in already_seen:
return ChickenAndEggError(already_seen)
already_seen.add(concept_id)
resolved = set()
to_resolve = set()
chicken_and_egg = set()
sheerka = context.sheerka
concept = sheerka.get_by_id(concept_id)
if sheerka.isaset(context, concept):
concepts = sheerka.get_set_elements(context, concept)
else:
concepts = [concept]
for concept in concepts:
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
for keyword in keywords:
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
for concept_to_resolve_str in to_resolve:
c_key, c_id = core.utils.unstr_concept(concept_to_resolve_str)
res = BaseNodeParser.get_referenced_concepts(context, c_id, already_seen)
if isinstance(res, ChickenAndEggError):
chicken_and_egg |= res.concepts
else:
resolved |= res
to_resolve.clear()
if len(resolved) == 0 and len(chicken_and_egg) > 0:
raise ChickenAndEggError(chicken_and_egg)
else:
return resolved
@staticmethod
def resolve_sya_associativity_and_precedence(context, sya):
pass
@staticmethod
def get_first_tokens(sheerka, concept):
"""
:param sheerka:
:param concept:
:return:
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.get_bnf())
return bnf_visitor.first_tokens
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF and not concept.get_bnf():
from parsers.BnfDefinitionParser import BnfDefinitionParser
regex_parser = BnfDefinitionParser()
desc = f"Resolving BNF '{concept.get_metadata().definition}'"
with context.push(BuiltinConcepts.INIT_BNF,
concept,
who=parser_name,
obj=concept,
desc=desc) as sub_context:
sub_context.add_inputs(parser_input=concept.get_metadata().definition)
bnf_parsing_ret_val = regex_parser.parse(sub_context, concept.get_metadata().definition)
sub_context.add_values(return_values=bnf_parsing_ret_val)
if not bnf_parsing_ret_val.status:
raise Exception(bnf_parsing_ret_val.value)
concept.set_bnf(bnf_parsing_ret_val.body.body)
if concept.id:
context.sheerka.get_by_id(concept.id).set_bnf(concept.get_bnf()) # update bnf in cache