Fixed #3: Added sheerka.resolve_rule()
Fixed #5: Refactored SheerkaComparisonManager Fixed #6: Sya parser no longer works after restart
This commit is contained in:
@@ -1,12 +1,9 @@
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Set
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
|
||||
from core.global_symbols import NotFound
|
||||
from core.concept import Concept, ConceptParts
|
||||
from core.rule import Rule
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from parsers.BaseParser import Node, BaseParser, ParsingError
|
||||
@@ -14,17 +11,6 @@ from parsers.BaseParser import Node, BaseParser, ParsingError
|
||||
DEBUG_COMPILED = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChickenAndEggError(Exception):
|
||||
concepts: Set[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class NoFirstTokenError(ParsingError):
|
||||
concept: Concept
|
||||
key: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerNode(Node):
|
||||
start: int # starting index in the tokens list
|
||||
@@ -827,248 +813,10 @@ class BaseNodeParser(BaseParser):
|
||||
:param concepts
|
||||
:return:
|
||||
"""
|
||||
concepts_by_first_keyword = self.compute_concepts_by_first_token(context, concepts).body
|
||||
resolved = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
|
||||
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
|
||||
concepts_by_first_keyword = SheerkaConceptManager.compute_concepts_by_first_token(context, concepts).body
|
||||
resolved = SheerkaConceptManager.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
|
||||
|
||||
context.sheerka.om.put(context.sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
|
||||
context.sheerka.om.put(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
|
||||
False,
|
||||
resolved)
|
||||
|
||||
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
|
||||
"""
|
||||
Tries to find if there are concepts that match the value of the token
|
||||
Caution: Returns the actual cache, not a copy
|
||||
:param token:
|
||||
:param to_keep: predicate to tell if the concept is eligible
|
||||
:param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
|
||||
:param to_map:
|
||||
:param strip_quotes: Remove quotes from strings
|
||||
:return:
|
||||
"""
|
||||
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
return None
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
name = token.value[1:-1] if strip_quotes else token.value
|
||||
else:
|
||||
name = token.value
|
||||
|
||||
custom_concepts = custom(name) if custom else [] # to get extra concepts using an alternative method
|
||||
|
||||
result = []
|
||||
concepts_ids = self.sheerka.om.get(self.sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, name)
|
||||
if concepts_ids is NotFound:
|
||||
return custom_concepts if custom else None
|
||||
|
||||
for concept_id in concepts_ids:
|
||||
|
||||
concept = self.sheerka.get_by_id(concept_id)
|
||||
|
||||
if not to_keep(concept):
|
||||
continue
|
||||
|
||||
concept = to_map(concept, self, self.sheerka) if to_map else concept
|
||||
result.append(concept)
|
||||
|
||||
return core.utils.make_unique(result + custom_concepts,
|
||||
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
|
||||
|
||||
@staticmethod
|
||||
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
|
||||
"""
|
||||
Create the map describing the first token expected by a concept
|
||||
:param context:
|
||||
:param concepts: lists of concepts to parse
|
||||
:param use_sheerka: if True, update concepts_by_first_keyword from sheerka
|
||||
:param previous_entries:
|
||||
:return:
|
||||
"""
|
||||
sheerka = context.sheerka
|
||||
res = sheerka.om.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka else (previous_entries or {})
|
||||
for concept in concepts:
|
||||
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
|
||||
|
||||
if keywords is None:
|
||||
# no first token found for a concept ?
|
||||
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
|
||||
|
||||
for keyword in keywords:
|
||||
res.setdefault(keyword, []).append(concept.id)
|
||||
|
||||
# 'uniquify' the lists
|
||||
for k, v in res.items():
|
||||
res[k] = core.utils.make_unique(v)
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
@staticmethod
|
||||
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword, modified_concepts=None):
|
||||
sheerka = context.sheerka
|
||||
res = {}
|
||||
|
||||
def get_by_id(c_id):
|
||||
if modified_concepts and c_id in modified_concepts:
|
||||
return modified_concepts[c_id]
|
||||
return sheerka.get_by_id(c_id)
|
||||
|
||||
def resolve_concepts(concept_str):
|
||||
c_key, c_id = core.utils.unstr_concept(concept_str)
|
||||
if c_id in already_seen:
|
||||
return ChickenAndEggError(already_seen)
|
||||
|
||||
already_seen.add(c_id)
|
||||
|
||||
resolved = set()
|
||||
to_resolve = set()
|
||||
chicken_and_egg = set()
|
||||
|
||||
concept = get_by_id(c_id)
|
||||
|
||||
if sheerka.isaset(context, concept):
|
||||
concepts = sheerka.get_set_elements(context, concept)
|
||||
else:
|
||||
concepts = [concept]
|
||||
|
||||
for concept in concepts:
|
||||
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
|
||||
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
|
||||
for keyword in keywords:
|
||||
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
|
||||
|
||||
for concept_to_resolve_str in to_resolve:
|
||||
res = resolve_concepts(concept_to_resolve_str)
|
||||
if isinstance(res, ChickenAndEggError):
|
||||
chicken_and_egg |= res.concepts
|
||||
else:
|
||||
resolved |= res
|
||||
to_resolve.clear()
|
||||
|
||||
if len(resolved) == 0 and len(chicken_and_egg) > 0:
|
||||
raise ChickenAndEggError(chicken_and_egg)
|
||||
else:
|
||||
return resolved
|
||||
|
||||
for k, v in concepts_by_first_keyword.items():
|
||||
if k.startswith("c:|"):
|
||||
try:
|
||||
already_seen = set()
|
||||
resolved_keywords = resolve_concepts(k)
|
||||
for resolved in resolved_keywords:
|
||||
res.setdefault(resolved, []).extend(v)
|
||||
except ChickenAndEggError as ex:
|
||||
context.log(f"Chicken and egg detected for {k}, concepts={ex.concepts}")
|
||||
concepts_in_recursion = ex.concepts
|
||||
# make sure to have all the parents
|
||||
for parent in v:
|
||||
concepts_in_recursion.add(parent)
|
||||
|
||||
for concept_id in concepts_in_recursion:
|
||||
# make sure we keep the longest chain
|
||||
old = sheerka.chicken_and_eggs.get(concept_id)
|
||||
if old is NotFound or len(old) < len(ex.concepts):
|
||||
sheerka.chicken_and_eggs.put(concept_id, concepts_in_recursion)
|
||||
else:
|
||||
res.setdefault(k, []).extend(v)
|
||||
|
||||
# 'uniquify' the lists
|
||||
for k, v in res.items():
|
||||
res[k] = core.utils.make_unique(v)
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
@staticmethod
|
||||
def get_referenced_concepts(context, concept_id, already_seen):
|
||||
"""
|
||||
Gets all the tokens that may allow to recognize concept concept_id
|
||||
Basically, it returns all the starting tokens for concept concept_id
|
||||
CHICKEN_AND_EGG is returned when a circular references are found
|
||||
:param context:
|
||||
:param concept_id:
|
||||
:param already_seen:
|
||||
:return:
|
||||
"""
|
||||
if concept_id in already_seen:
|
||||
return ChickenAndEggError(already_seen)
|
||||
|
||||
already_seen.add(concept_id)
|
||||
|
||||
resolved = set()
|
||||
to_resolve = set()
|
||||
chicken_and_egg = set()
|
||||
sheerka = context.sheerka
|
||||
concept = sheerka.get_by_id(concept_id)
|
||||
|
||||
if sheerka.isaset(context, concept):
|
||||
concepts = sheerka.get_set_elements(context, concept)
|
||||
else:
|
||||
concepts = [concept]
|
||||
|
||||
for concept in concepts:
|
||||
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
|
||||
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
|
||||
for keyword in keywords:
|
||||
(to_resolve if keyword.startswith("c:|") else resolved).add(keyword)
|
||||
|
||||
for concept_to_resolve_str in to_resolve:
|
||||
c_key, c_id = core.utils.unstr_concept(concept_to_resolve_str)
|
||||
res = BaseNodeParser.get_referenced_concepts(context, c_id, already_seen)
|
||||
if isinstance(res, ChickenAndEggError):
|
||||
chicken_and_egg |= res.concepts
|
||||
else:
|
||||
resolved |= res
|
||||
to_resolve.clear()
|
||||
|
||||
if len(resolved) == 0 and len(chicken_and_egg) > 0:
|
||||
raise ChickenAndEggError(chicken_and_egg)
|
||||
else:
|
||||
return resolved
|
||||
|
||||
@staticmethod
|
||||
def resolve_sya_associativity_and_precedence(context, sya):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def get_first_tokens(sheerka, concept):
|
||||
"""
|
||||
|
||||
:param sheerka:
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
if concept.get_bnf():
|
||||
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
|
||||
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
|
||||
bnf_visitor.visit(concept.get_bnf())
|
||||
return bnf_visitor.first_tokens
|
||||
else:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
return [keyword]
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
|
||||
if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF and not concept.get_bnf():
|
||||
from parsers.BnfDefinitionParser import BnfDefinitionParser
|
||||
regex_parser = BnfDefinitionParser()
|
||||
desc = f"Resolving BNF '{concept.get_metadata().definition}'"
|
||||
with context.push(BuiltinConcepts.INIT_BNF,
|
||||
concept,
|
||||
who=parser_name,
|
||||
obj=concept,
|
||||
desc=desc) as sub_context:
|
||||
sub_context.add_inputs(parser_input=concept.get_metadata().definition)
|
||||
bnf_parsing_ret_val = regex_parser.parse(sub_context, concept.get_metadata().definition)
|
||||
sub_context.add_values(return_values=bnf_parsing_ret_val)
|
||||
|
||||
if not bnf_parsing_ret_val.status:
|
||||
raise Exception(bnf_parsing_ret_val.value)
|
||||
|
||||
concept.set_bnf(bnf_parsing_ret_val.body.body)
|
||||
if concept.id:
|
||||
context.sheerka.get_by_id(concept.id).set_bnf(concept.get_bnf()) # update bnf in cache
|
||||
|
||||
@@ -1291,7 +1291,7 @@ class BnfNodeParser(BaseNodeParser):
|
||||
debugger.debug_log(debug_prefix + ", all parsers are locked. Nothing to do.")
|
||||
continue
|
||||
|
||||
concepts = self.get_concepts(token, self._is_eligible, strip_quotes=False)
|
||||
concepts = context.sheerka.get_concepts_by_first_token(token, self._is_eligible, strip_quotes=False)
|
||||
|
||||
if not concepts:
|
||||
if debugger.is_enabled():
|
||||
@@ -1475,7 +1475,7 @@ class BnfNodeParser(BaseNodeParser):
|
||||
desc = f"Resolve concept parsing expression for '{concept}'. {key_to_use=}"
|
||||
with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context:
|
||||
if not concept.get_bnf(): # 'if' is done outside to save a function call. Not sure it worth it.
|
||||
BaseNodeParser.ensure_bnf(sub_context, concept, self.name)
|
||||
core.builtin_helpers.ensure_bnf(sub_context, concept, self.name)
|
||||
|
||||
grammar[key_to_use] = UnderConstruction(concept.id)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.rule import Rule, ACTION_TYPE_DEFERRED
|
||||
from core.rule import Rule
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError
|
||||
@@ -70,7 +70,7 @@ class RuleParser(BaseParser):
|
||||
rule = sheerka.get_rule_by_id(token.value[1])
|
||||
else:
|
||||
rule = Rule().set_id(token.value[1])
|
||||
rule.metadata.action_type = ACTION_TYPE_DEFERRED
|
||||
rule.metadata.id_is_unresolved = True
|
||||
|
||||
if sheerka.isinstance(rule, BuiltinConcepts.UNKNOWN_RULE):
|
||||
return sheerka.ret(self.name,
|
||||
|
||||
@@ -250,7 +250,7 @@ class SequenceNodeParser(BaseNodeParser):
|
||||
return a if isinstance(a, list) else [a]
|
||||
|
||||
concepts_by_name = as_list(self.sheerka.resolve(token))
|
||||
concepts_by_first_keyword = new_instances(super().get_concepts(token, self._is_eligible))
|
||||
concepts_by_first_keyword = new_instances(self.sheerka.get_concepts_by_first_token(token, self._is_eligible))
|
||||
|
||||
if concepts_by_name is None:
|
||||
return concepts_by_first_keyword
|
||||
|
||||
@@ -136,7 +136,7 @@ class SyaConceptDef:
|
||||
|
||||
# otherwise, use sheerka # KSI 20210109 otherwise or override ??
|
||||
if sheerka:
|
||||
concept_weight = parser.sheerka.get_concepts_weights(BuiltinConcepts.PRECEDENCE, CONCEPT_COMPARISON_CONTEXT)
|
||||
concept_weight = parser.sheerka.get_weights(BuiltinConcepts.PRECEDENCE, CONCEPT_COMPARISON_CONTEXT)
|
||||
if concept.str_id in concept_weight:
|
||||
sya_concept_def.precedence = concept_weight[concept.str_id]
|
||||
|
||||
@@ -1210,7 +1210,10 @@ class SyaNodeParser(BaseNodeParser):
|
||||
debugger.debug_log(debug_prefix + f", all parsers are locked")
|
||||
continue
|
||||
|
||||
concepts_def = self.get_concepts(token, self._is_eligible, to_map=SyaConceptDef.get_sya_concept_def)
|
||||
concepts_def = context.sheerka.get_concepts_by_first_token(token,
|
||||
self._is_eligible,
|
||||
to_map=SyaConceptDef.get_sya_concept_def,
|
||||
parser=self)
|
||||
if not concepts_def:
|
||||
if debugger.is_enabled():
|
||||
debugger.debug_log(debug_prefix + f", no concept found")
|
||||
|
||||
Reference in New Issue
Block a user