Fixed #30 : Add variable support in BNF concept definition
Fixed #31 : Add regex support in BNF Concept Fixed #33 : Do not memorize object during restore
This commit is contained in:
+40
-20
@@ -342,8 +342,9 @@ def evaluate(context,
|
||||
|
||||
def get_lexer_nodes(return_values, start, tokens):
|
||||
"""
|
||||
From a parser result, return the corresponding LexerNode
|
||||
either ConceptNode, UnrecognizedTokensNode or SourceCodeNode
|
||||
Transform all elements from return_values into lexer nodes (ConceptNode, UnrecognizedTokensNode, SourceCodeNode...)
|
||||
On the contrary of the other method (get_lexer_nodes_using_positions),
|
||||
all created lexer node will use the same offset (start)
|
||||
:param return_values:
|
||||
:param start:
|
||||
:param tokens:
|
||||
@@ -360,13 +361,12 @@ def get_lexer_nodes(return_values, start, tokens):
|
||||
continue
|
||||
|
||||
end = start + len(tokens) - 1
|
||||
lexer_nodes.append(
|
||||
[SourceCodeNode(start,
|
||||
end,
|
||||
tokens,
|
||||
ret_val.body.source,
|
||||
python_node=ret_val.body.body,
|
||||
return_value=ret_val)])
|
||||
lexer_nodes.append([SourceCodeNode(start,
|
||||
end,
|
||||
tokens,
|
||||
ret_val.body.source,
|
||||
python_node=ret_val.body.body,
|
||||
return_value=ret_val)])
|
||||
|
||||
elif ret_val.who == "parsers.ExactConcept":
|
||||
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
|
||||
@@ -379,6 +379,11 @@ def get_lexer_nodes(return_values, start, tokens):
|
||||
for node in nodes:
|
||||
node.start += start
|
||||
node.end += start
|
||||
if isinstance(node, ConceptNode):
|
||||
for k, v in node.concept.get_compiled().items():
|
||||
if hasattr(v, "start"):
|
||||
v.start += start
|
||||
v.end += start
|
||||
|
||||
# but append the whole sequence if when it's a sequence
|
||||
lexer_nodes.append(nodes)
|
||||
@@ -397,9 +402,15 @@ def get_lexer_nodes(return_values, start, tokens):
|
||||
|
||||
def get_lexer_nodes_using_positions(return_values, positions):
|
||||
"""
|
||||
Transform all elements from return_values into lexer nodes
|
||||
use positions to remap the exact positions
|
||||
Transform all elements from return_values into lexer nodes (ConceptNode, UnrecognizedTokensNode, SourceCodeNode...)
|
||||
Use positions to compute the exact new positions
|
||||
On the contrary of the other method (get_lexer_nodes),
|
||||
one return value is mapped with one position. it's not a offset, but an absolute position
|
||||
:param return_values:
|
||||
:param positions: is a list of triplets (start, end, tokens)
|
||||
:return:
|
||||
"""
|
||||
|
||||
lexer_nodes = []
|
||||
for ret_val, position in zip(return_values, positions):
|
||||
if ret_val.who in ("parsers.Python", 'parsers.PythonWithConcepts'):
|
||||
@@ -425,6 +436,11 @@ def get_lexer_nodes_using_positions(return_values, positions):
|
||||
for node in nodes:
|
||||
node.start = position.start
|
||||
node.end = position.end
|
||||
if isinstance(node, ConceptNode):
|
||||
for k, v in node.concept.get_compiled().items():
|
||||
if hasattr(v, "start"):
|
||||
v.start += position.start
|
||||
v.end += position.start
|
||||
|
||||
# but append the whole sequence if when it's a sequence
|
||||
lexer_nodes.extend(nodes)
|
||||
@@ -493,9 +509,10 @@ def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers
|
||||
|
||||
def update_compiled(context, concept, errors, parsers=None):
|
||||
"""
|
||||
recursively iterate over concept.get_compiled() to replace LexerNode into concepts or list of ReturnValueConcept
|
||||
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...)
|
||||
the result will be a LexerNode.
|
||||
TL;DR;
|
||||
Recursively iterate over concept.get_compiled() to replace LexerNode into concepts or list of ReturnValueConcept
|
||||
Long version:
|
||||
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...) the result will be a LexerNode.
|
||||
In the specific case of a ConceptNode, the compiled variables will also be LexerNode (UnrecognizedTokensNode...)
|
||||
This function iterate over the compile to transform these nodes into concept of compiled AST
|
||||
:param context:
|
||||
@@ -518,9 +535,12 @@ def update_compiled(context, concept, errors, parsers=None):
|
||||
_validate_concept(v)
|
||||
|
||||
elif isinstance(v, SourceCodeWithConceptNode):
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
parser_helper = PythonWithConceptsParser()
|
||||
res = parser_helper.parse_nodes(context, v.get_all_nodes())
|
||||
if v.return_value:
|
||||
res = v.return_value
|
||||
else:
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
parser_helper = PythonWithConceptsParser()
|
||||
res = parser_helper.parse_nodes(context, v.get_all_nodes())
|
||||
if res.status:
|
||||
c.get_compiled()[k] = [res]
|
||||
else:
|
||||
@@ -556,7 +576,7 @@ def update_compiled(context, concept, errors, parsers=None):
|
||||
# example : Concept("a plus b").def_var("a").def_var("b")
|
||||
# and the user has entered 'a plus b'
|
||||
# Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
|
||||
# This means that 'a' and 'b' don't have any real value
|
||||
# This means that 'a' and 'b' don't have any real values
|
||||
if len(concept.get_metadata().variables) > 0:
|
||||
for name, value in concept.get_metadata().variables:
|
||||
if _get_source(concept.get_compiled(), name) != name:
|
||||
@@ -633,7 +653,7 @@ def ensure_concept_or_rule(*items):
|
||||
raise TypeError(f"'{items}' must be a concept or rule")
|
||||
|
||||
|
||||
def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
|
||||
def ensure_bnf(context, concept, parser_name="BaseNodeParser", update_bnf_for_cached_concept=True):
|
||||
if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF and not concept.get_bnf():
|
||||
from parsers.BnfDefinitionParser import BnfDefinitionParser
|
||||
regex_parser = BnfDefinitionParser()
|
||||
@@ -651,7 +671,7 @@ def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
|
||||
raise Exception(bnf_parsing_ret_val.value)
|
||||
|
||||
concept.set_bnf(bnf_parsing_ret_val.body.body)
|
||||
if concept.id:
|
||||
if concept.id and update_bnf_for_cached_concept:
|
||||
context.sheerka.get_by_id(concept.id).set_bnf(concept.get_bnf()) # update bnf in cache
|
||||
|
||||
|
||||
|
||||
+29
-2
@@ -694,6 +694,33 @@ class CC:
|
||||
self.end = end
|
||||
return self
|
||||
|
||||
def to_compare(self, other, to_compare_delegate):
|
||||
"""
|
||||
Transform other into CNC, to ease the comparison
|
||||
:param other:
|
||||
:param to_compare_delegate:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if isinstance(other, CC):
|
||||
return other
|
||||
|
||||
if isinstance(other, Concept):
|
||||
if self.exclude_body:
|
||||
compiled = {k: v for k, v in other.get_compiled().items() if k != ConceptParts.BODY}
|
||||
else:
|
||||
compiled = other.get_compiled()
|
||||
|
||||
self_compile_to_use = self.compiled or compiled
|
||||
|
||||
compiled = to_compare_delegate(self_compile_to_use, compiled, to_compare_delegate)
|
||||
return CC(other,
|
||||
self.source,
|
||||
self.exclude_body,
|
||||
**compiled)
|
||||
|
||||
raise NotImplementedError(f"CC, {other=}")
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CB:
|
||||
@@ -825,8 +852,8 @@ class CIO:
|
||||
self.concept_id = concept.id
|
||||
self.concept = concept
|
||||
self.source = source
|
||||
self.start = -1
|
||||
self.end = -1
|
||||
self.start = None
|
||||
self.end = None
|
||||
|
||||
def set_concept(self, concept):
|
||||
self.concept = concept
|
||||
|
||||
@@ -47,9 +47,15 @@ class RemovedType(CustomType):
|
||||
super(RemovedType, self).__init__("**Removed**")
|
||||
|
||||
|
||||
class NoFirstTokenType(CustomType):
|
||||
def __init__(self):
|
||||
super(NoFirstTokenType, self).__init__("**NoFirstToken**")
|
||||
|
||||
|
||||
NotInit = NotInitType()
|
||||
NotFound = NotFoundType()
|
||||
Removed = RemovedType()
|
||||
NoFirstToken = NoFirstTokenType()
|
||||
|
||||
|
||||
class ErrorObj:
|
||||
|
||||
@@ -199,6 +199,16 @@ class ExecutionContext:
|
||||
self._push = None
|
||||
|
||||
def add_preprocess(self, name, **kwargs):
|
||||
"""
|
||||
PreProcess item are used during the parsing and the evaluation of the ReturnValueConcept
|
||||
Using them, you can twitch the behaviour of parser and evaluator (you can disable them for instance)
|
||||
example :
|
||||
context.add_preprocess(BaseEvaluator.get_name("priority15"), enabled=False)
|
||||
context.add_preprocess(BaseEvaluator.get_name("all_priority15"), priority=99)
|
||||
:param name:
|
||||
:param kwargs:
|
||||
:return:
|
||||
"""
|
||||
preprocess = self.sheerka.new(BuiltinConcepts.EVALUATOR_PRE_PROCESS)
|
||||
preprocess.set_value("preprocess_name", name)
|
||||
for k, v in kwargs.items():
|
||||
|
||||
@@ -734,7 +734,7 @@ class Sheerka(Concept):
|
||||
if not isinstance(obj, Concept):
|
||||
return True
|
||||
|
||||
return obj.key not in (BuiltinConcepts.UNKNOWN_CONCEPT, BuiltinConcepts.UNKNOWN_RULE)
|
||||
return obj.key not in (None, BuiltinConcepts.UNKNOWN_CONCEPT, BuiltinConcepts.UNKNOWN_RULE)
|
||||
|
||||
@staticmethod
|
||||
def isinstance(a, b):
|
||||
@@ -879,6 +879,13 @@ class Sheerka(Concept):
|
||||
|
||||
return concept
|
||||
|
||||
@staticmethod
|
||||
def deepdiff(a, b):
|
||||
from deepdiff import DeepDiff
|
||||
ddiff = DeepDiff(a, b, ignore_order=True)
|
||||
print(ddiff)
|
||||
return ddiff
|
||||
|
||||
|
||||
def to_profile():
|
||||
sheerka = Sheerka()
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Set
|
||||
from typing import Set, List, Union
|
||||
|
||||
import core.utils
|
||||
from cache.Cache import Cache
|
||||
@@ -12,9 +13,10 @@ from core.builtin_concepts_ids import BuiltinConcepts, AllBuiltinConcepts, Built
|
||||
from core.builtin_helpers import ensure_concept, ensure_bnf
|
||||
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata, \
|
||||
VARIABLE_PREFIX
|
||||
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED
|
||||
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED, NoFirstToken
|
||||
from core.sheerka.services.sheerka_service import BaseService
|
||||
from core.tokenizer import Tokenizer, TokenKind
|
||||
from parsers.BnfNodeParser import RegExDef
|
||||
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
|
||||
|
||||
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
|
||||
@@ -98,6 +100,8 @@ class SheerkaConceptManager(BaseService):
|
||||
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Concepts_By_First_Keyword"
|
||||
RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Resolved_Concepts_By_First_Keyword"
|
||||
|
||||
CONCEPTS_BY_REGEX_ENTRY = "ConceptManager:Concepts_By_Regex"
|
||||
|
||||
CONCEPTS_BNF_DEFINITIONS_ENTRY = "ConceptManager:Concepts_BNF_Definitions"
|
||||
|
||||
def __init__(self, sheerka):
|
||||
@@ -105,6 +109,7 @@ class SheerkaConceptManager(BaseService):
|
||||
self.forbidden_meta = {"is_builtin", "key", "id", "props", "variables"}
|
||||
self.allowed_meta = {attr for attr in vars(ConceptMetadata) if
|
||||
not attr.startswith("_") and attr not in self.forbidden_meta}
|
||||
self.compiled_concepts_by_regex = []
|
||||
|
||||
def initialize(self):
|
||||
self.sheerka.bind_service_method(self.create_new_concept, True)
|
||||
@@ -119,6 +124,7 @@ class SheerkaConceptManager(BaseService):
|
||||
self.sheerka.bind_service_method(self.get_by_id, False, visible=False)
|
||||
self.sheerka.bind_service_method(self.is_not_a_variable, False, visible=False)
|
||||
self.sheerka.bind_service_method(self.get_concepts_by_first_token, False, visible=False)
|
||||
self.sheerka.bind_service_method(self.get_concepts_by_first_regex, False, visible=False)
|
||||
self.sheerka.bind_service_method(self.get_concepts_bnf_definitions, False, visible=False)
|
||||
self.sheerka.bind_service_method(self.clear_bnf_definition, True, visible=False)
|
||||
|
||||
@@ -145,6 +151,9 @@ class SheerkaConceptManager(BaseService):
|
||||
cache = DictionaryCache().auto_configure(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
|
||||
self.sheerka.om.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False)
|
||||
|
||||
cache = DictionaryCache().auto_configure(self.CONCEPTS_BY_REGEX_ENTRY)
|
||||
self.sheerka.om.register_cache(self.CONCEPTS_BY_REGEX_ENTRY, cache)
|
||||
|
||||
cache = Cache().auto_configure(self.CONCEPTS_BNF_DEFINITIONS_ENTRY)
|
||||
self.sheerka.om.register_cache(self.CONCEPTS_BNF_DEFINITIONS_ENTRY, cache, persist=False)
|
||||
|
||||
@@ -158,6 +167,14 @@ class SheerkaConceptManager(BaseService):
|
||||
res = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
|
||||
self.sheerka.om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
|
||||
|
||||
# init the regular expression
|
||||
self.sheerka.om.get(self.CONCEPTS_BY_REGEX_ENTRY, None)
|
||||
from_db = self.sheerka.om.current_cache_manager().copy(self.CONCEPTS_BY_REGEX_ENTRY)
|
||||
concepts_by_first_regex = {RegExDef().deserialize(k): v for k, v in from_db.items()}
|
||||
res = self.compile_concepts_by_first_regex(context, concepts_by_first_regex)
|
||||
self.compiled_concepts_by_regex.clear()
|
||||
self.compiled_concepts_by_regex.extend(res.body)
|
||||
|
||||
def initialize_builtin_concepts(self):
|
||||
"""
|
||||
Initializes the builtin concepts
|
||||
@@ -201,9 +218,9 @@ class SheerkaConceptManager(BaseService):
|
||||
concept.init_key()
|
||||
init_bnf_ret_value = None
|
||||
|
||||
ontology = sheerka.om
|
||||
om = sheerka.om
|
||||
|
||||
if ontology.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
|
||||
if om.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
|
||||
error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_BY_KEY_ENTRY + "." + concept.key, concept)
|
||||
return sheerka.ret(
|
||||
self.NAME,
|
||||
@@ -220,33 +237,44 @@ class SheerkaConceptManager(BaseService):
|
||||
except Exception as ex:
|
||||
return sheerka.ret(self.NAME, False, ex.args[0])
|
||||
|
||||
# compute new concepts_by_first_keyword
|
||||
init_ret_value = self.compute_concepts_by_first_token(context, [concept], True)
|
||||
# compute first token and/or first regex
|
||||
init_ret_value = self.compute_concepts_by_first_item(context, [concept], True)
|
||||
if not init_ret_value.status:
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
|
||||
concepts_by_first_keyword = init_ret_value.body
|
||||
by_first_keyword, by_first_regex = init_ret_value.body
|
||||
|
||||
# computes resolved concepts_by_first_keyword
|
||||
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
|
||||
init_ret_value = self.resolve_concepts_by_first_keyword(context, by_first_keyword)
|
||||
if not init_ret_value.status:
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
|
||||
resolved_concepts_by_first_keyword = init_ret_value.body
|
||||
|
||||
# compile regex
|
||||
compile_ret = self.compile_concepts_by_first_regex(context, by_first_regex)
|
||||
if not compile_ret.status:
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
|
||||
compiled_concepts_by_first_regex = compile_ret.body
|
||||
|
||||
# if everything is fine
|
||||
freeze_concept_attrs(concept)
|
||||
concept.freeze_definition_hash()
|
||||
|
||||
ontology.add_concept(concept)
|
||||
ontology.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
|
||||
ontology.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
|
||||
om.add_concept(concept)
|
||||
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, by_first_keyword)
|
||||
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
|
||||
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in by_first_regex.items()})
|
||||
|
||||
# update the compiled regex
|
||||
self.compiled_concepts_by_regex.clear()
|
||||
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
|
||||
|
||||
if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF and concept.get_metadata().definition != concept.name:
|
||||
# allow search by definition when definition relevant
|
||||
ontology.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
|
||||
om.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
|
||||
|
||||
# update references
|
||||
for ref in self.compute_references(concept):
|
||||
ontology.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
|
||||
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
|
||||
|
||||
# TODO : this line seems to be useless
|
||||
# The grammar is never reset
|
||||
@@ -286,7 +314,7 @@ class SheerkaConceptManager(BaseService):
|
||||
# }
|
||||
#
|
||||
sheerka = self.sheerka
|
||||
cache_manager = self.sheerka.om
|
||||
om = self.sheerka.om
|
||||
|
||||
if not to_add and not to_remove:
|
||||
return sheerka.ret(self.NAME, False, sheerka.err(NoModificationFound(concept)))
|
||||
@@ -301,23 +329,19 @@ class SheerkaConceptManager(BaseService):
|
||||
if res is not None:
|
||||
return res
|
||||
|
||||
# To update concept by first keyword
|
||||
# first remove the old references
|
||||
keywords = self.get_first_tokens(sheerka, concept) # keyword of the old concept
|
||||
concepts_by_first_keyword = cache_manager.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
|
||||
for keyword in keywords:
|
||||
try:
|
||||
concepts_by_first_keyword[keyword].remove(concept.id)
|
||||
if len(concepts_by_first_keyword[keyword]) == 0:
|
||||
del concepts_by_first_keyword[keyword]
|
||||
except KeyError: # only occurs in unit tests when concepts are created without create_new()
|
||||
pass
|
||||
# To update concept by first keyword and first regex
|
||||
# first remove old first token and first regex entries
|
||||
concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
|
||||
|
||||
# and then update
|
||||
init_ret_value = self.compute_concepts_by_first_token(context, [new_concept], False, concepts_by_first_keyword)
|
||||
init_ret_value = self.compute_concepts_by_first_item(context,
|
||||
[new_concept],
|
||||
False,
|
||||
concepts_by_first_keyword,
|
||||
concepts_by_regex)
|
||||
if not init_ret_value.status:
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
|
||||
concepts_by_first_keyword = init_ret_value.body
|
||||
concepts_by_first_keyword, concepts_by_regex = init_ret_value.body
|
||||
|
||||
# computes resolved concepts_by_first_keyword
|
||||
init_ret_value = self.resolve_concepts_by_first_keyword(context,
|
||||
@@ -327,18 +351,30 @@ class SheerkaConceptManager(BaseService):
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
|
||||
resolved_concepts_by_first_keyword = init_ret_value.body
|
||||
|
||||
# compile new regex
|
||||
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
|
||||
if not compile_ret.status:
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
|
||||
compiled_concepts_by_first_regex = compile_ret.body
|
||||
|
||||
# update concept that referenced the old concept and clear old references
|
||||
self.update_references(context, concept, new_concept, to_add)
|
||||
for ref in self.compute_references(concept):
|
||||
cache_manager.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
|
||||
om.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
|
||||
|
||||
# compute new references
|
||||
for ref in self.compute_references(new_concept):
|
||||
cache_manager.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
|
||||
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
|
||||
|
||||
cache_manager.update_concept(concept, new_concept)
|
||||
cache_manager.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
|
||||
cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
|
||||
# everything is ok, update the caches
|
||||
om.update_concept(concept, new_concept)
|
||||
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
|
||||
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
|
||||
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
|
||||
|
||||
# update the compiled regex
|
||||
self.compiled_concepts_by_regex.clear()
|
||||
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
|
||||
|
||||
# everything seems to be fine. Update the list of attributes
|
||||
# Caution. Must be done AFTER update_concept()
|
||||
@@ -349,6 +385,7 @@ class SheerkaConceptManager(BaseService):
|
||||
if modify_source:
|
||||
self._update_concept(context, concept, to_add, to_remove)
|
||||
|
||||
# KSI 2021-02-16 publish the modification of the concept only when someone needs it
|
||||
ret = sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=new_concept))
|
||||
return ret
|
||||
|
||||
@@ -362,17 +399,44 @@ class SheerkaConceptManager(BaseService):
|
||||
# TODO : resolve concept first
|
||||
|
||||
sheerka = context.sheerka
|
||||
refs = self.sheerka.om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
|
||||
|
||||
if not sheerka.is_known(concept):
|
||||
return sheerka.ret(self.NAME, False, sheerka.err(ConceptNotFound(concept)))
|
||||
|
||||
om = sheerka.om
|
||||
|
||||
refs = om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
|
||||
if refs is not NotFound:
|
||||
refs_instances = [sheerka.new_from_template(c, c.key) for c in [self.get_by_id(ref) for ref in refs]]
|
||||
return sheerka.ret(self.NAME, False, sheerka.err(ConceptIsReferenced(refs_instances)))
|
||||
|
||||
try:
|
||||
sheerka.om.remove_concept(concept)
|
||||
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
|
||||
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
|
||||
except ConceptNotFound as ex:
|
||||
return sheerka.ret(self.NAME, False, sheerka.err(ex))
|
||||
concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
|
||||
|
||||
# computes resolved concepts_by_first_keyword
|
||||
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
|
||||
if not init_ret_value.status:
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
|
||||
resolved_concepts_by_first_keyword = init_ret_value.body
|
||||
|
||||
# compile new regex
|
||||
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
|
||||
if not compile_ret.status:
|
||||
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
|
||||
compiled_concepts_by_first_regex = compile_ret.body
|
||||
|
||||
# everything seems fine. I can commit the modification and remove
|
||||
om.remove_concept(concept)
|
||||
|
||||
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
|
||||
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
|
||||
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
|
||||
|
||||
# update the compiled regex
|
||||
self.compiled_concepts_by_regex.clear()
|
||||
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
|
||||
|
||||
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
|
||||
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
|
||||
|
||||
def set_attr(self, concept, attribute, value):
|
||||
"""
|
||||
@@ -497,7 +561,7 @@ class SheerkaConceptManager(BaseService):
|
||||
if c.id == concept_id:
|
||||
return c
|
||||
|
||||
metadata = [(index_name, key), ("id", concept_id)] if concept_id else (index_name, key)
|
||||
metadata = {index_name: key, "id": concept_id} if concept_id else {index_name: key}
|
||||
return self.sheerka.get_unknown(metadata)
|
||||
|
||||
def update_references(self, context, concept, modified_concept=None, modifications=None):
|
||||
@@ -663,12 +727,39 @@ class SheerkaConceptManager(BaseService):
|
||||
concept.get_metadata().key = None
|
||||
if self._definition_has_changed(to_add) and concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
|
||||
concept.set_bnf(None)
|
||||
ensure_bnf(context, concept)
|
||||
ensure_bnf(context, concept, update_bnf_for_cached_concept=False)
|
||||
|
||||
concept.init_key()
|
||||
|
||||
return
|
||||
|
||||
def _remove_concept_first_token_and_first_regex(self, concept):
|
||||
keywords_or_regex = self.get_first_items(self.sheerka, concept) # keyword of the old concept
|
||||
concepts_by_first_keyword = self.sheerka.om.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
|
||||
concepts_by_regex = self.sheerka.om.copy(self.CONCEPTS_BY_REGEX_ENTRY)
|
||||
for item in keywords_or_regex:
|
||||
try:
|
||||
if isinstance(item, RegExDef):
|
||||
serialized = item.serialize()
|
||||
copy = concepts_by_regex[serialized].copy()
|
||||
copy.remove(concept.id)
|
||||
if len(copy) == 0:
|
||||
del concepts_by_regex[serialized]
|
||||
else:
|
||||
concepts_by_regex[serialized] = copy
|
||||
else:
|
||||
copy = concepts_by_first_keyword[item].copy()
|
||||
copy.remove(concept.id)
|
||||
if len(copy) == 0:
|
||||
del concepts_by_first_keyword[item]
|
||||
else:
|
||||
concepts_by_first_keyword[item] = copy
|
||||
except KeyError: # only occurs in unit tests when concepts are created without create_new()
|
||||
pass
|
||||
|
||||
# return concepts_by_first_keyword, concepts_by_regex
|
||||
return concepts_by_first_keyword, {RegExDef().deserialize(k): v for k, v in concepts_by_regex.items()}
|
||||
|
||||
@staticmethod
|
||||
def get_first_tokens(sheerka, concept):
|
||||
"""
|
||||
@@ -677,6 +768,30 @@ class SheerkaConceptManager(BaseService):
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
if concept.get_bnf():
|
||||
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
|
||||
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
|
||||
bnf_visitor.visit(concept.get_bnf())
|
||||
return [t for t in bnf_visitor.first_tokens if t is not NoFirstToken]
|
||||
else:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
return [keyword]
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_first_items(sheerka, concept) -> List[Union[str, RegExDef]]:
|
||||
"""
|
||||
Get all the first item needed by the concept
|
||||
An item can either be a token, or regular expression
|
||||
:param sheerka:
|
||||
:param concept:
|
||||
:return: List of string (if it's token or RegExDef if it's the definition of a regex)
|
||||
"""
|
||||
if concept.get_bnf():
|
||||
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
|
||||
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
|
||||
@@ -692,6 +807,55 @@ class SheerkaConceptManager(BaseService):
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def compute_concepts_by_first_item(context,
|
||||
concepts,
|
||||
use_sheerka=False,
|
||||
previous_first_keywords=None,
|
||||
previous_first_regex=None):
|
||||
"""
|
||||
Create two map,
|
||||
one for describing the first token expected by a concept
|
||||
one for the first regular expression
|
||||
eg the dictionaries that go into CONCEPTS_BY_FIRST_KEYWORD_ENTRY and CONCEPTS_BY_REGEX_ENTRY
|
||||
:param context:
|
||||
:param concepts: lists of concepts to parse
|
||||
:param use_sheerka: if True, updates sheerka
|
||||
:param previous_first_keywords:
|
||||
:param previous_first_regex:
|
||||
:return: Returns two dictionaries : on for ALL first item entries, another one for all first regex entries
|
||||
"""
|
||||
sheerka = context.sheerka
|
||||
if use_sheerka:
|
||||
previous_first_keywords = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
|
||||
previous_first_regex = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY)
|
||||
previous_first_regex = {RegExDef().deserialize(k): v for k, v in previous_first_regex.items()}
|
||||
else:
|
||||
previous_first_keywords = previous_first_keywords or {}
|
||||
previous_first_regex = previous_first_regex or {}
|
||||
|
||||
for concept in concepts:
|
||||
items = SheerkaConceptManager.get_first_items(sheerka, concept)
|
||||
|
||||
if items is None:
|
||||
# no first token found for a concept ?
|
||||
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
|
||||
|
||||
for item in items:
|
||||
if isinstance(item, RegExDef):
|
||||
previous_first_regex.setdefault(item, []).append(concept.id)
|
||||
else:
|
||||
previous_first_keywords.setdefault(item, []).append(concept.id)
|
||||
|
||||
# 'uniquify' the lists
|
||||
for k, v in previous_first_keywords.items():
|
||||
previous_first_keywords[k] = core.utils.make_unique(v)
|
||||
|
||||
for k, v in previous_first_regex.items():
|
||||
previous_first_regex[k] = core.utils.make_unique(v)
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, (previous_first_keywords, previous_first_regex))
|
||||
|
||||
@staticmethod
|
||||
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
|
||||
"""
|
||||
@@ -812,6 +976,19 @@ class SheerkaConceptManager(BaseService):
|
||||
|
||||
return sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
@staticmethod
|
||||
def compile_concepts_by_first_regex(context, concepts_by_first_regex):
|
||||
res = []
|
||||
|
||||
try:
|
||||
for k, v in concepts_by_first_regex.items():
|
||||
flags = RegExDef.compile_flags(k.ignore_case, k.multiline, k.explicit_flags)
|
||||
res.append((re.compile(k.to_match, flags), v))
|
||||
except Exception as ex:
|
||||
return context.sheerka.ret("BaseNodeParser", False, ex)
|
||||
|
||||
return context.sheerka.ret("BaseNodeParser", True, res)
|
||||
|
||||
def get_concepts_by_first_token(self, token, to_keep, custom=None, to_map=None, strip_quotes=False, parser=None):
|
||||
"""
|
||||
Tries to find if there are concepts that match the value of the token
|
||||
@@ -853,5 +1030,19 @@ class SheerkaConceptManager(BaseService):
|
||||
return core.utils.make_unique(result + custom_concepts,
|
||||
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
|
||||
|
||||
def get_concepts_by_first_regex(self, expr, pos):
|
||||
"""
|
||||
Go thru all the declared regular expressions and try to see if there is a match
|
||||
:param expr:
|
||||
:param pos:
|
||||
:return:
|
||||
"""
|
||||
result = []
|
||||
for compiled_regex, concept_ids in self.compiled_concepts_by_regex:
|
||||
if compiled_regex.match(expr, pos):
|
||||
result.extend([self.sheerka.get_by_id(concept_id) for concept_id in concept_ids])
|
||||
|
||||
return result
|
||||
|
||||
def get_concepts_bnf_definitions(self):
|
||||
return self.sheerka.om.current_cache_manager().caches[self.CONCEPTS_BNF_DEFINITIONS_ENTRY].cache
|
||||
|
||||
@@ -239,14 +239,17 @@ class ConsoleDebugLogger(BaseDebugLogger):
|
||||
:param kwargs:
|
||||
:return:
|
||||
"""
|
||||
raw = kwargs.pop('raw', None)
|
||||
if not self.debug_manager.compute_debug_concept(self.context,
|
||||
self.service_name,
|
||||
self.method_name,
|
||||
concept.id,
|
||||
self.debug_id):
|
||||
return
|
||||
raw = kwargs.pop('raw', None)
|
||||
color = kwargs.pop('color', None)
|
||||
str_vars = raw if raw else pp.pformat(kwargs) if kwargs else ""
|
||||
if color:
|
||||
str_vars = CCM[color] + str_vars + CCM['reset']
|
||||
text = " - " + text if text is not None else ""
|
||||
colon = ": " if str_vars else ""
|
||||
str_text = f"{CCM['cyan']}..concept#{concept.id}{text}{colon} {CCM['reset']}"
|
||||
|
||||
@@ -5,6 +5,7 @@ from core.builtin_helpers import expect_one, only_successful, evaluate, ensure_c
|
||||
from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved, AllConceptParts, \
|
||||
concept_part_value
|
||||
from core.global_symbols import NotInit
|
||||
from core.rule import Rule
|
||||
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.sheerka.services.sheerka_service import BaseService
|
||||
@@ -421,6 +422,9 @@ class SheerkaEvaluateConcept(BaseService):
|
||||
else:
|
||||
return evaluated
|
||||
|
||||
elif isinstance(to_resolve, Rule):
|
||||
raise NotImplementedError() # how to resolve rules ?
|
||||
|
||||
# otherwise, execute all return values to find out what is the value
|
||||
else:
|
||||
# update short term memory with current concept variables
|
||||
|
||||
@@ -22,7 +22,7 @@ class ParserInput:
|
||||
Helper class that tokenizes the input once for all
|
||||
"""
|
||||
|
||||
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True):
|
||||
def __init__(self, text, tokens=None, length=None, start=None, end=None, yield_oef=True):
|
||||
self.text = text
|
||||
self.tokens = tokens or None
|
||||
if self.tokens:
|
||||
@@ -38,13 +38,13 @@ class ParserInput:
|
||||
last_token.line,
|
||||
last_token.column + 1)]
|
||||
|
||||
self.length = None # to be computed in reset()
|
||||
self.length = length # to be computed (again) in reset()
|
||||
self.yield_oef = yield_oef
|
||||
|
||||
self.start = start or 0
|
||||
if end:
|
||||
self.original_end = end + 1
|
||||
self.end = self.original_end
|
||||
self.original_end = end # forced index of the last token
|
||||
self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
|
||||
else:
|
||||
self.original_end = self.end = None
|
||||
|
||||
@@ -61,30 +61,43 @@ class ParserInput:
|
||||
return f"ParserInput({from_tokens}'{self.text}')"
|
||||
|
||||
def reset(self, yield_oef=None):
|
||||
|
||||
def _get_end_from_yield_eof(_length, _yield_oef):
|
||||
return _length - 1 if _yield_oef else _length - 2
|
||||
|
||||
if yield_oef is None:
|
||||
yield_oef = self.yield_oef
|
||||
|
||||
# make sure tokens is correctly initialized
|
||||
if self.tokens is None:
|
||||
# the eof if forced, but will not be yield if not set to.
|
||||
self.tokens = list(Tokenizer(self.text, yield_eof=True))
|
||||
|
||||
self.length = len(self.tokens)
|
||||
|
||||
if self.original_end is None:
|
||||
self.end = len(self.tokens) if yield_oef else len(self.tokens) - 1
|
||||
self.end = _get_end_from_yield_eof(self.length, yield_oef)
|
||||
else:
|
||||
self.end = self.original_end if self.original_end <= len(self.tokens) else self.tokens
|
||||
self.end = self.original_end if self.original_end < self.length else \
|
||||
_get_end_from_yield_eof(self.length, yield_oef)
|
||||
|
||||
self.pos = self.start - 1
|
||||
self.token = None
|
||||
return self
|
||||
|
||||
def as_text(self, custom_switcher=None, tracker=None):
|
||||
if not self.tokens or self.end is None:
|
||||
# as_text is requested before reset().
|
||||
# It means that we want the original text
|
||||
return self.text
|
||||
|
||||
if custom_switcher is None:
|
||||
if self.sub_text:
|
||||
return self.sub_text
|
||||
if self.start == 0 and self.end == self.length:
|
||||
if self.start == 0 and self.end == self.length - 1:
|
||||
self.sub_text = self.text
|
||||
return self.sub_text
|
||||
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end])
|
||||
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end + 1])
|
||||
return self.sub_text
|
||||
else:
|
||||
return core.utils.get_text_from_tokens(self.as_tokens(), custom_switcher, tracker)
|
||||
@@ -92,16 +105,16 @@ class ParserInput:
|
||||
def as_tokens(self):
|
||||
if self.sub_tokens:
|
||||
return self.sub_tokens
|
||||
if self.start == 0 and self.end == self.length:
|
||||
if self.start == 0 and self.end == self.length - 1:
|
||||
self.sub_tokens = self.tokens
|
||||
return self.sub_tokens
|
||||
self.sub_tokens = self.tokens[self.start:self.end]
|
||||
self.sub_tokens = self.tokens[self.start:self.end + 1]
|
||||
return self.sub_tokens
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
self.pos += 1
|
||||
|
||||
if self.pos >= self.end:
|
||||
if self.pos > self.end:
|
||||
return False
|
||||
|
||||
self.token = self.tokens[self.pos]
|
||||
@@ -111,11 +124,11 @@ class ParserInput:
|
||||
if skip_whitespace:
|
||||
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
self.pos += 1
|
||||
if self.pos == self.end:
|
||||
if self.pos > self.end:
|
||||
return False
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.pos < self.end
|
||||
return self.pos <= self.end
|
||||
|
||||
def the_token_after(self, skip_whitespace=True):
|
||||
"""
|
||||
@@ -123,13 +136,13 @@ class ParserInput:
|
||||
Never returns None (returns TokenKind.EOF instead)
|
||||
"""
|
||||
my_pos = self.pos + 1
|
||||
if my_pos >= self.end:
|
||||
if my_pos > self.end:
|
||||
return Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
|
||||
if skip_whitespace:
|
||||
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
my_pos += 1
|
||||
if my_pos == self.end:
|
||||
if my_pos > self.end:
|
||||
return Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
|
||||
return self.tokens[my_pos]
|
||||
@@ -140,7 +153,7 @@ class ParserInput:
|
||||
:param pos:
|
||||
:return: True is pos is a valid position False otherwise
|
||||
"""
|
||||
if pos < 0 or pos >= self.end:
|
||||
if pos < 0 or pos > self.end:
|
||||
self.token = None
|
||||
return False
|
||||
|
||||
@@ -355,10 +368,10 @@ class SheerkaExecute(BaseService):
|
||||
if pi is NotFound: # when CacheManager.cache_only is True
|
||||
pi = ParserInput(text)
|
||||
self.pi_cache.put(text, pi)
|
||||
return ParserInput(text, pi.tokens) # new instance, but no need to tokenize the text again
|
||||
return ParserInput(text, tokens=pi.tokens, length=pi.length) # new instance, but no need to tokenize the text again
|
||||
|
||||
key = text or core.utils.get_text_from_tokens(tokens)
|
||||
pi = ParserInput(key, tokens)
|
||||
pi = ParserInput(key, tokens=tokens, length=len(tokens))
|
||||
self.pi_cache.put(key, pi)
|
||||
return pi
|
||||
|
||||
|
||||
@@ -144,7 +144,7 @@ class SheerkaMemory(BaseService):
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
if self.sheerka.during_initialisation:
|
||||
if self.sheerka.during_initialisation or self.sheerka.during_restore:
|
||||
return
|
||||
self.registration[key] = concept
|
||||
|
||||
|
||||
@@ -49,8 +49,8 @@ class TokenKind(Enum):
|
||||
DEGREE = "degree" # °
|
||||
WORD = "word"
|
||||
EQUALSEQUALS = "=="
|
||||
VAR_DEF = "__var__"
|
||||
REGEX = "r'xxx' or r\"xxx\" or r|xxx| or r/xxx/"
|
||||
VAR_DEF = "concept variable" # __var__
|
||||
REGEX = "regex" # r'xxx' or r\"xxx\" or r|xxx| or r/xxx/ but not r:xxx: which means rules
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -73,7 +73,10 @@ class Token:
|
||||
if self._strip_quote:
|
||||
return self._strip_quote
|
||||
|
||||
self._strip_quote = self.value[1:-1] if self.type == TokenKind.STRING else self.value
|
||||
if self.type in (TokenKind.STRING, TokenKind.REGEX):
|
||||
self._strip_quote = self.value[1:-1]
|
||||
else:
|
||||
self._strip_quote = self.value
|
||||
return self._strip_quote
|
||||
|
||||
@property
|
||||
@@ -120,6 +123,8 @@ class Token:
|
||||
elif self.type == TokenKind.RULE:
|
||||
from core.utils import str_concept
|
||||
return str_concept(self.value, prefix="r:")
|
||||
elif self.type == TokenKind.REGEX:
|
||||
return "r" + self.value
|
||||
else:
|
||||
return str(self.value)
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
import core.utils
|
||||
from core.ast_helpers import UnreferencedVariablesVisitor
|
||||
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
|
||||
@@ -11,6 +13,29 @@ from parsers.DefConceptParser import DefConceptNode, NameNode
|
||||
from parsers.PythonParser import get_python_node
|
||||
|
||||
|
||||
@dataclass(eq=True, frozen=True)
|
||||
class MandatoryVariable:
|
||||
"""
|
||||
When we are searching for variables, we are searching for potential variable
|
||||
So if the variable found has no match in the concept definition, it's not a problem
|
||||
for example:
|
||||
def concept foo x as isinstance(x, str)
|
||||
{x, str} will be detected as potential variable, but 'str' will find no match.
|
||||
|
||||
But there are cases where the variable found must exist, otherwise, it's an error
|
||||
example:
|
||||
def concept foo from bnf xxx
|
||||
'xxx' is detected as a variable (assuming that there is no concept named 'xxx' and a match must be
|
||||
found in the the name of the variable
|
||||
|
||||
To distinguish between mandatory and not mandatory variable, we use MandatoryVariable
|
||||
"""
|
||||
name: str
|
||||
|
||||
def __hash__(self):
|
||||
return hash(("MandatoryVariable", self.name))
|
||||
|
||||
|
||||
class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
|
||||
"""
|
||||
Gets the concepts referenced by BNF
|
||||
@@ -29,6 +54,9 @@ class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
|
||||
else:
|
||||
self.names.add(node.concept)
|
||||
|
||||
def visit_VariableExpression(self, node):
|
||||
self.names.add(MandatoryVariable(node.rule_name))
|
||||
|
||||
def visit_all(self, node):
|
||||
if node.rule_name:
|
||||
self.names.add(node.rule_name)
|
||||
@@ -60,11 +88,13 @@ class DefConceptEvaluator(OneReturnValueEvaluator):
|
||||
|
||||
# validate the node
|
||||
variables_found = set()
|
||||
mandatory_variables = set() # these variable MUST have a match in the name (if the name is not None)
|
||||
|
||||
concept = Concept(str(def_concept_node.name))
|
||||
concept.get_metadata().definition_type = def_concept_node.definition_type
|
||||
name_to_use = self.get_name_to_use(def_concept_node)
|
||||
|
||||
# get variables
|
||||
for prop in ("definition", "where", "pre", "post", "body", "ret"):
|
||||
|
||||
part_ret_val = getattr(def_concept_node, prop)
|
||||
@@ -87,13 +117,26 @@ class DefConceptEvaluator(OneReturnValueEvaluator):
|
||||
|
||||
# try to find what can be a property
|
||||
for p in self.get_variables(context, part_ret_val, name_to_use):
|
||||
variables_found.add(p)
|
||||
if isinstance(p, MandatoryVariable):
|
||||
variables_found.add(p.name)
|
||||
mandatory_variables.add(p.name)
|
||||
else:
|
||||
variables_found.add(p)
|
||||
|
||||
# add variables by order of appearance when possible
|
||||
for name_part in name_to_use:
|
||||
if name_part in variables_found:
|
||||
concept.def_var(name_part, None)
|
||||
|
||||
# check that all mandatory variables are defined in the name
|
||||
# KSI: 2021-02-17
|
||||
# The mandatory variables come for bnf definition where it was not possible to resolve to a concept
|
||||
# So rather that issuing a 'UnresolvedVariableError' I prefer UNKNOWN_CONCEPT
|
||||
if (diff := mandatory_variables.difference(set(name_to_use))) != set():
|
||||
unknown_concepts = [sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body={"name": c}) for c in sorted(diff)]
|
||||
error = sheerka.new(BuiltinConcepts.ERROR, body=unknown_concepts)
|
||||
return sheerka.ret(self.name, False, error, parents=[return_value])
|
||||
|
||||
# add the remaining properties
|
||||
# They mainly come from BNF definition
|
||||
for p in variables_found:
|
||||
|
||||
@@ -441,6 +441,11 @@ class GrammarErrorNode(ParsingError):
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NoMatchingTokenError(ParsingError):
|
||||
pos: int
|
||||
|
||||
|
||||
class SyaAssociativity(Enum):
|
||||
Left = "left"
|
||||
Right = "right"
|
||||
@@ -720,6 +725,35 @@ class CNC(CN):
|
||||
txt += f", {k}='{v}'"
|
||||
return txt + ")"
|
||||
|
||||
def to_compare(self, other, to_compare_delegate):
|
||||
"""
|
||||
Transform other into CNC, to ease the comparison
|
||||
:param other:
|
||||
:param to_compare_delegate:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if isinstance(other, CNC):
|
||||
return other
|
||||
|
||||
if isinstance(other, ConceptNode):
|
||||
if self.exclude_body:
|
||||
compiled = {k: v for k, v in other.concept.get_compiled().items() if k != ConceptParts.BODY}
|
||||
else:
|
||||
compiled = other.concept.get_compiled()
|
||||
|
||||
self_compile_to_use = self.compiled or compiled
|
||||
|
||||
compiled = to_compare_delegate(self_compile_to_use, compiled, to_compare_delegate)
|
||||
return CNC(other.concept,
|
||||
other.start if self.start is not None else None,
|
||||
other.end if self.end is not None else None,
|
||||
other.source if self.source is not None else None,
|
||||
self.exclude_body,
|
||||
**compiled)
|
||||
|
||||
raise NotImplementedError("CNC")
|
||||
|
||||
|
||||
class UTN(HelperWithPos):
|
||||
"""
|
||||
@@ -763,6 +797,24 @@ class UTN(HelperWithPos):
|
||||
txt += f", end={self.end}"
|
||||
return txt + ")"
|
||||
|
||||
def to_compare(self, other, to_compare_delegate):
|
||||
"""
|
||||
Transform other into CNC, to ease the comparison
|
||||
:param other:
|
||||
:param to_compare_delegate:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if isinstance(other, UTN):
|
||||
return other
|
||||
|
||||
if isinstance(other, UnrecognizedTokensNode):
|
||||
return UTN(other.source,
|
||||
other.start,
|
||||
other.end)
|
||||
|
||||
raise NotImplementedError("UTN")
|
||||
|
||||
|
||||
class RN(HelperWithPos):
|
||||
"""
|
||||
@@ -840,9 +892,19 @@ class BaseNodeParser(BaseParser):
|
||||
:return:
|
||||
"""
|
||||
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
|
||||
concepts_by_first_keyword = SheerkaConceptManager.compute_concepts_by_first_token(context, concepts).body
|
||||
resolved = SheerkaConceptManager.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
|
||||
service = context.sheerka.services[SheerkaConceptManager.NAME]
|
||||
by_token, by_regex = SheerkaConceptManager.compute_concepts_by_first_item(context, concepts).body
|
||||
|
||||
context.sheerka.om.put(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY,
|
||||
False,
|
||||
{k.serialize(): v for k, v in by_regex.items()})
|
||||
compiled = service.compile_concepts_by_first_regex(context, by_regex).body
|
||||
service.compiled_concepts_by_regex.clear()
|
||||
service.compiled_concepts_by_regex.extend(compiled)
|
||||
|
||||
resolved = SheerkaConceptManager.resolve_concepts_by_first_keyword(context, by_token).body
|
||||
context.sheerka.om.put(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
|
||||
False,
|
||||
resolved)
|
||||
|
||||
return self
|
||||
|
||||
@@ -4,7 +4,7 @@ from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError
|
||||
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, \
|
||||
ConceptExpression, StrMatch
|
||||
ConceptExpression, StrMatch, RegExMatch, VariableExpression
|
||||
|
||||
|
||||
class BnfDefinitionParser(BaseParser):
|
||||
@@ -231,9 +231,11 @@ class BnfDefinitionParser(BaseParser):
|
||||
if token.type == TokenKind.CONCEPT:
|
||||
self.next_token()
|
||||
concept = self.sheerka.new((token.value[0], token.value[1]))
|
||||
expr = ConceptExpression(concept)
|
||||
# expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \
|
||||
# else ConceptExpression(concept)
|
||||
if not self.sheerka.is_known(concept):
|
||||
self.add_error(concept)
|
||||
return None
|
||||
|
||||
expr = ConceptExpression(concept, rule_name=concept.name)
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
|
||||
if token.type in (TokenKind.IDENTIFIER, TokenKind.KEYWORD):
|
||||
@@ -245,20 +247,19 @@ class BnfDefinitionParser(BaseParser):
|
||||
# (for example of recursive bnf definition)
|
||||
if self.context.obj and hasattr(self.context.obj, "name"):
|
||||
if concept_name == str(self.context.obj.name):
|
||||
return self.eat_rule_name_if_needed(ConceptExpression(concept_name))
|
||||
return self.eat_rule_name_if_needed(ConceptExpression(concept_name)) # 2021-02-17 no rule name ?
|
||||
|
||||
concept = self.context.get_concept(concept_name)
|
||||
if not self.sheerka.is_known(concept):
|
||||
self.add_error(concept)
|
||||
return None
|
||||
expr = VariableExpression(concept_name)
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
elif hasattr(concept, "__iter__"):
|
||||
self.add_error(
|
||||
self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
|
||||
body=("key", concept_name)))
|
||||
return None
|
||||
else:
|
||||
expr = ConceptExpression(concept)
|
||||
expr.rule_name = concept.name
|
||||
expr = ConceptExpression(concept, rule_name=concept.name)
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
@@ -272,6 +273,11 @@ class BnfDefinitionParser(BaseParser):
|
||||
ret = Sequence(*elements)
|
||||
return self.eat_rule_name_if_needed(ret)
|
||||
|
||||
if token.type == TokenKind.REGEX:
|
||||
self.next_token()
|
||||
ret = RegExMatch(core.utils.strip_quotes(token.strip_quote))
|
||||
return self.eat_rule_name_if_needed(ret)
|
||||
|
||||
ret = StrMatch(core.utils.strip_quotes(token.value))
|
||||
self.next_token()
|
||||
return self.eat_rule_name_if_needed(ret)
|
||||
|
||||
+600
-181
File diff suppressed because it is too large
Load Diff
@@ -30,6 +30,10 @@ class PythonErrorNode(ParsingError):
|
||||
|
||||
@dataclass()
|
||||
class ConceptDetectedError(ParsingError):
|
||||
"""
|
||||
When the Python parser finds an identifier, and that identifier is a concept
|
||||
So it's not for the PythonParser to respond
|
||||
"""
|
||||
name: str
|
||||
|
||||
|
||||
|
||||
@@ -1142,6 +1142,8 @@ class SyaNodeParser(BaseNodeParser):
|
||||
if sya_definitions:
|
||||
self.test_only_sya_definitions = sya_definitions
|
||||
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def _is_eligible(concept):
|
||||
"""
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import json
|
||||
|
||||
import core.utils
|
||||
from core.global_symbols import NotInit, NotFound, Removed
|
||||
from core.global_symbols import NotInit, NotFound, Removed, NoFirstToken
|
||||
from sheerkapickle import tags, utils, handlers
|
||||
|
||||
|
||||
@@ -54,6 +54,8 @@ class SheerkaUnpickler:
|
||||
instance = NotFound
|
||||
elif obj[tags.CUSTOM] == Removed.value:
|
||||
instance = Removed
|
||||
elif obj[tags.CUSTOM] == NoFirstToken.value:
|
||||
instance = NoFirstToken
|
||||
else:
|
||||
raise KeyError(f"unknown {obj[tags.CUSTOM]}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user