Fixed #30 : Add variable support in BNF concept definition

Fixed #31 : Add regex support in BNF Concept
Fixed #33 : Do not memorize object during restore
This commit is contained in:
2021-02-24 17:23:03 +01:00
parent cac2dad17f
commit 646c428edb
32 changed files with 2107 additions and 360 deletions
+40 -20
View File
@@ -342,8 +342,9 @@ def evaluate(context,
def get_lexer_nodes(return_values, start, tokens):
"""
From a parser result, return the corresponding LexerNode
either ConceptNode, UnrecognizedTokensNode or SourceCodeNode
Transform all elements from return_values into lexer nodes (ConceptNode, UnrecognizedTokensNode, SourceCodeNode...)
On the contrary of the other method (get_lexer_nodes_using_positions),
all created lexer node will use the same offset (start)
:param return_values:
:param start:
:param tokens:
@@ -360,13 +361,12 @@ def get_lexer_nodes(return_values, start, tokens):
continue
end = start + len(tokens) - 1
lexer_nodes.append(
[SourceCodeNode(start,
end,
tokens,
ret_val.body.source,
python_node=ret_val.body.body,
return_value=ret_val)])
lexer_nodes.append([SourceCodeNode(start,
end,
tokens,
ret_val.body.source,
python_node=ret_val.body.body,
return_value=ret_val)])
elif ret_val.who == "parsers.ExactConcept":
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
@@ -379,6 +379,11 @@ def get_lexer_nodes(return_values, start, tokens):
for node in nodes:
node.start += start
node.end += start
if isinstance(node, ConceptNode):
for k, v in node.concept.get_compiled().items():
if hasattr(v, "start"):
v.start += start
v.end += start
# but append the whole sequence if when it's a sequence
lexer_nodes.append(nodes)
@@ -397,9 +402,15 @@ def get_lexer_nodes(return_values, start, tokens):
def get_lexer_nodes_using_positions(return_values, positions):
"""
Transform all elements from return_values into lexer nodes
use positions to remap the exact positions
Transform all elements from return_values into lexer nodes (ConceptNode, UnrecognizedTokensNode, SourceCodeNode...)
Use positions to compute the exact new positions
On the contrary of the other method (get_lexer_nodes),
one return value is mapped with one position. it's not a offset, but an absolute position
:param return_values:
:param positions: is a list of triplets (start, end, tokens)
:return:
"""
lexer_nodes = []
for ret_val, position in zip(return_values, positions):
if ret_val.who in ("parsers.Python", 'parsers.PythonWithConcepts'):
@@ -425,6 +436,11 @@ def get_lexer_nodes_using_positions(return_values, positions):
for node in nodes:
node.start = position.start
node.end = position.end
if isinstance(node, ConceptNode):
for k, v in node.concept.get_compiled().items():
if hasattr(v, "start"):
v.start += position.start
v.end += position.start
# but append the whole sequence if when it's a sequence
lexer_nodes.extend(nodes)
@@ -493,9 +509,10 @@ def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers
def update_compiled(context, concept, errors, parsers=None):
"""
recursively iterate over concept.get_compiled() to replace LexerNode into concepts or list of ReturnValueConcept
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...)
the result will be a LexerNode.
TL;DR;
Recursively iterate over concept.get_compiled() to replace LexerNode into concepts or list of ReturnValueConcept
Long version:
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...) the result will be a LexerNode.
In the specific case of a ConceptNode, the compiled variables will also be LexerNode (UnrecognizedTokensNode...)
This function iterate over the compile to transform these nodes into concept of compiled AST
:param context:
@@ -518,9 +535,12 @@ def update_compiled(context, concept, errors, parsers=None):
_validate_concept(v)
elif isinstance(v, SourceCodeWithConceptNode):
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
parser_helper = PythonWithConceptsParser()
res = parser_helper.parse_nodes(context, v.get_all_nodes())
if v.return_value:
res = v.return_value
else:
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
parser_helper = PythonWithConceptsParser()
res = parser_helper.parse_nodes(context, v.get_all_nodes())
if res.status:
c.get_compiled()[k] = [res]
else:
@@ -556,7 +576,7 @@ def update_compiled(context, concept, errors, parsers=None):
# example : Concept("a plus b").def_var("a").def_var("b")
# and the user has entered 'a plus b'
# Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
# This means that 'a' and 'b' don't have any real value
# This means that 'a' and 'b' don't have any real values
if len(concept.get_metadata().variables) > 0:
for name, value in concept.get_metadata().variables:
if _get_source(concept.get_compiled(), name) != name:
@@ -633,7 +653,7 @@ def ensure_concept_or_rule(*items):
raise TypeError(f"'{items}' must be a concept or rule")
def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
def ensure_bnf(context, concept, parser_name="BaseNodeParser", update_bnf_for_cached_concept=True):
if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF and not concept.get_bnf():
from parsers.BnfDefinitionParser import BnfDefinitionParser
regex_parser = BnfDefinitionParser()
@@ -651,7 +671,7 @@ def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
raise Exception(bnf_parsing_ret_val.value)
concept.set_bnf(bnf_parsing_ret_val.body.body)
if concept.id:
if concept.id and update_bnf_for_cached_concept:
context.sheerka.get_by_id(concept.id).set_bnf(concept.get_bnf()) # update bnf in cache
+29 -2
View File
@@ -694,6 +694,33 @@ class CC:
self.end = end
return self
def to_compare(self, other, to_compare_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param to_compare_delegate:
:return:
"""
if isinstance(other, CC):
return other
if isinstance(other, Concept):
if self.exclude_body:
compiled = {k: v for k, v in other.get_compiled().items() if k != ConceptParts.BODY}
else:
compiled = other.get_compiled()
self_compile_to_use = self.compiled or compiled
compiled = to_compare_delegate(self_compile_to_use, compiled, to_compare_delegate)
return CC(other,
self.source,
self.exclude_body,
**compiled)
raise NotImplementedError(f"CC, {other=}")
@dataclass()
class CB:
@@ -825,8 +852,8 @@ class CIO:
self.concept_id = concept.id
self.concept = concept
self.source = source
self.start = -1
self.end = -1
self.start = None
self.end = None
def set_concept(self, concept):
self.concept = concept
+6
View File
@@ -47,9 +47,15 @@ class RemovedType(CustomType):
super(RemovedType, self).__init__("**Removed**")
class NoFirstTokenType(CustomType):
def __init__(self):
super(NoFirstTokenType, self).__init__("**NoFirstToken**")
NotInit = NotInitType()
NotFound = NotFoundType()
Removed = RemovedType()
NoFirstToken = NoFirstTokenType()
class ErrorObj:
+10
View File
@@ -199,6 +199,16 @@ class ExecutionContext:
self._push = None
def add_preprocess(self, name, **kwargs):
"""
PreProcess item are used during the parsing and the evaluation of the ReturnValueConcept
Using them, you can twitch the behaviour of parser and evaluator (you can disable them for instance)
example :
context.add_preprocess(BaseEvaluator.get_name("priority15"), enabled=False)
context.add_preprocess(BaseEvaluator.get_name("all_priority15"), priority=99)
:param name:
:param kwargs:
:return:
"""
preprocess = self.sheerka.new(BuiltinConcepts.EVALUATOR_PRE_PROCESS)
preprocess.set_value("preprocess_name", name)
for k, v in kwargs.items():
+8 -1
View File
@@ -734,7 +734,7 @@ class Sheerka(Concept):
if not isinstance(obj, Concept):
return True
return obj.key not in (BuiltinConcepts.UNKNOWN_CONCEPT, BuiltinConcepts.UNKNOWN_RULE)
return obj.key not in (None, BuiltinConcepts.UNKNOWN_CONCEPT, BuiltinConcepts.UNKNOWN_RULE)
@staticmethod
def isinstance(a, b):
@@ -879,6 +879,13 @@ class Sheerka(Concept):
return concept
@staticmethod
def deepdiff(a, b):
from deepdiff import DeepDiff
ddiff = DeepDiff(a, b, ignore_order=True)
print(ddiff)
return ddiff
def to_profile():
sheerka = Sheerka()
@@ -1,5 +1,6 @@
import re
from dataclasses import dataclass
from typing import Set
from typing import Set, List, Union
import core.utils
from cache.Cache import Cache
@@ -12,9 +13,10 @@ from core.builtin_concepts_ids import BuiltinConcepts, AllBuiltinConcepts, Built
from core.builtin_helpers import ensure_concept, ensure_bnf
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata, \
VARIABLE_PREFIX
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED, NoFirstToken
from core.sheerka.services.sheerka_service import BaseService
from core.tokenizer import Tokenizer, TokenKind
from parsers.BnfNodeParser import RegExDef
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
@@ -98,6 +100,8 @@ class SheerkaConceptManager(BaseService):
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Concepts_By_First_Keyword"
RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Resolved_Concepts_By_First_Keyword"
CONCEPTS_BY_REGEX_ENTRY = "ConceptManager:Concepts_By_Regex"
CONCEPTS_BNF_DEFINITIONS_ENTRY = "ConceptManager:Concepts_BNF_Definitions"
def __init__(self, sheerka):
@@ -105,6 +109,7 @@ class SheerkaConceptManager(BaseService):
self.forbidden_meta = {"is_builtin", "key", "id", "props", "variables"}
self.allowed_meta = {attr for attr in vars(ConceptMetadata) if
not attr.startswith("_") and attr not in self.forbidden_meta}
self.compiled_concepts_by_regex = []
def initialize(self):
self.sheerka.bind_service_method(self.create_new_concept, True)
@@ -119,6 +124,7 @@ class SheerkaConceptManager(BaseService):
self.sheerka.bind_service_method(self.get_by_id, False, visible=False)
self.sheerka.bind_service_method(self.is_not_a_variable, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_token, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_regex, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_bnf_definitions, False, visible=False)
self.sheerka.bind_service_method(self.clear_bnf_definition, True, visible=False)
@@ -145,6 +151,9 @@ class SheerkaConceptManager(BaseService):
cache = DictionaryCache().auto_configure(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
self.sheerka.om.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False)
cache = DictionaryCache().auto_configure(self.CONCEPTS_BY_REGEX_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BY_REGEX_ENTRY, cache)
cache = Cache().auto_configure(self.CONCEPTS_BNF_DEFINITIONS_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BNF_DEFINITIONS_ENTRY, cache, persist=False)
@@ -158,6 +167,14 @@ class SheerkaConceptManager(BaseService):
res = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
self.sheerka.om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
# init the regular expression
self.sheerka.om.get(self.CONCEPTS_BY_REGEX_ENTRY, None)
from_db = self.sheerka.om.current_cache_manager().copy(self.CONCEPTS_BY_REGEX_ENTRY)
concepts_by_first_regex = {RegExDef().deserialize(k): v for k, v in from_db.items()}
res = self.compile_concepts_by_first_regex(context, concepts_by_first_regex)
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(res.body)
def initialize_builtin_concepts(self):
"""
Initializes the builtin concepts
@@ -201,9 +218,9 @@ class SheerkaConceptManager(BaseService):
concept.init_key()
init_bnf_ret_value = None
ontology = sheerka.om
om = sheerka.om
if ontology.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
if om.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_BY_KEY_ENTRY + "." + concept.key, concept)
return sheerka.ret(
self.NAME,
@@ -220,33 +237,44 @@ class SheerkaConceptManager(BaseService):
except Exception as ex:
return sheerka.ret(self.NAME, False, ex.args[0])
# compute new concepts_by_first_keyword
init_ret_value = self.compute_concepts_by_first_token(context, [concept], True)
# compute first token and/or first regex
init_ret_value = self.compute_concepts_by_first_item(context, [concept], True)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
by_first_keyword, by_first_regex = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
init_ret_value = self.resolve_concepts_by_first_keyword(context, by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile regex
compile_ret = self.compile_concepts_by_first_regex(context, by_first_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# if everything is fine
freeze_concept_attrs(concept)
concept.freeze_definition_hash()
ontology.add_concept(concept)
ontology.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
ontology.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.add_concept(concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in by_first_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF and concept.get_metadata().definition != concept.name:
# allow search by definition when definition relevant
ontology.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
om.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
# update references
for ref in self.compute_references(concept):
ontology.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# TODO : this line seems to be useless
# The grammar is never reset
@@ -286,7 +314,7 @@ class SheerkaConceptManager(BaseService):
# }
#
sheerka = self.sheerka
cache_manager = self.sheerka.om
om = self.sheerka.om
if not to_add and not to_remove:
return sheerka.ret(self.NAME, False, sheerka.err(NoModificationFound(concept)))
@@ -301,23 +329,19 @@ class SheerkaConceptManager(BaseService):
if res is not None:
return res
# To update concept by first keyword
# first remove the old references
keywords = self.get_first_tokens(sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = cache_manager.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
for keyword in keywords:
try:
concepts_by_first_keyword[keyword].remove(concept.id)
if len(concepts_by_first_keyword[keyword]) == 0:
del concepts_by_first_keyword[keyword]
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# To update concept by first keyword and first regex
# first remove old first token and first regex entries
concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
# and then update
init_ret_value = self.compute_concepts_by_first_token(context, [new_concept], False, concepts_by_first_keyword)
init_ret_value = self.compute_concepts_by_first_item(context,
[new_concept],
False,
concepts_by_first_keyword,
concepts_by_regex)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
concepts_by_first_keyword, concepts_by_regex = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context,
@@ -327,18 +351,30 @@ class SheerkaConceptManager(BaseService):
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile new regex
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# update concept that referenced the old concept and clear old references
self.update_references(context, concept, new_concept, to_add)
for ref in self.compute_references(concept):
cache_manager.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
om.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# compute new references
for ref in self.compute_references(new_concept):
cache_manager.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
cache_manager.update_concept(concept, new_concept)
cache_manager.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
# everything is ok, update the caches
om.update_concept(concept, new_concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
# everything seems to be fine. Update the list of attributes
# Caution. Must be done AFTER update_concept()
@@ -349,6 +385,7 @@ class SheerkaConceptManager(BaseService):
if modify_source:
self._update_concept(context, concept, to_add, to_remove)
# KSI 2021-02-16 publish the modification of the concept only when someone needs it
ret = sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=new_concept))
return ret
@@ -362,17 +399,44 @@ class SheerkaConceptManager(BaseService):
# TODO : resolve concept first
sheerka = context.sheerka
refs = self.sheerka.om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if not sheerka.is_known(concept):
return sheerka.ret(self.NAME, False, sheerka.err(ConceptNotFound(concept)))
om = sheerka.om
refs = om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if refs is not NotFound:
refs_instances = [sheerka.new_from_template(c, c.key) for c in [self.get_by_id(ref) for ref in refs]]
return sheerka.ret(self.NAME, False, sheerka.err(ConceptIsReferenced(refs_instances)))
try:
sheerka.om.remove_concept(concept)
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
except ConceptNotFound as ex:
return sheerka.ret(self.NAME, False, sheerka.err(ex))
concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile new regex
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# everything seems fine. I can commit the modification and remove
om.remove_concept(concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
def set_attr(self, concept, attribute, value):
"""
@@ -497,7 +561,7 @@ class SheerkaConceptManager(BaseService):
if c.id == concept_id:
return c
metadata = [(index_name, key), ("id", concept_id)] if concept_id else (index_name, key)
metadata = {index_name: key, "id": concept_id} if concept_id else {index_name: key}
return self.sheerka.get_unknown(metadata)
def update_references(self, context, concept, modified_concept=None, modifications=None):
@@ -663,12 +727,39 @@ class SheerkaConceptManager(BaseService):
concept.get_metadata().key = None
if self._definition_has_changed(to_add) and concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
concept.set_bnf(None)
ensure_bnf(context, concept)
ensure_bnf(context, concept, update_bnf_for_cached_concept=False)
concept.init_key()
return
def _remove_concept_first_token_and_first_regex(self, concept):
keywords_or_regex = self.get_first_items(self.sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = self.sheerka.om.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
concepts_by_regex = self.sheerka.om.copy(self.CONCEPTS_BY_REGEX_ENTRY)
for item in keywords_or_regex:
try:
if isinstance(item, RegExDef):
serialized = item.serialize()
copy = concepts_by_regex[serialized].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_regex[serialized]
else:
concepts_by_regex[serialized] = copy
else:
copy = concepts_by_first_keyword[item].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_first_keyword[item]
else:
concepts_by_first_keyword[item] = copy
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# return concepts_by_first_keyword, concepts_by_regex
return concepts_by_first_keyword, {RegExDef().deserialize(k): v for k, v in concepts_by_regex.items()}
@staticmethod
def get_first_tokens(sheerka, concept):
"""
@@ -677,6 +768,30 @@ class SheerkaConceptManager(BaseService):
:param concept:
:return:
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.get_bnf())
return [t for t in bnf_visitor.first_tokens if t is not NoFirstToken]
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def get_first_items(sheerka, concept) -> List[Union[str, RegExDef]]:
"""
Get all the first item needed by the concept
An item can either be a token, or regular expression
:param sheerka:
:param concept:
:return: List of string (if it's token or RegExDef if it's the definition of a regex)
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
@@ -692,6 +807,55 @@ class SheerkaConceptManager(BaseService):
return None
@staticmethod
def compute_concepts_by_first_item(context,
concepts,
use_sheerka=False,
previous_first_keywords=None,
previous_first_regex=None):
"""
Create two map,
one for describing the first token expected by a concept
one for the first regular expression
eg the dictionaries that go into CONCEPTS_BY_FIRST_KEYWORD_ENTRY and CONCEPTS_BY_REGEX_ENTRY
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, updates sheerka
:param previous_first_keywords:
:param previous_first_regex:
:return: Returns two dictionaries : on for ALL first item entries, another one for all first regex entries
"""
sheerka = context.sheerka
if use_sheerka:
previous_first_keywords = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
previous_first_regex = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY)
previous_first_regex = {RegExDef().deserialize(k): v for k, v in previous_first_regex.items()}
else:
previous_first_keywords = previous_first_keywords or {}
previous_first_regex = previous_first_regex or {}
for concept in concepts:
items = SheerkaConceptManager.get_first_items(sheerka, concept)
if items is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
for item in items:
if isinstance(item, RegExDef):
previous_first_regex.setdefault(item, []).append(concept.id)
else:
previous_first_keywords.setdefault(item, []).append(concept.id)
# 'uniquify' the lists
for k, v in previous_first_keywords.items():
previous_first_keywords[k] = core.utils.make_unique(v)
for k, v in previous_first_regex.items():
previous_first_regex[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, (previous_first_keywords, previous_first_regex))
@staticmethod
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
"""
@@ -812,6 +976,19 @@ class SheerkaConceptManager(BaseService):
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def compile_concepts_by_first_regex(context, concepts_by_first_regex):
res = []
try:
for k, v in concepts_by_first_regex.items():
flags = RegExDef.compile_flags(k.ignore_case, k.multiline, k.explicit_flags)
res.append((re.compile(k.to_match, flags), v))
except Exception as ex:
return context.sheerka.ret("BaseNodeParser", False, ex)
return context.sheerka.ret("BaseNodeParser", True, res)
def get_concepts_by_first_token(self, token, to_keep, custom=None, to_map=None, strip_quotes=False, parser=None):
"""
Tries to find if there are concepts that match the value of the token
@@ -853,5 +1030,19 @@ class SheerkaConceptManager(BaseService):
return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
def get_concepts_by_first_regex(self, expr, pos):
"""
Go thru all the declared regular expressions and try to see if there is a match
:param expr:
:param pos:
:return:
"""
result = []
for compiled_regex, concept_ids in self.compiled_concepts_by_regex:
if compiled_regex.match(expr, pos):
result.extend([self.sheerka.get_by_id(concept_id) for concept_id in concept_ids])
return result
def get_concepts_bnf_definitions(self):
return self.sheerka.om.current_cache_manager().caches[self.CONCEPTS_BNF_DEFINITIONS_ENTRY].cache
@@ -239,14 +239,17 @@ class ConsoleDebugLogger(BaseDebugLogger):
:param kwargs:
:return:
"""
raw = kwargs.pop('raw', None)
if not self.debug_manager.compute_debug_concept(self.context,
self.service_name,
self.method_name,
concept.id,
self.debug_id):
return
raw = kwargs.pop('raw', None)
color = kwargs.pop('color', None)
str_vars = raw if raw else pp.pformat(kwargs) if kwargs else ""
if color:
str_vars = CCM[color] + str_vars + CCM['reset']
text = " - " + text if text is not None else ""
colon = ": " if str_vars else ""
str_text = f"{CCM['cyan']}..concept#{concept.id}{text}{colon} {CCM['reset']}"
@@ -5,6 +5,7 @@ from core.builtin_helpers import expect_one, only_successful, evaluate, ensure_c
from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved, AllConceptParts, \
concept_part_value
from core.global_symbols import NotInit
from core.rule import Rule
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput
from core.sheerka.services.sheerka_service import BaseService
@@ -421,6 +422,9 @@ class SheerkaEvaluateConcept(BaseService):
else:
return evaluated
elif isinstance(to_resolve, Rule):
raise NotImplementedError() # how to resolve rules ?
# otherwise, execute all return values to find out what is the value
else:
# update short term memory with current concept variables
+31 -18
View File
@@ -22,7 +22,7 @@ class ParserInput:
Helper class that tokenizes the input once for all
"""
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True):
def __init__(self, text, tokens=None, length=None, start=None, end=None, yield_oef=True):
self.text = text
self.tokens = tokens or None
if self.tokens:
@@ -38,13 +38,13 @@ class ParserInput:
last_token.line,
last_token.column + 1)]
self.length = None # to be computed in reset()
self.length = length # to be computed (again) in reset()
self.yield_oef = yield_oef
self.start = start or 0
if end:
self.original_end = end + 1
self.end = self.original_end
self.original_end = end # forced index of the last token
self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
else:
self.original_end = self.end = None
@@ -61,30 +61,43 @@ class ParserInput:
return f"ParserInput({from_tokens}'{self.text}')"
def reset(self, yield_oef=None):
def _get_end_from_yield_eof(_length, _yield_oef):
return _length - 1 if _yield_oef else _length - 2
if yield_oef is None:
yield_oef = self.yield_oef
# make sure tokens is correctly initialized
if self.tokens is None:
# the eof if forced, but will not be yield if not set to.
self.tokens = list(Tokenizer(self.text, yield_eof=True))
self.length = len(self.tokens)
if self.original_end is None:
self.end = len(self.tokens) if yield_oef else len(self.tokens) - 1
self.end = _get_end_from_yield_eof(self.length, yield_oef)
else:
self.end = self.original_end if self.original_end <= len(self.tokens) else self.tokens
self.end = self.original_end if self.original_end < self.length else \
_get_end_from_yield_eof(self.length, yield_oef)
self.pos = self.start - 1
self.token = None
return self
def as_text(self, custom_switcher=None, tracker=None):
if not self.tokens or self.end is None:
# as_text is requested before reset().
# It means that we want the original text
return self.text
if custom_switcher is None:
if self.sub_text:
return self.sub_text
if self.start == 0 and self.end == self.length:
if self.start == 0 and self.end == self.length - 1:
self.sub_text = self.text
return self.sub_text
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end])
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end + 1])
return self.sub_text
else:
return core.utils.get_text_from_tokens(self.as_tokens(), custom_switcher, tracker)
@@ -92,16 +105,16 @@ class ParserInput:
def as_tokens(self):
if self.sub_tokens:
return self.sub_tokens
if self.start == 0 and self.end == self.length:
if self.start == 0 and self.end == self.length - 1:
self.sub_tokens = self.tokens
return self.sub_tokens
self.sub_tokens = self.tokens[self.start:self.end]
self.sub_tokens = self.tokens[self.start:self.end + 1]
return self.sub_tokens
def next_token(self, skip_whitespace=True):
self.pos += 1
if self.pos >= self.end:
if self.pos > self.end:
return False
self.token = self.tokens[self.pos]
@@ -111,11 +124,11 @@ class ParserInput:
if skip_whitespace:
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
self.pos += 1
if self.pos == self.end:
if self.pos > self.end:
return False
self.token = self.tokens[self.pos]
return self.pos < self.end
return self.pos <= self.end
def the_token_after(self, skip_whitespace=True):
"""
@@ -123,13 +136,13 @@ class ParserInput:
Never returns None (returns TokenKind.EOF instead)
"""
my_pos = self.pos + 1
if my_pos >= self.end:
if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1)
if skip_whitespace:
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
my_pos += 1
if my_pos == self.end:
if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1)
return self.tokens[my_pos]
@@ -140,7 +153,7 @@ class ParserInput:
:param pos:
:return: True is pos is a valid position False otherwise
"""
if pos < 0 or pos >= self.end:
if pos < 0 or pos > self.end:
self.token = None
return False
@@ -355,10 +368,10 @@ class SheerkaExecute(BaseService):
if pi is NotFound: # when CacheManager.cache_only is True
pi = ParserInput(text)
self.pi_cache.put(text, pi)
return ParserInput(text, pi.tokens) # new instance, but no need to tokenize the text again
return ParserInput(text, tokens=pi.tokens, length=pi.length) # new instance, but no need to tokenize the text again
key = text or core.utils.get_text_from_tokens(tokens)
pi = ParserInput(key, tokens)
pi = ParserInput(key, tokens=tokens, length=len(tokens))
self.pi_cache.put(key, pi)
return pi
+1 -1
View File
@@ -144,7 +144,7 @@ class SheerkaMemory(BaseService):
:param concept:
:return:
"""
if self.sheerka.during_initialisation:
if self.sheerka.during_initialisation or self.sheerka.during_restore:
return
self.registration[key] = concept
+8 -3
View File
@@ -49,8 +49,8 @@ class TokenKind(Enum):
DEGREE = "degree" # °
WORD = "word"
EQUALSEQUALS = "=="
VAR_DEF = "__var__"
REGEX = "r'xxx' or r\"xxx\" or r|xxx| or r/xxx/"
VAR_DEF = "concept variable" # __var__
REGEX = "regex" # r'xxx' or r\"xxx\" or r|xxx| or r/xxx/ but not r:xxx: which means rules
@dataclass()
@@ -73,7 +73,10 @@ class Token:
if self._strip_quote:
return self._strip_quote
self._strip_quote = self.value[1:-1] if self.type == TokenKind.STRING else self.value
if self.type in (TokenKind.STRING, TokenKind.REGEX):
self._strip_quote = self.value[1:-1]
else:
self._strip_quote = self.value
return self._strip_quote
@property
@@ -120,6 +123,8 @@ class Token:
elif self.type == TokenKind.RULE:
from core.utils import str_concept
return str_concept(self.value, prefix="r:")
elif self.type == TokenKind.REGEX:
return "r" + self.value
else:
return str(self.value)
+44 -1
View File
@@ -1,3 +1,5 @@
from dataclasses import dataclass
import core.utils
from core.ast_helpers import UnreferencedVariablesVisitor
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
@@ -11,6 +13,29 @@ from parsers.DefConceptParser import DefConceptNode, NameNode
from parsers.PythonParser import get_python_node
@dataclass(eq=True, frozen=True)
class MandatoryVariable:
"""
When we are searching for variables, we are searching for potential variable
So if the variable found has no match in the concept definition, it's not a problem
for example:
def concept foo x as isinstance(x, str)
{x, str} will be detected as potential variable, but 'str' will find no match.
But there are cases where the variable found must exist, otherwise, it's an error
example:
def concept foo from bnf xxx
'xxx' is detected as a variable (assuming that there is no concept named 'xxx' and a match must be
found in the the name of the variable
To distinguish between mandatory and not mandatory variable, we use MandatoryVariable
"""
name: str
def __hash__(self):
return hash(("MandatoryVariable", self.name))
class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
"""
Gets the concepts referenced by BNF
@@ -29,6 +54,9 @@ class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
else:
self.names.add(node.concept)
def visit_VariableExpression(self, node):
self.names.add(MandatoryVariable(node.rule_name))
def visit_all(self, node):
if node.rule_name:
self.names.add(node.rule_name)
@@ -60,11 +88,13 @@ class DefConceptEvaluator(OneReturnValueEvaluator):
# validate the node
variables_found = set()
mandatory_variables = set() # these variable MUST have a match in the name (if the name is not None)
concept = Concept(str(def_concept_node.name))
concept.get_metadata().definition_type = def_concept_node.definition_type
name_to_use = self.get_name_to_use(def_concept_node)
# get variables
for prop in ("definition", "where", "pre", "post", "body", "ret"):
part_ret_val = getattr(def_concept_node, prop)
@@ -87,13 +117,26 @@ class DefConceptEvaluator(OneReturnValueEvaluator):
# try to find what can be a property
for p in self.get_variables(context, part_ret_val, name_to_use):
variables_found.add(p)
if isinstance(p, MandatoryVariable):
variables_found.add(p.name)
mandatory_variables.add(p.name)
else:
variables_found.add(p)
# add variables by order of appearance when possible
for name_part in name_to_use:
if name_part in variables_found:
concept.def_var(name_part, None)
# check that all mandatory variables are defined in the name
# KSI: 2021-02-17
# The mandatory variables come for bnf definition where it was not possible to resolve to a concept
# So rather that issuing a 'UnresolvedVariableError' I prefer UNKNOWN_CONCEPT
if (diff := mandatory_variables.difference(set(name_to_use))) != set():
unknown_concepts = [sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body={"name": c}) for c in sorted(diff)]
error = sheerka.new(BuiltinConcepts.ERROR, body=unknown_concepts)
return sheerka.ret(self.name, False, error, parents=[return_value])
# add the remaining properties
# They mainly come from BNF definition
for p in variables_found:
+64 -2
View File
@@ -441,6 +441,11 @@ class GrammarErrorNode(ParsingError):
message: str
@dataclass()
class NoMatchingTokenError(ParsingError):
pos: int
class SyaAssociativity(Enum):
Left = "left"
Right = "right"
@@ -720,6 +725,35 @@ class CNC(CN):
txt += f", {k}='{v}'"
return txt + ")"
def to_compare(self, other, to_compare_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param to_compare_delegate:
:return:
"""
if isinstance(other, CNC):
return other
if isinstance(other, ConceptNode):
if self.exclude_body:
compiled = {k: v for k, v in other.concept.get_compiled().items() if k != ConceptParts.BODY}
else:
compiled = other.concept.get_compiled()
self_compile_to_use = self.compiled or compiled
compiled = to_compare_delegate(self_compile_to_use, compiled, to_compare_delegate)
return CNC(other.concept,
other.start if self.start is not None else None,
other.end if self.end is not None else None,
other.source if self.source is not None else None,
self.exclude_body,
**compiled)
raise NotImplementedError("CNC")
class UTN(HelperWithPos):
"""
@@ -763,6 +797,24 @@ class UTN(HelperWithPos):
txt += f", end={self.end}"
return txt + ")"
def to_compare(self, other, to_compare_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param to_compare_delegate:
:return:
"""
if isinstance(other, UTN):
return other
if isinstance(other, UnrecognizedTokensNode):
return UTN(other.source,
other.start,
other.end)
raise NotImplementedError("UTN")
class RN(HelperWithPos):
"""
@@ -840,9 +892,19 @@ class BaseNodeParser(BaseParser):
:return:
"""
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
concepts_by_first_keyword = SheerkaConceptManager.compute_concepts_by_first_token(context, concepts).body
resolved = SheerkaConceptManager.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
service = context.sheerka.services[SheerkaConceptManager.NAME]
by_token, by_regex = SheerkaConceptManager.compute_concepts_by_first_item(context, concepts).body
context.sheerka.om.put(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY,
False,
{k.serialize(): v for k, v in by_regex.items()})
compiled = service.compile_concepts_by_first_regex(context, by_regex).body
service.compiled_concepts_by_regex.clear()
service.compiled_concepts_by_regex.extend(compiled)
resolved = SheerkaConceptManager.resolve_concepts_by_first_keyword(context, by_token).body
context.sheerka.om.put(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
False,
resolved)
return self
+15 -9
View File
@@ -4,7 +4,7 @@ from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, \
ConceptExpression, StrMatch
ConceptExpression, StrMatch, RegExMatch, VariableExpression
class BnfDefinitionParser(BaseParser):
@@ -231,9 +231,11 @@ class BnfDefinitionParser(BaseParser):
if token.type == TokenKind.CONCEPT:
self.next_token()
concept = self.sheerka.new((token.value[0], token.value[1]))
expr = ConceptExpression(concept)
# expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \
# else ConceptExpression(concept)
if not self.sheerka.is_known(concept):
self.add_error(concept)
return None
expr = ConceptExpression(concept, rule_name=concept.name)
return self.eat_rule_name_if_needed(expr)
if token.type in (TokenKind.IDENTIFIER, TokenKind.KEYWORD):
@@ -245,20 +247,19 @@ class BnfDefinitionParser(BaseParser):
# (for example of recursive bnf definition)
if self.context.obj and hasattr(self.context.obj, "name"):
if concept_name == str(self.context.obj.name):
return self.eat_rule_name_if_needed(ConceptExpression(concept_name))
return self.eat_rule_name_if_needed(ConceptExpression(concept_name)) # 2021-02-17 no rule name ?
concept = self.context.get_concept(concept_name)
if not self.sheerka.is_known(concept):
self.add_error(concept)
return None
expr = VariableExpression(concept_name)
return self.eat_rule_name_if_needed(expr)
elif hasattr(concept, "__iter__"):
self.add_error(
self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
body=("key", concept_name)))
return None
else:
expr = ConceptExpression(concept)
expr.rule_name = concept.name
expr = ConceptExpression(concept, rule_name=concept.name)
return self.eat_rule_name_if_needed(expr)
if token.type == TokenKind.STRING:
@@ -272,6 +273,11 @@ class BnfDefinitionParser(BaseParser):
ret = Sequence(*elements)
return self.eat_rule_name_if_needed(ret)
if token.type == TokenKind.REGEX:
self.next_token()
ret = RegExMatch(core.utils.strip_quotes(token.strip_quote))
return self.eat_rule_name_if_needed(ret)
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return self.eat_rule_name_if_needed(ret)
File diff suppressed because it is too large Load Diff
+4
View File
@@ -30,6 +30,10 @@ class PythonErrorNode(ParsingError):
@dataclass()
class ConceptDetectedError(ParsingError):
"""
When the Python parser finds an identifier, and that identifier is a concept
So it's not for the PythonParser to respond
"""
name: str
+2
View File
@@ -1142,6 +1142,8 @@ class SyaNodeParser(BaseNodeParser):
if sya_definitions:
self.test_only_sya_definitions = sya_definitions
return self
@staticmethod
def _is_eligible(concept):
"""
+3 -1
View File
@@ -1,7 +1,7 @@
import json
import core.utils
from core.global_symbols import NotInit, NotFound, Removed
from core.global_symbols import NotInit, NotFound, Removed, NoFirstToken
from sheerkapickle import tags, utils, handlers
@@ -54,6 +54,8 @@ class SheerkaUnpickler:
instance = NotFound
elif obj[tags.CUSTOM] == Removed.value:
instance = Removed
elif obj[tags.CUSTOM] == NoFirstToken.value:
instance = NoFirstToken
else:
raise KeyError(f"unknown {obj[tags.CUSTOM]}")