Fixed #30 : Add variable support in BNF concept definition

Fixed #31 : Add regex support in BNF Concept
Fixed #33 : Do not memorize object during restore
This commit is contained in:
2021-02-24 17:23:03 +01:00
parent cac2dad17f
commit 646c428edb
32 changed files with 2107 additions and 360 deletions
+11
View File
@@ -37,3 +37,14 @@ woman is a female
def concept human
man is a human
woman is a human
# days of the week
def concept monday
def concept tuesday
def concept wednesday
def concept thursday
def concept friday
def concept saturday
def concept sunday
+3 -1
View File
@@ -1,4 +1,6 @@
def concept x is a string pre is_question() as isinstance(x, str)
def concept x is a int pre is_question() as isinstance(x, int)
def concept x is a integer pre is_question() as isinstance(x, int)
def concept x starts with y pre is_question() where x is a string as x.startswith(y)
def concept x starts with y pre is_question() where x is a string as x.startswith(y)
def concept sha256 from bnf r'[a-f0-9]{64}'
def concept sha512 from bnf r'[a-f0-9]{128}'
+40 -20
View File
@@ -342,8 +342,9 @@ def evaluate(context,
def get_lexer_nodes(return_values, start, tokens):
"""
From a parser result, return the corresponding LexerNode
either ConceptNode, UnrecognizedTokensNode or SourceCodeNode
Transform all elements from return_values into lexer nodes (ConceptNode, UnrecognizedTokensNode, SourceCodeNode...)
On the contrary of the other method (get_lexer_nodes_using_positions),
all created lexer node will use the same offset (start)
:param return_values:
:param start:
:param tokens:
@@ -360,13 +361,12 @@ def get_lexer_nodes(return_values, start, tokens):
continue
end = start + len(tokens) - 1
lexer_nodes.append(
[SourceCodeNode(start,
end,
tokens,
ret_val.body.source,
python_node=ret_val.body.body,
return_value=ret_val)])
lexer_nodes.append([SourceCodeNode(start,
end,
tokens,
ret_val.body.source,
python_node=ret_val.body.body,
return_value=ret_val)])
elif ret_val.who == "parsers.ExactConcept":
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
@@ -379,6 +379,11 @@ def get_lexer_nodes(return_values, start, tokens):
for node in nodes:
node.start += start
node.end += start
if isinstance(node, ConceptNode):
for k, v in node.concept.get_compiled().items():
if hasattr(v, "start"):
v.start += start
v.end += start
# but append the whole sequence if when it's a sequence
lexer_nodes.append(nodes)
@@ -397,9 +402,15 @@ def get_lexer_nodes(return_values, start, tokens):
def get_lexer_nodes_using_positions(return_values, positions):
"""
Transform all elements from return_values into lexer nodes
use positions to remap the exact positions
Transform all elements from return_values into lexer nodes (ConceptNode, UnrecognizedTokensNode, SourceCodeNode...)
Use positions to compute the exact new positions
On the contrary of the other method (get_lexer_nodes),
one return value is mapped with one position. it's not a offset, but an absolute position
:param return_values:
:param positions: is a list of triplets (start, end, tokens)
:return:
"""
lexer_nodes = []
for ret_val, position in zip(return_values, positions):
if ret_val.who in ("parsers.Python", 'parsers.PythonWithConcepts'):
@@ -425,6 +436,11 @@ def get_lexer_nodes_using_positions(return_values, positions):
for node in nodes:
node.start = position.start
node.end = position.end
if isinstance(node, ConceptNode):
for k, v in node.concept.get_compiled().items():
if hasattr(v, "start"):
v.start += position.start
v.end += position.start
# but append the whole sequence if when it's a sequence
lexer_nodes.extend(nodes)
@@ -493,9 +509,10 @@ def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers
def update_compiled(context, concept, errors, parsers=None):
"""
recursively iterate over concept.get_compiled() to replace LexerNode into concepts or list of ReturnValueConcept
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...)
the result will be a LexerNode.
TL;DR;
Recursively iterate over concept.get_compiled() to replace LexerNode into concepts or list of ReturnValueConcept
Long version:
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...) the result will be a LexerNode.
In the specific case of a ConceptNode, the compiled variables will also be LexerNode (UnrecognizedTokensNode...)
This function iterate over the compile to transform these nodes into concept of compiled AST
:param context:
@@ -518,9 +535,12 @@ def update_compiled(context, concept, errors, parsers=None):
_validate_concept(v)
elif isinstance(v, SourceCodeWithConceptNode):
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
parser_helper = PythonWithConceptsParser()
res = parser_helper.parse_nodes(context, v.get_all_nodes())
if v.return_value:
res = v.return_value
else:
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
parser_helper = PythonWithConceptsParser()
res = parser_helper.parse_nodes(context, v.get_all_nodes())
if res.status:
c.get_compiled()[k] = [res]
else:
@@ -556,7 +576,7 @@ def update_compiled(context, concept, errors, parsers=None):
# example : Concept("a plus b").def_var("a").def_var("b")
# and the user has entered 'a plus b'
# Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
# This means that 'a' and 'b' don't have any real value
# This means that 'a' and 'b' don't have any real values
if len(concept.get_metadata().variables) > 0:
for name, value in concept.get_metadata().variables:
if _get_source(concept.get_compiled(), name) != name:
@@ -633,7 +653,7 @@ def ensure_concept_or_rule(*items):
raise TypeError(f"'{items}' must be a concept or rule")
def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
def ensure_bnf(context, concept, parser_name="BaseNodeParser", update_bnf_for_cached_concept=True):
if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF and not concept.get_bnf():
from parsers.BnfDefinitionParser import BnfDefinitionParser
regex_parser = BnfDefinitionParser()
@@ -651,7 +671,7 @@ def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
raise Exception(bnf_parsing_ret_val.value)
concept.set_bnf(bnf_parsing_ret_val.body.body)
if concept.id:
if concept.id and update_bnf_for_cached_concept:
context.sheerka.get_by_id(concept.id).set_bnf(concept.get_bnf()) # update bnf in cache
+29 -2
View File
@@ -694,6 +694,33 @@ class CC:
self.end = end
return self
def to_compare(self, other, to_compare_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param to_compare_delegate:
:return:
"""
if isinstance(other, CC):
return other
if isinstance(other, Concept):
if self.exclude_body:
compiled = {k: v for k, v in other.get_compiled().items() if k != ConceptParts.BODY}
else:
compiled = other.get_compiled()
self_compile_to_use = self.compiled or compiled
compiled = to_compare_delegate(self_compile_to_use, compiled, to_compare_delegate)
return CC(other,
self.source,
self.exclude_body,
**compiled)
raise NotImplementedError(f"CC, {other=}")
@dataclass()
class CB:
@@ -825,8 +852,8 @@ class CIO:
self.concept_id = concept.id
self.concept = concept
self.source = source
self.start = -1
self.end = -1
self.start = None
self.end = None
def set_concept(self, concept):
self.concept = concept
+6
View File
@@ -47,9 +47,15 @@ class RemovedType(CustomType):
super(RemovedType, self).__init__("**Removed**")
class NoFirstTokenType(CustomType):
def __init__(self):
super(NoFirstTokenType, self).__init__("**NoFirstToken**")
NotInit = NotInitType()
NotFound = NotFoundType()
Removed = RemovedType()
NoFirstToken = NoFirstTokenType()
class ErrorObj:
+10
View File
@@ -199,6 +199,16 @@ class ExecutionContext:
self._push = None
def add_preprocess(self, name, **kwargs):
"""
PreProcess item are used during the parsing and the evaluation of the ReturnValueConcept
Using them, you can twitch the behaviour of parser and evaluator (you can disable them for instance)
example :
context.add_preprocess(BaseEvaluator.get_name("priority15"), enabled=False)
context.add_preprocess(BaseEvaluator.get_name("all_priority15"), priority=99)
:param name:
:param kwargs:
:return:
"""
preprocess = self.sheerka.new(BuiltinConcepts.EVALUATOR_PRE_PROCESS)
preprocess.set_value("preprocess_name", name)
for k, v in kwargs.items():
+8 -1
View File
@@ -734,7 +734,7 @@ class Sheerka(Concept):
if not isinstance(obj, Concept):
return True
return obj.key not in (BuiltinConcepts.UNKNOWN_CONCEPT, BuiltinConcepts.UNKNOWN_RULE)
return obj.key not in (None, BuiltinConcepts.UNKNOWN_CONCEPT, BuiltinConcepts.UNKNOWN_RULE)
@staticmethod
def isinstance(a, b):
@@ -879,6 +879,13 @@ class Sheerka(Concept):
return concept
@staticmethod
def deepdiff(a, b):
from deepdiff import DeepDiff
ddiff = DeepDiff(a, b, ignore_order=True)
print(ddiff)
return ddiff
def to_profile():
sheerka = Sheerka()
@@ -1,5 +1,6 @@
import re
from dataclasses import dataclass
from typing import Set
from typing import Set, List, Union
import core.utils
from cache.Cache import Cache
@@ -12,9 +13,10 @@ from core.builtin_concepts_ids import BuiltinConcepts, AllBuiltinConcepts, Built
from core.builtin_helpers import ensure_concept, ensure_bnf
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata, \
VARIABLE_PREFIX
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED, NoFirstToken
from core.sheerka.services.sheerka_service import BaseService
from core.tokenizer import Tokenizer, TokenKind
from parsers.BnfNodeParser import RegExDef
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
@@ -98,6 +100,8 @@ class SheerkaConceptManager(BaseService):
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Concepts_By_First_Keyword"
RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Resolved_Concepts_By_First_Keyword"
CONCEPTS_BY_REGEX_ENTRY = "ConceptManager:Concepts_By_Regex"
CONCEPTS_BNF_DEFINITIONS_ENTRY = "ConceptManager:Concepts_BNF_Definitions"
def __init__(self, sheerka):
@@ -105,6 +109,7 @@ class SheerkaConceptManager(BaseService):
self.forbidden_meta = {"is_builtin", "key", "id", "props", "variables"}
self.allowed_meta = {attr for attr in vars(ConceptMetadata) if
not attr.startswith("_") and attr not in self.forbidden_meta}
self.compiled_concepts_by_regex = []
def initialize(self):
self.sheerka.bind_service_method(self.create_new_concept, True)
@@ -119,6 +124,7 @@ class SheerkaConceptManager(BaseService):
self.sheerka.bind_service_method(self.get_by_id, False, visible=False)
self.sheerka.bind_service_method(self.is_not_a_variable, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_token, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_regex, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_bnf_definitions, False, visible=False)
self.sheerka.bind_service_method(self.clear_bnf_definition, True, visible=False)
@@ -145,6 +151,9 @@ class SheerkaConceptManager(BaseService):
cache = DictionaryCache().auto_configure(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
self.sheerka.om.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False)
cache = DictionaryCache().auto_configure(self.CONCEPTS_BY_REGEX_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BY_REGEX_ENTRY, cache)
cache = Cache().auto_configure(self.CONCEPTS_BNF_DEFINITIONS_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BNF_DEFINITIONS_ENTRY, cache, persist=False)
@@ -158,6 +167,14 @@ class SheerkaConceptManager(BaseService):
res = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
self.sheerka.om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
# init the regular expression
self.sheerka.om.get(self.CONCEPTS_BY_REGEX_ENTRY, None)
from_db = self.sheerka.om.current_cache_manager().copy(self.CONCEPTS_BY_REGEX_ENTRY)
concepts_by_first_regex = {RegExDef().deserialize(k): v for k, v in from_db.items()}
res = self.compile_concepts_by_first_regex(context, concepts_by_first_regex)
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(res.body)
def initialize_builtin_concepts(self):
"""
Initializes the builtin concepts
@@ -201,9 +218,9 @@ class SheerkaConceptManager(BaseService):
concept.init_key()
init_bnf_ret_value = None
ontology = sheerka.om
om = sheerka.om
if ontology.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
if om.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_BY_KEY_ENTRY + "." + concept.key, concept)
return sheerka.ret(
self.NAME,
@@ -220,33 +237,44 @@ class SheerkaConceptManager(BaseService):
except Exception as ex:
return sheerka.ret(self.NAME, False, ex.args[0])
# compute new concepts_by_first_keyword
init_ret_value = self.compute_concepts_by_first_token(context, [concept], True)
# compute first token and/or first regex
init_ret_value = self.compute_concepts_by_first_item(context, [concept], True)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
by_first_keyword, by_first_regex = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
init_ret_value = self.resolve_concepts_by_first_keyword(context, by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile regex
compile_ret = self.compile_concepts_by_first_regex(context, by_first_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# if everything is fine
freeze_concept_attrs(concept)
concept.freeze_definition_hash()
ontology.add_concept(concept)
ontology.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
ontology.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.add_concept(concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in by_first_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF and concept.get_metadata().definition != concept.name:
# allow search by definition when definition relevant
ontology.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
om.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
# update references
for ref in self.compute_references(concept):
ontology.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# TODO : this line seems to be useless
# The grammar is never reset
@@ -286,7 +314,7 @@ class SheerkaConceptManager(BaseService):
# }
#
sheerka = self.sheerka
cache_manager = self.sheerka.om
om = self.sheerka.om
if not to_add and not to_remove:
return sheerka.ret(self.NAME, False, sheerka.err(NoModificationFound(concept)))
@@ -301,23 +329,19 @@ class SheerkaConceptManager(BaseService):
if res is not None:
return res
# To update concept by first keyword
# first remove the old references
keywords = self.get_first_tokens(sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = cache_manager.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
for keyword in keywords:
try:
concepts_by_first_keyword[keyword].remove(concept.id)
if len(concepts_by_first_keyword[keyword]) == 0:
del concepts_by_first_keyword[keyword]
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# To update concept by first keyword and first regex
# first remove old first token and first regex entries
concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
# and then update
init_ret_value = self.compute_concepts_by_first_token(context, [new_concept], False, concepts_by_first_keyword)
init_ret_value = self.compute_concepts_by_first_item(context,
[new_concept],
False,
concepts_by_first_keyword,
concepts_by_regex)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body
concepts_by_first_keyword, concepts_by_regex = init_ret_value.body
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context,
@@ -327,18 +351,30 @@ class SheerkaConceptManager(BaseService):
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile new regex
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# update concept that referenced the old concept and clear old references
self.update_references(context, concept, new_concept, to_add)
for ref in self.compute_references(concept):
cache_manager.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
om.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# compute new references
for ref in self.compute_references(new_concept):
cache_manager.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
cache_manager.update_concept(concept, new_concept)
cache_manager.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
# everything is ok, update the caches
om.update_concept(concept, new_concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
# everything seems to be fine. Update the list of attributes
# Caution. Must be done AFTER update_concept()
@@ -349,6 +385,7 @@ class SheerkaConceptManager(BaseService):
if modify_source:
self._update_concept(context, concept, to_add, to_remove)
# KSI 2021-02-16 publish the modification of the concept only when someone needs it
ret = sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=new_concept))
return ret
@@ -362,17 +399,44 @@ class SheerkaConceptManager(BaseService):
# TODO : resolve concept first
sheerka = context.sheerka
refs = self.sheerka.om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if not sheerka.is_known(concept):
return sheerka.ret(self.NAME, False, sheerka.err(ConceptNotFound(concept)))
om = sheerka.om
refs = om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if refs is not NotFound:
refs_instances = [sheerka.new_from_template(c, c.key) for c in [self.get_by_id(ref) for ref in refs]]
return sheerka.ret(self.NAME, False, sheerka.err(ConceptIsReferenced(refs_instances)))
try:
sheerka.om.remove_concept(concept)
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
except ConceptNotFound as ex:
return sheerka.ret(self.NAME, False, sheerka.err(ex))
concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
# computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile new regex
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# everything seems fine. I can commit the modification and remove
om.remove_concept(concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
def set_attr(self, concept, attribute, value):
"""
@@ -497,7 +561,7 @@ class SheerkaConceptManager(BaseService):
if c.id == concept_id:
return c
metadata = [(index_name, key), ("id", concept_id)] if concept_id else (index_name, key)
metadata = {index_name: key, "id": concept_id} if concept_id else {index_name: key}
return self.sheerka.get_unknown(metadata)
def update_references(self, context, concept, modified_concept=None, modifications=None):
@@ -663,12 +727,39 @@ class SheerkaConceptManager(BaseService):
concept.get_metadata().key = None
if self._definition_has_changed(to_add) and concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
concept.set_bnf(None)
ensure_bnf(context, concept)
ensure_bnf(context, concept, update_bnf_for_cached_concept=False)
concept.init_key()
return
def _remove_concept_first_token_and_first_regex(self, concept):
keywords_or_regex = self.get_first_items(self.sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = self.sheerka.om.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
concepts_by_regex = self.sheerka.om.copy(self.CONCEPTS_BY_REGEX_ENTRY)
for item in keywords_or_regex:
try:
if isinstance(item, RegExDef):
serialized = item.serialize()
copy = concepts_by_regex[serialized].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_regex[serialized]
else:
concepts_by_regex[serialized] = copy
else:
copy = concepts_by_first_keyword[item].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_first_keyword[item]
else:
concepts_by_first_keyword[item] = copy
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# return concepts_by_first_keyword, concepts_by_regex
return concepts_by_first_keyword, {RegExDef().deserialize(k): v for k, v in concepts_by_regex.items()}
@staticmethod
def get_first_tokens(sheerka, concept):
"""
@@ -677,6 +768,30 @@ class SheerkaConceptManager(BaseService):
:param concept:
:return:
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.get_bnf())
return [t for t in bnf_visitor.first_tokens if t is not NoFirstToken]
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def get_first_items(sheerka, concept) -> List[Union[str, RegExDef]]:
"""
Get all the first item needed by the concept
An item can either be a token, or regular expression
:param sheerka:
:param concept:
:return: List of string (if it's token or RegExDef if it's the definition of a regex)
"""
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
@@ -692,6 +807,55 @@ class SheerkaConceptManager(BaseService):
return None
@staticmethod
def compute_concepts_by_first_item(context,
concepts,
use_sheerka=False,
previous_first_keywords=None,
previous_first_regex=None):
"""
Create two map,
one for describing the first token expected by a concept
one for the first regular expression
eg the dictionaries that go into CONCEPTS_BY_FIRST_KEYWORD_ENTRY and CONCEPTS_BY_REGEX_ENTRY
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, updates sheerka
:param previous_first_keywords:
:param previous_first_regex:
:return: Returns two dictionaries : on for ALL first item entries, another one for all first regex entries
"""
sheerka = context.sheerka
if use_sheerka:
previous_first_keywords = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
previous_first_regex = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY)
previous_first_regex = {RegExDef().deserialize(k): v for k, v in previous_first_regex.items()}
else:
previous_first_keywords = previous_first_keywords or {}
previous_first_regex = previous_first_regex or {}
for concept in concepts:
items = SheerkaConceptManager.get_first_items(sheerka, concept)
if items is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
for item in items:
if isinstance(item, RegExDef):
previous_first_regex.setdefault(item, []).append(concept.id)
else:
previous_first_keywords.setdefault(item, []).append(concept.id)
# 'uniquify' the lists
for k, v in previous_first_keywords.items():
previous_first_keywords[k] = core.utils.make_unique(v)
for k, v in previous_first_regex.items():
previous_first_regex[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, (previous_first_keywords, previous_first_regex))
@staticmethod
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
"""
@@ -812,6 +976,19 @@ class SheerkaConceptManager(BaseService):
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def compile_concepts_by_first_regex(context, concepts_by_first_regex):
res = []
try:
for k, v in concepts_by_first_regex.items():
flags = RegExDef.compile_flags(k.ignore_case, k.multiline, k.explicit_flags)
res.append((re.compile(k.to_match, flags), v))
except Exception as ex:
return context.sheerka.ret("BaseNodeParser", False, ex)
return context.sheerka.ret("BaseNodeParser", True, res)
def get_concepts_by_first_token(self, token, to_keep, custom=None, to_map=None, strip_quotes=False, parser=None):
"""
Tries to find if there are concepts that match the value of the token
@@ -853,5 +1030,19 @@ class SheerkaConceptManager(BaseService):
return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
def get_concepts_by_first_regex(self, expr, pos):
"""
Go thru all the declared regular expressions and try to see if there is a match
:param expr:
:param pos:
:return:
"""
result = []
for compiled_regex, concept_ids in self.compiled_concepts_by_regex:
if compiled_regex.match(expr, pos):
result.extend([self.sheerka.get_by_id(concept_id) for concept_id in concept_ids])
return result
def get_concepts_bnf_definitions(self):
return self.sheerka.om.current_cache_manager().caches[self.CONCEPTS_BNF_DEFINITIONS_ENTRY].cache
@@ -239,14 +239,17 @@ class ConsoleDebugLogger(BaseDebugLogger):
:param kwargs:
:return:
"""
raw = kwargs.pop('raw', None)
if not self.debug_manager.compute_debug_concept(self.context,
self.service_name,
self.method_name,
concept.id,
self.debug_id):
return
raw = kwargs.pop('raw', None)
color = kwargs.pop('color', None)
str_vars = raw if raw else pp.pformat(kwargs) if kwargs else ""
if color:
str_vars = CCM[color] + str_vars + CCM['reset']
text = " - " + text if text is not None else ""
colon = ": " if str_vars else ""
str_text = f"{CCM['cyan']}..concept#{concept.id}{text}{colon} {CCM['reset']}"
@@ -5,6 +5,7 @@ from core.builtin_helpers import expect_one, only_successful, evaluate, ensure_c
from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved, AllConceptParts, \
concept_part_value
from core.global_symbols import NotInit
from core.rule import Rule
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput
from core.sheerka.services.sheerka_service import BaseService
@@ -421,6 +422,9 @@ class SheerkaEvaluateConcept(BaseService):
else:
return evaluated
elif isinstance(to_resolve, Rule):
raise NotImplementedError() # how to resolve rules ?
# otherwise, execute all return values to find out what is the value
else:
# update short term memory with current concept variables
+31 -18
View File
@@ -22,7 +22,7 @@ class ParserInput:
Helper class that tokenizes the input once for all
"""
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True):
def __init__(self, text, tokens=None, length=None, start=None, end=None, yield_oef=True):
self.text = text
self.tokens = tokens or None
if self.tokens:
@@ -38,13 +38,13 @@ class ParserInput:
last_token.line,
last_token.column + 1)]
self.length = None # to be computed in reset()
self.length = length # to be computed (again) in reset()
self.yield_oef = yield_oef
self.start = start or 0
if end:
self.original_end = end + 1
self.end = self.original_end
self.original_end = end # forced index of the last token
self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
else:
self.original_end = self.end = None
@@ -61,30 +61,43 @@ class ParserInput:
return f"ParserInput({from_tokens}'{self.text}')"
def reset(self, yield_oef=None):
def _get_end_from_yield_eof(_length, _yield_oef):
return _length - 1 if _yield_oef else _length - 2
if yield_oef is None:
yield_oef = self.yield_oef
# make sure tokens is correctly initialized
if self.tokens is None:
# the eof if forced, but will not be yield if not set to.
self.tokens = list(Tokenizer(self.text, yield_eof=True))
self.length = len(self.tokens)
if self.original_end is None:
self.end = len(self.tokens) if yield_oef else len(self.tokens) - 1
self.end = _get_end_from_yield_eof(self.length, yield_oef)
else:
self.end = self.original_end if self.original_end <= len(self.tokens) else self.tokens
self.end = self.original_end if self.original_end < self.length else \
_get_end_from_yield_eof(self.length, yield_oef)
self.pos = self.start - 1
self.token = None
return self
def as_text(self, custom_switcher=None, tracker=None):
if not self.tokens or self.end is None:
# as_text is requested before reset().
# It means that we want the original text
return self.text
if custom_switcher is None:
if self.sub_text:
return self.sub_text
if self.start == 0 and self.end == self.length:
if self.start == 0 and self.end == self.length - 1:
self.sub_text = self.text
return self.sub_text
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end])
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end + 1])
return self.sub_text
else:
return core.utils.get_text_from_tokens(self.as_tokens(), custom_switcher, tracker)
@@ -92,16 +105,16 @@ class ParserInput:
def as_tokens(self):
if self.sub_tokens:
return self.sub_tokens
if self.start == 0 and self.end == self.length:
if self.start == 0 and self.end == self.length - 1:
self.sub_tokens = self.tokens
return self.sub_tokens
self.sub_tokens = self.tokens[self.start:self.end]
self.sub_tokens = self.tokens[self.start:self.end + 1]
return self.sub_tokens
def next_token(self, skip_whitespace=True):
self.pos += 1
if self.pos >= self.end:
if self.pos > self.end:
return False
self.token = self.tokens[self.pos]
@@ -111,11 +124,11 @@ class ParserInput:
if skip_whitespace:
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
self.pos += 1
if self.pos == self.end:
if self.pos > self.end:
return False
self.token = self.tokens[self.pos]
return self.pos < self.end
return self.pos <= self.end
def the_token_after(self, skip_whitespace=True):
"""
@@ -123,13 +136,13 @@ class ParserInput:
Never returns None (returns TokenKind.EOF instead)
"""
my_pos = self.pos + 1
if my_pos >= self.end:
if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1)
if skip_whitespace:
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
my_pos += 1
if my_pos == self.end:
if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1)
return self.tokens[my_pos]
@@ -140,7 +153,7 @@ class ParserInput:
:param pos:
:return: True is pos is a valid position False otherwise
"""
if pos < 0 or pos >= self.end:
if pos < 0 or pos > self.end:
self.token = None
return False
@@ -355,10 +368,10 @@ class SheerkaExecute(BaseService):
if pi is NotFound: # when CacheManager.cache_only is True
pi = ParserInput(text)
self.pi_cache.put(text, pi)
return ParserInput(text, pi.tokens) # new instance, but no need to tokenize the text again
return ParserInput(text, tokens=pi.tokens, length=pi.length) # new instance, but no need to tokenize the text again
key = text or core.utils.get_text_from_tokens(tokens)
pi = ParserInput(key, tokens)
pi = ParserInput(key, tokens=tokens, length=len(tokens))
self.pi_cache.put(key, pi)
return pi
+1 -1
View File
@@ -144,7 +144,7 @@ class SheerkaMemory(BaseService):
:param concept:
:return:
"""
if self.sheerka.during_initialisation:
if self.sheerka.during_initialisation or self.sheerka.during_restore:
return
self.registration[key] = concept
+8 -3
View File
@@ -49,8 +49,8 @@ class TokenKind(Enum):
DEGREE = "degree" # °
WORD = "word"
EQUALSEQUALS = "=="
VAR_DEF = "__var__"
REGEX = "r'xxx' or r\"xxx\" or r|xxx| or r/xxx/"
VAR_DEF = "concept variable" # __var__
REGEX = "regex" # r'xxx' or r\"xxx\" or r|xxx| or r/xxx/ but not r:xxx: which means rules
@dataclass()
@@ -73,7 +73,10 @@ class Token:
if self._strip_quote:
return self._strip_quote
self._strip_quote = self.value[1:-1] if self.type == TokenKind.STRING else self.value
if self.type in (TokenKind.STRING, TokenKind.REGEX):
self._strip_quote = self.value[1:-1]
else:
self._strip_quote = self.value
return self._strip_quote
@property
@@ -120,6 +123,8 @@ class Token:
elif self.type == TokenKind.RULE:
from core.utils import str_concept
return str_concept(self.value, prefix="r:")
elif self.type == TokenKind.REGEX:
return "r" + self.value
else:
return str(self.value)
+44 -1
View File
@@ -1,3 +1,5 @@
from dataclasses import dataclass
import core.utils
from core.ast_helpers import UnreferencedVariablesVisitor
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
@@ -11,6 +13,29 @@ from parsers.DefConceptParser import DefConceptNode, NameNode
from parsers.PythonParser import get_python_node
@dataclass(eq=True, frozen=True)
class MandatoryVariable:
"""
When we are searching for variables, we are searching for potential variable
So if the variable found has no match in the concept definition, it's not a problem
for example:
def concept foo x as isinstance(x, str)
{x, str} will be detected as potential variable, but 'str' will find no match.
But there are cases where the variable found must exist, otherwise, it's an error
example:
def concept foo from bnf xxx
'xxx' is detected as a variable (assuming that there is no concept named 'xxx' and a match must be
found in the the name of the variable
To distinguish between mandatory and not mandatory variable, we use MandatoryVariable
"""
name: str
def __hash__(self):
return hash(("MandatoryVariable", self.name))
class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
"""
Gets the concepts referenced by BNF
@@ -29,6 +54,9 @@ class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
else:
self.names.add(node.concept)
def visit_VariableExpression(self, node):
self.names.add(MandatoryVariable(node.rule_name))
def visit_all(self, node):
if node.rule_name:
self.names.add(node.rule_name)
@@ -60,11 +88,13 @@ class DefConceptEvaluator(OneReturnValueEvaluator):
# validate the node
variables_found = set()
mandatory_variables = set() # these variable MUST have a match in the name (if the name is not None)
concept = Concept(str(def_concept_node.name))
concept.get_metadata().definition_type = def_concept_node.definition_type
name_to_use = self.get_name_to_use(def_concept_node)
# get variables
for prop in ("definition", "where", "pre", "post", "body", "ret"):
part_ret_val = getattr(def_concept_node, prop)
@@ -87,13 +117,26 @@ class DefConceptEvaluator(OneReturnValueEvaluator):
# try to find what can be a property
for p in self.get_variables(context, part_ret_val, name_to_use):
variables_found.add(p)
if isinstance(p, MandatoryVariable):
variables_found.add(p.name)
mandatory_variables.add(p.name)
else:
variables_found.add(p)
# add variables by order of appearance when possible
for name_part in name_to_use:
if name_part in variables_found:
concept.def_var(name_part, None)
# check that all mandatory variables are defined in the name
# KSI: 2021-02-17
# The mandatory variables come for bnf definition where it was not possible to resolve to a concept
# So rather that issuing a 'UnresolvedVariableError' I prefer UNKNOWN_CONCEPT
if (diff := mandatory_variables.difference(set(name_to_use))) != set():
unknown_concepts = [sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body={"name": c}) for c in sorted(diff)]
error = sheerka.new(BuiltinConcepts.ERROR, body=unknown_concepts)
return sheerka.ret(self.name, False, error, parents=[return_value])
# add the remaining properties
# They mainly come from BNF definition
for p in variables_found:
+64 -2
View File
@@ -441,6 +441,11 @@ class GrammarErrorNode(ParsingError):
message: str
@dataclass()
class NoMatchingTokenError(ParsingError):
pos: int
class SyaAssociativity(Enum):
Left = "left"
Right = "right"
@@ -720,6 +725,35 @@ class CNC(CN):
txt += f", {k}='{v}'"
return txt + ")"
def to_compare(self, other, to_compare_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param to_compare_delegate:
:return:
"""
if isinstance(other, CNC):
return other
if isinstance(other, ConceptNode):
if self.exclude_body:
compiled = {k: v for k, v in other.concept.get_compiled().items() if k != ConceptParts.BODY}
else:
compiled = other.concept.get_compiled()
self_compile_to_use = self.compiled or compiled
compiled = to_compare_delegate(self_compile_to_use, compiled, to_compare_delegate)
return CNC(other.concept,
other.start if self.start is not None else None,
other.end if self.end is not None else None,
other.source if self.source is not None else None,
self.exclude_body,
**compiled)
raise NotImplementedError("CNC")
class UTN(HelperWithPos):
"""
@@ -763,6 +797,24 @@ class UTN(HelperWithPos):
txt += f", end={self.end}"
return txt + ")"
def to_compare(self, other, to_compare_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param to_compare_delegate:
:return:
"""
if isinstance(other, UTN):
return other
if isinstance(other, UnrecognizedTokensNode):
return UTN(other.source,
other.start,
other.end)
raise NotImplementedError("UTN")
class RN(HelperWithPos):
"""
@@ -840,9 +892,19 @@ class BaseNodeParser(BaseParser):
:return:
"""
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
concepts_by_first_keyword = SheerkaConceptManager.compute_concepts_by_first_token(context, concepts).body
resolved = SheerkaConceptManager.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
service = context.sheerka.services[SheerkaConceptManager.NAME]
by_token, by_regex = SheerkaConceptManager.compute_concepts_by_first_item(context, concepts).body
context.sheerka.om.put(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY,
False,
{k.serialize(): v for k, v in by_regex.items()})
compiled = service.compile_concepts_by_first_regex(context, by_regex).body
service.compiled_concepts_by_regex.clear()
service.compiled_concepts_by_regex.extend(compiled)
resolved = SheerkaConceptManager.resolve_concepts_by_first_keyword(context, by_token).body
context.sheerka.om.put(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
False,
resolved)
return self
+15 -9
View File
@@ -4,7 +4,7 @@ from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, \
ConceptExpression, StrMatch
ConceptExpression, StrMatch, RegExMatch, VariableExpression
class BnfDefinitionParser(BaseParser):
@@ -231,9 +231,11 @@ class BnfDefinitionParser(BaseParser):
if token.type == TokenKind.CONCEPT:
self.next_token()
concept = self.sheerka.new((token.value[0], token.value[1]))
expr = ConceptExpression(concept)
# expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \
# else ConceptExpression(concept)
if not self.sheerka.is_known(concept):
self.add_error(concept)
return None
expr = ConceptExpression(concept, rule_name=concept.name)
return self.eat_rule_name_if_needed(expr)
if token.type in (TokenKind.IDENTIFIER, TokenKind.KEYWORD):
@@ -245,20 +247,19 @@ class BnfDefinitionParser(BaseParser):
# (for example of recursive bnf definition)
if self.context.obj and hasattr(self.context.obj, "name"):
if concept_name == str(self.context.obj.name):
return self.eat_rule_name_if_needed(ConceptExpression(concept_name))
return self.eat_rule_name_if_needed(ConceptExpression(concept_name)) # 2021-02-17 no rule name ?
concept = self.context.get_concept(concept_name)
if not self.sheerka.is_known(concept):
self.add_error(concept)
return None
expr = VariableExpression(concept_name)
return self.eat_rule_name_if_needed(expr)
elif hasattr(concept, "__iter__"):
self.add_error(
self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
body=("key", concept_name)))
return None
else:
expr = ConceptExpression(concept)
expr.rule_name = concept.name
expr = ConceptExpression(concept, rule_name=concept.name)
return self.eat_rule_name_if_needed(expr)
if token.type == TokenKind.STRING:
@@ -272,6 +273,11 @@ class BnfDefinitionParser(BaseParser):
ret = Sequence(*elements)
return self.eat_rule_name_if_needed(ret)
if token.type == TokenKind.REGEX:
self.next_token()
ret = RegExMatch(core.utils.strip_quotes(token.strip_quote))
return self.eat_rule_name_if_needed(ret)
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return self.eat_rule_name_if_needed(ret)
File diff suppressed because it is too large Load Diff
+4
View File
@@ -30,6 +30,10 @@ class PythonErrorNode(ParsingError):
@dataclass()
class ConceptDetectedError(ParsingError):
"""
When the Python parser finds an identifier, and that identifier is a concept
So it's not for the PythonParser to respond
"""
name: str
+2
View File
@@ -1142,6 +1142,8 @@ class SyaNodeParser(BaseNodeParser):
if sya_definitions:
self.test_only_sya_definitions = sya_definitions
return self
@staticmethod
def _is_eligible(concept):
"""
+3 -1
View File
@@ -1,7 +1,7 @@
import json
import core.utils
from core.global_symbols import NotInit, NotFound, Removed
from core.global_symbols import NotInit, NotFound, Removed, NoFirstToken
from sheerkapickle import tags, utils, handlers
@@ -54,6 +54,8 @@ class SheerkaUnpickler:
instance = NotFound
elif obj[tags.CUSTOM] == Removed.value:
instance = Removed
elif obj[tags.CUSTOM] == NoFirstToken.value:
instance = NoFirstToken
else:
raise KeyError(f"unknown {obj[tags.CUSTOM]}")
+177 -12
View File
@@ -8,7 +8,8 @@ from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF,
from core.global_symbols import NotInit, NotFound
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager, NoModificationFound, ForbiddenAttribute, \
UnknownAttribute, CannotRemoveMeta, ValueNotFound, ConceptIsReferenced, NoFirstTokenError
from parsers.BnfNodeParser import Sequence, StrMatch, ConceptExpression, OrderedChoice, Optional, ZeroOrMore, OneOrMore
from parsers.BnfNodeParser import Sequence, StrMatch, ConceptExpression, OrderedChoice, Optional, ZeroOrMore, OneOrMore, \
RegExDef, RegExMatch
from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -60,6 +61,50 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash())
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+")
def test_i_can_create_a_bnf_concept_that_starts_with_a_regex(self):
sheerka = self.get_sheerka(cache_only=False)
context = self.get_context(sheerka)
service = sheerka.services[SheerkaConceptManager.NAME]
foo = self.bnf_concept("foo", RegExMatch("[a-z]+"))
bar = self.bnf_concept("bar", RegExMatch("[0-9]+"))
res = sheerka.create_new_concept(context, foo)
assert res.status
assert sheerka.isinstance(res.value, BuiltinConcepts.NEW_CONCEPT)
# I can get by the first regex
assert sheerka.om.get(service.CONCEPTS_BY_REGEX_ENTRY, RegExDef("[a-z]+").serialize()) == [foo.id]
assert len(service.compiled_concepts_by_regex) == 1
# I can commit
sheerka.om.commit(context)
# I can load from DB
entry = sheerka.om.current_sdp().get(service.CONCEPTS_BY_REGEX_ENTRY)
assert entry == {RegExDef("[a-z]+").serialize(): [foo.id]}
# I can create another concept
res = sheerka.create_new_concept(context, bar)
assert res.status
assert sheerka.isinstance(res.value, BuiltinConcepts.NEW_CONCEPT)
# I can get by the first regex
assert sheerka.om.get(service.CONCEPTS_BY_REGEX_ENTRY, RegExDef("[0-9]+").serialize()) == [bar.id]
assert sheerka.om.get(service.CONCEPTS_BY_REGEX_ENTRY, RegExDef("[a-z]+").serialize()) == [foo.id]
assert len(service.compiled_concepts_by_regex) == 2
# I can commit
sheerka.om.commit(context)
# I can load from DB
entry = sheerka.om.current_sdp().get(service.CONCEPTS_BY_REGEX_ENTRY)
assert entry == {
RegExDef("[a-z]+").serialize(): [foo.id],
RegExDef("[0-9]+").serialize(): [bar.id]
}
def test_i_cannot_create_a_bnf_concept_that_references_a_concept_that_cannot_be_resolved(self):
sheerka, context, one_1, one_1_0 = self.init_concepts(Concept("one", body="1"), Concept("one", body="1.0"))
twenty_one = Concept("twenty one", definition="'twenty' one", definition_type=DEFINITION_TYPE_BNF)
@@ -361,17 +406,16 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
# sdp is updated
sheerka.om.commit(context)
from_sdp = sheerka.om.current_sdp().get(service.CONCEPTS_BY_ID_ENTRY, new_concept.id)
sdp = sheerka.om.current_sdp()
from_sdp = sdp.get(service.CONCEPTS_BY_ID_ENTRY, new_concept.id)
assert from_sdp.get_metadata().body == "metadata value"
assert from_sdp.get_metadata().variables == [("var_name", "default value")]
assert from_sdp.get_prop(BuiltinConcepts.ISA) == {bar}
assert sheerka.om.current_sdp().get(service.CONCEPTS_BY_NAME_ENTRY,
new_concept.name).get_metadata().body == "metadata value"
assert sheerka.om.current_sdp().get(service.CONCEPTS_BY_KEY_ENTRY,
new_concept.key).get_metadata().body == "metadata value"
assert sheerka.om.current_sdp().get(service.CONCEPTS_BY_HASH_ENTRY,
new_concept.get_definition_hash()).get_metadata().body == "metadata value"
assert sdp.get(service.CONCEPTS_BY_NAME_ENTRY, new_concept.name).get_metadata().body == "metadata value"
assert sdp.get(service.CONCEPTS_BY_KEY_ENTRY, new_concept.key).get_metadata().body == "metadata value"
assert sdp.get(service.CONCEPTS_BY_HASH_ENTRY,
new_concept.get_definition_hash()).get_metadata().body == "metadata value"
def test_caches_are_update_when_i_modify_the_name(self):
sheerka, context, foo = self.init_concepts("foo", cache_only=False)
@@ -496,6 +540,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
Concept("baz", definition="foo"),
create_new=True).unpack()
# sanity check
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"foo": ["1001"],
"bar": ["1002"],
@@ -514,6 +559,71 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
'bar': ['1002', '1001', '1003']}
def test_i_can_modify_bnf_definition_from_first_token_to_first_regex(self):
sheerka, context, foo, = self.init_test().with_concepts(
Concept("foo", definition="'hello'|'hola'"), create_new=True).unpack()
service = sheerka.services[SheerkaConceptManager.NAME]
# sanity
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"hello": ["1001"],
"hola": ["1001"]}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {}
assert len(service.compiled_concepts_by_regex) == 0
to_add = {"meta": {"definition": "r'[a-z]+'"}}
res = sheerka.modify_concept(context, foo, to_add)
assert res.status
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {
RegExDef("[a-z]+").serialize(): ["1001"]
}
assert len(service.compiled_concepts_by_regex) == 1
def test_i_can_modify_bnf_definition_from_first_regex_to_first_token(self):
sheerka, context, foo, = self.init_test().with_concepts(
Concept("foo", definition="r'[a-z]+'"), create_new=True).unpack()
service = sheerka.services[SheerkaConceptManager.NAME]
# sanity
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {
RegExDef("[a-z]+").serialize(): ["1001"]
}
assert len(service.compiled_concepts_by_regex) == 1
to_add = {"meta": {"definition": "'hello'|'hola'"}}
res = sheerka.modify_concept(context, foo, to_add)
assert res.status
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"hello": ["1001"],
"hola": ["1001"]}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {}
assert len(service.compiled_concepts_by_regex) == 0
def test_i_can_modify_when_multiple_bnf_definitions_are_already_defined(self):
sheerka, context, foo, bar, baz = self.init_test().with_concepts(
Concept("foo", definition="r'[a-z]+'"),
Concept("bar", definition="r'[0-1]+'"),
Concept("baz", definition="'one'|'twox'"), create_new=True).unpack()
service = sheerka.services[SheerkaConceptManager.NAME]
# it does not matter than baz is a bnf
to_add = {"meta": {"definition": "'one'|'two'"}}
res = sheerka.modify_concept(context, baz, to_add)
assert res.status
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"one": ["1003"],
"two": ["1003"]}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {
RegExDef("[a-z]+").serialize(): ["1001"],
RegExDef("[0-1]+").serialize(): ["1002"],
}
assert len(service.compiled_concepts_by_regex) == 2
def test_references_are_updated_after_concept_modification(self):
sheerka, context, one, twenty_one = self.init_test().with_concepts(
"onz",
@@ -602,7 +712,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert res.body.body == NoModificationFound(foo, {"name": "foo", "body": "a body"})
def test_i_cannot_remove_meta_attributes(self):
def test_i_cannot_modify_and_remove_meta_attributes(self):
sheerka, context, foo = self.init_concepts(Concept("foo"))
res = sheerka.modify_concept(context, foo, to_remove={"meta": {"any_value": "foo"}})
@@ -611,7 +721,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert res.body.body == CannotRemoveMeta({"any_value": "foo"})
def test_i_cannot_remove_props_that_does_not_exists(self):
def test_i_cannot_modify_and_remove_props_that_does_not_exists(self):
sheerka, context, foo = self.init_concepts(Concept("foo"))
res = sheerka.modify_concept(context, foo, to_remove={"props": {"any_value": "foo"}})
@@ -620,7 +730,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert res.body.body == UnknownAttribute("any_value")
def test_i_cannot_remove_props_value_that_does_not_exists(self):
def test_i_cannot_modify_and_remove_props_value_that_does_not_exists(self):
# Need to returns an error, otherwise, we will save a concept that is not modified
sheerka, context, foo = self.init_concepts(Concept("foo", props={"a": {"value"}}))
@@ -630,7 +740,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert res.body.body == ValueNotFound("a", "dummy")
def test_i_cannot_remove_variable_that_does_not_exists(self):
def test_i_cannot_modify_and_remove_variable_that_does_not_exists(self):
sheerka, context, foo = self.init_concepts(Concept("foo").def_var("a"))
res = sheerka.modify_concept(context, foo, to_remove={"variables": ["b"]})
@@ -649,6 +759,30 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.UNKNOWN_CONCEPT)
def test_i_cannot_modify_with_an_invalid_regex_expression(self):
sheerka, context, foo, = self.init_test().with_concepts(
Concept("foo", definition="'hello'|'hola'"), create_new=True).unpack()
service = sheerka.services[SheerkaConceptManager.NAME]
# sanity
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"hello": ["1001"],
"hola": ["1001"]}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {}
assert len(service.compiled_concepts_by_regex) == 0
to_add = {"meta": {"definition": "r'[a-z+'"}} # invalid regex definition
res = sheerka.modify_concept(context, foo, to_add)
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert res.body.body.msg == 'unterminated character set'
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"hello": ["1001"],
"hola": ["1001"]}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {}
assert len(service.compiled_concepts_by_regex) == 0
def test_i_can_get_and_set_attribute(self):
sheerka, context = self.init_concepts()
foo = Concept("foo")
@@ -683,6 +817,8 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.get_by_name(one.name) == one
assert sheerka.get_by_key(one.key) == one
assert sheerka.get_by_hash(one.get_definition_hash()) == one
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) != {}
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) != {}
res = sheerka.remove_concept(context, one)
@@ -694,6 +830,35 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(sheerka.get_by_key(one.key), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.isinstance(sheerka.get_by_hash(one.get_definition_hash()), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
def test_i_can_remove_a_first_regex_concept(self):
sheerka, context, one = self.init_test().with_concepts(
Concept("one", definition="r'[a-z]+'"),
create_new=True).unpack()
service = sheerka.services[SheerkaConceptManager.NAME]
# sanity check
assert sheerka.get_by_id(one.id) == one
assert sheerka.get_by_name(one.name) == one
assert sheerka.get_by_key(one.key) == one
assert sheerka.get_by_hash(one.get_definition_hash()) == one
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) != {}
assert len(service.compiled_concepts_by_regex) != 0
res = sheerka.remove_concept(context, one)
assert res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.SUCCESS)
assert sheerka.isinstance(sheerka.get_by_id(one.id), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.isinstance(sheerka.get_by_name(one.name), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.isinstance(sheerka.get_by_key(one.key), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.isinstance(sheerka.get_by_hash(one.get_definition_hash()), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {}
assert len(service.compiled_concepts_by_regex) == 0
def test_i_cannot_remove_a_concept_that_does_not_exist(self):
sheerka, context = self.init_concepts()
one = Concept("one", id="1001")
+5 -5
View File
@@ -73,7 +73,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka):
assert loaded is not None
assert sheerka.isinstance(loaded, BuiltinConcepts.UNKNOWN_CONCEPT)
assert loaded.body == ("key", "key_that_does_not_exist")
assert loaded.body == {"key": "key_that_does_not_exist"}
assert loaded.get_metadata().is_evaluated
def test_i_cannot_get_when_id_is_not_found(self):
@@ -83,7 +83,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka):
assert loaded is not None
assert sheerka.isinstance(loaded, BuiltinConcepts.UNKNOWN_CONCEPT)
assert loaded.body == ("id", "id_that_does_not_exist")
assert loaded.body == {"id": "id_that_does_not_exist"}
assert loaded.get_metadata().is_evaluated
def test_i_can_instantiate_a_builtin_concept_when_it_has_its_own_class(self):
@@ -200,7 +200,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka):
new = sheerka.new("fake_concept")
assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT)
assert new.body == ('key', 'fake_concept')
assert new.body == {'key': 'fake_concept'}
def test_i_cannot_instantiate_with_invalid_id(self):
sheerka, context, *concepts = self.init_test().with_concepts(Concept("foo", body="foo1"),
@@ -210,7 +210,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka):
new = sheerka.new(("foo", "invalid_id"))
assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT)
assert new.body == [('key', 'foo'), ('id', 'invalid_id')]
assert new.body == {'key': 'foo', 'id': 'invalid_id'}
def test_i_cannot_instantiate_with_invalid_key(self):
sheerka, context, *concepts = self.init_test().with_concepts(Concept("foo", body="foo1"),
@@ -220,7 +220,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka):
new = sheerka.new(("invalid_key", "1001"))
assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT)
assert new.body == [('key', 'invalid_key'), ('id', '1001')]
assert new.body == {'key': 'invalid_key', 'id': '1001'}
def test_concept_id_is_irrelevant_when_only_one_concept(self):
sheerka, context, *concepts = self.init_test().with_concepts(Concept("foo", body="foo1"),
+5
View File
@@ -1,4 +1,5 @@
import pytest
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
@@ -172,6 +173,7 @@ def test_i_can_parse_concept_token(text, expected):
assert tokens[0].type == TokenKind.CONCEPT
assert tokens[0].value == expected
@pytest.mark.parametrize("text, expected", [
("r:key:", ("key", None)),
("r:key|id:", ("key", "id")),
@@ -197,3 +199,6 @@ def test_i_can_parse_regex_token(text, expected):
assert tokens[0].type == TokenKind.REGEX
assert tokens[0].value == expected
assert tokens[0].str_value == "r" + expected
assert tokens[0].repr_value == "r" + expected
assert tokens[0].strip_quote == expected[1:-1]
+1 -1
View File
@@ -58,7 +58,7 @@ def pr_ret_val(value, parser="parser", source=None):
def python_ret_val(source):
python_node = PythonNode(source, ast.parse(source, f"<source>", 'eval'))
python_node = PythonNode(source.strip(), ast.parse(source.strip(), f"<source>", 'eval'))
return pr_ret_val(python_node, parser="Python", source=source)
+54 -1
View File
@@ -4,12 +4,13 @@ import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from core.sheerka.services.SheerkaConceptManager import NoFirstTokenError
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer
from evaluators.DefConceptEvaluator import DefConceptEvaluator
from parsers.BaseParser import BaseParser
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression
from parsers.BnfNodeParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression, VariableExpression
from parsers.DefConceptParser import DefConceptNode, NameNode, DefConceptParser
from parsers.PythonParser import PythonNode, PythonParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -277,3 +278,55 @@ class TestDefConceptEvaluator(TestUsingMemoryBasedSheerka):
assert evaluated.status
assert evaluated.body.body.key == "foo2 __var__0"
assert evaluated.body.body.get_metadata().variables == [("x", None)]
def test_i_can_eval_when_bnf_concept_with_regex(self):
context = self.get_context()
def_ret_val = DefConceptParser().parse(context, ParserInput("def concept hello a from bnf r'[a-z]+'=a 'hello'"))
evaluated = DefConceptEvaluator().eval(context, def_ret_val)
assert evaluated.status
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
created_concept = evaluated.body.body
assert created_concept.get_metadata().name == "hello a"
assert created_concept.get_metadata().key == "hello __var__0"
assert created_concept.get_metadata().definition == "r'[a-z]+'=a 'hello'"
assert created_concept.get_metadata().definition_type == "bnf"
def test_i_can_eval_when_bnf_concept_with_variable(self):
context = self.get_context()
def_ret_val = DefConceptParser().parse(context, ParserInput("def concept hello x from bnf 'hello' x"))
evaluated = DefConceptEvaluator().eval(context, def_ret_val)
assert evaluated.status
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
created_concept = evaluated.body.body
assert created_concept.get_metadata().name == "hello x"
assert created_concept.get_metadata().key == "hello __var__0"
assert created_concept.get_metadata().definition == "'hello' x"
assert created_concept.get_metadata().definition_type == "bnf"
assert created_concept.get_metadata().variables == [("x", None)]
assert created_concept._bnf == Sequence(StrMatch("hello"), VariableExpression("x"))
def test_i_cannot_eval_bnf_concept_with_unknown_variable(self):
context = self.get_context()
def_ret_val = DefConceptParser().parse(context, ParserInput("def concept name from bnf unknown foo"))
evaluated = DefConceptEvaluator().eval(context, def_ret_val)
assert not evaluated.status
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.ERROR)
unknown_concepts = [
context.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body={"name": "foo"}),
context.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body={"name": "unknown"}),
]
assert evaluated.body.body == unknown_concepts
def test_i_cannot_eval_bnf_concept_with_only_variable(self):
sheerka, context = self.init_test().unpack()
def_ret_val = DefConceptParser().parse(context, ParserInput("def concept foo x from bnf x"))
evaluated = DefConceptEvaluator().eval(context, def_ret_val)
assert not evaluated.status
assert sheerka.isinstance(evaluated.body, BuiltinConcepts.ERROR)
assert isinstance(evaluated.body.body, NoFirstTokenError)
+12
View File
@@ -1237,6 +1237,18 @@ as:
assert len(l) > 0
sheerka.save_execution_context = False
def test_i_can_define_a_regex_concept_and_parse_it(self):
sheerka, context = self.init_test().unpack()
res = sheerka.evaluate_user_input("def concept binary from bnf r'[01]+'")
assert len(res) == 1
assert res[0].status
res = sheerka.evaluate_user_input("01001")
assert len(res) == 1
assert res[0].status
assert sheerka.isinstance(res[0].body, "binary")
class TestSheerkaNonRegFile(TestUsingFileBasedSheerka):
def test_i_can_def_several_concepts(self):
+34 -3
View File
@@ -1,6 +1,7 @@
import ast
from dataclasses import dataclass
from core.builtin_concepts import ReturnValueConcept
from core.builtin_helpers import CreateObjectIdentifiers
from core.concept import CC, Concept, ConceptParts, DoNotResolve, CIO, CMV
from core.tokenizer import Tokenizer, TokenKind, Token
@@ -256,13 +257,17 @@ def get_node(
if sub_expr == "')'":
return ")"
if isinstance(sub_expr, ReturnValueConcept):
return sub_expr
if isinstance(sub_expr, (scnode, utnode, DoNotResolve)):
return sub_expr
if isinstance(sub_expr, CIO):
sub_expr.set_concept(concepts_map[sub_expr.concept_name])
if sub_expr.source:
node = get_node(concepts_map, expression_as_tokens, sub_expr.source, sya=sya)
source = sub_expr.source or sub_expr.concept_name
if source:
node = get_node(concepts_map, expression_as_tokens, source, sya=sya)
sub_expr.start = node.start
sub_expr.end = node.end
return sub_expr
@@ -366,7 +371,7 @@ def get_node(
return CN(concept_found, start, start + length - 1, source=sub_expr)
else:
# else an UnrecognizedTokensNode
return utnode(start, start + length - 1, sub_expr)
return UTN(sub_expr, start, start + length - 1)
def init_body(item, concept, value):
@@ -482,3 +487,29 @@ def get_rete_conditions(*conditions_as_string):
res.append(Condition(identifier, attribute, value))
return AndConditions(res)
def get_test_obj(test_obj, real_obj, to_compare_delegate=None):
"""
From a production object (Concept, ConceptNode, ....)
Create a test object (CNC, CC ...) that can be used to validate the unit tests
:param test_obj:
:param real_obj:
:param to_compare_delegate:
:return:
"""
if isinstance(test_obj, list):
if len(test_obj) != len(real_obj):
raise Exception(f"Not the same size ! {test_obj=}, {real_obj=}")
return [get_test_obj(t, r) for t, r in zip(test_obj, real_obj)]
if isinstance(test_obj, dict):
if len(test_obj) != len(real_obj):
raise Exception(f"Not the same size ! {test_obj=}, {real_obj=}")
return {k: get_test_obj(v, real_obj[k]) for k, v in test_obj.items()}
if not hasattr(test_obj, "to_compare"):
return real_obj
return test_obj.to_compare(real_obj, get_test_obj)
+445 -37
View File
@@ -1,3 +1,5 @@
import re
import pytest
import tests.parsers.parsers_utils
@@ -6,12 +8,14 @@ from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYP
from core.global_symbols import NotInit
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC, UTN, CN
from parsers.BaseNodeParser import CNC, UTN, CN, NoMatchingTokenError, SCN
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser, RegExMatch, \
BnfNodeFirstTokenVisitor, Match, RegExDef, VariableExpression
from tests.BaseTest import BaseTest
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.evaluators.EvaluatorTestsUtils import python_ret_val
cmap = {
"one": Concept("one"),
@@ -42,9 +46,14 @@ cmap = {
"three_four": Concept("three_four", definition="three | four").def_var("three").def_var("four"),
"t2": Concept("t2", definition="'twenty' three_four=unit").def_var("unit").def_var("three").def_var("four"),
# bnf with variable
"one thing": Concept("one x", definition="one x").def_var("x"),
"x shoe": Concept("x shoe", definition="x 'shoe'").def_var("x"),
# testing keywords
"def_only": Concept("def"),
"def number": Concept("def number", definition="def (one|two)=number"),
# sequence of keywords using bnf definition
# "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"),
# "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"),
@@ -68,8 +77,8 @@ def u(parsing_expression, start, end, children=None):
if isinstance(parsing_expression, str):
parsing_expression = StrMatch(parsing_expression)
if isinstance(parsing_expression, StrMatch):
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
if isinstance(parsing_expression, Match):
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match, parsing_expression.to_match)
return NonTerminalNode(parsing_expression, start, end, [], children)
@@ -105,7 +114,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka.set_isa(context, cmap["one hundred"], cmap["number"])
sheerka.set_isa(context, cmap["hundreds"], cmap["number"])
# Pay attention. 'twenties (t1 and t2) are not set as number
# Pay attention. 'twenties (t1 and t2) are not set as 'number'
thirties = cls.update_bnf(context, Concept("thirties",
definition="thirty number",
@@ -158,7 +167,10 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
for i, pair in enumerate(my_concepts_map):
my_concepts_map[pair] = updated[i]
parser = BnfNodeParser(sheerka=sheerka) if init_from_sheerka else BnfNodeParser()
if init_from_sheerka:
parser = BnfNodeParser(sheerka=sheerka)
else:
parser = BnfNodeParser().init_from_concepts(context, my_concepts_map.values())
return sheerka, context, parser
def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
@@ -198,7 +210,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert len(bnf_parsers_helpers) == len(expected_array)
for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array):
assert parser_helper.sequence == expected_sequence
to_compare = tests.parsers.parsers_utils.get_test_obj(expected_sequence, parser_helper.sequence)
# assert parser_helper.sequence == expected_sequence
assert to_compare == expected_sequence
if len(bnf_parsers_helpers) == 1:
return bnf_parsers_helpers[0].sequence
@@ -221,7 +235,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
(StrMatch("3.14"), "3.14"),
(StrMatch("+"), "+"),
])
def test_i_can_match_simple_bnf(self, expr, text):
def test_i_can_match_str_bnf(self, expr, text):
my_map = {
text: self.bnf_concept("foo", expr)
}
@@ -229,6 +243,57 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, 0)
@pytest.mark.parametrize("expr, text, end", [
(RegExMatch("bar"), "bar", 0),
(RegExMatch("[a-z]+"), "xyz", 0),
(RegExMatch("[a-z=]+"), "uvt=xyz=abc", 4),
])
def test_i_can_match_regex_bnf(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying.start == 0
assert sequence[0].underlying.end == end
assert sequence[0].underlying.parsing_expression == expr
@pytest.mark.parametrize("expr, text, end", [
(Sequence(StrMatch("foo"), RegExMatch("bar")), "foo bar", 2),
(Sequence(StrMatch("foo"), RegExMatch("[a-z]+")), "foo xyz", 2),
(Sequence(StrMatch("foo"), RegExMatch("[a-z=]+")), "foo uvt=xyz=abc", 6),
])
def test_i_can_match_sequence_str_regex(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children)
@pytest.mark.parametrize("expr, text, end", [
(Sequence(RegExMatch("bar"), StrMatch("foo")), "bar foo", 2),
(Sequence(RegExMatch("[a-z]+"), StrMatch("foo")), "xyz foo", 2),
(Sequence(RegExMatch("[a-z=]+"), StrMatch("foo")), "uvt=xyz=abc foo", 6),
])
def test_i_can_match_sequence_regex_str(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children)
def test_i_can_match_sequence_str_regex_str(self):
text = "foo uvt=xyz=abc baz"
expr = Sequence(StrMatch("foo"), RegExMatch("[a-z=]+"), StrMatch("baz"))
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, 8, sequence[0].underlying.children)
def test_i_can_match_multiple_concepts_in_one_input(self):
my_map = {
"one": self.bnf_concept("one"),
@@ -356,8 +421,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [
# ("twenty one", [CNC("foo", source="twenty one")]),
# ("twenty three", []), # three does not exist
("twenty one", [CNC("foo", source="twenty one")]),
("twenty three", []), # three does not exist
("twenty four", []), # four exists but should not be seen
])
def test_i_can_mix_sequence_and_ordered_2(self, text, expected):
@@ -388,7 +453,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("three", []),
])
def test_i_can_parse_unordered_choice(self, text, expected):
def test_i_can_match_unordered_choice(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", UnOrderedChoice(
StrMatch("one"),
@@ -402,7 +467,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("", []),
("two", []),
])
def test_i_can_parse_optional(self, text, expected):
def test_i_can_match_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", Optional(StrMatch("one")))
}
@@ -413,7 +478,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("twenty one", [CNC("foo", source="twenty one")]),
("one", [CNC("foo", source="one")]),
])
def test_i_can_parse_sequence_starting_with_optional(self, text, expected):
def test_i_can_match_sequence_starting_with_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
@@ -427,7 +492,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two three", [CNC("foo", source="one two three")]),
("one two", [CNC("foo", source="one two")]),
])
def test_i_can_parse_sequence_ending_with_optional(self, text, expected):
def test_i_can_match_sequence_ending_with_optional(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
@@ -442,7 +507,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two three", [CNC("foo", source="one two three")]),
("one three", [CNC("foo", source="one three")]),
])
def test_i_can_parse_sequence_with_optional_in_between(self, text, expected):
def test_i_can_match_sequence_with_optional_in_between(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
@@ -459,7 +524,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one", [CNC("foo", source="one")]),
("one one", [CNC("foo", source="one one")]),
])
def test_i_can_parse_zero_or_more(self, text, expected):
def test_i_can_match_zero_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one")))
}
@@ -471,7 +536,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two", [CNC("foo", source="one two")]),
("one one two", [CNC("foo", source="one one two")]),
])
def test_i_can_parse_sequence_and_zero_or_more(self, text, expected):
def test_i_can_match_sequence_and_zero_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
@@ -485,7 +550,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, expected", [
("one, one , one", [CNC("foo", source="one, one , one")]),
])
def test_i_can_parse_zero_or_more_with_separator(self, text, expected):
def test_i_can_match_zero_or_more_with_separator(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"), sep=","))
}
@@ -508,7 +573,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one", [CNC("foo", source="one")]),
("one one one", [CNC("foo", source="one one one")]),
])
def test_i_can_parse_one_or_more(self, text, expected):
def test_i_can_match_one_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))),
}
@@ -520,7 +585,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two", [CNC("foo", source="one two")]),
("one one two", [CNC("foo", source="one one two")]),
])
def test_i_can_parse_sequence_one_and_or_more(self, text, expected):
def test_i_can_match_sequence_one_and_or_more(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo",
Sequence(
@@ -534,7 +599,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, expected", [
("one, one , one", [CNC("foo", source="one, one , one")]),
])
def test_i_can_parse_one_or_more_with_separator(self, text, expected):
def test_i_can_match_one_or_more_with_separator(self, text, expected):
my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"), sep=","))
}
@@ -763,7 +828,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert concept_bar.get_compiled()["foo"][1].get_compiled() == {ConceptParts.BODY: DoNotResolve("two")}
assert concept_bar.get_compiled()["foo"][2].get_compiled() == {ConceptParts.BODY: DoNotResolve("three")}
def test_i_can_parse_concept_reference_that_is_not_in_grammar(self):
def test_i_can_match_concept_reference_that_is_not_in_grammar(self):
my_map = {
"one": Concept("one"),
"two": Concept("two"),
@@ -817,6 +882,234 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert concept_foo.get_compiled() == {'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]),
ConceptParts.BODY: DoNotResolve(value='twenty one')}
@pytest.mark.parametrize("expr, expected", [
("one 'car'", [CNC("foo", source="one 'car'", x=python_ret_val("'car'"))]), # python
("one bar", [CNC("foo", source="one bar", x=CC("bar"))]), # simple concept
("one super car", [CNC("foo", source="one super car", x=CC("super car"))]), # long concept
("one shoe", [CNC("foo", source="one shoe", x=CC("thing", source="shoe", body=DoNotResolve("shoe")))]), # bnf
])
def test_i_can_match_variable_when_ending_with_one_variable(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
"thing": Concept("thing", definition="'shoe'|'skirt'"),
"super car": Concept("super car"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_ending_with_one_variable_and_sya(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
expr = "one bar plus baz"
expected = [
[CNC("foo", source="one bar", x=CC("bar")), UTN(" plus "), CN("baz")],
[CNC("foo", source="one bar plus baz", x=CC("plus", source="bar plus baz", x="bar", y="baz"))],
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_variable_when_ending_with_one_variable_and_multiple_results(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"pretty big": Concept("pretty big", body="'pretty big'"),
"pbig": Concept("pretty big"),
}
expr = "one pretty big"
expected = [
[CNC("foo", source="one pretty big", x=CC("pretty big"))],
[CNC("foo", source="one pretty big", x=CC("pbig", source="pretty big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_variable_when_ending_with_multiple_variables_and_multiple_results(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), VariableExpression("y"))),
"pretty": Concept("pretty", body="pretty"),
"pretty2": Concept("pretty"),
"big": Concept("big", body="big"),
}
expr = "one pretty big"
expected = [
[CNC("foo", source="one pretty big", x=CC("pretty"), y=CC("big"))],
[CNC("foo", source="one pretty big", x=CC("pretty2", source="pretty"), y=CC("big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
@pytest.mark.parametrize("expr, expected", [
("'my' shoe", [CNC("foo", source="'my' shoe", x=python_ret_val("'my' "))]), # python
("one shoe", [CNC("foo", source="one shoe", x=CC("one"))]), # concept
("my little shoe", [CNC("foo", source="my little shoe", x=CC("my little"))]), # long concept
("black shoe", [CNC("foo", source="black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]),
])
def test_i_can_match_variable_when_starting_with_one_variable(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"one": Concept("one"),
"my little": Concept("my little"),
"color": Concept("color", definition="'blue'|'black'"),
"and": Concept("x and y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_starting_with_one_variable_and_sya(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"tiny": Concept("tiny"),
"beautiful": Concept("beautiful"),
"but": Concept("x but y").def_var("x").def_var("y"),
}
expr = "tiny but beautiful shoe"
expected_res = [
CNC("foo",
source="tiny but beautiful shoe",
x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful"))]
unwanted_res = [CN("tiny"), UTN(" but "), CNC("foo", source="beautiful shoe", x=CC("beautiful"))]
self.validate_get_concepts_sequences(my_map, expr, [unwanted_res, expected_res], multiple_result=True)
def test_i_can_match_variable_when_starting_with_multiple_variables(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
VariableExpression("y"),
VariableExpression("z"),
StrMatch("shoe"))),
"one": Concept("one"),
"two": Concept("two"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
text = "one 'one' one plus two shoe"
unwanted_res = [CN("one"), SCN(" 'one' "), ("one", 1), UTN(" plus "), CN("two")]
expected_res = [CNC("foo",
source="one 'one' one plus two shoe",
x=CC("one"),
y=python_ret_val(" 'one' "),
z=CC("plus", source="one plus two", x="one", y="two"))]
expected = [unwanted_res, expected_res]
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
def test_i_can_match_variable_when_starting_with_one_variable_and_longer_str(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("foo"),
StrMatch("bar"),
StrMatch("baz"))),
"one": Concept("one")
}
text = "one foo bar baz"
expected = [CNC("foo", source="one foo bar baz", x=CC("one"))]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("expr, expected", [
("one 'pretty' shoe", [CNC("foo", source="one 'pretty' shoe", x=python_ret_val("'pretty' "))]), # python
("one little shoe", [CNC("foo", source="one little shoe", x=CC("little"))]), # concept
("one very big shoe", [CNC("foo", source="one very big shoe", x=CC("very big"))]), # long concept
("one black shoe",
[CNC("foo", source="one black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]),
("one tiny but beautiful shoe",
[CNC("foo",
source="one tiny but beautiful shoe",
x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful "))]),
])
def test_i_can_match_variable_in_between(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))),
"little": Concept("little"),
"very big": Concept("very big"),
"color": Concept("color", definition="'blue'|'black'"),
"but": Concept("x but y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_multiple_results_in_between(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))),
"pretty big": Concept("pretty big", body="'pretty big'"),
"pbig": Concept("pretty big"),
}
expr = "one pretty big shoe"
expected = [
[CNC("foo", source="one pretty big shoe", x=CC("pretty big"))],
[CNC("foo", source="one pretty big shoe", x=CC("pbig", source="pretty big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_regex_and_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(RegExMatch("[a-z]+"),
VariableExpression("x"))),
"shoe": Concept("shoe")
}
text = "onyx shoe"
expected = [CNC("foo", source="onyx shoe", x=CC("shoe"))]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_match_variable_and_regex(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
RegExMatch("[a-z]+"))),
"one": Concept("one")
}
text = "one onyx"
expected = [CNC("foo", source="one onyx", x=CC("one"))]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_reuse_the_same_variable(self):
# in this test, the variable appears several times, but only once in concept.compiled
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("equals"),
VariableExpression("x"))),
"one": Concept("one"),
"two": Concept("two"),
}
sheerka, context, *updated = self.init_concepts(*my_map.values())
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
# same variable appears only once in the compiled variables
text = "one equals one"
expected = [CNC("foo", source="one equals one", x=CC("one"))]
expected_sequence = compute_expected_array(my_map, text, expected)
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
to_compare = tests.parsers.parsers_utils.get_test_obj(expected_sequence, bnf_parsers_helpers[0].sequence)
assert to_compare == expected
def test_i_cannot_match_variable_when_variables_discrepancy(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("equals"),
VariableExpression("x"))),
"one": Concept("one"),
"one_1": Concept("one", body="1"),
"two": Concept("two"),
"two_2": Concept("two", body="2"),
}
sheerka, context, *updated = self.init_concepts(*my_map.values())
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
text = "one equals two"
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
assert bnf_parsers_helpers[0].sequence == []
@pytest.mark.parametrize("bar_expr, expected", [
(ConceptExpression("foo"), {}),
(OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}),
@@ -833,7 +1126,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == expected
# get_parsing_expression() also returns CHICKEN_AND_EGG
@@ -858,7 +1150,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
@@ -884,7 +1175,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
@@ -908,8 +1198,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"foo": self.bnf_concept("foo", expr),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
sheerka, context, parser = self.init_parser(my_map)
parser.context = context
parser.sheerka = sheerka
@@ -923,7 +1212,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"number": Concept("number"),
"twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
sheerka, context, parser = self.init_parser(my_map)
parser.context = context
parser.sheerka = sheerka
sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
@@ -1025,8 +1314,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
ConceptExpression(my_map["one"], rule_name="one"))
@pytest.mark.parametrize("expr, text, expected", [
# (ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]),
# (StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]),
(ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]),
(StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]),
(StrMatch("one"), "two one", [UTN("two "), CNC("foo", source="one")]),
])
def test_i_can_recognize_unknown_concepts(self, expr, text, expected):
@@ -1053,7 +1342,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"three": self.bnf_concept("three")
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
parser.reset_parser(context, ParserInput("one three"))
sequences = parser.get_concepts_sequences(context)
@@ -1067,6 +1355,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]),
("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]),
("twenty one", True, [CNC("t1", source="twenty one", unit="one")]),
("one 'car'", True, [CNC("one thing", source="one 'car'", x=python_ret_val("'car'"), one="one")])
])
def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
@@ -1359,8 +1648,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"expr": Concept("expr", definition="term ('+' term)*"),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
sheerka, context, parser = self.init_parser(my_map)
text = "1 + 2 * 3"
@@ -1396,8 +1684,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
Sequence(ConceptExpression("term"), StrMatch("+"), ConceptExpression("expr")),
ConceptExpression("term"))),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
sheerka, context, parser = self.init_parser(my_map)
text = "1 + 2 * 3"
@@ -1437,8 +1724,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
OrderedChoice(StrMatch("bar"), ConceptExpression("foo")))),
}
sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
sheerka, context, parser = self.init_parser(my_map)
assert parser.parse(context, ParserInput("foo bar")).status
assert parser.parse(context, ParserInput("foo foo foo bar")).status
@@ -1475,6 +1761,128 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert res.status
assert res.value.value == compute_expected_array(cmap, text, [CN("thirties", source=text)])
def test_i_do_not_eat_unwanted_tokens_at_the_beginning_when_concept_with_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"one": Concept("one"),
"two": Concept("two"),
}
sheerka, context, parser = self.init_parser(my_map)
text = "two one shoe"
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.value.value == compute_expected_array(my_map, text, [
CN("two"),
CNC("foo", source="one shoe", x=CC("one"))])
def test_i_do_not_eat_unwanted_tokens_at_the_end_when_concept_with_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
}
sheerka, context, parser = self.init_parser(my_map)
text = "one bar baz"
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.value.value == compute_expected_array(my_map, text, [
CNC("foo", source="one bar", x=CC("bar")),
CN("baz")])
@pytest.mark.parametrize("parsing_expression, expected", [
(RegExMatch("a"), [RegExDef("a")]),
(OrderedChoice(StrMatch("first"), RegExMatch("a|b")), ["first", RegExDef("a|b")]),
(OrderedChoice(RegExMatch("a|b"), StrMatch("first")), [RegExDef("a|b"), "first"]),
(Sequence(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(Sequence(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
(OneOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(OneOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
(ZeroOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(ZeroOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
])
def test_i_can_get_first_item(self, parsing_expression, expected):
sheerka = self.get_sheerka()
visitor = BnfNodeFirstTokenVisitor(sheerka)
visitor.visit(parsing_expression)
assert visitor.first_tokens == expected
def test_i_cannot_parse_regex_when_no_next_matching_token_cannot_be_found(self):
sheerka, context, foo = self.init_test().with_concepts(Concept("foo", definition="r'abcd'"),
create_new=True).unpack()
parser = BnfNodeParser(sheerka=sheerka)
res = parser.parse(context, ParserInput("abcdef"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert res.body.reason == [NoMatchingTokenError(4)]
@pytest.mark.parametrize("text", [
"one",
" one",
"one ",
" one "
])
def test_i_cannot_parse_empty_variable(self, text):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
res = parser.parse(context, ParserInput("one"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
@pytest.mark.parametrize("bnf, text", [
(Sequence(VariableExpression("x"), StrMatch("foo")), "one foo"),
(Sequence(StrMatch("foo"), VariableExpression("x")), "foo one"),
(Sequence(StrMatch("foo"), VariableExpression("x"), StrMatch("bar")), "foo one bar"),
])
def test_i_cannot_parse_variable_when_unrecognized_nodes(self, bnf, text):
sheerka, context, foo = self.init_test().with_concepts(
self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe")))
).unpack()
parser = BnfNodeParser()
parser.init_from_concepts(context, [foo])
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
@pytest.mark.parametrize("to_match, ignore_case, multiline, explicit_flags", [
("xxy", None, None, re.MULTILINE),
("xxy", True, True, re.MULTILINE),
("xxy", False, False, re.MULTILINE),
])
def test_i_can_serialize_reg_ex_def(self, to_match, ignore_case, multiline, explicit_flags):
r = RegExDef(to_match, ignore_case, multiline, explicit_flags)
serialized = r.serialize()
r2 = RegExDef().deserialize(serialized)
assert r == r2
def test_i_can_resolve_parsing_expression_for_variable_concept(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = Sequence(VariableExpression("x"), StrMatch("x"))
resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set())
assert isinstance(resolved.nodes[0], VariableExpression)
assert resolved.nodes[0].nodes[0] == resolved.nodes[1]
def test_i_can_resolve_parsing_expression_when_ending_with_variable_concept(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = Sequence(StrMatch("x"), VariableExpression("x"))
resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set())
assert isinstance(resolved.nodes[1], VariableExpression)
assert resolved.nodes[0].nodes == []
# @pytest.mark.parametrize("parser_input, expected", [
# ("one", [
# (True, [CNC("bnf_one", source="one", one="one", body="one")]),
+49 -5
View File
@@ -1,4 +1,5 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.sheerka.services.SheerkaExecute import ParserInput
@@ -6,10 +7,9 @@ from core.tokenizer import Tokenizer, TokenKind, LexerError
from parsers.BaseNodeParser import cnode
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import BnfNodeParser
from parsers.BnfNodeParser import BnfNodeParser, RegExMatch, VariableExpression
from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, \
OneOrMore, ConceptExpression
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -49,6 +49,7 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("r'str'", RegExMatch("str")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
@@ -84,6 +85,19 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("r'str'=var", RegExMatch("str", rule_name="var")),
("r'foo'?=var", Optional(RegExMatch("foo"), rule_name="var")),
("(r'foo'?)=var", Optional(RegExMatch("foo"), rule_name="var")),
("r'foo'*=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
("(r'foo'*)=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
("r'foo'+=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
("(r'foo'+)=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
("r'foo'=var?", Optional(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)?", Optional(RegExMatch("foo", rule_name="var"))),
("r'foo'=var*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
("r'foo'=var+", OneOrMore(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)+", OneOrMore(RegExMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
@@ -118,6 +132,8 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
("foo=f", c("foo", "f")),
("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))),
("def 'concept'", Sequence(c("def"), StrMatch("concept"))),
("c:foo:", c("foo")),
("c:|1001:", c("foo")),
])
def test_i_can_parse_regex_with_concept(self, expression, expected):
sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def")
@@ -131,6 +147,29 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("x", VariableExpression("x")),
("x bar", Sequence(VariableExpression("x"), c("bar"))),
("bar x", Sequence(c("bar"), VariableExpression("x"))),
("x 'and' bar", Sequence(VariableExpression("x"), StrMatch("and"), c("bar"))),
("x | bar", OrderedChoice(VariableExpression("x"), c("bar"))),
("x*", ZeroOrMore(VariableExpression("x"))),
("x+", OneOrMore(VariableExpression("x"))),
("'str' = x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
("'str''='x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
("foo=x", VariableExpression("x")),
])
def test_i_can_parse_regex_with_variable(self, expression, expected):
# A variable is an identifier that cannot be resolved to a concept
sheerka, context, regex_parser, bar = self.init_parser("bar")
update_concepts_ids(sheerka, expected)
res = regex_parser.parse(self.get_context(), expression)
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", ConceptExpression("foo")),
("foo=f", ConceptExpression("foo", rule_name="f")),
@@ -208,13 +247,18 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT)
assert res.value.body == ('key', 'foo')
def test_i_cannot_parse_when_unknown_concept(self):
@pytest.mark.parametrize("text, expected", [
("c:foo:", {'key': 'foo'}),
("c:|1001:", {'id': '1001'}),
("c:foo|1001:", {'key': 'foo', 'id': '1001'}),
])
def test_i_cannot_parse_when_unknown_concept(self, text, expected):
sheerka, context, regex_parser = self.init_parser()
res = regex_parser.parse(self.get_context(), "foo")
res = regex_parser.parse(self.get_context(), text)
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ('key', 'foo')
assert res.value.body == expected
def test_concept_expression_are_correctly_created_when_isa_concept_is_detected(self):
sheerka, context, parser, one, two, number, twenties = self.init_parser(
+95 -13
View File
@@ -2,6 +2,7 @@ import ast
from dataclasses import dataclass
import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF, Concept, CV
from core.global_symbols import NotInit
@@ -9,13 +10,13 @@ from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Keywords, Tokenizer, LexerError
from parsers.BaseNodeParser import SCWC
from parsers.BaseParser import UnexpectedEofParsingError
from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch, Sequence
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch, Sequence, RegExMatch, OneOrMore, \
VariableExpression
from parsers.DefConceptParser import DefConceptParser, NameNode, SyntaxErrorNode
from parsers.DefConceptParser import UnexpectedTokenParsingError, DefConceptNode
from parsers.FunctionParser import FunctionParser
from parsers.PythonParser import PythonParser, PythonNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array
@@ -332,7 +333,7 @@ def concept add one to a as:
"def concept name from bnf ",
"def concept name from bnf as True",
])
def test_i_cannot_parse_empty_bnf_definition(self, text):
def test_i_cannot_parse_empty_bnf_definition_when_no_definition(self, text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
error = res.body
@@ -347,7 +348,8 @@ def concept add one to a as:
node = res.value.value
definition = OrderedChoice(ConceptExpression(a_concept, rule_name="a_concept"), StrMatch("a_string"))
parser_result = ParserResultConcept(BnfDefinitionParser(), "a_concept | 'a_string'", None, definition, definition)
parser_result = ParserResultConcept(BnfDefinitionParser(), "a_concept | 'a_string'", None, definition,
definition)
expected = get_def_concept(name="name", body="__definition[0]", bnf_def=parser_result)
assert res.status
@@ -356,6 +358,22 @@ def concept add one to a as:
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_parse_def_concept_from_bnf_when_using_concept_token(self):
text = "def concept name from bnf c:a_concept: 'xxx'"
sheerka, context, parser, a_concept = self.init_parser("a_concept")
res = parser.parse(context, ParserInput(text))
node = res.value.value
definition = Sequence(ConceptExpression(a_concept, rule_name="a_concept"), StrMatch("xxx"))
parser_result = ParserResultConcept(BnfDefinitionParser(), "c:a_concept: 'xxx'", None, definition, definition)
expected = get_def_concept(name="name", bnf_def=parser_result)
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_parse_def_concept_where_bnf_references_itself(self):
text = "def concept name from bnf 'a' + name?"
sheerka, context, parser, a_concept = self.init_parser("a_concept")
@@ -495,15 +513,6 @@ from give me the date !
assert res.body.body[0].message == error_msg
assert res.body.body[0].text == error_text
def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self):
text = "def concept name from bnf unknown"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ("key", "unknown")
def test_i_cannot_parse_bnf_definition_referencing_multiple_concepts_sharing_the_same_name(self):
text = "def concept twenty one from bnf 'twenty' one"
sheerka, context, parser, *concepts = self.init_parser(Concept("one", body="1"), Concept("one", body="1.0"))
@@ -557,5 +566,78 @@ from give me the date !
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_parse_bnf_concept_with_regex(self):
sheerka, context, parser, number = self.init_parser("number")
text = "def concept sha512 from bnf r'^[a-f0-9]{128}$'"
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
node = res.value.value
parsing_expression = RegExMatch("^[a-f0-9]{128}$")
parser_result = ParserResultConcept(BnfDefinitionParser(),
"r'^[a-f0-9]{128}$'",
None,
parsing_expression,
parsing_expression)
expected = get_def_concept(name="sha512", bnf_def=parser_result)
assert node == expected
def test_i_can_parse_bnf_concept_with_a_more_complicated_bnf(self):
sheerka, context, parser, number = self.init_parser("number")
text = "def concept foo from bnf number | r'[a-f0-9]+' | (number r'[a-f0-9]+')+"
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
node = res.value.value
parsing_expression = OrderedChoice(
ConceptExpression(number, rule_name="number"),
RegExMatch("[a-f0-9]+"),
OneOrMore(Sequence(ConceptExpression(number, rule_name="number"), RegExMatch("[a-f0-9]+")))
)
parser_result = ParserResultConcept(BnfDefinitionParser(),
"number | r'[a-f0-9]+' | (number r'[a-f0-9]+')+",
None,
parsing_expression,
parsing_expression)
expected = get_def_concept(name="foo", bnf_def=parser_result)
assert node == expected
def test_i_can_parse_bnf_concept_definition_with_a_variable(self):
sheerka, context, parser, number = self.init_parser("number")
text = "def concept foo from bnf number x where x"
res = parser.parse(context, ParserInput(text))
node = res.value.value
definition = Sequence(ConceptExpression(number, rule_name="number"), VariableExpression("x"))
parser_result = ParserResultConcept(BnfDefinitionParser(), "number x", None, definition, definition)
expected = get_def_concept(name="foo", bnf_def=parser_result, where="x")
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_parse_bnf_definition_referencing_unknown_concept(self):
text = "def concept name from bnf unknown"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
node = res.value.value
definition = VariableExpression("unknown")
parser_result = ParserResultConcept(BnfDefinitionParser(), "unknown", None, definition, definition)
expected = get_def_concept(name="name", bnf_def=parser_result)
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
assert node == expected
+109
View File
@@ -0,0 +1,109 @@
from core.concept import Concept, ConceptParts, CC
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC
from parsers.BnfNodeParser import BnfNodeParser
from parsers.SyaNodeParser import SyaNodeParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import get_test_obj
class TestParsersUtils(TestUsingMemoryBasedSheerka):
def test_i_can_get_test_obj_when_CNC_from_sya(self):
sheerka, context, one, two, plus = self.init_concepts(
"one",
"two",
Concept("a plus b").def_var("a").def_var("b")
)
parser = SyaNodeParser().init_from_concepts(context, [one, two, plus])
cnode = parser.parse(context, ParserInput("one plus two")).body.body[0]
# compare all attributes
cnc_res = get_test_obj(CNC(concept_key="key", start=0, end=1, source="", exclude_body=False), cnode)
assert isinstance(cnc_res, CNC)
assert cnc_res == CNC("__var__0 plus __var__1", 0, 4, "one plus two", False, **cnode.concept.get_compiled())
# I can discard start, end and source
cnc_res = get_test_obj(CNC(concept_key="key"), cnode)
assert isinstance(cnc_res, CNC)
assert cnc_res == CNC("__var__0 plus __var__1", None, None, None, False, **cnode.concept.get_compiled())
def test_i_can_get_test_obj_when_CNC_from_bnf(self):
sheerka, context, one, two, plus = self.init_concepts(
"one",
"two",
Concept("twenties", definition="'twenty' (one | two)=unit").def_var("unit").def_var("one").def_var("two")
)
parser = BnfNodeParser().init_from_concepts(context, [one, two, plus])
cnode = parser.parse(context, ParserInput("twenty one")).body.body[0]
# compare all attributes
cnc_res = get_test_obj(CNC(concept_key="key", start=0, end=1, source="", exclude_body=False), cnode)
assert isinstance(cnc_res, CNC)
assert cnc_res == CNC("twenties", 0, 2, "twenty one", False, **cnode.concept.get_compiled())
# I can exclude body
cnc_res = get_test_obj(CNC(concept_key="key", exclude_body=True), cnode)
expected_compiled = {k: v for k, v in cnode.concept.get_compiled().items()}
del expected_compiled[ConceptParts.BODY]
assert isinstance(cnc_res, CNC)
assert cnc_res == CNC("twenties", None, None, None, False, **expected_compiled)
def test_i_can_get_test_obj_when_list(self):
sheerka, context, one, two, plus = self.init_concepts(
"one",
"two",
Concept("a plus b").def_var("a").def_var("b")
)
parser = SyaNodeParser().init_from_concepts(context, [one, two, plus])
cnode = parser.parse(context, ParserInput("one plus two")).body.body[0]
res = get_test_obj([CNC("key1"), CNC("key", 0, 1, "")], [cnode, cnode])
assert len(res) == 2
assert isinstance(res[0], CNC)
assert res[0] == CNC("__var__0 plus __var__1", None, None, None, False, **cnode.concept.get_compiled())
assert isinstance(res[1], CNC)
assert res[1] == CNC("__var__0 plus __var__1", 0, 4, "one plus two", False, **cnode.concept.get_compiled())
def test_i_can_get_test_obj_when_dict(self):
sheerka, context, one, two, plus = self.init_concepts(
"one",
"two",
Concept("a plus b").def_var("a").def_var("b")
)
parser = SyaNodeParser().init_from_concepts(context, [one, two, plus])
cnode = parser.parse(context, ParserInput("one plus two")).body.body[0]
res = get_test_obj({"key1": CNC("key1"), "key2": CNC("key", 0, 1, "")}, {"key1": cnode, "key2": cnode})
assert len(res) == 2
assert isinstance(res["key1"], CNC)
assert res["key1"] == CNC("__var__0 plus __var__1", None, None, None, False, **cnode.concept.get_compiled())
assert isinstance(res["key2"], CNC)
assert res["key2"] == CNC("__var__0 plus __var__1", 0, 4, "one plus two", False, **cnode.concept.get_compiled())
def test_i_can_get_test_obj_when_CC(self):
sheerka, context, one, two, plus = self.init_concepts(
"one",
"two",
Concept("twenties", definition="'twenty' (one | two)=unit").def_var("unit").def_var("one").def_var("two")
)
parser = BnfNodeParser().init_from_concepts(context, [one, two, plus])
cc = parser.parse(context, ParserInput("twenty one")).body.body[0].concept
# compare all attributes
cc_res = get_test_obj(CC(concept="key", source="", exclude_body=False), cc)
assert isinstance(cc_res, CC)
assert cc_res == CC("twenties", "twenty one", False, **cc.get_compiled())
# I can exclude body
cnc_res = get_test_obj(CC(concept="key", exclude_body=True), cc)
expected_compiled = {k: v for k, v in cc.get_compiled().items()}
del expected_compiled[ConceptParts.BODY]
assert isinstance(cnc_res, CC)
assert cnc_res == CC("twenties", "twenty one", True, **expected_compiled)
+2 -1
View File
@@ -2,7 +2,7 @@ import logging
import pytest
from core.concept import Concept
from core.global_symbols import NotInit, NotFound, Removed
from core.global_symbols import NotInit, NotFound, Removed, NoFirstToken
from core.tokenizer import Keywords
from sheerkapickle import tags
from sheerkapickle.SheerkaPickler import SheerkaPickler
@@ -68,6 +68,7 @@ class TestSheerkaPickler(TestUsingMemoryBasedSheerka):
(NotInit, {tags.CUSTOM: NotInit.value}),
(NotFound, {tags.CUSTOM: NotFound.value}),
(Removed, {tags.CUSTOM: Removed.value}),
(NoFirstToken, {tags.CUSTOM: NoFirstToken.value}),
])
def test_i_can_flatten_and_restore_custom_types(self, obj, expected):
sheerka = self.get_sheerka()