Fixed #30 : Add variable support in BNF concept definition

Fixed #31 : Add regex support in BNF Concept
Fixed #33 : Do not memorize object during restore
This commit is contained in:
2021-02-24 17:23:03 +01:00
parent cac2dad17f
commit 646c428edb
32 changed files with 2107 additions and 360 deletions
+11
View File
@@ -37,3 +37,14 @@ woman is a female
def concept human def concept human
man is a human man is a human
woman is a human woman is a human
# days of the week
def concept monday
def concept tuesday
def concept wednesday
def concept thursday
def concept friday
def concept saturday
def concept sunday
+3 -1
View File
@@ -1,4 +1,6 @@
def concept x is a string pre is_question() as isinstance(x, str) def concept x is a string pre is_question() as isinstance(x, str)
def concept x is a int pre is_question() as isinstance(x, int) def concept x is a int pre is_question() as isinstance(x, int)
def concept x is a integer pre is_question() as isinstance(x, int) def concept x is a integer pre is_question() as isinstance(x, int)
def concept x starts with y pre is_question() where x is a string as x.startswith(y) def concept x starts with y pre is_question() where x is a string as x.startswith(y)
def concept sha256 from bnf r'[a-f0-9]{64}'
def concept sha512 from bnf r'[a-f0-9]{128}'
+40 -20
View File
@@ -342,8 +342,9 @@ def evaluate(context,
def get_lexer_nodes(return_values, start, tokens): def get_lexer_nodes(return_values, start, tokens):
""" """
From a parser result, return the corresponding LexerNode Transform all elements from return_values into lexer nodes (ConceptNode, UnrecognizedTokensNode, SourceCodeNode...)
either ConceptNode, UnrecognizedTokensNode or SourceCodeNode On the contrary of the other method (get_lexer_nodes_using_positions),
all created lexer node will use the same offset (start)
:param return_values: :param return_values:
:param start: :param start:
:param tokens: :param tokens:
@@ -360,13 +361,12 @@ def get_lexer_nodes(return_values, start, tokens):
continue continue
end = start + len(tokens) - 1 end = start + len(tokens) - 1
lexer_nodes.append( lexer_nodes.append([SourceCodeNode(start,
[SourceCodeNode(start, end,
end, tokens,
tokens, ret_val.body.source,
ret_val.body.source, python_node=ret_val.body.body,
python_node=ret_val.body.body, return_value=ret_val)])
return_value=ret_val)])
elif ret_val.who == "parsers.ExactConcept": elif ret_val.who == "parsers.ExactConcept":
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body] concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
@@ -379,6 +379,11 @@ def get_lexer_nodes(return_values, start, tokens):
for node in nodes: for node in nodes:
node.start += start node.start += start
node.end += start node.end += start
if isinstance(node, ConceptNode):
for k, v in node.concept.get_compiled().items():
if hasattr(v, "start"):
v.start += start
v.end += start
# but append the whole sequence if when it's a sequence # but append the whole sequence if when it's a sequence
lexer_nodes.append(nodes) lexer_nodes.append(nodes)
@@ -397,9 +402,15 @@ def get_lexer_nodes(return_values, start, tokens):
def get_lexer_nodes_using_positions(return_values, positions): def get_lexer_nodes_using_positions(return_values, positions):
""" """
Transform all elements from return_values into lexer nodes Transform all elements from return_values into lexer nodes (ConceptNode, UnrecognizedTokensNode, SourceCodeNode...)
use positions to remap the exact positions Use positions to compute the exact new positions
On the contrary of the other method (get_lexer_nodes),
one return value is mapped with one position. it's not a offset, but an absolute position
:param return_values:
:param positions: is a list of triplets (start, end, tokens)
:return:
""" """
lexer_nodes = [] lexer_nodes = []
for ret_val, position in zip(return_values, positions): for ret_val, position in zip(return_values, positions):
if ret_val.who in ("parsers.Python", 'parsers.PythonWithConcepts'): if ret_val.who in ("parsers.Python", 'parsers.PythonWithConcepts'):
@@ -425,6 +436,11 @@ def get_lexer_nodes_using_positions(return_values, positions):
for node in nodes: for node in nodes:
node.start = position.start node.start = position.start
node.end = position.end node.end = position.end
if isinstance(node, ConceptNode):
for k, v in node.concept.get_compiled().items():
if hasattr(v, "start"):
v.start += position.start
v.end += position.start
# but append the whole sequence if when it's a sequence # but append the whole sequence if when it's a sequence
lexer_nodes.extend(nodes) lexer_nodes.extend(nodes)
@@ -493,9 +509,10 @@ def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers
def update_compiled(context, concept, errors, parsers=None): def update_compiled(context, concept, errors, parsers=None):
""" """
recursively iterate over concept.get_compiled() to replace LexerNode into concepts or list of ReturnValueConcept TL;DR;
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...) Recursively iterate over concept.get_compiled() to replace LexerNode into concepts or list of ReturnValueConcept
the result will be a LexerNode. Long version:
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...) the result will be a LexerNode.
In the specific case of a ConceptNode, the compiled variables will also be LexerNode (UnrecognizedTokensNode...) In the specific case of a ConceptNode, the compiled variables will also be LexerNode (UnrecognizedTokensNode...)
This function iterate over the compile to transform these nodes into concept of compiled AST This function iterate over the compile to transform these nodes into concept of compiled AST
:param context: :param context:
@@ -518,9 +535,12 @@ def update_compiled(context, concept, errors, parsers=None):
_validate_concept(v) _validate_concept(v)
elif isinstance(v, SourceCodeWithConceptNode): elif isinstance(v, SourceCodeWithConceptNode):
from parsers.PythonWithConceptsParser import PythonWithConceptsParser if v.return_value:
parser_helper = PythonWithConceptsParser() res = v.return_value
res = parser_helper.parse_nodes(context, v.get_all_nodes()) else:
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
parser_helper = PythonWithConceptsParser()
res = parser_helper.parse_nodes(context, v.get_all_nodes())
if res.status: if res.status:
c.get_compiled()[k] = [res] c.get_compiled()[k] = [res]
else: else:
@@ -556,7 +576,7 @@ def update_compiled(context, concept, errors, parsers=None):
# example : Concept("a plus b").def_var("a").def_var("b") # example : Concept("a plus b").def_var("a").def_var("b")
# and the user has entered 'a plus b' # and the user has entered 'a plus b'
# Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2') # Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
# This means that 'a' and 'b' don't have any real value # This means that 'a' and 'b' don't have any real values
if len(concept.get_metadata().variables) > 0: if len(concept.get_metadata().variables) > 0:
for name, value in concept.get_metadata().variables: for name, value in concept.get_metadata().variables:
if _get_source(concept.get_compiled(), name) != name: if _get_source(concept.get_compiled(), name) != name:
@@ -633,7 +653,7 @@ def ensure_concept_or_rule(*items):
raise TypeError(f"'{items}' must be a concept or rule") raise TypeError(f"'{items}' must be a concept or rule")
def ensure_bnf(context, concept, parser_name="BaseNodeParser"): def ensure_bnf(context, concept, parser_name="BaseNodeParser", update_bnf_for_cached_concept=True):
if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF and not concept.get_bnf(): if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF and not concept.get_bnf():
from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfDefinitionParser import BnfDefinitionParser
regex_parser = BnfDefinitionParser() regex_parser = BnfDefinitionParser()
@@ -651,7 +671,7 @@ def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
raise Exception(bnf_parsing_ret_val.value) raise Exception(bnf_parsing_ret_val.value)
concept.set_bnf(bnf_parsing_ret_val.body.body) concept.set_bnf(bnf_parsing_ret_val.body.body)
if concept.id: if concept.id and update_bnf_for_cached_concept:
context.sheerka.get_by_id(concept.id).set_bnf(concept.get_bnf()) # update bnf in cache context.sheerka.get_by_id(concept.id).set_bnf(concept.get_bnf()) # update bnf in cache
+29 -2
View File
@@ -694,6 +694,33 @@ class CC:
self.end = end self.end = end
return self return self
def to_compare(self, other, to_compare_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param to_compare_delegate:
:return:
"""
if isinstance(other, CC):
return other
if isinstance(other, Concept):
if self.exclude_body:
compiled = {k: v for k, v in other.get_compiled().items() if k != ConceptParts.BODY}
else:
compiled = other.get_compiled()
self_compile_to_use = self.compiled or compiled
compiled = to_compare_delegate(self_compile_to_use, compiled, to_compare_delegate)
return CC(other,
self.source,
self.exclude_body,
**compiled)
raise NotImplementedError(f"CC, {other=}")
@dataclass() @dataclass()
class CB: class CB:
@@ -825,8 +852,8 @@ class CIO:
self.concept_id = concept.id self.concept_id = concept.id
self.concept = concept self.concept = concept
self.source = source self.source = source
self.start = -1 self.start = None
self.end = -1 self.end = None
def set_concept(self, concept): def set_concept(self, concept):
self.concept = concept self.concept = concept
+6
View File
@@ -47,9 +47,15 @@ class RemovedType(CustomType):
super(RemovedType, self).__init__("**Removed**") super(RemovedType, self).__init__("**Removed**")
class NoFirstTokenType(CustomType):
def __init__(self):
super(NoFirstTokenType, self).__init__("**NoFirstToken**")
NotInit = NotInitType() NotInit = NotInitType()
NotFound = NotFoundType() NotFound = NotFoundType()
Removed = RemovedType() Removed = RemovedType()
NoFirstToken = NoFirstTokenType()
class ErrorObj: class ErrorObj:
+10
View File
@@ -199,6 +199,16 @@ class ExecutionContext:
self._push = None self._push = None
def add_preprocess(self, name, **kwargs): def add_preprocess(self, name, **kwargs):
"""
PreProcess item are used during the parsing and the evaluation of the ReturnValueConcept
Using them, you can twitch the behaviour of parser and evaluator (you can disable them for instance)
example :
context.add_preprocess(BaseEvaluator.get_name("priority15"), enabled=False)
context.add_preprocess(BaseEvaluator.get_name("all_priority15"), priority=99)
:param name:
:param kwargs:
:return:
"""
preprocess = self.sheerka.new(BuiltinConcepts.EVALUATOR_PRE_PROCESS) preprocess = self.sheerka.new(BuiltinConcepts.EVALUATOR_PRE_PROCESS)
preprocess.set_value("preprocess_name", name) preprocess.set_value("preprocess_name", name)
for k, v in kwargs.items(): for k, v in kwargs.items():
+8 -1
View File
@@ -734,7 +734,7 @@ class Sheerka(Concept):
if not isinstance(obj, Concept): if not isinstance(obj, Concept):
return True return True
return obj.key not in (BuiltinConcepts.UNKNOWN_CONCEPT, BuiltinConcepts.UNKNOWN_RULE) return obj.key not in (None, BuiltinConcepts.UNKNOWN_CONCEPT, BuiltinConcepts.UNKNOWN_RULE)
@staticmethod @staticmethod
def isinstance(a, b): def isinstance(a, b):
@@ -879,6 +879,13 @@ class Sheerka(Concept):
return concept return concept
@staticmethod
def deepdiff(a, b):
from deepdiff import DeepDiff
ddiff = DeepDiff(a, b, ignore_order=True)
print(ddiff)
return ddiff
def to_profile(): def to_profile():
sheerka = Sheerka() sheerka = Sheerka()
@@ -1,5 +1,6 @@
import re
from dataclasses import dataclass from dataclasses import dataclass
from typing import Set from typing import Set, List, Union
import core.utils import core.utils
from cache.Cache import Cache from cache.Cache import Cache
@@ -12,9 +13,10 @@ from core.builtin_concepts_ids import BuiltinConcepts, AllBuiltinConcepts, Built
from core.builtin_helpers import ensure_concept, ensure_bnf from core.builtin_helpers import ensure_concept, ensure_bnf
from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata, \ from core.concept import Concept, DEFINITION_TYPE_DEF, DEFINITION_TYPE_BNF, freeze_concept_attrs, ConceptMetadata, \
VARIABLE_PREFIX VARIABLE_PREFIX
from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED from core.global_symbols import EVENT_CONCEPT_CREATED, NotInit, NotFound, ErrorObj, EVENT_CONCEPT_DELETED, NoFirstToken
from core.sheerka.services.sheerka_service import BaseService from core.sheerka.services.sheerka_service import BaseService
from core.tokenizer import Tokenizer, TokenKind from core.tokenizer import Tokenizer, TokenKind
from parsers.BnfNodeParser import RegExDef
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser" BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
@@ -98,6 +100,8 @@ class SheerkaConceptManager(BaseService):
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Concepts_By_First_Keyword" CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Concepts_By_First_Keyword"
RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Resolved_Concepts_By_First_Keyword" RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "ConceptManager:Resolved_Concepts_By_First_Keyword"
CONCEPTS_BY_REGEX_ENTRY = "ConceptManager:Concepts_By_Regex"
CONCEPTS_BNF_DEFINITIONS_ENTRY = "ConceptManager:Concepts_BNF_Definitions" CONCEPTS_BNF_DEFINITIONS_ENTRY = "ConceptManager:Concepts_BNF_Definitions"
def __init__(self, sheerka): def __init__(self, sheerka):
@@ -105,6 +109,7 @@ class SheerkaConceptManager(BaseService):
self.forbidden_meta = {"is_builtin", "key", "id", "props", "variables"} self.forbidden_meta = {"is_builtin", "key", "id", "props", "variables"}
self.allowed_meta = {attr for attr in vars(ConceptMetadata) if self.allowed_meta = {attr for attr in vars(ConceptMetadata) if
not attr.startswith("_") and attr not in self.forbidden_meta} not attr.startswith("_") and attr not in self.forbidden_meta}
self.compiled_concepts_by_regex = []
def initialize(self): def initialize(self):
self.sheerka.bind_service_method(self.create_new_concept, True) self.sheerka.bind_service_method(self.create_new_concept, True)
@@ -119,6 +124,7 @@ class SheerkaConceptManager(BaseService):
self.sheerka.bind_service_method(self.get_by_id, False, visible=False) self.sheerka.bind_service_method(self.get_by_id, False, visible=False)
self.sheerka.bind_service_method(self.is_not_a_variable, False, visible=False) self.sheerka.bind_service_method(self.is_not_a_variable, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_token, False, visible=False) self.sheerka.bind_service_method(self.get_concepts_by_first_token, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_by_first_regex, False, visible=False)
self.sheerka.bind_service_method(self.get_concepts_bnf_definitions, False, visible=False) self.sheerka.bind_service_method(self.get_concepts_bnf_definitions, False, visible=False)
self.sheerka.bind_service_method(self.clear_bnf_definition, True, visible=False) self.sheerka.bind_service_method(self.clear_bnf_definition, True, visible=False)
@@ -145,6 +151,9 @@ class SheerkaConceptManager(BaseService):
cache = DictionaryCache().auto_configure(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) cache = DictionaryCache().auto_configure(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
self.sheerka.om.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False) self.sheerka.om.register_cache(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, cache, persist=False)
cache = DictionaryCache().auto_configure(self.CONCEPTS_BY_REGEX_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BY_REGEX_ENTRY, cache)
cache = Cache().auto_configure(self.CONCEPTS_BNF_DEFINITIONS_ENTRY) cache = Cache().auto_configure(self.CONCEPTS_BNF_DEFINITIONS_ENTRY)
self.sheerka.om.register_cache(self.CONCEPTS_BNF_DEFINITIONS_ENTRY, cache, persist=False) self.sheerka.om.register_cache(self.CONCEPTS_BNF_DEFINITIONS_ENTRY, cache, persist=False)
@@ -158,6 +167,14 @@ class SheerkaConceptManager(BaseService):
res = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword) res = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
self.sheerka.om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body) self.sheerka.om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body)
# init the regular expression
self.sheerka.om.get(self.CONCEPTS_BY_REGEX_ENTRY, None)
from_db = self.sheerka.om.current_cache_manager().copy(self.CONCEPTS_BY_REGEX_ENTRY)
concepts_by_first_regex = {RegExDef().deserialize(k): v for k, v in from_db.items()}
res = self.compile_concepts_by_first_regex(context, concepts_by_first_regex)
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(res.body)
def initialize_builtin_concepts(self): def initialize_builtin_concepts(self):
""" """
Initializes the builtin concepts Initializes the builtin concepts
@@ -201,9 +218,9 @@ class SheerkaConceptManager(BaseService):
concept.init_key() concept.init_key()
init_bnf_ret_value = None init_bnf_ret_value = None
ontology = sheerka.om om = sheerka.om
if ontology.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()): if om.exists(self.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()):
error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_BY_KEY_ENTRY + "." + concept.key, concept) error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_BY_KEY_ENTRY + "." + concept.key, concept)
return sheerka.ret( return sheerka.ret(
self.NAME, self.NAME,
@@ -220,33 +237,44 @@ class SheerkaConceptManager(BaseService):
except Exception as ex: except Exception as ex:
return sheerka.ret(self.NAME, False, ex.args[0]) return sheerka.ret(self.NAME, False, ex.args[0])
# compute new concepts_by_first_keyword # compute first token and/or first regex
init_ret_value = self.compute_concepts_by_first_token(context, [concept], True) init_ret_value = self.compute_concepts_by_first_item(context, [concept], True)
if not init_ret_value.status: if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value)) return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body by_first_keyword, by_first_regex = init_ret_value.body
# computes resolved concepts_by_first_keyword # computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword) init_ret_value = self.resolve_concepts_by_first_keyword(context, by_first_keyword)
if not init_ret_value.status: if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value)) return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body resolved_concepts_by_first_keyword = init_ret_value.body
# compile regex
compile_ret = self.compile_concepts_by_first_regex(context, by_first_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# if everything is fine # if everything is fine
freeze_concept_attrs(concept) freeze_concept_attrs(concept)
concept.freeze_definition_hash() concept.freeze_definition_hash()
ontology.add_concept(concept) om.add_concept(concept)
ontology.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword) om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, by_first_keyword)
ontology.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword) om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in by_first_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF and concept.get_metadata().definition != concept.name: if concept.get_metadata().definition_type == DEFINITION_TYPE_DEF and concept.get_metadata().definition != concept.name:
# allow search by definition when definition relevant # allow search by definition when definition relevant
ontology.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept) om.put(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.get_metadata().definition, concept)
# update references # update references
for ref in self.compute_references(concept): for ref in self.compute_references(concept):
ontology.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id) om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# TODO : this line seems to be useless # TODO : this line seems to be useless
# The grammar is never reset # The grammar is never reset
@@ -286,7 +314,7 @@ class SheerkaConceptManager(BaseService):
# } # }
# #
sheerka = self.sheerka sheerka = self.sheerka
cache_manager = self.sheerka.om om = self.sheerka.om
if not to_add and not to_remove: if not to_add and not to_remove:
return sheerka.ret(self.NAME, False, sheerka.err(NoModificationFound(concept))) return sheerka.ret(self.NAME, False, sheerka.err(NoModificationFound(concept)))
@@ -301,23 +329,19 @@ class SheerkaConceptManager(BaseService):
if res is not None: if res is not None:
return res return res
# To update concept by first keyword # To update concept by first keyword and first regex
# first remove the old references # first remove old first token and first regex entries
keywords = self.get_first_tokens(sheerka, concept) # keyword of the old concept concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
concepts_by_first_keyword = cache_manager.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
for keyword in keywords:
try:
concepts_by_first_keyword[keyword].remove(concept.id)
if len(concepts_by_first_keyword[keyword]) == 0:
del concepts_by_first_keyword[keyword]
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# and then update # and then update
init_ret_value = self.compute_concepts_by_first_token(context, [new_concept], False, concepts_by_first_keyword) init_ret_value = self.compute_concepts_by_first_item(context,
[new_concept],
False,
concepts_by_first_keyword,
concepts_by_regex)
if not init_ret_value.status: if not init_ret_value.status:
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value)) return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
concepts_by_first_keyword = init_ret_value.body concepts_by_first_keyword, concepts_by_regex = init_ret_value.body
# computes resolved concepts_by_first_keyword # computes resolved concepts_by_first_keyword
init_ret_value = self.resolve_concepts_by_first_keyword(context, init_ret_value = self.resolve_concepts_by_first_keyword(context,
@@ -327,18 +351,30 @@ class SheerkaConceptManager(BaseService):
return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value)) return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body resolved_concepts_by_first_keyword = init_ret_value.body
# compile new regex
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# update concept that referenced the old concept and clear old references # update concept that referenced the old concept and clear old references
self.update_references(context, concept, new_concept, to_add) self.update_references(context, concept, new_concept, to_add)
for ref in self.compute_references(concept): for ref in self.compute_references(concept):
cache_manager.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id) om.delete(self.CONCEPTS_REFERENCES_ENTRY, ref, concept.id)
# compute new references # compute new references
for ref in self.compute_references(new_concept): for ref in self.compute_references(new_concept):
cache_manager.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id) om.put(self.CONCEPTS_REFERENCES_ENTRY, ref, new_concept.id)
cache_manager.update_concept(concept, new_concept) # everything is ok, update the caches
cache_manager.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword) om.update_concept(concept, new_concept)
cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword) om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
# everything seems to be fine. Update the list of attributes # everything seems to be fine. Update the list of attributes
# Caution. Must be done AFTER update_concept() # Caution. Must be done AFTER update_concept()
@@ -349,6 +385,7 @@ class SheerkaConceptManager(BaseService):
if modify_source: if modify_source:
self._update_concept(context, concept, to_add, to_remove) self._update_concept(context, concept, to_add, to_remove)
# KSI 2021-02-16 publish the modification of the concept only when someone needs it
ret = sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=new_concept)) ret = sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=new_concept))
return ret return ret
@@ -362,17 +399,44 @@ class SheerkaConceptManager(BaseService):
# TODO : resolve concept first # TODO : resolve concept first
sheerka = context.sheerka sheerka = context.sheerka
refs = self.sheerka.om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if not sheerka.is_known(concept):
return sheerka.ret(self.NAME, False, sheerka.err(ConceptNotFound(concept)))
om = sheerka.om
refs = om.get(self.CONCEPTS_REFERENCES_ENTRY, concept.id)
if refs is not NotFound: if refs is not NotFound:
refs_instances = [sheerka.new_from_template(c, c.key) for c in [self.get_by_id(ref) for ref in refs]] refs_instances = [sheerka.new_from_template(c, c.key) for c in [self.get_by_id(ref) for ref in refs]]
return sheerka.ret(self.NAME, False, sheerka.err(ConceptIsReferenced(refs_instances))) return sheerka.ret(self.NAME, False, sheerka.err(ConceptIsReferenced(refs_instances)))
try: concepts_by_first_keyword, concepts_by_regex = self._remove_concept_first_token_and_first_regex(concept)
sheerka.om.remove_concept(concept)
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept) # computes resolved concepts_by_first_keyword
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS)) init_ret_value = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword)
except ConceptNotFound as ex: if not init_ret_value.status:
return sheerka.ret(self.NAME, False, sheerka.err(ex)) return sheerka.ret(self.NAME, False, ErrorConcept(init_ret_value.value))
resolved_concepts_by_first_keyword = init_ret_value.body
# compile new regex
compile_ret = self.compile_concepts_by_first_regex(context, concepts_by_regex)
if not compile_ret.status:
return sheerka.ret(self.NAME, False, ErrorConcept(compile_ret.value))
compiled_concepts_by_first_regex = compile_ret.body
# everything seems fine. I can commit the modification and remove
om.remove_concept(concept)
om.put(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, concepts_by_first_keyword)
om.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, resolved_concepts_by_first_keyword)
om.put(self.CONCEPTS_BY_REGEX_ENTRY, False, {k.serialize(): v for k, v in concepts_by_regex.items()})
# update the compiled regex
self.compiled_concepts_by_regex.clear()
self.compiled_concepts_by_regex.extend(compiled_concepts_by_first_regex)
sheerka.publish(context, EVENT_CONCEPT_DELETED, concept)
return sheerka.ret(self.NAME, True, sheerka.new(BuiltinConcepts.SUCCESS))
def set_attr(self, concept, attribute, value): def set_attr(self, concept, attribute, value):
""" """
@@ -497,7 +561,7 @@ class SheerkaConceptManager(BaseService):
if c.id == concept_id: if c.id == concept_id:
return c return c
metadata = [(index_name, key), ("id", concept_id)] if concept_id else (index_name, key) metadata = {index_name: key, "id": concept_id} if concept_id else {index_name: key}
return self.sheerka.get_unknown(metadata) return self.sheerka.get_unknown(metadata)
def update_references(self, context, concept, modified_concept=None, modifications=None): def update_references(self, context, concept, modified_concept=None, modifications=None):
@@ -663,12 +727,39 @@ class SheerkaConceptManager(BaseService):
concept.get_metadata().key = None concept.get_metadata().key = None
if self._definition_has_changed(to_add) and concept.get_metadata().definition_type == DEFINITION_TYPE_BNF: if self._definition_has_changed(to_add) and concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
concept.set_bnf(None) concept.set_bnf(None)
ensure_bnf(context, concept) ensure_bnf(context, concept, update_bnf_for_cached_concept=False)
concept.init_key() concept.init_key()
return return
def _remove_concept_first_token_and_first_regex(self, concept):
keywords_or_regex = self.get_first_items(self.sheerka, concept) # keyword of the old concept
concepts_by_first_keyword = self.sheerka.om.copy(self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
concepts_by_regex = self.sheerka.om.copy(self.CONCEPTS_BY_REGEX_ENTRY)
for item in keywords_or_regex:
try:
if isinstance(item, RegExDef):
serialized = item.serialize()
copy = concepts_by_regex[serialized].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_regex[serialized]
else:
concepts_by_regex[serialized] = copy
else:
copy = concepts_by_first_keyword[item].copy()
copy.remove(concept.id)
if len(copy) == 0:
del concepts_by_first_keyword[item]
else:
concepts_by_first_keyword[item] = copy
except KeyError: # only occurs in unit tests when concepts are created without create_new()
pass
# return concepts_by_first_keyword, concepts_by_regex
return concepts_by_first_keyword, {RegExDef().deserialize(k): v for k, v in concepts_by_regex.items()}
@staticmethod @staticmethod
def get_first_tokens(sheerka, concept): def get_first_tokens(sheerka, concept):
""" """
@@ -677,6 +768,30 @@ class SheerkaConceptManager(BaseService):
:param concept: :param concept:
:return: :return:
""" """
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.get_bnf())
return [t for t in bnf_visitor.first_tokens if t is not NoFirstToken]
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def get_first_items(sheerka, concept) -> List[Union[str, RegExDef]]:
"""
Get all the first item needed by the concept
An item can either be a token, or regular expression
:param sheerka:
:param concept:
:return: List of string (if it's token or RegExDef if it's the definition of a regex)
"""
if concept.get_bnf(): if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka) bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
@@ -692,6 +807,55 @@ class SheerkaConceptManager(BaseService):
return None return None
@staticmethod
def compute_concepts_by_first_item(context,
concepts,
use_sheerka=False,
previous_first_keywords=None,
previous_first_regex=None):
"""
Create two map,
one for describing the first token expected by a concept
one for the first regular expression
eg the dictionaries that go into CONCEPTS_BY_FIRST_KEYWORD_ENTRY and CONCEPTS_BY_REGEX_ENTRY
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, updates sheerka
:param previous_first_keywords:
:param previous_first_regex:
:return: Returns two dictionaries : on for ALL first item entries, another one for all first regex entries
"""
sheerka = context.sheerka
if use_sheerka:
previous_first_keywords = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY)
previous_first_regex = sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY)
previous_first_regex = {RegExDef().deserialize(k): v for k, v in previous_first_regex.items()}
else:
previous_first_keywords = previous_first_keywords or {}
previous_first_regex = previous_first_regex or {}
for concept in concepts:
items = SheerkaConceptManager.get_first_items(sheerka, concept)
if items is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, NoFirstTokenError(concept, concept.key))
for item in items:
if isinstance(item, RegExDef):
previous_first_regex.setdefault(item, []).append(concept.id)
else:
previous_first_keywords.setdefault(item, []).append(concept.id)
# 'uniquify' the lists
for k, v in previous_first_keywords.items():
previous_first_keywords[k] = core.utils.make_unique(v)
for k, v in previous_first_regex.items():
previous_first_regex[k] = core.utils.make_unique(v)
return sheerka.ret("BaseNodeParser", True, (previous_first_keywords, previous_first_regex))
@staticmethod @staticmethod
def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None): def compute_concepts_by_first_token(context, concepts, use_sheerka=False, previous_entries=None):
""" """
@@ -812,6 +976,19 @@ class SheerkaConceptManager(BaseService):
return sheerka.ret("BaseNodeParser", True, res) return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def compile_concepts_by_first_regex(context, concepts_by_first_regex):
res = []
try:
for k, v in concepts_by_first_regex.items():
flags = RegExDef.compile_flags(k.ignore_case, k.multiline, k.explicit_flags)
res.append((re.compile(k.to_match, flags), v))
except Exception as ex:
return context.sheerka.ret("BaseNodeParser", False, ex)
return context.sheerka.ret("BaseNodeParser", True, res)
def get_concepts_by_first_token(self, token, to_keep, custom=None, to_map=None, strip_quotes=False, parser=None): def get_concepts_by_first_token(self, token, to_keep, custom=None, to_map=None, strip_quotes=False, parser=None):
""" """
Tries to find if there are concepts that match the value of the token Tries to find if there are concepts that match the value of the token
@@ -853,5 +1030,19 @@ class SheerkaConceptManager(BaseService):
return core.utils.make_unique(result + custom_concepts, return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id) lambda c: c.concept.id if hasattr(c, "concept") else c.id)
def get_concepts_by_first_regex(self, expr, pos):
"""
Go thru all the declared regular expressions and try to see if there is a match
:param expr:
:param pos:
:return:
"""
result = []
for compiled_regex, concept_ids in self.compiled_concepts_by_regex:
if compiled_regex.match(expr, pos):
result.extend([self.sheerka.get_by_id(concept_id) for concept_id in concept_ids])
return result
def get_concepts_bnf_definitions(self): def get_concepts_bnf_definitions(self):
return self.sheerka.om.current_cache_manager().caches[self.CONCEPTS_BNF_DEFINITIONS_ENTRY].cache return self.sheerka.om.current_cache_manager().caches[self.CONCEPTS_BNF_DEFINITIONS_ENTRY].cache
@@ -239,14 +239,17 @@ class ConsoleDebugLogger(BaseDebugLogger):
:param kwargs: :param kwargs:
:return: :return:
""" """
raw = kwargs.pop('raw', None)
if not self.debug_manager.compute_debug_concept(self.context, if not self.debug_manager.compute_debug_concept(self.context,
self.service_name, self.service_name,
self.method_name, self.method_name,
concept.id, concept.id,
self.debug_id): self.debug_id):
return return
raw = kwargs.pop('raw', None)
color = kwargs.pop('color', None)
str_vars = raw if raw else pp.pformat(kwargs) if kwargs else "" str_vars = raw if raw else pp.pformat(kwargs) if kwargs else ""
if color:
str_vars = CCM[color] + str_vars + CCM['reset']
text = " - " + text if text is not None else "" text = " - " + text if text is not None else ""
colon = ": " if str_vars else "" colon = ": " if str_vars else ""
str_text = f"{CCM['cyan']}..concept#{concept.id}{text}{colon} {CCM['reset']}" str_text = f"{CCM['cyan']}..concept#{concept.id}{text}{colon} {CCM['reset']}"
@@ -5,6 +5,7 @@ from core.builtin_helpers import expect_one, only_successful, evaluate, ensure_c
from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved, AllConceptParts, \ from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved, AllConceptParts, \
concept_part_value concept_part_value
from core.global_symbols import NotInit from core.global_symbols import NotInit
from core.rule import Rule
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from core.sheerka.services.sheerka_service import BaseService from core.sheerka.services.sheerka_service import BaseService
@@ -421,6 +422,9 @@ class SheerkaEvaluateConcept(BaseService):
else: else:
return evaluated return evaluated
elif isinstance(to_resolve, Rule):
raise NotImplementedError() # how to resolve rules ?
# otherwise, execute all return values to find out what is the value # otherwise, execute all return values to find out what is the value
else: else:
# update short term memory with current concept variables # update short term memory with current concept variables
+31 -18
View File
@@ -22,7 +22,7 @@ class ParserInput:
Helper class that tokenizes the input once for all Helper class that tokenizes the input once for all
""" """
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True): def __init__(self, text, tokens=None, length=None, start=None, end=None, yield_oef=True):
self.text = text self.text = text
self.tokens = tokens or None self.tokens = tokens or None
if self.tokens: if self.tokens:
@@ -38,13 +38,13 @@ class ParserInput:
last_token.line, last_token.line,
last_token.column + 1)] last_token.column + 1)]
self.length = None # to be computed in reset() self.length = length # to be computed (again) in reset()
self.yield_oef = yield_oef self.yield_oef = yield_oef
self.start = start or 0 self.start = start or 0
if end: if end:
self.original_end = end + 1 self.original_end = end # forced index of the last token
self.end = self.original_end self.end = self.original_end # index of the last token => len(tokens) - 1 if full tokens
else: else:
self.original_end = self.end = None self.original_end = self.end = None
@@ -61,30 +61,43 @@ class ParserInput:
return f"ParserInput({from_tokens}'{self.text}')" return f"ParserInput({from_tokens}'{self.text}')"
def reset(self, yield_oef=None): def reset(self, yield_oef=None):
def _get_end_from_yield_eof(_length, _yield_oef):
return _length - 1 if _yield_oef else _length - 2
if yield_oef is None: if yield_oef is None:
yield_oef = self.yield_oef yield_oef = self.yield_oef
# make sure tokens is correctly initialized # make sure tokens is correctly initialized
if self.tokens is None: if self.tokens is None:
# the eof if forced, but will not be yield if not set to.
self.tokens = list(Tokenizer(self.text, yield_eof=True)) self.tokens = list(Tokenizer(self.text, yield_eof=True))
self.length = len(self.tokens)
if self.original_end is None: if self.original_end is None:
self.end = len(self.tokens) if yield_oef else len(self.tokens) - 1 self.end = _get_end_from_yield_eof(self.length, yield_oef)
else: else:
self.end = self.original_end if self.original_end <= len(self.tokens) else self.tokens self.end = self.original_end if self.original_end < self.length else \
_get_end_from_yield_eof(self.length, yield_oef)
self.pos = self.start - 1 self.pos = self.start - 1
self.token = None self.token = None
return self return self
def as_text(self, custom_switcher=None, tracker=None): def as_text(self, custom_switcher=None, tracker=None):
if not self.tokens or self.end is None:
# as_text is requested before reset().
# It means that we want the original text
return self.text
if custom_switcher is None: if custom_switcher is None:
if self.sub_text: if self.sub_text:
return self.sub_text return self.sub_text
if self.start == 0 and self.end == self.length: if self.start == 0 and self.end == self.length - 1:
self.sub_text = self.text self.sub_text = self.text
return self.sub_text return self.sub_text
self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end]) self.sub_text = core.utils.get_text_from_tokens(self.tokens[self.start:self.end + 1])
return self.sub_text return self.sub_text
else: else:
return core.utils.get_text_from_tokens(self.as_tokens(), custom_switcher, tracker) return core.utils.get_text_from_tokens(self.as_tokens(), custom_switcher, tracker)
@@ -92,16 +105,16 @@ class ParserInput:
def as_tokens(self): def as_tokens(self):
if self.sub_tokens: if self.sub_tokens:
return self.sub_tokens return self.sub_tokens
if self.start == 0 and self.end == self.length: if self.start == 0 and self.end == self.length - 1:
self.sub_tokens = self.tokens self.sub_tokens = self.tokens
return self.sub_tokens return self.sub_tokens
self.sub_tokens = self.tokens[self.start:self.end] self.sub_tokens = self.tokens[self.start:self.end + 1]
return self.sub_tokens return self.sub_tokens
def next_token(self, skip_whitespace=True): def next_token(self, skip_whitespace=True):
self.pos += 1 self.pos += 1
if self.pos >= self.end: if self.pos > self.end:
return False return False
self.token = self.tokens[self.pos] self.token = self.tokens[self.pos]
@@ -111,11 +124,11 @@ class ParserInput:
if skip_whitespace: if skip_whitespace:
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE): while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
self.pos += 1 self.pos += 1
if self.pos == self.end: if self.pos > self.end:
return False return False
self.token = self.tokens[self.pos] self.token = self.tokens[self.pos]
return self.pos < self.end return self.pos <= self.end
def the_token_after(self, skip_whitespace=True): def the_token_after(self, skip_whitespace=True):
""" """
@@ -123,13 +136,13 @@ class ParserInput:
Never returns None (returns TokenKind.EOF instead) Never returns None (returns TokenKind.EOF instead)
""" """
my_pos = self.pos + 1 my_pos = self.pos + 1
if my_pos >= self.end: if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1) return Token(TokenKind.EOF, "", -1, -1, -1)
if skip_whitespace: if skip_whitespace:
while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE): while self.tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
my_pos += 1 my_pos += 1
if my_pos == self.end: if my_pos > self.end:
return Token(TokenKind.EOF, "", -1, -1, -1) return Token(TokenKind.EOF, "", -1, -1, -1)
return self.tokens[my_pos] return self.tokens[my_pos]
@@ -140,7 +153,7 @@ class ParserInput:
:param pos: :param pos:
:return: True is pos is a valid position False otherwise :return: True is pos is a valid position False otherwise
""" """
if pos < 0 or pos >= self.end: if pos < 0 or pos > self.end:
self.token = None self.token = None
return False return False
@@ -355,10 +368,10 @@ class SheerkaExecute(BaseService):
if pi is NotFound: # when CacheManager.cache_only is True if pi is NotFound: # when CacheManager.cache_only is True
pi = ParserInput(text) pi = ParserInput(text)
self.pi_cache.put(text, pi) self.pi_cache.put(text, pi)
return ParserInput(text, pi.tokens) # new instance, but no need to tokenize the text again return ParserInput(text, tokens=pi.tokens, length=pi.length) # new instance, but no need to tokenize the text again
key = text or core.utils.get_text_from_tokens(tokens) key = text or core.utils.get_text_from_tokens(tokens)
pi = ParserInput(key, tokens) pi = ParserInput(key, tokens=tokens, length=len(tokens))
self.pi_cache.put(key, pi) self.pi_cache.put(key, pi)
return pi return pi
+1 -1
View File
@@ -144,7 +144,7 @@ class SheerkaMemory(BaseService):
:param concept: :param concept:
:return: :return:
""" """
if self.sheerka.during_initialisation: if self.sheerka.during_initialisation or self.sheerka.during_restore:
return return
self.registration[key] = concept self.registration[key] = concept
+8 -3
View File
@@ -49,8 +49,8 @@ class TokenKind(Enum):
DEGREE = "degree" # ° DEGREE = "degree" # °
WORD = "word" WORD = "word"
EQUALSEQUALS = "==" EQUALSEQUALS = "=="
VAR_DEF = "__var__" VAR_DEF = "concept variable" # __var__
REGEX = "r'xxx' or r\"xxx\" or r|xxx| or r/xxx/" REGEX = "regex" # r'xxx' or r\"xxx\" or r|xxx| or r/xxx/ but not r:xxx: which means rules
@dataclass() @dataclass()
@@ -73,7 +73,10 @@ class Token:
if self._strip_quote: if self._strip_quote:
return self._strip_quote return self._strip_quote
self._strip_quote = self.value[1:-1] if self.type == TokenKind.STRING else self.value if self.type in (TokenKind.STRING, TokenKind.REGEX):
self._strip_quote = self.value[1:-1]
else:
self._strip_quote = self.value
return self._strip_quote return self._strip_quote
@property @property
@@ -120,6 +123,8 @@ class Token:
elif self.type == TokenKind.RULE: elif self.type == TokenKind.RULE:
from core.utils import str_concept from core.utils import str_concept
return str_concept(self.value, prefix="r:") return str_concept(self.value, prefix="r:")
elif self.type == TokenKind.REGEX:
return "r" + self.value
else: else:
return str(self.value) return str(self.value)
+44 -1
View File
@@ -1,3 +1,5 @@
from dataclasses import dataclass
import core.utils import core.utils
from core.ast_helpers import UnreferencedVariablesVisitor from core.ast_helpers import UnreferencedVariablesVisitor
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
@@ -11,6 +13,29 @@ from parsers.DefConceptParser import DefConceptNode, NameNode
from parsers.PythonParser import get_python_node from parsers.PythonParser import get_python_node
@dataclass(eq=True, frozen=True)
class MandatoryVariable:
"""
When we are searching for variables, we are searching for potential variable
So if the variable found has no match in the concept definition, it's not a problem
for example:
def concept foo x as isinstance(x, str)
{x, str} will be detected as potential variable, but 'str' will find no match.
But there are cases where the variable found must exist, otherwise, it's an error
example:
def concept foo from bnf xxx
'xxx' is detected as a variable (assuming that there is no concept named 'xxx' and a match must be
found in the the name of the variable
To distinguish between mandatory and not mandatory variable, we use MandatoryVariable
"""
name: str
def __hash__(self):
return hash(("MandatoryVariable", self.name))
class ConceptOrRuleNameVisitor(ParsingExpressionVisitor): class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
""" """
Gets the concepts referenced by BNF Gets the concepts referenced by BNF
@@ -29,6 +54,9 @@ class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
else: else:
self.names.add(node.concept) self.names.add(node.concept)
def visit_VariableExpression(self, node):
self.names.add(MandatoryVariable(node.rule_name))
def visit_all(self, node): def visit_all(self, node):
if node.rule_name: if node.rule_name:
self.names.add(node.rule_name) self.names.add(node.rule_name)
@@ -60,11 +88,13 @@ class DefConceptEvaluator(OneReturnValueEvaluator):
# validate the node # validate the node
variables_found = set() variables_found = set()
mandatory_variables = set() # these variable MUST have a match in the name (if the name is not None)
concept = Concept(str(def_concept_node.name)) concept = Concept(str(def_concept_node.name))
concept.get_metadata().definition_type = def_concept_node.definition_type concept.get_metadata().definition_type = def_concept_node.definition_type
name_to_use = self.get_name_to_use(def_concept_node) name_to_use = self.get_name_to_use(def_concept_node)
# get variables
for prop in ("definition", "where", "pre", "post", "body", "ret"): for prop in ("definition", "where", "pre", "post", "body", "ret"):
part_ret_val = getattr(def_concept_node, prop) part_ret_val = getattr(def_concept_node, prop)
@@ -87,13 +117,26 @@ class DefConceptEvaluator(OneReturnValueEvaluator):
# try to find what can be a property # try to find what can be a property
for p in self.get_variables(context, part_ret_val, name_to_use): for p in self.get_variables(context, part_ret_val, name_to_use):
variables_found.add(p) if isinstance(p, MandatoryVariable):
variables_found.add(p.name)
mandatory_variables.add(p.name)
else:
variables_found.add(p)
# add variables by order of appearance when possible # add variables by order of appearance when possible
for name_part in name_to_use: for name_part in name_to_use:
if name_part in variables_found: if name_part in variables_found:
concept.def_var(name_part, None) concept.def_var(name_part, None)
# check that all mandatory variables are defined in the name
# KSI: 2021-02-17
# The mandatory variables come for bnf definition where it was not possible to resolve to a concept
# So rather that issuing a 'UnresolvedVariableError' I prefer UNKNOWN_CONCEPT
if (diff := mandatory_variables.difference(set(name_to_use))) != set():
unknown_concepts = [sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body={"name": c}) for c in sorted(diff)]
error = sheerka.new(BuiltinConcepts.ERROR, body=unknown_concepts)
return sheerka.ret(self.name, False, error, parents=[return_value])
# add the remaining properties # add the remaining properties
# They mainly come from BNF definition # They mainly come from BNF definition
for p in variables_found: for p in variables_found:
+64 -2
View File
@@ -441,6 +441,11 @@ class GrammarErrorNode(ParsingError):
message: str message: str
@dataclass()
class NoMatchingTokenError(ParsingError):
pos: int
class SyaAssociativity(Enum): class SyaAssociativity(Enum):
Left = "left" Left = "left"
Right = "right" Right = "right"
@@ -720,6 +725,35 @@ class CNC(CN):
txt += f", {k}='{v}'" txt += f", {k}='{v}'"
return txt + ")" return txt + ")"
def to_compare(self, other, to_compare_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param to_compare_delegate:
:return:
"""
if isinstance(other, CNC):
return other
if isinstance(other, ConceptNode):
if self.exclude_body:
compiled = {k: v for k, v in other.concept.get_compiled().items() if k != ConceptParts.BODY}
else:
compiled = other.concept.get_compiled()
self_compile_to_use = self.compiled or compiled
compiled = to_compare_delegate(self_compile_to_use, compiled, to_compare_delegate)
return CNC(other.concept,
other.start if self.start is not None else None,
other.end if self.end is not None else None,
other.source if self.source is not None else None,
self.exclude_body,
**compiled)
raise NotImplementedError("CNC")
class UTN(HelperWithPos): class UTN(HelperWithPos):
""" """
@@ -763,6 +797,24 @@ class UTN(HelperWithPos):
txt += f", end={self.end}" txt += f", end={self.end}"
return txt + ")" return txt + ")"
def to_compare(self, other, to_compare_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param to_compare_delegate:
:return:
"""
if isinstance(other, UTN):
return other
if isinstance(other, UnrecognizedTokensNode):
return UTN(other.source,
other.start,
other.end)
raise NotImplementedError("UTN")
class RN(HelperWithPos): class RN(HelperWithPos):
""" """
@@ -840,9 +892,19 @@ class BaseNodeParser(BaseParser):
:return: :return:
""" """
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
concepts_by_first_keyword = SheerkaConceptManager.compute_concepts_by_first_token(context, concepts).body service = context.sheerka.services[SheerkaConceptManager.NAME]
resolved = SheerkaConceptManager.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body by_token, by_regex = SheerkaConceptManager.compute_concepts_by_first_item(context, concepts).body
context.sheerka.om.put(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY,
False,
{k.serialize(): v for k, v in by_regex.items()})
compiled = service.compile_concepts_by_first_regex(context, by_regex).body
service.compiled_concepts_by_regex.clear()
service.compiled_concepts_by_regex.extend(compiled)
resolved = SheerkaConceptManager.resolve_concepts_by_first_keyword(context, by_token).body
context.sheerka.om.put(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, context.sheerka.om.put(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
False, False,
resolved) resolved)
return self
+15 -9
View File
@@ -4,7 +4,7 @@ from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, \ from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, \
ConceptExpression, StrMatch ConceptExpression, StrMatch, RegExMatch, VariableExpression
class BnfDefinitionParser(BaseParser): class BnfDefinitionParser(BaseParser):
@@ -231,9 +231,11 @@ class BnfDefinitionParser(BaseParser):
if token.type == TokenKind.CONCEPT: if token.type == TokenKind.CONCEPT:
self.next_token() self.next_token()
concept = self.sheerka.new((token.value[0], token.value[1])) concept = self.sheerka.new((token.value[0], token.value[1]))
expr = ConceptExpression(concept) if not self.sheerka.is_known(concept):
# expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \ self.add_error(concept)
# else ConceptExpression(concept) return None
expr = ConceptExpression(concept, rule_name=concept.name)
return self.eat_rule_name_if_needed(expr) return self.eat_rule_name_if_needed(expr)
if token.type in (TokenKind.IDENTIFIER, TokenKind.KEYWORD): if token.type in (TokenKind.IDENTIFIER, TokenKind.KEYWORD):
@@ -245,20 +247,19 @@ class BnfDefinitionParser(BaseParser):
# (for example of recursive bnf definition) # (for example of recursive bnf definition)
if self.context.obj and hasattr(self.context.obj, "name"): if self.context.obj and hasattr(self.context.obj, "name"):
if concept_name == str(self.context.obj.name): if concept_name == str(self.context.obj.name):
return self.eat_rule_name_if_needed(ConceptExpression(concept_name)) return self.eat_rule_name_if_needed(ConceptExpression(concept_name)) # 2021-02-17 no rule name ?
concept = self.context.get_concept(concept_name) concept = self.context.get_concept(concept_name)
if not self.sheerka.is_known(concept): if not self.sheerka.is_known(concept):
self.add_error(concept) expr = VariableExpression(concept_name)
return None return self.eat_rule_name_if_needed(expr)
elif hasattr(concept, "__iter__"): elif hasattr(concept, "__iter__"):
self.add_error( self.add_error(
self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT, self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
body=("key", concept_name))) body=("key", concept_name)))
return None return None
else: else:
expr = ConceptExpression(concept) expr = ConceptExpression(concept, rule_name=concept.name)
expr.rule_name = concept.name
return self.eat_rule_name_if_needed(expr) return self.eat_rule_name_if_needed(expr)
if token.type == TokenKind.STRING: if token.type == TokenKind.STRING:
@@ -272,6 +273,11 @@ class BnfDefinitionParser(BaseParser):
ret = Sequence(*elements) ret = Sequence(*elements)
return self.eat_rule_name_if_needed(ret) return self.eat_rule_name_if_needed(ret)
if token.type == TokenKind.REGEX:
self.next_token()
ret = RegExMatch(core.utils.strip_quotes(token.strip_quote))
return self.eat_rule_name_if_needed(ret)
ret = StrMatch(core.utils.strip_quotes(token.value)) ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token() self.next_token()
return self.eat_rule_name_if_needed(ret) return self.eat_rule_name_if_needed(ret)
File diff suppressed because it is too large Load Diff
+4
View File
@@ -30,6 +30,10 @@ class PythonErrorNode(ParsingError):
@dataclass() @dataclass()
class ConceptDetectedError(ParsingError): class ConceptDetectedError(ParsingError):
"""
When the Python parser finds an identifier, and that identifier is a concept
So it's not for the PythonParser to respond
"""
name: str name: str
+2
View File
@@ -1142,6 +1142,8 @@ class SyaNodeParser(BaseNodeParser):
if sya_definitions: if sya_definitions:
self.test_only_sya_definitions = sya_definitions self.test_only_sya_definitions = sya_definitions
return self
@staticmethod @staticmethod
def _is_eligible(concept): def _is_eligible(concept):
""" """
+3 -1
View File
@@ -1,7 +1,7 @@
import json import json
import core.utils import core.utils
from core.global_symbols import NotInit, NotFound, Removed from core.global_symbols import NotInit, NotFound, Removed, NoFirstToken
from sheerkapickle import tags, utils, handlers from sheerkapickle import tags, utils, handlers
@@ -54,6 +54,8 @@ class SheerkaUnpickler:
instance = NotFound instance = NotFound
elif obj[tags.CUSTOM] == Removed.value: elif obj[tags.CUSTOM] == Removed.value:
instance = Removed instance = Removed
elif obj[tags.CUSTOM] == NoFirstToken.value:
instance = NoFirstToken
else: else:
raise KeyError(f"unknown {obj[tags.CUSTOM]}") raise KeyError(f"unknown {obj[tags.CUSTOM]}")
+177 -12
View File
@@ -8,7 +8,8 @@ from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF,
from core.global_symbols import NotInit, NotFound from core.global_symbols import NotInit, NotFound
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager, NoModificationFound, ForbiddenAttribute, \ from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager, NoModificationFound, ForbiddenAttribute, \
UnknownAttribute, CannotRemoveMeta, ValueNotFound, ConceptIsReferenced, NoFirstTokenError UnknownAttribute, CannotRemoveMeta, ValueNotFound, ConceptIsReferenced, NoFirstTokenError
from parsers.BnfNodeParser import Sequence, StrMatch, ConceptExpression, OrderedChoice, Optional, ZeroOrMore, OneOrMore from parsers.BnfNodeParser import Sequence, StrMatch, ConceptExpression, OrderedChoice, Optional, ZeroOrMore, OneOrMore, \
RegExDef, RegExMatch
from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -60,6 +61,50 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash()) assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_HASH_ENTRY, concept.get_definition_hash())
assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+") assert sheerka.om.current_sdp().exists(service.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+")
def test_i_can_create_a_bnf_concept_that_starts_with_a_regex(self):
sheerka = self.get_sheerka(cache_only=False)
context = self.get_context(sheerka)
service = sheerka.services[SheerkaConceptManager.NAME]
foo = self.bnf_concept("foo", RegExMatch("[a-z]+"))
bar = self.bnf_concept("bar", RegExMatch("[0-9]+"))
res = sheerka.create_new_concept(context, foo)
assert res.status
assert sheerka.isinstance(res.value, BuiltinConcepts.NEW_CONCEPT)
# I can get by the first regex
assert sheerka.om.get(service.CONCEPTS_BY_REGEX_ENTRY, RegExDef("[a-z]+").serialize()) == [foo.id]
assert len(service.compiled_concepts_by_regex) == 1
# I can commit
sheerka.om.commit(context)
# I can load from DB
entry = sheerka.om.current_sdp().get(service.CONCEPTS_BY_REGEX_ENTRY)
assert entry == {RegExDef("[a-z]+").serialize(): [foo.id]}
# I can create another concept
res = sheerka.create_new_concept(context, bar)
assert res.status
assert sheerka.isinstance(res.value, BuiltinConcepts.NEW_CONCEPT)
# I can get by the first regex
assert sheerka.om.get(service.CONCEPTS_BY_REGEX_ENTRY, RegExDef("[0-9]+").serialize()) == [bar.id]
assert sheerka.om.get(service.CONCEPTS_BY_REGEX_ENTRY, RegExDef("[a-z]+").serialize()) == [foo.id]
assert len(service.compiled_concepts_by_regex) == 2
# I can commit
sheerka.om.commit(context)
# I can load from DB
entry = sheerka.om.current_sdp().get(service.CONCEPTS_BY_REGEX_ENTRY)
assert entry == {
RegExDef("[a-z]+").serialize(): [foo.id],
RegExDef("[0-9]+").serialize(): [bar.id]
}
def test_i_cannot_create_a_bnf_concept_that_references_a_concept_that_cannot_be_resolved(self): def test_i_cannot_create_a_bnf_concept_that_references_a_concept_that_cannot_be_resolved(self):
sheerka, context, one_1, one_1_0 = self.init_concepts(Concept("one", body="1"), Concept("one", body="1.0")) sheerka, context, one_1, one_1_0 = self.init_concepts(Concept("one", body="1"), Concept("one", body="1.0"))
twenty_one = Concept("twenty one", definition="'twenty' one", definition_type=DEFINITION_TYPE_BNF) twenty_one = Concept("twenty one", definition="'twenty' one", definition_type=DEFINITION_TYPE_BNF)
@@ -361,17 +406,16 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
# sdp is updated # sdp is updated
sheerka.om.commit(context) sheerka.om.commit(context)
from_sdp = sheerka.om.current_sdp().get(service.CONCEPTS_BY_ID_ENTRY, new_concept.id) sdp = sheerka.om.current_sdp()
from_sdp = sdp.get(service.CONCEPTS_BY_ID_ENTRY, new_concept.id)
assert from_sdp.get_metadata().body == "metadata value" assert from_sdp.get_metadata().body == "metadata value"
assert from_sdp.get_metadata().variables == [("var_name", "default value")] assert from_sdp.get_metadata().variables == [("var_name", "default value")]
assert from_sdp.get_prop(BuiltinConcepts.ISA) == {bar} assert from_sdp.get_prop(BuiltinConcepts.ISA) == {bar}
assert sheerka.om.current_sdp().get(service.CONCEPTS_BY_NAME_ENTRY, assert sdp.get(service.CONCEPTS_BY_NAME_ENTRY, new_concept.name).get_metadata().body == "metadata value"
new_concept.name).get_metadata().body == "metadata value" assert sdp.get(service.CONCEPTS_BY_KEY_ENTRY, new_concept.key).get_metadata().body == "metadata value"
assert sheerka.om.current_sdp().get(service.CONCEPTS_BY_KEY_ENTRY, assert sdp.get(service.CONCEPTS_BY_HASH_ENTRY,
new_concept.key).get_metadata().body == "metadata value" new_concept.get_definition_hash()).get_metadata().body == "metadata value"
assert sheerka.om.current_sdp().get(service.CONCEPTS_BY_HASH_ENTRY,
new_concept.get_definition_hash()).get_metadata().body == "metadata value"
def test_caches_are_update_when_i_modify_the_name(self): def test_caches_are_update_when_i_modify_the_name(self):
sheerka, context, foo = self.init_concepts("foo", cache_only=False) sheerka, context, foo = self.init_concepts("foo", cache_only=False)
@@ -496,6 +540,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
Concept("baz", definition="foo"), Concept("baz", definition="foo"),
create_new=True).unpack() create_new=True).unpack()
# sanity check
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == { assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"foo": ["1001"], "foo": ["1001"],
"bar": ["1002"], "bar": ["1002"],
@@ -514,6 +559,71 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == { assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
'bar': ['1002', '1001', '1003']} 'bar': ['1002', '1001', '1003']}
def test_i_can_modify_bnf_definition_from_first_token_to_first_regex(self):
sheerka, context, foo, = self.init_test().with_concepts(
Concept("foo", definition="'hello'|'hola'"), create_new=True).unpack()
service = sheerka.services[SheerkaConceptManager.NAME]
# sanity
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"hello": ["1001"],
"hola": ["1001"]}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {}
assert len(service.compiled_concepts_by_regex) == 0
to_add = {"meta": {"definition": "r'[a-z]+'"}}
res = sheerka.modify_concept(context, foo, to_add)
assert res.status
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {
RegExDef("[a-z]+").serialize(): ["1001"]
}
assert len(service.compiled_concepts_by_regex) == 1
def test_i_can_modify_bnf_definition_from_first_regex_to_first_token(self):
sheerka, context, foo, = self.init_test().with_concepts(
Concept("foo", definition="r'[a-z]+'"), create_new=True).unpack()
service = sheerka.services[SheerkaConceptManager.NAME]
# sanity
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {
RegExDef("[a-z]+").serialize(): ["1001"]
}
assert len(service.compiled_concepts_by_regex) == 1
to_add = {"meta": {"definition": "'hello'|'hola'"}}
res = sheerka.modify_concept(context, foo, to_add)
assert res.status
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"hello": ["1001"],
"hola": ["1001"]}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {}
assert len(service.compiled_concepts_by_regex) == 0
def test_i_can_modify_when_multiple_bnf_definitions_are_already_defined(self):
sheerka, context, foo, bar, baz = self.init_test().with_concepts(
Concept("foo", definition="r'[a-z]+'"),
Concept("bar", definition="r'[0-1]+'"),
Concept("baz", definition="'one'|'twox'"), create_new=True).unpack()
service = sheerka.services[SheerkaConceptManager.NAME]
# it does not matter than baz is a bnf
to_add = {"meta": {"definition": "'one'|'two'"}}
res = sheerka.modify_concept(context, baz, to_add)
assert res.status
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"one": ["1003"],
"two": ["1003"]}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {
RegExDef("[a-z]+").serialize(): ["1001"],
RegExDef("[0-1]+").serialize(): ["1002"],
}
assert len(service.compiled_concepts_by_regex) == 2
def test_references_are_updated_after_concept_modification(self): def test_references_are_updated_after_concept_modification(self):
sheerka, context, one, twenty_one = self.init_test().with_concepts( sheerka, context, one, twenty_one = self.init_test().with_concepts(
"onz", "onz",
@@ -602,7 +712,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert res.body.body == NoModificationFound(foo, {"name": "foo", "body": "a body"}) assert res.body.body == NoModificationFound(foo, {"name": "foo", "body": "a body"})
def test_i_cannot_remove_meta_attributes(self): def test_i_cannot_modify_and_remove_meta_attributes(self):
sheerka, context, foo = self.init_concepts(Concept("foo")) sheerka, context, foo = self.init_concepts(Concept("foo"))
res = sheerka.modify_concept(context, foo, to_remove={"meta": {"any_value": "foo"}}) res = sheerka.modify_concept(context, foo, to_remove={"meta": {"any_value": "foo"}})
@@ -611,7 +721,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert res.body.body == CannotRemoveMeta({"any_value": "foo"}) assert res.body.body == CannotRemoveMeta({"any_value": "foo"})
def test_i_cannot_remove_props_that_does_not_exists(self): def test_i_cannot_modify_and_remove_props_that_does_not_exists(self):
sheerka, context, foo = self.init_concepts(Concept("foo")) sheerka, context, foo = self.init_concepts(Concept("foo"))
res = sheerka.modify_concept(context, foo, to_remove={"props": {"any_value": "foo"}}) res = sheerka.modify_concept(context, foo, to_remove={"props": {"any_value": "foo"}})
@@ -620,7 +730,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert res.body.body == UnknownAttribute("any_value") assert res.body.body == UnknownAttribute("any_value")
def test_i_cannot_remove_props_value_that_does_not_exists(self): def test_i_cannot_modify_and_remove_props_value_that_does_not_exists(self):
# Need to returns an error, otherwise, we will save a concept that is not modified # Need to returns an error, otherwise, we will save a concept that is not modified
sheerka, context, foo = self.init_concepts(Concept("foo", props={"a": {"value"}})) sheerka, context, foo = self.init_concepts(Concept("foo", props={"a": {"value"}}))
@@ -630,7 +740,7 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert res.body.body == ValueNotFound("a", "dummy") assert res.body.body == ValueNotFound("a", "dummy")
def test_i_cannot_remove_variable_that_does_not_exists(self): def test_i_cannot_modify_and_remove_variable_that_does_not_exists(self):
sheerka, context, foo = self.init_concepts(Concept("foo").def_var("a")) sheerka, context, foo = self.init_concepts(Concept("foo").def_var("a"))
res = sheerka.modify_concept(context, foo, to_remove={"variables": ["b"]}) res = sheerka.modify_concept(context, foo, to_remove={"variables": ["b"]})
@@ -649,6 +759,30 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert not res.status assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.UNKNOWN_CONCEPT) assert sheerka.isinstance(res.body, BuiltinConcepts.UNKNOWN_CONCEPT)
def test_i_cannot_modify_with_an_invalid_regex_expression(self):
sheerka, context, foo, = self.init_test().with_concepts(
Concept("foo", definition="'hello'|'hola'"), create_new=True).unpack()
service = sheerka.services[SheerkaConceptManager.NAME]
# sanity
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"hello": ["1001"],
"hola": ["1001"]}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {}
assert len(service.compiled_concepts_by_regex) == 0
to_add = {"meta": {"definition": "r'[a-z+'"}} # invalid regex definition
res = sheerka.modify_concept(context, foo, to_add)
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert res.body.body.msg == 'unterminated character set'
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {
"hello": ["1001"],
"hola": ["1001"]}
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {}
assert len(service.compiled_concepts_by_regex) == 0
def test_i_can_get_and_set_attribute(self): def test_i_can_get_and_set_attribute(self):
sheerka, context = self.init_concepts() sheerka, context = self.init_concepts()
foo = Concept("foo") foo = Concept("foo")
@@ -683,6 +817,8 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.get_by_name(one.name) == one assert sheerka.get_by_name(one.name) == one
assert sheerka.get_by_key(one.key) == one assert sheerka.get_by_key(one.key) == one
assert sheerka.get_by_hash(one.get_definition_hash()) == one assert sheerka.get_by_hash(one.get_definition_hash()) == one
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) != {}
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) != {}
res = sheerka.remove_concept(context, one) res = sheerka.remove_concept(context, one)
@@ -694,6 +830,35 @@ class TestSheerkaConceptManager(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(sheerka.get_by_key(one.key), BuiltinConcepts.UNKNOWN_CONCEPT) assert sheerka.isinstance(sheerka.get_by_key(one.key), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.isinstance(sheerka.get_by_hash(one.get_definition_hash()), BuiltinConcepts.UNKNOWN_CONCEPT) assert sheerka.isinstance(sheerka.get_by_hash(one.get_definition_hash()), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
def test_i_can_remove_a_first_regex_concept(self):
sheerka, context, one = self.init_test().with_concepts(
Concept("one", definition="r'[a-z]+'"),
create_new=True).unpack()
service = sheerka.services[SheerkaConceptManager.NAME]
# sanity check
assert sheerka.get_by_id(one.id) == one
assert sheerka.get_by_name(one.name) == one
assert sheerka.get_by_key(one.key) == one
assert sheerka.get_by_hash(one.get_definition_hash()) == one
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) != {}
assert len(service.compiled_concepts_by_regex) != 0
res = sheerka.remove_concept(context, one)
assert res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.SUCCESS)
assert sheerka.isinstance(sheerka.get_by_id(one.id), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.isinstance(sheerka.get_by_name(one.name), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.isinstance(sheerka.get_by_key(one.key), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.isinstance(sheerka.get_by_hash(one.get_definition_hash()), BuiltinConcepts.UNKNOWN_CONCEPT)
assert sheerka.om.copy(SheerkaConceptManager.CONCEPTS_BY_REGEX_ENTRY) == {}
assert len(service.compiled_concepts_by_regex) == 0
def test_i_cannot_remove_a_concept_that_does_not_exist(self): def test_i_cannot_remove_a_concept_that_does_not_exist(self):
sheerka, context = self.init_concepts() sheerka, context = self.init_concepts()
one = Concept("one", id="1001") one = Concept("one", id="1001")
+5 -5
View File
@@ -73,7 +73,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka):
assert loaded is not None assert loaded is not None
assert sheerka.isinstance(loaded, BuiltinConcepts.UNKNOWN_CONCEPT) assert sheerka.isinstance(loaded, BuiltinConcepts.UNKNOWN_CONCEPT)
assert loaded.body == ("key", "key_that_does_not_exist") assert loaded.body == {"key": "key_that_does_not_exist"}
assert loaded.get_metadata().is_evaluated assert loaded.get_metadata().is_evaluated
def test_i_cannot_get_when_id_is_not_found(self): def test_i_cannot_get_when_id_is_not_found(self):
@@ -83,7 +83,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka):
assert loaded is not None assert loaded is not None
assert sheerka.isinstance(loaded, BuiltinConcepts.UNKNOWN_CONCEPT) assert sheerka.isinstance(loaded, BuiltinConcepts.UNKNOWN_CONCEPT)
assert loaded.body == ("id", "id_that_does_not_exist") assert loaded.body == {"id": "id_that_does_not_exist"}
assert loaded.get_metadata().is_evaluated assert loaded.get_metadata().is_evaluated
def test_i_can_instantiate_a_builtin_concept_when_it_has_its_own_class(self): def test_i_can_instantiate_a_builtin_concept_when_it_has_its_own_class(self):
@@ -200,7 +200,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka):
new = sheerka.new("fake_concept") new = sheerka.new("fake_concept")
assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT) assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT)
assert new.body == ('key', 'fake_concept') assert new.body == {'key': 'fake_concept'}
def test_i_cannot_instantiate_with_invalid_id(self): def test_i_cannot_instantiate_with_invalid_id(self):
sheerka, context, *concepts = self.init_test().with_concepts(Concept("foo", body="foo1"), sheerka, context, *concepts = self.init_test().with_concepts(Concept("foo", body="foo1"),
@@ -210,7 +210,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka):
new = sheerka.new(("foo", "invalid_id")) new = sheerka.new(("foo", "invalid_id"))
assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT) assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT)
assert new.body == [('key', 'foo'), ('id', 'invalid_id')] assert new.body == {'key': 'foo', 'id': 'invalid_id'}
def test_i_cannot_instantiate_with_invalid_key(self): def test_i_cannot_instantiate_with_invalid_key(self):
sheerka, context, *concepts = self.init_test().with_concepts(Concept("foo", body="foo1"), sheerka, context, *concepts = self.init_test().with_concepts(Concept("foo", body="foo1"),
@@ -220,7 +220,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka):
new = sheerka.new(("invalid_key", "1001")) new = sheerka.new(("invalid_key", "1001"))
assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT) assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT)
assert new.body == [('key', 'invalid_key'), ('id', '1001')] assert new.body == {'key': 'invalid_key', 'id': '1001'}
def test_concept_id_is_irrelevant_when_only_one_concept(self): def test_concept_id_is_irrelevant_when_only_one_concept(self):
sheerka, context, *concepts = self.init_test().with_concepts(Concept("foo", body="foo1"), sheerka, context, *concepts = self.init_test().with_concepts(Concept("foo", body="foo1"),
+5
View File
@@ -1,4 +1,5 @@
import pytest import pytest
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
@@ -172,6 +173,7 @@ def test_i_can_parse_concept_token(text, expected):
assert tokens[0].type == TokenKind.CONCEPT assert tokens[0].type == TokenKind.CONCEPT
assert tokens[0].value == expected assert tokens[0].value == expected
@pytest.mark.parametrize("text, expected", [ @pytest.mark.parametrize("text, expected", [
("r:key:", ("key", None)), ("r:key:", ("key", None)),
("r:key|id:", ("key", "id")), ("r:key|id:", ("key", "id")),
@@ -197,3 +199,6 @@ def test_i_can_parse_regex_token(text, expected):
assert tokens[0].type == TokenKind.REGEX assert tokens[0].type == TokenKind.REGEX
assert tokens[0].value == expected assert tokens[0].value == expected
assert tokens[0].str_value == "r" + expected
assert tokens[0].repr_value == "r" + expected
assert tokens[0].strip_quote == expected[1:-1]
+1 -1
View File
@@ -58,7 +58,7 @@ def pr_ret_val(value, parser="parser", source=None):
def python_ret_val(source): def python_ret_val(source):
python_node = PythonNode(source, ast.parse(source, f"<source>", 'eval')) python_node = PythonNode(source.strip(), ast.parse(source.strip(), f"<source>", 'eval'))
return pr_ret_val(python_node, parser="Python", source=source) return pr_ret_val(python_node, parser="Python", source=source)
+54 -1
View File
@@ -4,12 +4,13 @@ import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from core.sheerka.services.SheerkaConceptManager import NoFirstTokenError
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer from core.tokenizer import Tokenizer
from evaluators.DefConceptEvaluator import DefConceptEvaluator from evaluators.DefConceptEvaluator import DefConceptEvaluator
from parsers.BaseParser import BaseParser from parsers.BaseParser import BaseParser
from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression from parsers.BnfNodeParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression, VariableExpression
from parsers.DefConceptParser import DefConceptNode, NameNode, DefConceptParser from parsers.DefConceptParser import DefConceptNode, NameNode, DefConceptParser
from parsers.PythonParser import PythonNode, PythonParser from parsers.PythonParser import PythonNode, PythonParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -277,3 +278,55 @@ class TestDefConceptEvaluator(TestUsingMemoryBasedSheerka):
assert evaluated.status assert evaluated.status
assert evaluated.body.body.key == "foo2 __var__0" assert evaluated.body.body.key == "foo2 __var__0"
assert evaluated.body.body.get_metadata().variables == [("x", None)] assert evaluated.body.body.get_metadata().variables == [("x", None)]
def test_i_can_eval_when_bnf_concept_with_regex(self):
context = self.get_context()
def_ret_val = DefConceptParser().parse(context, ParserInput("def concept hello a from bnf r'[a-z]+'=a 'hello'"))
evaluated = DefConceptEvaluator().eval(context, def_ret_val)
assert evaluated.status
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
created_concept = evaluated.body.body
assert created_concept.get_metadata().name == "hello a"
assert created_concept.get_metadata().key == "hello __var__0"
assert created_concept.get_metadata().definition == "r'[a-z]+'=a 'hello'"
assert created_concept.get_metadata().definition_type == "bnf"
def test_i_can_eval_when_bnf_concept_with_variable(self):
context = self.get_context()
def_ret_val = DefConceptParser().parse(context, ParserInput("def concept hello x from bnf 'hello' x"))
evaluated = DefConceptEvaluator().eval(context, def_ret_val)
assert evaluated.status
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
created_concept = evaluated.body.body
assert created_concept.get_metadata().name == "hello x"
assert created_concept.get_metadata().key == "hello __var__0"
assert created_concept.get_metadata().definition == "'hello' x"
assert created_concept.get_metadata().definition_type == "bnf"
assert created_concept.get_metadata().variables == [("x", None)]
assert created_concept._bnf == Sequence(StrMatch("hello"), VariableExpression("x"))
def test_i_cannot_eval_bnf_concept_with_unknown_variable(self):
context = self.get_context()
def_ret_val = DefConceptParser().parse(context, ParserInput("def concept name from bnf unknown foo"))
evaluated = DefConceptEvaluator().eval(context, def_ret_val)
assert not evaluated.status
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.ERROR)
unknown_concepts = [
context.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body={"name": "foo"}),
context.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body={"name": "unknown"}),
]
assert evaluated.body.body == unknown_concepts
def test_i_cannot_eval_bnf_concept_with_only_variable(self):
sheerka, context = self.init_test().unpack()
def_ret_val = DefConceptParser().parse(context, ParserInput("def concept foo x from bnf x"))
evaluated = DefConceptEvaluator().eval(context, def_ret_val)
assert not evaluated.status
assert sheerka.isinstance(evaluated.body, BuiltinConcepts.ERROR)
assert isinstance(evaluated.body.body, NoFirstTokenError)
+12
View File
@@ -1237,6 +1237,18 @@ as:
assert len(l) > 0 assert len(l) > 0
sheerka.save_execution_context = False sheerka.save_execution_context = False
def test_i_can_define_a_regex_concept_and_parse_it(self):
sheerka, context = self.init_test().unpack()
res = sheerka.evaluate_user_input("def concept binary from bnf r'[01]+'")
assert len(res) == 1
assert res[0].status
res = sheerka.evaluate_user_input("01001")
assert len(res) == 1
assert res[0].status
assert sheerka.isinstance(res[0].body, "binary")
class TestSheerkaNonRegFile(TestUsingFileBasedSheerka): class TestSheerkaNonRegFile(TestUsingFileBasedSheerka):
def test_i_can_def_several_concepts(self): def test_i_can_def_several_concepts(self):
+34 -3
View File
@@ -1,6 +1,7 @@
import ast import ast
from dataclasses import dataclass from dataclasses import dataclass
from core.builtin_concepts import ReturnValueConcept
from core.builtin_helpers import CreateObjectIdentifiers from core.builtin_helpers import CreateObjectIdentifiers
from core.concept import CC, Concept, ConceptParts, DoNotResolve, CIO, CMV from core.concept import CC, Concept, ConceptParts, DoNotResolve, CIO, CMV
from core.tokenizer import Tokenizer, TokenKind, Token from core.tokenizer import Tokenizer, TokenKind, Token
@@ -256,13 +257,17 @@ def get_node(
if sub_expr == "')'": if sub_expr == "')'":
return ")" return ")"
if isinstance(sub_expr, ReturnValueConcept):
return sub_expr
if isinstance(sub_expr, (scnode, utnode, DoNotResolve)): if isinstance(sub_expr, (scnode, utnode, DoNotResolve)):
return sub_expr return sub_expr
if isinstance(sub_expr, CIO): if isinstance(sub_expr, CIO):
sub_expr.set_concept(concepts_map[sub_expr.concept_name]) sub_expr.set_concept(concepts_map[sub_expr.concept_name])
if sub_expr.source: source = sub_expr.source or sub_expr.concept_name
node = get_node(concepts_map, expression_as_tokens, sub_expr.source, sya=sya) if source:
node = get_node(concepts_map, expression_as_tokens, source, sya=sya)
sub_expr.start = node.start sub_expr.start = node.start
sub_expr.end = node.end sub_expr.end = node.end
return sub_expr return sub_expr
@@ -366,7 +371,7 @@ def get_node(
return CN(concept_found, start, start + length - 1, source=sub_expr) return CN(concept_found, start, start + length - 1, source=sub_expr)
else: else:
# else an UnrecognizedTokensNode # else an UnrecognizedTokensNode
return utnode(start, start + length - 1, sub_expr) return UTN(sub_expr, start, start + length - 1)
def init_body(item, concept, value): def init_body(item, concept, value):
@@ -482,3 +487,29 @@ def get_rete_conditions(*conditions_as_string):
res.append(Condition(identifier, attribute, value)) res.append(Condition(identifier, attribute, value))
return AndConditions(res) return AndConditions(res)
def get_test_obj(test_obj, real_obj, to_compare_delegate=None):
"""
From a production object (Concept, ConceptNode, ....)
Create a test object (CNC, CC ...) that can be used to validate the unit tests
:param test_obj:
:param real_obj:
:param to_compare_delegate:
:return:
"""
if isinstance(test_obj, list):
if len(test_obj) != len(real_obj):
raise Exception(f"Not the same size ! {test_obj=}, {real_obj=}")
return [get_test_obj(t, r) for t, r in zip(test_obj, real_obj)]
if isinstance(test_obj, dict):
if len(test_obj) != len(real_obj):
raise Exception(f"Not the same size ! {test_obj=}, {real_obj=}")
return {k: get_test_obj(v, real_obj[k]) for k, v in test_obj.items()}
if not hasattr(test_obj, "to_compare"):
return real_obj
return test_obj.to_compare(real_obj, get_test_obj)
+445 -37
View File
@@ -1,3 +1,5 @@
import re
import pytest import pytest
import tests.parsers.parsers_utils import tests.parsers.parsers_utils
@@ -6,12 +8,14 @@ from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYP
from core.global_symbols import NotInit from core.global_symbols import NotInit
from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager from core.sheerka.services.SheerkaConceptManager import SheerkaConceptManager
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC, UTN, CN from parsers.BaseNodeParser import CNC, UTN, CN, NoMatchingTokenError, SCN
from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ from parsers.BnfNodeParser import StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser, RegExMatch, \
BnfNodeFirstTokenVisitor, Match, RegExDef, VariableExpression
from tests.BaseTest import BaseTest from tests.BaseTest import BaseTest
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.evaluators.EvaluatorTestsUtils import python_ret_val
cmap = { cmap = {
"one": Concept("one"), "one": Concept("one"),
@@ -42,9 +46,14 @@ cmap = {
"three_four": Concept("three_four", definition="three | four").def_var("three").def_var("four"), "three_four": Concept("three_four", definition="three | four").def_var("three").def_var("four"),
"t2": Concept("t2", definition="'twenty' three_four=unit").def_var("unit").def_var("three").def_var("four"), "t2": Concept("t2", definition="'twenty' three_four=unit").def_var("unit").def_var("three").def_var("four"),
# bnf with variable
"one thing": Concept("one x", definition="one x").def_var("x"),
"x shoe": Concept("x shoe", definition="x 'shoe'").def_var("x"),
# testing keywords # testing keywords
"def_only": Concept("def"), "def_only": Concept("def"),
"def number": Concept("def number", definition="def (one|two)=number"), "def number": Concept("def number", definition="def (one|two)=number"),
# sequence of keywords using bnf definition # sequence of keywords using bnf definition
# "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"), # "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"),
# "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"), # "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"),
@@ -68,8 +77,8 @@ def u(parsing_expression, start, end, children=None):
if isinstance(parsing_expression, str): if isinstance(parsing_expression, str):
parsing_expression = StrMatch(parsing_expression) parsing_expression = StrMatch(parsing_expression)
if isinstance(parsing_expression, StrMatch): if isinstance(parsing_expression, Match):
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match) return TerminalNode(parsing_expression, start, end, parsing_expression.to_match, parsing_expression.to_match)
return NonTerminalNode(parsing_expression, start, end, [], children) return NonTerminalNode(parsing_expression, start, end, [], children)
@@ -105,7 +114,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sheerka.set_isa(context, cmap["one hundred"], cmap["number"]) sheerka.set_isa(context, cmap["one hundred"], cmap["number"])
sheerka.set_isa(context, cmap["hundreds"], cmap["number"]) sheerka.set_isa(context, cmap["hundreds"], cmap["number"])
# Pay attention. 'twenties (t1 and t2) are not set as number # Pay attention. 'twenties (t1 and t2) are not set as 'number'
thirties = cls.update_bnf(context, Concept("thirties", thirties = cls.update_bnf(context, Concept("thirties",
definition="thirty number", definition="thirty number",
@@ -158,7 +167,10 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
for i, pair in enumerate(my_concepts_map): for i, pair in enumerate(my_concepts_map):
my_concepts_map[pair] = updated[i] my_concepts_map[pair] = updated[i]
parser = BnfNodeParser(sheerka=sheerka) if init_from_sheerka else BnfNodeParser() if init_from_sheerka:
parser = BnfNodeParser(sheerka=sheerka)
else:
parser = BnfNodeParser().init_from_concepts(context, my_concepts_map.values())
return sheerka, context, parser return sheerka, context, parser
def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None): def validate_get_concepts_sequences(self, my_map, text, expected, multiple_result=False, post_init_concepts=None):
@@ -198,7 +210,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert len(bnf_parsers_helpers) == len(expected_array) assert len(bnf_parsers_helpers) == len(expected_array)
for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array): for parser_helper, expected_sequence in zip(bnf_parsers_helpers, expected_array):
assert parser_helper.sequence == expected_sequence to_compare = tests.parsers.parsers_utils.get_test_obj(expected_sequence, parser_helper.sequence)
# assert parser_helper.sequence == expected_sequence
assert to_compare == expected_sequence
if len(bnf_parsers_helpers) == 1: if len(bnf_parsers_helpers) == 1:
return bnf_parsers_helpers[0].sequence return bnf_parsers_helpers[0].sequence
@@ -221,7 +235,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
(StrMatch("3.14"), "3.14"), (StrMatch("3.14"), "3.14"),
(StrMatch("+"), "+"), (StrMatch("+"), "+"),
]) ])
def test_i_can_match_simple_bnf(self, expr, text): def test_i_can_match_str_bnf(self, expr, text):
my_map = { my_map = {
text: self.bnf_concept("foo", expr) text: self.bnf_concept("foo", expr)
} }
@@ -229,6 +243,57 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
sequence = self.validate_get_concepts_sequences(my_map, text, [text]) sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, 0) assert sequence[0].underlying == u(expr, 0, 0)
@pytest.mark.parametrize("expr, text, end", [
(RegExMatch("bar"), "bar", 0),
(RegExMatch("[a-z]+"), "xyz", 0),
(RegExMatch("[a-z=]+"), "uvt=xyz=abc", 4),
])
def test_i_can_match_regex_bnf(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying.start == 0
assert sequence[0].underlying.end == end
assert sequence[0].underlying.parsing_expression == expr
@pytest.mark.parametrize("expr, text, end", [
(Sequence(StrMatch("foo"), RegExMatch("bar")), "foo bar", 2),
(Sequence(StrMatch("foo"), RegExMatch("[a-z]+")), "foo xyz", 2),
(Sequence(StrMatch("foo"), RegExMatch("[a-z=]+")), "foo uvt=xyz=abc", 6),
])
def test_i_can_match_sequence_str_regex(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children)
@pytest.mark.parametrize("expr, text, end", [
(Sequence(RegExMatch("bar"), StrMatch("foo")), "bar foo", 2),
(Sequence(RegExMatch("[a-z]+"), StrMatch("foo")), "xyz foo", 2),
(Sequence(RegExMatch("[a-z=]+"), StrMatch("foo")), "uvt=xyz=abc foo", 6),
])
def test_i_can_match_sequence_regex_str(self, expr, text, end):
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, end, sequence[0].underlying.children)
def test_i_can_match_sequence_str_regex_str(self):
text = "foo uvt=xyz=abc baz"
expr = Sequence(StrMatch("foo"), RegExMatch("[a-z=]+"), StrMatch("baz"))
my_map = {
text: self.bnf_concept("foo", expr)
}
sequence = self.validate_get_concepts_sequences(my_map, text, [text])
assert sequence[0].underlying == u(expr, 0, 8, sequence[0].underlying.children)
def test_i_can_match_multiple_concepts_in_one_input(self): def test_i_can_match_multiple_concepts_in_one_input(self):
my_map = { my_map = {
"one": self.bnf_concept("one"), "one": self.bnf_concept("one"),
@@ -356,8 +421,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
self.validate_get_concepts_sequences(my_map, text, expected) self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("text, expected", [ @pytest.mark.parametrize("text, expected", [
# ("twenty one", [CNC("foo", source="twenty one")]), ("twenty one", [CNC("foo", source="twenty one")]),
# ("twenty three", []), # three does not exist ("twenty three", []), # three does not exist
("twenty four", []), # four exists but should not be seen ("twenty four", []), # four exists but should not be seen
]) ])
def test_i_can_mix_sequence_and_ordered_2(self, text, expected): def test_i_can_mix_sequence_and_ordered_2(self, text, expected):
@@ -388,7 +453,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("three", []), ("three", []),
]) ])
def test_i_can_parse_unordered_choice(self, text, expected): def test_i_can_match_unordered_choice(self, text, expected):
my_map = { my_map = {
"foo": self.bnf_concept("foo", UnOrderedChoice( "foo": self.bnf_concept("foo", UnOrderedChoice(
StrMatch("one"), StrMatch("one"),
@@ -402,7 +467,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("", []), ("", []),
("two", []), ("two", []),
]) ])
def test_i_can_parse_optional(self, text, expected): def test_i_can_match_optional(self, text, expected):
my_map = { my_map = {
"foo": self.bnf_concept("foo", Optional(StrMatch("one"))) "foo": self.bnf_concept("foo", Optional(StrMatch("one")))
} }
@@ -413,7 +478,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("twenty one", [CNC("foo", source="twenty one")]), ("twenty one", [CNC("foo", source="twenty one")]),
("one", [CNC("foo", source="one")]), ("one", [CNC("foo", source="one")]),
]) ])
def test_i_can_parse_sequence_starting_with_optional(self, text, expected): def test_i_can_match_sequence_starting_with_optional(self, text, expected):
my_map = { my_map = {
"foo": self.bnf_concept("foo", "foo": self.bnf_concept("foo",
Sequence( Sequence(
@@ -427,7 +492,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two three", [CNC("foo", source="one two three")]), ("one two three", [CNC("foo", source="one two three")]),
("one two", [CNC("foo", source="one two")]), ("one two", [CNC("foo", source="one two")]),
]) ])
def test_i_can_parse_sequence_ending_with_optional(self, text, expected): def test_i_can_match_sequence_ending_with_optional(self, text, expected):
my_map = { my_map = {
"foo": self.bnf_concept("foo", "foo": self.bnf_concept("foo",
Sequence( Sequence(
@@ -442,7 +507,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two three", [CNC("foo", source="one two three")]), ("one two three", [CNC("foo", source="one two three")]),
("one three", [CNC("foo", source="one three")]), ("one three", [CNC("foo", source="one three")]),
]) ])
def test_i_can_parse_sequence_with_optional_in_between(self, text, expected): def test_i_can_match_sequence_with_optional_in_between(self, text, expected):
my_map = { my_map = {
"foo": self.bnf_concept("foo", "foo": self.bnf_concept("foo",
Sequence( Sequence(
@@ -459,7 +524,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one", [CNC("foo", source="one")]), ("one", [CNC("foo", source="one")]),
("one one", [CNC("foo", source="one one")]), ("one one", [CNC("foo", source="one one")]),
]) ])
def test_i_can_parse_zero_or_more(self, text, expected): def test_i_can_match_zero_or_more(self, text, expected):
my_map = { my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"))) "foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one")))
} }
@@ -471,7 +536,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two", [CNC("foo", source="one two")]), ("one two", [CNC("foo", source="one two")]),
("one one two", [CNC("foo", source="one one two")]), ("one one two", [CNC("foo", source="one one two")]),
]) ])
def test_i_can_parse_sequence_and_zero_or_more(self, text, expected): def test_i_can_match_sequence_and_zero_or_more(self, text, expected):
my_map = { my_map = {
"foo": self.bnf_concept("foo", "foo": self.bnf_concept("foo",
Sequence( Sequence(
@@ -485,7 +550,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, expected", [ @pytest.mark.parametrize("text, expected", [
("one, one , one", [CNC("foo", source="one, one , one")]), ("one, one , one", [CNC("foo", source="one, one , one")]),
]) ])
def test_i_can_parse_zero_or_more_with_separator(self, text, expected): def test_i_can_match_zero_or_more_with_separator(self, text, expected):
my_map = { my_map = {
"foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"), sep=",")) "foo": self.bnf_concept("foo", ZeroOrMore(StrMatch("one"), sep=","))
} }
@@ -508,7 +573,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one", [CNC("foo", source="one")]), ("one", [CNC("foo", source="one")]),
("one one one", [CNC("foo", source="one one one")]), ("one one one", [CNC("foo", source="one one one")]),
]) ])
def test_i_can_parse_one_or_more(self, text, expected): def test_i_can_match_one_or_more(self, text, expected):
my_map = { my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))), "foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"))),
} }
@@ -520,7 +585,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("one two", [CNC("foo", source="one two")]), ("one two", [CNC("foo", source="one two")]),
("one one two", [CNC("foo", source="one one two")]), ("one one two", [CNC("foo", source="one one two")]),
]) ])
def test_i_can_parse_sequence_one_and_or_more(self, text, expected): def test_i_can_match_sequence_one_and_or_more(self, text, expected):
my_map = { my_map = {
"foo": self.bnf_concept("foo", "foo": self.bnf_concept("foo",
Sequence( Sequence(
@@ -534,7 +599,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, expected", [ @pytest.mark.parametrize("text, expected", [
("one, one , one", [CNC("foo", source="one, one , one")]), ("one, one , one", [CNC("foo", source="one, one , one")]),
]) ])
def test_i_can_parse_one_or_more_with_separator(self, text, expected): def test_i_can_match_one_or_more_with_separator(self, text, expected):
my_map = { my_map = {
"foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"), sep=",")) "foo": self.bnf_concept("foo", OneOrMore(StrMatch("one"), sep=","))
} }
@@ -763,7 +828,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert concept_bar.get_compiled()["foo"][1].get_compiled() == {ConceptParts.BODY: DoNotResolve("two")} assert concept_bar.get_compiled()["foo"][1].get_compiled() == {ConceptParts.BODY: DoNotResolve("two")}
assert concept_bar.get_compiled()["foo"][2].get_compiled() == {ConceptParts.BODY: DoNotResolve("three")} assert concept_bar.get_compiled()["foo"][2].get_compiled() == {ConceptParts.BODY: DoNotResolve("three")}
def test_i_can_parse_concept_reference_that_is_not_in_grammar(self): def test_i_can_match_concept_reference_that_is_not_in_grammar(self):
my_map = { my_map = {
"one": Concept("one"), "one": Concept("one"),
"two": Concept("two"), "two": Concept("two"),
@@ -817,6 +882,234 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert concept_foo.get_compiled() == {'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]), assert concept_foo.get_compiled() == {'number': CC(my_map["number"], body=my_map["one"], one=my_map["one"]),
ConceptParts.BODY: DoNotResolve(value='twenty one')} ConceptParts.BODY: DoNotResolve(value='twenty one')}
@pytest.mark.parametrize("expr, expected", [
("one 'car'", [CNC("foo", source="one 'car'", x=python_ret_val("'car'"))]), # python
("one bar", [CNC("foo", source="one bar", x=CC("bar"))]), # simple concept
("one super car", [CNC("foo", source="one super car", x=CC("super car"))]), # long concept
("one shoe", [CNC("foo", source="one shoe", x=CC("thing", source="shoe", body=DoNotResolve("shoe")))]), # bnf
])
def test_i_can_match_variable_when_ending_with_one_variable(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
"thing": Concept("thing", definition="'shoe'|'skirt'"),
"super car": Concept("super car"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_ending_with_one_variable_and_sya(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
expr = "one bar plus baz"
expected = [
[CNC("foo", source="one bar", x=CC("bar")), UTN(" plus "), CN("baz")],
[CNC("foo", source="one bar plus baz", x=CC("plus", source="bar plus baz", x="bar", y="baz"))],
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_variable_when_ending_with_one_variable_and_multiple_results(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"pretty big": Concept("pretty big", body="'pretty big'"),
"pbig": Concept("pretty big"),
}
expr = "one pretty big"
expected = [
[CNC("foo", source="one pretty big", x=CC("pretty big"))],
[CNC("foo", source="one pretty big", x=CC("pbig", source="pretty big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_variable_when_ending_with_multiple_variables_and_multiple_results(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), VariableExpression("y"))),
"pretty": Concept("pretty", body="pretty"),
"pretty2": Concept("pretty"),
"big": Concept("big", body="big"),
}
expr = "one pretty big"
expected = [
[CNC("foo", source="one pretty big", x=CC("pretty"), y=CC("big"))],
[CNC("foo", source="one pretty big", x=CC("pretty2", source="pretty"), y=CC("big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
@pytest.mark.parametrize("expr, expected", [
("'my' shoe", [CNC("foo", source="'my' shoe", x=python_ret_val("'my' "))]), # python
("one shoe", [CNC("foo", source="one shoe", x=CC("one"))]), # concept
("my little shoe", [CNC("foo", source="my little shoe", x=CC("my little"))]), # long concept
("black shoe", [CNC("foo", source="black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]),
])
def test_i_can_match_variable_when_starting_with_one_variable(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"one": Concept("one"),
"my little": Concept("my little"),
"color": Concept("color", definition="'blue'|'black'"),
"and": Concept("x and y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_starting_with_one_variable_and_sya(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"tiny": Concept("tiny"),
"beautiful": Concept("beautiful"),
"but": Concept("x but y").def_var("x").def_var("y"),
}
expr = "tiny but beautiful shoe"
expected_res = [
CNC("foo",
source="tiny but beautiful shoe",
x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful"))]
unwanted_res = [CN("tiny"), UTN(" but "), CNC("foo", source="beautiful shoe", x=CC("beautiful"))]
self.validate_get_concepts_sequences(my_map, expr, [unwanted_res, expected_res], multiple_result=True)
def test_i_can_match_variable_when_starting_with_multiple_variables(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
VariableExpression("y"),
VariableExpression("z"),
StrMatch("shoe"))),
"one": Concept("one"),
"two": Concept("two"),
"plus": Concept("x plus y").def_var("x").def_var("y"),
}
text = "one 'one' one plus two shoe"
unwanted_res = [CN("one"), SCN(" 'one' "), ("one", 1), UTN(" plus "), CN("two")]
expected_res = [CNC("foo",
source="one 'one' one plus two shoe",
x=CC("one"),
y=python_ret_val(" 'one' "),
z=CC("plus", source="one plus two", x="one", y="two"))]
expected = [unwanted_res, expected_res]
self.validate_get_concepts_sequences(my_map, text, expected, multiple_result=True)
def test_i_can_match_variable_when_starting_with_one_variable_and_longer_str(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("foo"),
StrMatch("bar"),
StrMatch("baz"))),
"one": Concept("one")
}
text = "one foo bar baz"
expected = [CNC("foo", source="one foo bar baz", x=CC("one"))]
self.validate_get_concepts_sequences(my_map, text, expected)
@pytest.mark.parametrize("expr, expected", [
("one 'pretty' shoe", [CNC("foo", source="one 'pretty' shoe", x=python_ret_val("'pretty' "))]), # python
("one little shoe", [CNC("foo", source="one little shoe", x=CC("little"))]), # concept
("one very big shoe", [CNC("foo", source="one very big shoe", x=CC("very big"))]), # long concept
("one black shoe",
[CNC("foo", source="one black shoe", x=CC("color", source="black", body=DoNotResolve('black')))]),
("one tiny but beautiful shoe",
[CNC("foo",
source="one tiny but beautiful shoe",
x=CC("but", source="tiny but beautiful", x="tiny", y="beautiful "))]),
])
def test_i_can_match_variable_in_between(self, expr, expected):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))),
"little": Concept("little"),
"very big": Concept("very big"),
"color": Concept("color", definition="'blue'|'black'"),
"but": Concept("x but y").def_var("x").def_var("y"),
}
self.validate_get_concepts_sequences(my_map, expr, expected)
def test_i_can_match_variable_when_multiple_results_in_between(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"), StrMatch("shoe"))),
"pretty big": Concept("pretty big", body="'pretty big'"),
"pbig": Concept("pretty big"),
}
expr = "one pretty big shoe"
expected = [
[CNC("foo", source="one pretty big shoe", x=CC("pretty big"))],
[CNC("foo", source="one pretty big shoe", x=CC("pbig", source="pretty big"))]
]
self.validate_get_concepts_sequences(my_map, expr, expected, multiple_result=True)
def test_i_can_match_regex_and_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(RegExMatch("[a-z]+"),
VariableExpression("x"))),
"shoe": Concept("shoe")
}
text = "onyx shoe"
expected = [CNC("foo", source="onyx shoe", x=CC("shoe"))]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_match_variable_and_regex(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
RegExMatch("[a-z]+"))),
"one": Concept("one")
}
text = "one onyx"
expected = [CNC("foo", source="one onyx", x=CC("one"))]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_reuse_the_same_variable(self):
# in this test, the variable appears several times, but only once in concept.compiled
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("equals"),
VariableExpression("x"))),
"one": Concept("one"),
"two": Concept("two"),
}
sheerka, context, *updated = self.init_concepts(*my_map.values())
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
# same variable appears only once in the compiled variables
text = "one equals one"
expected = [CNC("foo", source="one equals one", x=CC("one"))]
expected_sequence = compute_expected_array(my_map, text, expected)
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
to_compare = tests.parsers.parsers_utils.get_test_obj(expected_sequence, bnf_parsers_helpers[0].sequence)
assert to_compare == expected
def test_i_cannot_match_variable_when_variables_discrepancy(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"),
StrMatch("equals"),
VariableExpression("x"))),
"one": Concept("one"),
"one_1": Concept("one", body="1"),
"two": Concept("two"),
"two_2": Concept("two", body="2"),
}
sheerka, context, *updated = self.init_concepts(*my_map.values())
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
text = "one equals two"
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences(context)
assert bnf_parsers_helpers[0].sequence == []
@pytest.mark.parametrize("bar_expr, expected", [ @pytest.mark.parametrize("bar_expr, expected", [
(ConceptExpression("foo"), {}), (ConceptExpression("foo"), {}),
(OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}), (OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}),
@@ -833,7 +1126,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser.sheerka = sheerka parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict # every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == expected assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == expected
# get_parsing_expression() also returns CHICKEN_AND_EGG # get_parsing_expression() also returns CHICKEN_AND_EGG
@@ -858,7 +1150,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser.sheerka = sheerka parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict # every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {} assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"]) parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
@@ -884,7 +1175,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
parser.sheerka = sheerka parser.sheerka = sheerka
# every obvious cyclic recursion are removed from concept_by_first_keyword dict # every obvious cyclic recursion are removed from concept_by_first_keyword dict
parser.init_from_concepts(context, my_map.values())
assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {} assert sheerka.om.copy(SheerkaConceptManager.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY) == {}
parsing_expression = parser.get_parsing_expression(context, my_map["foo"]) parsing_expression = parser.get_parsing_expression(context, my_map["foo"])
@@ -908,8 +1198,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"foo": self.bnf_concept("foo", expr), "foo": self.bnf_concept("foo", expr),
} }
sheerka, context, parser = self.init_parser(my_map, singleton=True) sheerka, context, parser = self.init_parser(my_map)
parser.init_from_concepts(context, my_map.values())
parser.context = context parser.context = context
parser.sheerka = sheerka parser.sheerka = sheerka
@@ -923,7 +1212,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"number": Concept("number"), "number": Concept("number"),
"twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number"))) "twenties": self.bnf_concept("twenties", Sequence(ConceptExpression("twenty"), ConceptExpression("number")))
} }
sheerka, context, parser = self.init_parser(my_map, singleton=True) sheerka, context, parser = self.init_parser(my_map)
parser.context = context parser.context = context
parser.sheerka = sheerka parser.sheerka = sheerka
sheerka.set_isa(context, sheerka.new("one"), my_map["number"]) sheerka.set_isa(context, sheerka.new("one"), my_map["number"])
@@ -1025,8 +1314,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
ConceptExpression(my_map["one"], rule_name="one")) ConceptExpression(my_map["one"], rule_name="one"))
@pytest.mark.parametrize("expr, text, expected", [ @pytest.mark.parametrize("expr, text, expected", [
# (ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]), (ZeroOrMore(StrMatch("one"), sep=","), "one,", [CNC("foo", source="one"), UTN(",")]),
# (StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]), (StrMatch("one"), "one two", [CNC("foo", source="one"), UTN(" two")]),
(StrMatch("one"), "two one", [UTN("two "), CNC("foo", source="one")]), (StrMatch("one"), "two one", [UTN("two "), CNC("foo", source="one")]),
]) ])
def test_i_can_recognize_unknown_concepts(self, expr, text, expected): def test_i_can_recognize_unknown_concepts(self, expr, text, expected):
@@ -1053,7 +1342,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"three": self.bnf_concept("three") "three": self.bnf_concept("three")
} }
sheerka, context, parser = self.init_parser(my_map, singleton=True) sheerka, context, parser = self.init_parser(my_map, singleton=True)
parser.init_from_concepts(context, my_map.values())
parser.reset_parser(context, ParserInput("one three")) parser.reset_parser(context, ParserInput("one three"))
sequences = parser.get_concepts_sequences(context) sequences = parser.get_concepts_sequences(context)
@@ -1067,6 +1355,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]), ("bar", True, [CNC("foo or bar", source="bar", bar="bar", body="bar")]),
("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]), ("one plus two", True, [CNC("plus", source="one plus two", one="one", two="two")]),
("twenty one", True, [CNC("t1", source="twenty one", unit="one")]), ("twenty one", True, [CNC("t1", source="twenty one", unit="one")]),
("one 'car'", True, [CNC("one thing", source="one 'car'", x=python_ret_val("'car'"), one="one")])
]) ])
def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected): def test_i_can_parse_simple_expressions(self, parser_input, expected_status, expected):
sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka, context, parser = self.init_parser(init_from_sheerka=True)
@@ -1359,8 +1648,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
"expr": Concept("expr", definition="term ('+' term)*"), "expr": Concept("expr", definition="term ('+' term)*"),
} }
sheerka, context, parser = self.init_parser(my_map, singleton=True) sheerka, context, parser = self.init_parser(my_map)
parser.init_from_concepts(context, my_map.values())
text = "1 + 2 * 3" text = "1 + 2 * 3"
@@ -1396,8 +1684,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
Sequence(ConceptExpression("term"), StrMatch("+"), ConceptExpression("expr")), Sequence(ConceptExpression("term"), StrMatch("+"), ConceptExpression("expr")),
ConceptExpression("term"))), ConceptExpression("term"))),
} }
sheerka, context, parser = self.init_parser(my_map, singleton=True) sheerka, context, parser = self.init_parser(my_map)
parser.init_from_concepts(context, my_map.values())
text = "1 + 2 * 3" text = "1 + 2 * 3"
@@ -1437,8 +1724,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
OrderedChoice(StrMatch("bar"), ConceptExpression("foo")))), OrderedChoice(StrMatch("bar"), ConceptExpression("foo")))),
} }
sheerka, context, parser = self.init_parser(my_map, singleton=True) sheerka, context, parser = self.init_parser(my_map)
parser.init_from_concepts(context, my_map.values())
assert parser.parse(context, ParserInput("foo bar")).status assert parser.parse(context, ParserInput("foo bar")).status
assert parser.parse(context, ParserInput("foo foo foo bar")).status assert parser.parse(context, ParserInput("foo foo foo bar")).status
@@ -1475,6 +1761,128 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert res.status assert res.status
assert res.value.value == compute_expected_array(cmap, text, [CN("thirties", source=text)]) assert res.value.value == compute_expected_array(cmap, text, [CN("thirties", source=text)])
def test_i_do_not_eat_unwanted_tokens_at_the_beginning_when_concept_with_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe"))),
"one": Concept("one"),
"two": Concept("two"),
}
sheerka, context, parser = self.init_parser(my_map)
text = "two one shoe"
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.value.value == compute_expected_array(my_map, text, [
CN("two"),
CNC("foo", source="one shoe", x=CC("one"))])
def test_i_do_not_eat_unwanted_tokens_at_the_end_when_concept_with_variable(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), VariableExpression("x"))),
"bar": Concept("bar"),
"baz": Concept("baz"),
}
sheerka, context, parser = self.init_parser(my_map)
text = "one bar baz"
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.value.value == compute_expected_array(my_map, text, [
CNC("foo", source="one bar", x=CC("bar")),
CN("baz")])
@pytest.mark.parametrize("parsing_expression, expected", [
(RegExMatch("a"), [RegExDef("a")]),
(OrderedChoice(StrMatch("first"), RegExMatch("a|b")), ["first", RegExDef("a|b")]),
(OrderedChoice(RegExMatch("a|b"), StrMatch("first")), [RegExDef("a|b"), "first"]),
(Sequence(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(Sequence(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
(OneOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(OneOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
(ZeroOrMore(StrMatch("a"), RegExMatch("a|b")), ["a"]),
(ZeroOrMore(RegExMatch("a|b"), StrMatch("a")), [RegExDef("a|b")]),
])
def test_i_can_get_first_item(self, parsing_expression, expected):
sheerka = self.get_sheerka()
visitor = BnfNodeFirstTokenVisitor(sheerka)
visitor.visit(parsing_expression)
assert visitor.first_tokens == expected
def test_i_cannot_parse_regex_when_no_next_matching_token_cannot_be_found(self):
sheerka, context, foo = self.init_test().with_concepts(Concept("foo", definition="r'abcd'"),
create_new=True).unpack()
parser = BnfNodeParser(sheerka=sheerka)
res = parser.parse(context, ParserInput("abcdef"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert res.body.reason == [NoMatchingTokenError(4)]
@pytest.mark.parametrize("text", [
"one",
" one",
"one ",
" one "
])
def test_i_cannot_parse_empty_variable(self, text):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
res = parser.parse(context, ParserInput("one"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
@pytest.mark.parametrize("bnf, text", [
(Sequence(VariableExpression("x"), StrMatch("foo")), "one foo"),
(Sequence(StrMatch("foo"), VariableExpression("x")), "foo one"),
(Sequence(StrMatch("foo"), VariableExpression("x"), StrMatch("bar")), "foo one bar"),
])
def test_i_cannot_parse_variable_when_unrecognized_nodes(self, bnf, text):
sheerka, context, foo = self.init_test().with_concepts(
self.bnf_concept("foo", Sequence(VariableExpression("x"), StrMatch("shoe")))
).unpack()
parser = BnfNodeParser()
parser.init_from_concepts(context, [foo])
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
@pytest.mark.parametrize("to_match, ignore_case, multiline, explicit_flags", [
("xxy", None, None, re.MULTILINE),
("xxy", True, True, re.MULTILINE),
("xxy", False, False, re.MULTILINE),
])
def test_i_can_serialize_reg_ex_def(self, to_match, ignore_case, multiline, explicit_flags):
r = RegExDef(to_match, ignore_case, multiline, explicit_flags)
serialized = r.serialize()
r2 = RegExDef().deserialize(serialized)
assert r == r2
def test_i_can_resolve_parsing_expression_for_variable_concept(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = Sequence(VariableExpression("x"), StrMatch("x"))
resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set())
assert isinstance(resolved.nodes[0], VariableExpression)
assert resolved.nodes[0].nodes[0] == resolved.nodes[1]
def test_i_can_resolve_parsing_expression_when_ending_with_variable_concept(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = Sequence(StrMatch("x"), VariableExpression("x"))
resolved = parser.resolve_parsing_expression(context, expression, {}, set(), set())
assert isinstance(resolved.nodes[1], VariableExpression)
assert resolved.nodes[0].nodes == []
# @pytest.mark.parametrize("parser_input, expected", [ # @pytest.mark.parametrize("parser_input, expected", [
# ("one", [ # ("one", [
# (True, [CNC("bnf_one", source="one", one="one", body="one")]), # (True, [CNC("bnf_one", source="one", one="one", body="one")]),
+49 -5
View File
@@ -1,4 +1,5 @@
import pytest import pytest
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF from core.concept import Concept, DEFINITION_TYPE_BNF
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
@@ -6,10 +7,9 @@ from core.tokenizer import Tokenizer, TokenKind, LexerError
from parsers.BaseNodeParser import cnode from parsers.BaseNodeParser import cnode
from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError
from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import BnfNodeParser from parsers.BnfNodeParser import BnfNodeParser, RegExMatch, VariableExpression
from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, \ from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, \
OneOrMore, ConceptExpression OneOrMore, ConceptExpression
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -49,6 +49,7 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
("1", StrMatch("1")), ("1", StrMatch("1")),
(" 1", StrMatch("1")), (" 1", StrMatch("1")),
(",", StrMatch(",")), (",", StrMatch(",")),
("r'str'", RegExMatch("str")),
("'foo'?", Optional(StrMatch("foo"))), ("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))), ("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))), ("'foo'+", OneOrMore(StrMatch("foo"))),
@@ -84,6 +85,19 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))), ("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))), ("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("r'str'=var", RegExMatch("str", rule_name="var")),
("r'foo'?=var", Optional(RegExMatch("foo"), rule_name="var")),
("(r'foo'?)=var", Optional(RegExMatch("foo"), rule_name="var")),
("r'foo'*=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
("(r'foo'*)=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
("r'foo'+=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
("(r'foo'+)=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
("r'foo'=var?", Optional(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)?", Optional(RegExMatch("foo", rule_name="var"))),
("r'foo'=var*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
("r'foo'=var+", OneOrMore(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)+", OneOrMore(RegExMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")), ("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")), ("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")), ("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
@@ -118,6 +132,8 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
("foo=f", c("foo", "f")), ("foo=f", c("foo", "f")),
("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))), ("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))),
("def 'concept'", Sequence(c("def"), StrMatch("concept"))), ("def 'concept'", Sequence(c("def"), StrMatch("concept"))),
("c:foo:", c("foo")),
("c:|1001:", c("foo")),
]) ])
def test_i_can_parse_regex_with_concept(self, expression, expected): def test_i_can_parse_regex_with_concept(self, expression, expected):
sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def") sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def")
@@ -131,6 +147,29 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
assert res.value.value == expected assert res.value.value == expected
assert res.value.source == expression assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [
("x", VariableExpression("x")),
("x bar", Sequence(VariableExpression("x"), c("bar"))),
("bar x", Sequence(c("bar"), VariableExpression("x"))),
("x 'and' bar", Sequence(VariableExpression("x"), StrMatch("and"), c("bar"))),
("x | bar", OrderedChoice(VariableExpression("x"), c("bar"))),
("x*", ZeroOrMore(VariableExpression("x"))),
("x+", OneOrMore(VariableExpression("x"))),
("'str' = x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
("'str''='x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
("foo=x", VariableExpression("x")),
])
def test_i_can_parse_regex_with_variable(self, expression, expected):
# A variable is an identifier that cannot be resolved to a concept
sheerka, context, regex_parser, bar = self.init_parser("bar")
update_concepts_ids(sheerka, expected)
res = regex_parser.parse(self.get_context(), expression)
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, expected", [ @pytest.mark.parametrize("expression, expected", [
("foo", ConceptExpression("foo")), ("foo", ConceptExpression("foo")),
("foo=f", ConceptExpression("foo", rule_name="f")), ("foo=f", ConceptExpression("foo", rule_name="f")),
@@ -208,13 +247,18 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT) assert context.sheerka.isinstance(res.value, BuiltinConcepts.CANNOT_RESOLVE_CONCEPT)
assert res.value.body == ('key', 'foo') assert res.value.body == ('key', 'foo')
def test_i_cannot_parse_when_unknown_concept(self): @pytest.mark.parametrize("text, expected", [
("c:foo:", {'key': 'foo'}),
("c:|1001:", {'id': '1001'}),
("c:foo|1001:", {'key': 'foo', 'id': '1001'}),
])
def test_i_cannot_parse_when_unknown_concept(self, text, expected):
sheerka, context, regex_parser = self.init_parser() sheerka, context, regex_parser = self.init_parser()
res = regex_parser.parse(self.get_context(), "foo") res = regex_parser.parse(self.get_context(), text)
assert not res.status assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ('key', 'foo') assert res.value.body == expected
def test_concept_expression_are_correctly_created_when_isa_concept_is_detected(self): def test_concept_expression_are_correctly_created_when_isa_concept_is_detected(self):
sheerka, context, parser, one, two, number, twenties = self.init_parser( sheerka, context, parser, one, two, number, twenties = self.init_parser(
+95 -13
View File
@@ -2,6 +2,7 @@ import ast
from dataclasses import dataclass from dataclasses import dataclass
import pytest import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF, Concept, CV from core.concept import DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF, Concept, CV
from core.global_symbols import NotInit from core.global_symbols import NotInit
@@ -9,13 +10,13 @@ from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Keywords, Tokenizer, LexerError from core.tokenizer import Keywords, Tokenizer, LexerError
from parsers.BaseNodeParser import SCWC from parsers.BaseNodeParser import SCWC
from parsers.BaseParser import UnexpectedEofParsingError from parsers.BaseParser import UnexpectedEofParsingError
from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch, Sequence
from parsers.BnfDefinitionParser import BnfDefinitionParser from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch, Sequence, RegExMatch, OneOrMore, \
VariableExpression
from parsers.DefConceptParser import DefConceptParser, NameNode, SyntaxErrorNode from parsers.DefConceptParser import DefConceptParser, NameNode, SyntaxErrorNode
from parsers.DefConceptParser import UnexpectedTokenParsingError, DefConceptNode from parsers.DefConceptParser import UnexpectedTokenParsingError, DefConceptNode
from parsers.FunctionParser import FunctionParser from parsers.FunctionParser import FunctionParser
from parsers.PythonParser import PythonParser, PythonNode from parsers.PythonParser import PythonParser, PythonNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array from tests.parsers.parsers_utils import compute_expected_array
@@ -332,7 +333,7 @@ def concept add one to a as:
"def concept name from bnf ", "def concept name from bnf ",
"def concept name from bnf as True", "def concept name from bnf as True",
]) ])
def test_i_cannot_parse_empty_bnf_definition(self, text): def test_i_cannot_parse_empty_bnf_definition_when_no_definition(self, text):
sheerka, context, parser, *concepts = self.init_parser() sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text)) res = parser.parse(context, ParserInput(text))
error = res.body error = res.body
@@ -347,7 +348,8 @@ def concept add one to a as:
node = res.value.value node = res.value.value
definition = OrderedChoice(ConceptExpression(a_concept, rule_name="a_concept"), StrMatch("a_string")) definition = OrderedChoice(ConceptExpression(a_concept, rule_name="a_concept"), StrMatch("a_string"))
parser_result = ParserResultConcept(BnfDefinitionParser(), "a_concept | 'a_string'", None, definition, definition) parser_result = ParserResultConcept(BnfDefinitionParser(), "a_concept | 'a_string'", None, definition,
definition)
expected = get_def_concept(name="name", body="__definition[0]", bnf_def=parser_result) expected = get_def_concept(name="name", body="__definition[0]", bnf_def=parser_result)
assert res.status assert res.status
@@ -356,6 +358,22 @@ def concept add one to a as:
assert isinstance(res.value, ParserResultConcept) assert isinstance(res.value, ParserResultConcept)
assert node == expected assert node == expected
def test_i_can_parse_def_concept_from_bnf_when_using_concept_token(self):
text = "def concept name from bnf c:a_concept: 'xxx'"
sheerka, context, parser, a_concept = self.init_parser("a_concept")
res = parser.parse(context, ParserInput(text))
node = res.value.value
definition = Sequence(ConceptExpression(a_concept, rule_name="a_concept"), StrMatch("xxx"))
parser_result = ParserResultConcept(BnfDefinitionParser(), "c:a_concept: 'xxx'", None, definition, definition)
expected = get_def_concept(name="name", bnf_def=parser_result)
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_parse_def_concept_where_bnf_references_itself(self): def test_i_can_parse_def_concept_where_bnf_references_itself(self):
text = "def concept name from bnf 'a' + name?" text = "def concept name from bnf 'a' + name?"
sheerka, context, parser, a_concept = self.init_parser("a_concept") sheerka, context, parser, a_concept = self.init_parser("a_concept")
@@ -495,15 +513,6 @@ from give me the date !
assert res.body.body[0].message == error_msg assert res.body.body[0].message == error_msg
assert res.body.body[0].text == error_text assert res.body.body[0].text == error_text
def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self):
text = "def concept name from bnf unknown"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ("key", "unknown")
def test_i_cannot_parse_bnf_definition_referencing_multiple_concepts_sharing_the_same_name(self): def test_i_cannot_parse_bnf_definition_referencing_multiple_concepts_sharing_the_same_name(self):
text = "def concept twenty one from bnf 'twenty' one" text = "def concept twenty one from bnf 'twenty' one"
sheerka, context, parser, *concepts = self.init_parser(Concept("one", body="1"), Concept("one", body="1.0")) sheerka, context, parser, *concepts = self.init_parser(Concept("one", body="1"), Concept("one", body="1.0"))
@@ -557,5 +566,78 @@ from give me the date !
assert isinstance(res.value, ParserResultConcept) assert isinstance(res.value, ParserResultConcept)
assert node == expected assert node == expected
def test_i_can_parse_bnf_concept_with_regex(self):
sheerka, context, parser, number = self.init_parser("number")
text = "def concept sha512 from bnf r'^[a-f0-9]{128}$'"
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
node = res.value.value
parsing_expression = RegExMatch("^[a-f0-9]{128}$")
parser_result = ParserResultConcept(BnfDefinitionParser(),
"r'^[a-f0-9]{128}$'",
None,
parsing_expression,
parsing_expression)
expected = get_def_concept(name="sha512", bnf_def=parser_result)
assert node == expected
def test_i_can_parse_bnf_concept_with_a_more_complicated_bnf(self):
sheerka, context, parser, number = self.init_parser("number")
text = "def concept foo from bnf number | r'[a-f0-9]+' | (number r'[a-f0-9]+')+"
res = parser.parse(context, ParserInput(text))
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
node = res.value.value
parsing_expression = OrderedChoice(
ConceptExpression(number, rule_name="number"),
RegExMatch("[a-f0-9]+"),
OneOrMore(Sequence(ConceptExpression(number, rule_name="number"), RegExMatch("[a-f0-9]+")))
)
parser_result = ParserResultConcept(BnfDefinitionParser(),
"number | r'[a-f0-9]+' | (number r'[a-f0-9]+')+",
None,
parsing_expression,
parsing_expression)
expected = get_def_concept(name="foo", bnf_def=parser_result)
assert node == expected
def test_i_can_parse_bnf_concept_definition_with_a_variable(self):
sheerka, context, parser, number = self.init_parser("number")
text = "def concept foo from bnf number x where x"
res = parser.parse(context, ParserInput(text))
node = res.value.value
definition = Sequence(ConceptExpression(number, rule_name="number"), VariableExpression("x"))
parser_result = ParserResultConcept(BnfDefinitionParser(), "number x", None, definition, definition)
expected = get_def_concept(name="foo", bnf_def=parser_result, where="x")
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_parse_bnf_definition_referencing_unknown_concept(self):
text = "def concept name from bnf unknown"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
node = res.value.value
definition = VariableExpression("unknown")
parser_result = ParserResultConcept(BnfDefinitionParser(), "unknown", None, definition, definition)
expected = get_def_concept(name="name", bnf_def=parser_result)
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
assert node == expected
+109
View File
@@ -0,0 +1,109 @@
from core.concept import Concept, ConceptParts, CC
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC
from parsers.BnfNodeParser import BnfNodeParser
from parsers.SyaNodeParser import SyaNodeParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import get_test_obj
class TestParsersUtils(TestUsingMemoryBasedSheerka):
def test_i_can_get_test_obj_when_CNC_from_sya(self):
sheerka, context, one, two, plus = self.init_concepts(
"one",
"two",
Concept("a plus b").def_var("a").def_var("b")
)
parser = SyaNodeParser().init_from_concepts(context, [one, two, plus])
cnode = parser.parse(context, ParserInput("one plus two")).body.body[0]
# compare all attributes
cnc_res = get_test_obj(CNC(concept_key="key", start=0, end=1, source="", exclude_body=False), cnode)
assert isinstance(cnc_res, CNC)
assert cnc_res == CNC("__var__0 plus __var__1", 0, 4, "one plus two", False, **cnode.concept.get_compiled())
# I can discard start, end and source
cnc_res = get_test_obj(CNC(concept_key="key"), cnode)
assert isinstance(cnc_res, CNC)
assert cnc_res == CNC("__var__0 plus __var__1", None, None, None, False, **cnode.concept.get_compiled())
def test_i_can_get_test_obj_when_CNC_from_bnf(self):
sheerka, context, one, two, plus = self.init_concepts(
"one",
"two",
Concept("twenties", definition="'twenty' (one | two)=unit").def_var("unit").def_var("one").def_var("two")
)
parser = BnfNodeParser().init_from_concepts(context, [one, two, plus])
cnode = parser.parse(context, ParserInput("twenty one")).body.body[0]
# compare all attributes
cnc_res = get_test_obj(CNC(concept_key="key", start=0, end=1, source="", exclude_body=False), cnode)
assert isinstance(cnc_res, CNC)
assert cnc_res == CNC("twenties", 0, 2, "twenty one", False, **cnode.concept.get_compiled())
# I can exclude body
cnc_res = get_test_obj(CNC(concept_key="key", exclude_body=True), cnode)
expected_compiled = {k: v for k, v in cnode.concept.get_compiled().items()}
del expected_compiled[ConceptParts.BODY]
assert isinstance(cnc_res, CNC)
assert cnc_res == CNC("twenties", None, None, None, False, **expected_compiled)
def test_i_can_get_test_obj_when_list(self):
sheerka, context, one, two, plus = self.init_concepts(
"one",
"two",
Concept("a plus b").def_var("a").def_var("b")
)
parser = SyaNodeParser().init_from_concepts(context, [one, two, plus])
cnode = parser.parse(context, ParserInput("one plus two")).body.body[0]
res = get_test_obj([CNC("key1"), CNC("key", 0, 1, "")], [cnode, cnode])
assert len(res) == 2
assert isinstance(res[0], CNC)
assert res[0] == CNC("__var__0 plus __var__1", None, None, None, False, **cnode.concept.get_compiled())
assert isinstance(res[1], CNC)
assert res[1] == CNC("__var__0 plus __var__1", 0, 4, "one plus two", False, **cnode.concept.get_compiled())
def test_i_can_get_test_obj_when_dict(self):
sheerka, context, one, two, plus = self.init_concepts(
"one",
"two",
Concept("a plus b").def_var("a").def_var("b")
)
parser = SyaNodeParser().init_from_concepts(context, [one, two, plus])
cnode = parser.parse(context, ParserInput("one plus two")).body.body[0]
res = get_test_obj({"key1": CNC("key1"), "key2": CNC("key", 0, 1, "")}, {"key1": cnode, "key2": cnode})
assert len(res) == 2
assert isinstance(res["key1"], CNC)
assert res["key1"] == CNC("__var__0 plus __var__1", None, None, None, False, **cnode.concept.get_compiled())
assert isinstance(res["key2"], CNC)
assert res["key2"] == CNC("__var__0 plus __var__1", 0, 4, "one plus two", False, **cnode.concept.get_compiled())
def test_i_can_get_test_obj_when_CC(self):
sheerka, context, one, two, plus = self.init_concepts(
"one",
"two",
Concept("twenties", definition="'twenty' (one | two)=unit").def_var("unit").def_var("one").def_var("two")
)
parser = BnfNodeParser().init_from_concepts(context, [one, two, plus])
cc = parser.parse(context, ParserInput("twenty one")).body.body[0].concept
# compare all attributes
cc_res = get_test_obj(CC(concept="key", source="", exclude_body=False), cc)
assert isinstance(cc_res, CC)
assert cc_res == CC("twenties", "twenty one", False, **cc.get_compiled())
# I can exclude body
cnc_res = get_test_obj(CC(concept="key", exclude_body=True), cc)
expected_compiled = {k: v for k, v in cc.get_compiled().items()}
del expected_compiled[ConceptParts.BODY]
assert isinstance(cnc_res, CC)
assert cnc_res == CC("twenties", "twenty one", True, **expected_compiled)
+2 -1
View File
@@ -2,7 +2,7 @@ import logging
import pytest import pytest
from core.concept import Concept from core.concept import Concept
from core.global_symbols import NotInit, NotFound, Removed from core.global_symbols import NotInit, NotFound, Removed, NoFirstToken
from core.tokenizer import Keywords from core.tokenizer import Keywords
from sheerkapickle import tags from sheerkapickle import tags
from sheerkapickle.SheerkaPickler import SheerkaPickler from sheerkapickle.SheerkaPickler import SheerkaPickler
@@ -68,6 +68,7 @@ class TestSheerkaPickler(TestUsingMemoryBasedSheerka):
(NotInit, {tags.CUSTOM: NotInit.value}), (NotInit, {tags.CUSTOM: NotInit.value}),
(NotFound, {tags.CUSTOM: NotFound.value}), (NotFound, {tags.CUSTOM: NotFound.value}),
(Removed, {tags.CUSTOM: Removed.value}), (Removed, {tags.CUSTOM: Removed.value}),
(NoFirstToken, {tags.CUSTOM: NoFirstToken.value}),
]) ])
def test_i_can_flatten_and_restore_custom_types(self, obj, expected): def test_i_can_flatten_and_restore_custom_types(self, obj, expected):
sheerka = self.get_sheerka() sheerka = self.get_sheerka()