Added bnf when adding a new concept + Started logging filtering
This commit is contained in:
@@ -190,3 +190,4 @@ def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclud
|
|||||||
predicates.append(res)
|
predicates.append(res)
|
||||||
|
|
||||||
return predicates
|
return predicates
|
||||||
|
|
||||||
|
|||||||
+8
-8
@@ -3,6 +3,7 @@ from dataclasses import dataclass
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
import core.utils
|
||||||
from core.tokenizer import Tokenizer, TokenKind
|
from core.tokenizer import Tokenizer, TokenKind
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@@ -18,8 +19,7 @@ VARIABLE_PREFIX = "__var__"
|
|||||||
|
|
||||||
class ConceptParts(Enum):
|
class ConceptParts(Enum):
|
||||||
"""
|
"""
|
||||||
Helper class, Note quite sure that is it that useful
|
Lists metadata that can contains some code
|
||||||
I guess, I was learning nums with Python...
|
|
||||||
"""
|
"""
|
||||||
WHERE = "where"
|
WHERE = "where"
|
||||||
PRE = "pre"
|
PRE = "pre"
|
||||||
@@ -85,6 +85,7 @@ class Concept:
|
|||||||
self.metadata = metadata
|
self.metadata = metadata
|
||||||
self.props = {} # list of Property for this concept
|
self.props = {} # list of Property for this concept
|
||||||
self.cached_asts = {} # cached ast for the where, pre, post and body parts
|
self.cached_asts = {} # cached ast for the where, pre, post and body parts
|
||||||
|
self.bnf = None
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"({self.metadata.id}){self.metadata.name}"
|
return f"({self.metadata.id}){self.metadata.name}"
|
||||||
@@ -134,9 +135,9 @@ class Concept:
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
if tokens is None:
|
if tokens is None:
|
||||||
tokens = iter(Tokenizer(self.metadata.name))
|
tokens = list(Tokenizer(self.metadata.name))
|
||||||
|
|
||||||
variables = list(self.props.keys())
|
variables = list(self.props.keys()) if len(core.utils.strip_tokens(tokens, True)) > 1 else []
|
||||||
|
|
||||||
key = ""
|
key = ""
|
||||||
first = True
|
first = True
|
||||||
@@ -171,12 +172,11 @@ class Concept:
|
|||||||
:param codes:
|
:param codes:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
possibles_codes = ConceptParts.get_parts()
|
|
||||||
if codes is None:
|
if codes is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
for key in codes:
|
for key in codes:
|
||||||
if key in possibles_codes:
|
self.cached_asts[key] = codes[key]
|
||||||
self.cached_asts[ConceptParts(key)] = codes[key]
|
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@@ -231,7 +231,7 @@ class Concept:
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def set_prop(self, prop_name: str, prop_value=None):
|
def set_prop(self, prop_name: str, prop_value=None):
|
||||||
self.props[prop_name] = Property(prop_name, prop_value)
|
self.props[prop_name] = Property(prop_name, prop_value) # Python 3.x order is kept in dictionaries
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def set_prop_by_index(self, index: int, prop_value):
|
def set_prop_by_index(self, index: int, prop_value):
|
||||||
|
|||||||
+85
-24
@@ -1,4 +1,6 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept
|
from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept
|
||||||
from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST
|
from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST
|
||||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||||
@@ -10,8 +12,10 @@ import core.builtin_helpers
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
init_log = logging.getLogger(__name__ + ".init")
|
||||||
|
|
||||||
concept_evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION]
|
concept_evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION]
|
||||||
|
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
|
||||||
|
|
||||||
|
|
||||||
class Sheerka(Concept):
|
class Sheerka(Concept):
|
||||||
@@ -19,22 +23,29 @@ class Sheerka(Concept):
|
|||||||
Main controller for the project
|
Main controller for the project
|
||||||
"""
|
"""
|
||||||
|
|
||||||
CONCEPTS_ENTRY = "All_Concepts"
|
CONCEPTS_ENTRY = "All_Concepts" # to store all the concepts
|
||||||
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts"
|
CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts
|
||||||
USER_CONCEPTS_KEYS = "User_Concepts"
|
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
|
||||||
|
USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts
|
||||||
|
|
||||||
def __init__(self, debug=False, skip_builtins_in_db=False):
|
def __init__(self, debug=False, skip_builtins_in_db=False, loggers=None):
|
||||||
log.debug("Starting Sheerka.")
|
log.debug("Starting Sheerka.")
|
||||||
super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA)
|
super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA)
|
||||||
|
|
||||||
# cache of the most used concepts
|
# cache of the most used concepts
|
||||||
# Note that these are only templates
|
# Note that these are only templates
|
||||||
# They are used as a footprint for instantiation
|
# They are used as a footprint for instantiation
|
||||||
|
# Except of source when the concept is supposed to be unique
|
||||||
self.concepts_cache = {}
|
self.concepts_cache = {}
|
||||||
|
|
||||||
# cache for builtin types.
|
#
|
||||||
# It allow instantiation of a builtin clas
|
# Cache for all concepts BNF
|
||||||
self.builtin_cache = {}
|
self.concepts_definitions = {}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cache for concepts grammars
|
||||||
|
# a grammar can be seen as a resolved BNF
|
||||||
|
self.concepts_grammars = {}
|
||||||
|
|
||||||
# a concept can be instantiated
|
# a concept can be instantiated
|
||||||
# ex: File is a concept, but File('foo.txt') is an instance
|
# ex: File is a concept, but File('foo.txt') is an instance
|
||||||
@@ -45,14 +56,16 @@ class Sheerka(Concept):
|
|||||||
# ex: hello => say('hello')
|
# ex: hello => say('hello')
|
||||||
self.rules = []
|
self.rules = []
|
||||||
|
|
||||||
self.sdp = None
|
self.sdp: SheerkaDataProvider = None # SheerkaDataProvider
|
||||||
self.parsers = []
|
self.builtin_cache = {} # cache for builtin concepts
|
||||||
self.evaluators = []
|
self.parsers = {} # cache for builtin parsers
|
||||||
|
self.evaluators = [] # cache for builtin evaluators
|
||||||
|
|
||||||
self.evaluators_prefix = None
|
self.evaluators_prefix: str = None
|
||||||
self.parsers_prefix = None
|
self.parsers_prefix: str = None
|
||||||
|
|
||||||
self.debug = debug
|
self.debug = debug
|
||||||
|
self.loggers = loggers or []
|
||||||
self.skip_builtins_in_db = skip_builtins_in_db
|
self.skip_builtins_in_db = skip_builtins_in_db
|
||||||
|
|
||||||
def initialize(self, root_folder: str = None):
|
def initialize(self, root_folder: str = None):
|
||||||
@@ -85,7 +98,7 @@ class Sheerka(Concept):
|
|||||||
Initializes the builtin concepts
|
Initializes the builtin concepts
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
log.debug("Initializing builtin concepts")
|
init_log.debug("Initializing builtin concepts")
|
||||||
builtins_classes = self.get_builtins_classes_as_dict()
|
builtins_classes = self.get_builtins_classes_as_dict()
|
||||||
|
|
||||||
# this all initialization of the builtins seems to be little bit complicated
|
# this all initialization of the builtins seems to be little bit complicated
|
||||||
@@ -101,11 +114,11 @@ class Sheerka(Concept):
|
|||||||
if not self.skip_builtins_in_db:
|
if not self.skip_builtins_in_db:
|
||||||
from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key)
|
from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key)
|
||||||
if from_db is None:
|
if from_db is None:
|
||||||
log.debug(f"'{concept.name}' concept is not found in db. Adding.")
|
init_log.debug(f"'{concept.name}' concept is not found in db. Adding.")
|
||||||
self.set_id_if_needed(concept, True)
|
self.set_id_if_needed(concept, True)
|
||||||
self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True)
|
self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True)
|
||||||
else:
|
else:
|
||||||
log.debug(f"Found concept '{from_db}' in db. Updating.")
|
init_log.debug(f"Found concept '{from_db}' in db. Updating.")
|
||||||
concept.update_from(from_db)
|
concept.update_from(from_db)
|
||||||
|
|
||||||
self.add_in_cache(concept)
|
self.add_in_cache(concept)
|
||||||
@@ -120,8 +133,8 @@ class Sheerka(Concept):
|
|||||||
if parser.__module__ == base_class.__module__:
|
if parser.__module__ == base_class.__module__:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
log.debug(f"Adding builtin parser '{parser.__name__}'")
|
init_log.debug(f"Adding builtin parser '{parser.__name__}'")
|
||||||
self.parsers.append(parser)
|
self.parsers[core.utils.get_full_qualified_name(parser)] = parser
|
||||||
|
|
||||||
def initialize_builtin_evaluators(self):
|
def initialize_builtin_evaluators(self):
|
||||||
"""
|
"""
|
||||||
@@ -129,14 +142,26 @@ class Sheerka(Concept):
|
|||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.OneReturnValueEvaluator"):
|
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.OneReturnValueEvaluator"):
|
||||||
log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
||||||
self.evaluators.append(evaluator)
|
self.evaluators.append(evaluator)
|
||||||
|
|
||||||
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.AllReturnValuesEvaluator"):
|
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.AllReturnValuesEvaluator"):
|
||||||
log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
||||||
self.evaluators.append(evaluator)
|
self.evaluators.append(evaluator)
|
||||||
|
|
||||||
|
def logger_filter(self, record: logging.LogRecord):
|
||||||
|
if 'all' in self.loggers:
|
||||||
|
return True
|
||||||
|
|
||||||
|
ret = True
|
||||||
|
if 'init' not in self.loggers and record.name.endswith(".init"):
|
||||||
|
ret = False
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
def init_logging(self):
|
def init_logging(self):
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler.addFilter(self.logger_filter)
|
||||||
if self.debug:
|
if self.debug:
|
||||||
log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
|
log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
|
||||||
log_level = logging.DEBUG
|
log_level = logging.DEBUG
|
||||||
@@ -144,7 +169,7 @@ class Sheerka(Concept):
|
|||||||
log_format = "%(message)s"
|
log_format = "%(message)s"
|
||||||
log_level = logging.INFO
|
log_level = logging.INFO
|
||||||
|
|
||||||
logging.basicConfig(format=log_format, level=log_level)
|
logging.basicConfig(format=log_format, level=log_level, handlers=[handler])
|
||||||
|
|
||||||
def eval(self, text: str):
|
def eval(self, text: str):
|
||||||
"""
|
"""
|
||||||
@@ -153,7 +178,9 @@ class Sheerka(Concept):
|
|||||||
:param text:
|
:param text:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
log.debug(f"Evaluating '{text}'.")
|
||||||
evt_digest = self.sdp.save_event(Event(text))
|
evt_digest = self.sdp.save_event(Event(text))
|
||||||
|
log.debug(f"{evt_digest=}")
|
||||||
exec_context = ExecutionContext(self.key, evt_digest, self)
|
exec_context = ExecutionContext(self.key, evt_digest, self)
|
||||||
|
|
||||||
# Before parsing
|
# Before parsing
|
||||||
@@ -183,7 +210,7 @@ class Sheerka(Concept):
|
|||||||
debug_text = "'" + text + "'" if isinstance(text, str) \
|
debug_text = "'" + text + "'" if isinstance(text, str) \
|
||||||
else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens"
|
else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens"
|
||||||
log.debug(f"Parsing {debug_text}")
|
log.debug(f"Parsing {debug_text}")
|
||||||
for parser in self.parsers:
|
for parser in self.parsers.values():
|
||||||
p = parser()
|
p = parser()
|
||||||
res = p.parse(context, text)
|
res = p.parse(context, text)
|
||||||
if isinstance(res, list):
|
if isinstance(res, list):
|
||||||
@@ -193,7 +220,7 @@ class Sheerka(Concept):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def process(self, context, return_values, initial_concepts=None):
|
def process(self, context, return_values, initial_concepts=None):
|
||||||
log.debug(f"Processing parsing result. context concept={initial_concepts}")
|
log.debug(f"{initial_concepts=}. Processing " + core.utils.pp(return_values))
|
||||||
|
|
||||||
# return_values must be a list
|
# return_values must be a list
|
||||||
if not isinstance(return_values, list):
|
if not isinstance(return_values, list):
|
||||||
@@ -303,6 +330,8 @@ class Sheerka(Concept):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
concept.init_key()
|
concept.init_key()
|
||||||
|
concepts_definitions = None
|
||||||
|
init_ret_value = None
|
||||||
|
|
||||||
# checks for duplicate concepts
|
# checks for duplicate concepts
|
||||||
if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
|
if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
|
||||||
@@ -312,14 +341,33 @@ class Sheerka(Concept):
|
|||||||
# set id before saving in db
|
# set id before saving in db
|
||||||
self.set_id_if_needed(concept, False)
|
self.set_id_if_needed(concept, False)
|
||||||
|
|
||||||
|
# add the BNF if known
|
||||||
|
if concept.bnf:
|
||||||
|
concepts_definitions = self.concepts_definitions.copy()
|
||||||
|
concepts_definitions[concept] = concept.bnf
|
||||||
|
|
||||||
|
# check if it's a valid BNF or whether it breaks the known rules
|
||||||
|
concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](self.concepts_grammars.copy())
|
||||||
|
sub_context = context.push(self.name, "Initializing concept definition")
|
||||||
|
sub_context.concepts_cache[concept.key] = concept # the concept is not in the real cache yet
|
||||||
|
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
|
||||||
|
if not init_ret_value.status:
|
||||||
|
return self.ret(self.create_new_concept.__name__, False, ErrorConcept(init_ret_value.value))
|
||||||
|
|
||||||
# save the new context in sdp
|
# save the new context in sdp
|
||||||
try:
|
try:
|
||||||
self.sdp.add(context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
|
self.sdp.add(context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
|
||||||
|
if concepts_definitions is not None:
|
||||||
|
self.sdp.set(context.event_digest, self.CONCEPTS_DEFINITIONS_ENTRY, concepts_definitions, use_ref=True)
|
||||||
except SheerkaDataProviderDuplicateKeyError as error:
|
except SheerkaDataProviderDuplicateKeyError as error:
|
||||||
return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0])
|
return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0])
|
||||||
|
|
||||||
# add in cache for quick further reference
|
# Updates the caches
|
||||||
self.concepts_cache[concept.key] = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
|
self.concepts_cache[concept.key] = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
|
||||||
|
if concepts_definitions is not None:
|
||||||
|
self.concepts_definitions = concepts_definitions
|
||||||
|
if init_ret_value is not None and init_ret_value.status:
|
||||||
|
self.concepts_grammars = init_ret_value.body
|
||||||
|
|
||||||
# process the return in needed
|
# process the return in needed
|
||||||
ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
||||||
@@ -514,6 +562,18 @@ class Sheerka(Concept):
|
|||||||
|
|
||||||
return (self.value(obj) for obj in objs)
|
return (self.value(obj) for obj in objs)
|
||||||
|
|
||||||
|
def is_success(self, obj):
|
||||||
|
if isinstance(obj, bool):
|
||||||
|
return obj
|
||||||
|
|
||||||
|
if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE):
|
||||||
|
return obj.status
|
||||||
|
|
||||||
|
if self.isinstance(obj, BuiltinConcepts.ERROR):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
def isinstance(self, a, b):
|
def isinstance(self, a, b):
|
||||||
"""
|
"""
|
||||||
return true if the concept a is an instance of the concept b
|
return true if the concept a is an instance of the concept b
|
||||||
@@ -603,6 +663,7 @@ class ExecutionContext:
|
|||||||
sheerka: Sheerka # sheerka
|
sheerka: Sheerka # sheerka
|
||||||
desc: str = None # human description of what is going on
|
desc: str = None # human description of what is going on
|
||||||
obj: Concept = None # what is the subject of the execution context (if known)
|
obj: Concept = None # what is the subject of the execution context (if known)
|
||||||
|
concepts_cache: dict = field(default_factory=dict)
|
||||||
|
|
||||||
def push(self, who, desc=None, obj=None):
|
def push(self, who, desc=None, obj=None):
|
||||||
return ExecutionContext(who, self.event_digest, self.sheerka, desc=desc, obj=obj)
|
return ExecutionContext(who, self.event_digest, self.sheerka, desc=desc, obj=obj)
|
||||||
|
|||||||
+2
-21
@@ -80,6 +80,8 @@ class LexerError(Exception):
|
|||||||
class Keywords(Enum):
|
class Keywords(Enum):
|
||||||
DEF = "def"
|
DEF = "def"
|
||||||
CONCEPT = "concept"
|
CONCEPT = "concept"
|
||||||
|
FROM = "from"
|
||||||
|
BNF = "bnf"
|
||||||
AS = "as"
|
AS = "as"
|
||||||
WHERE = "where"
|
WHERE = "where"
|
||||||
PRE = "pre"
|
PRE = "pre"
|
||||||
@@ -308,24 +310,3 @@ class Tokenizer:
|
|||||||
1 if lines_count > 0 else start_column + len(result))
|
1 if lines_count > 0 else start_column + len(result))
|
||||||
|
|
||||||
return result, lines_count
|
return result, lines_count
|
||||||
|
|
||||||
def seek(self, words):
|
|
||||||
if self.i == self.text_len:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# init
|
|
||||||
offsets = {}
|
|
||||||
start_index = self.i
|
|
||||||
|
|
||||||
buffer = ""
|
|
||||||
while self.i < self.text_len:
|
|
||||||
c = self.text[self.i]
|
|
||||||
|
|
||||||
# skip white space
|
|
||||||
if c in (" ", "\t"):
|
|
||||||
self.i += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
for word in words:
|
|
||||||
if c == word[offset]:
|
|
||||||
os
|
|
||||||
|
|||||||
+63
-5
@@ -3,6 +3,8 @@ import inspect
|
|||||||
import pkgutil
|
import pkgutil
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from core.tokenizer import TokenKind
|
||||||
|
|
||||||
|
|
||||||
def sysarg_to_string(argv):
|
def sysarg_to_string(argv):
|
||||||
"""
|
"""
|
||||||
@@ -72,11 +74,18 @@ def get_full_qualified_name(obj):
|
|||||||
:param obj:
|
:param obj:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
module = obj.__class__.__module__
|
if obj.__class__ == type:
|
||||||
if module is None or module == str.__class__.__module__:
|
module = obj.__module__
|
||||||
return obj.__class__.__name__ # Avoid reporting __builtin__
|
if module is None or module == str.__class__.__module__:
|
||||||
|
return obj.__name__ # Avoid reporting __builtin__
|
||||||
|
else:
|
||||||
|
return module + '.' + obj.__name__
|
||||||
else:
|
else:
|
||||||
return module + '.' + obj.__class__.__name__
|
module = obj.__class__.__module__
|
||||||
|
if module is None or module == str.__class__.__module__:
|
||||||
|
return obj.__class__.__name__ # Avoid reporting __builtin__
|
||||||
|
else:
|
||||||
|
return module + '.' + obj.__class__.__name__
|
||||||
|
|
||||||
|
|
||||||
def get_classes(module_name):
|
def get_classes(module_name):
|
||||||
@@ -137,7 +146,7 @@ def remove_from_list(lst, to_remove_predicate):
|
|||||||
|
|
||||||
def product(a, b):
|
def product(a, b):
|
||||||
"""
|
"""
|
||||||
Kind of cartesian product between list a and b
|
Kind of cartesian product between lists a and b
|
||||||
knowing that a is also a list
|
knowing that a is also a list
|
||||||
|
|
||||||
So it's a cartesian product between a list of list and a list
|
So it's a cartesian product between a list of list and a list
|
||||||
@@ -155,3 +164,52 @@ def product(a, b):
|
|||||||
res.append(items)
|
res.append(items)
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
def strip_quotes(text):
|
||||||
|
if not isinstance(text, str):
|
||||||
|
return text
|
||||||
|
|
||||||
|
if text == "":
|
||||||
|
return ""
|
||||||
|
|
||||||
|
if text[0] == "'" or text[0] == '"':
|
||||||
|
return text[1:-1]
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def strip_tokens(tokens, strip_eof=False):
|
||||||
|
"""
|
||||||
|
Remove the starting and trailing spaces and newline
|
||||||
|
"""
|
||||||
|
if tokens is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
start = 0
|
||||||
|
length = len(tokens)
|
||||||
|
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||||
|
start += 1
|
||||||
|
|
||||||
|
if start == length:
|
||||||
|
return []
|
||||||
|
|
||||||
|
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
|
||||||
|
if strip_eof \
|
||||||
|
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
||||||
|
|
||||||
|
end = length - 1
|
||||||
|
while end > 0 and tokens[end].type in end_tokens:
|
||||||
|
end -= 1
|
||||||
|
|
||||||
|
return tokens[start: end + 1]
|
||||||
|
|
||||||
|
|
||||||
|
def pp(items):
|
||||||
|
if not hasattr(items, "__iter__"):
|
||||||
|
return str(items)
|
||||||
|
|
||||||
|
if len(items) == 0:
|
||||||
|
return str(items)
|
||||||
|
|
||||||
|
return " \n" + " \n".join(str(item) for item in items)
|
||||||
|
|||||||
+104
@@ -494,3 +494,107 @@ it wrong. But the profiling shows that the time is lost in the under layers of t
|
|||||||
FS library.
|
FS library.
|
||||||
|
|
||||||
It's a shame !
|
It's a shame !
|
||||||
|
|
||||||
|
2019-12-01
|
||||||
|
**********
|
||||||
|
|
||||||
|
Using BNF to define concept
|
||||||
|
"""""""""""""""""""""""""""""
|
||||||
|
|
||||||
|
I always knew that there will be several ways to define the body of a concept (same
|
||||||
|
goes for the 'pre', 'post' and 'where' parts). It can be defined as Python code,
|
||||||
|
or something that is related to concepts. It can even be a new language that I will
|
||||||
|
design. The important point, is that contrarily to traditional development languages,
|
||||||
|
Sheerka must remain extensible.
|
||||||
|
|
||||||
|
Same goes for the definition of the name.
|
||||||
|
|
||||||
|
The traditional form is:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
def concept boo bar baz as ...
|
||||||
|
|
||||||
|
So the concept is defined by the sequence 'foo', then 'bar' then 'baz'. In this order.
|
||||||
|
|
||||||
|
Another way is
|
||||||
|
|
||||||
|
::
|
||||||
|
def concept a plus b where a,b as ...
|
||||||
|
|
||||||
|
In this form, a and b are supposed to be variables.
|
||||||
|
It will be matched against :code:`one plus two`.
|
||||||
|
|
||||||
|
The concept name is 'a plus b'. It is a quick way to declare a concept with variable,
|
||||||
|
but if someone define another concept
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
def concept number1 plus number2 where number1,number2 as ...
|
||||||
|
|
||||||
|
This will produce another concept (with the same key although). I guess that, at
|
||||||
|
some point, Sheerka will be able to detect that the concepts are the same, but
|
||||||
|
the name of the concept includes its variables. Which may be annoying in some
|
||||||
|
situations.
|
||||||
|
|
||||||
|
Plus, it's not possible to define rules precedences in this way. For example,
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
def concept a plus b as ...
|
||||||
|
def concept a times b as ...
|
||||||
|
|
||||||
|
How do you express that multiplications have a higher priority in for example
|
||||||
|
:code:`one plus two times three` ?
|
||||||
|
|
||||||
|
The only right answer, at least to me, is to implement something that is inspired
|
||||||
|
by the BNF definition of a grammar.
|
||||||
|
|
||||||
|
So the definition of the concept will look like
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
def concept term as factor (('+' | '-') term)?
|
||||||
|
def concept factor as number (('*' | '/') factor)?
|
||||||
|
def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3)
|
||||||
|
|
||||||
|
This form seems great, but in the definition of term and factor, there is no more
|
||||||
|
room for the real body. ie once the components are recognized, what do we do with them ?
|
||||||
|
|
||||||
|
So we can try
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
def concept factor (('+') factor)* as factor[0] + factor[i]
|
||||||
|
def concept number (('*') number)? as number[0] + number[i]
|
||||||
|
def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3)
|
||||||
|
|
||||||
|
The body is defined, but the name of concept is to complicated ex: factor (('+') factor)*
|
||||||
|
It's quite impossible to reference a concept that is defined in this way.
|
||||||
|
|
||||||
|
So my last proposal, with marry the two ideas, is to introduce the two keyword 'using' 'bnf'
|
||||||
|
|
||||||
|
.. _bnf : https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
def concept term using bnf factor (('+' | '-') term)? as factor + (or -) term
|
||||||
|
def concept factor using bnf number (('*' | '/') factor)? as number * (or /) factor
|
||||||
|
def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3)
|
||||||
|
|
||||||
|
In my implementation:
|
||||||
|
|
||||||
|
* Terminals are between quotes
|
||||||
|
* Sequences are separated by whitespaces
|
||||||
|
* '|' (vertical bar) is used for alternatives
|
||||||
|
|
||||||
|
Like in regular expressions, you will also find
|
||||||
|
|
||||||
|
* '*' (star) is used to express zero or many
|
||||||
|
* '+' (plus) to express one or many
|
||||||
|
* '?' (question mark) to expression zero or one
|
||||||
|
|
||||||
|
For those who doesn't know that BNF stands for, please have a look at the bnf_
|
||||||
|
wikipedia page.
|
||||||
|
|
||||||
|
I guess that I will need a complete chapter to explain how you retrieve what was parsed
|
||||||
@@ -1,7 +1,11 @@
|
|||||||
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept
|
from core.ast.nodes import python_to_concept
|
||||||
|
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
|
||||||
|
from core.builtin_helpers import get_names
|
||||||
from core.concept import Concept
|
from core.concept import Concept
|
||||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||||
|
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
|
||||||
from parsers.DefaultParser import DefConceptNode
|
from parsers.DefaultParser import DefConceptNode
|
||||||
|
import functools
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from parsers.PythonParser import PythonGetNamesVisitor, PythonNode
|
from parsers.PythonParser import PythonGetNamesVisitor, PythonNode
|
||||||
@@ -9,6 +13,23 @@ from parsers.PythonParser import PythonGetNamesVisitor, PythonNode
|
|||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
|
||||||
|
"""
|
||||||
|
Gets the concepts referenced by BNF
|
||||||
|
If a rule_name is given, it will also be considered as a potential property
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.names = set()
|
||||||
|
|
||||||
|
def visit_ConceptMatch(self, node):
|
||||||
|
self.names.add(node.rule_name or node.concept_name)
|
||||||
|
|
||||||
|
def visit_all(self, node):
|
||||||
|
if node.rule_name:
|
||||||
|
self.names.add(node.rule_name)
|
||||||
|
|
||||||
|
|
||||||
class AddConceptEvaluator(OneReturnValueEvaluator):
|
class AddConceptEvaluator(OneReturnValueEvaluator):
|
||||||
"""
|
"""
|
||||||
Used to add a new concept
|
Used to add a new concept
|
||||||
@@ -32,7 +53,7 @@ class AddConceptEvaluator(OneReturnValueEvaluator):
|
|||||||
props_found = set()
|
props_found = set()
|
||||||
|
|
||||||
concept = Concept(def_concept_node.name)
|
concept = Concept(def_concept_node.name)
|
||||||
for prop in ("where", "pre", "post", "body"):
|
for prop in ("definition", "where", "pre", "post", "body"):
|
||||||
# put back the sources
|
# put back the sources
|
||||||
part_ret_val = getattr(def_concept_node, prop)
|
part_ret_val = getattr(def_concept_node, prop)
|
||||||
if not isinstance(part_ret_val, ReturnValueConcept) or not part_ret_val.status:
|
if not isinstance(part_ret_val, ReturnValueConcept) or not part_ret_val.status:
|
||||||
@@ -43,35 +64,63 @@ class AddConceptEvaluator(OneReturnValueEvaluator):
|
|||||||
setattr(concept.metadata, prop, source)
|
setattr(concept.metadata, prop, source)
|
||||||
|
|
||||||
# try to find what can be a property
|
# try to find what can be a property
|
||||||
for p in self.get_props(part_ret_val):
|
concept_name = [part.value for part in def_concept_node.name.tokens]
|
||||||
|
for p in self.get_props(sheerka, part_ret_val, concept_name):
|
||||||
props_found.add(p)
|
props_found.add(p)
|
||||||
|
|
||||||
# Auto discovered properties must be referenced in the name
|
# add props order by appearance when possible
|
||||||
# Note that with this method, the variables will be created in the order of appearance
|
|
||||||
for token in def_concept_node.name.tokens:
|
for token in def_concept_node.name.tokens:
|
||||||
if token.value in props_found:
|
if token.value in props_found:
|
||||||
concept.set_prop(token.value, None)
|
concept.set_prop(token.value, None)
|
||||||
|
|
||||||
|
# add the remaining properties
|
||||||
|
for p in props_found:
|
||||||
|
if p not in concept.props:
|
||||||
|
concept.set_prop(p, None)
|
||||||
|
|
||||||
# finish initialisation
|
# finish initialisation
|
||||||
concept.init_key(def_concept_node.name.tokens)
|
concept.init_key(def_concept_node.name.tokens)
|
||||||
concept.add_codes(def_concept_node.get_codes())
|
concept.add_codes(def_concept_node.get_asts())
|
||||||
|
if sheerka.is_success(def_concept_node.definition):
|
||||||
|
concept.bnf = def_concept_node.definition.value.value
|
||||||
|
|
||||||
ret = sheerka.create_new_concept(context, concept)
|
ret = sheerka.create_new_concept(context, concept)
|
||||||
return sheerka.ret(self.name, ret.status, ret.value, parents=[return_value])
|
return sheerka.ret(self.name, ret.status, ret.value, parents=[return_value])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_source(ret_value):
|
def get_source(ret_value):
|
||||||
return ret_value.value.source if isinstance(ret_value.value, ParserResultConcept) \
|
return ret_value.value.source
|
||||||
else ret_value.value.name
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_props(ret_value):
|
def get_props(sheerka, ret_value, concept_name):
|
||||||
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, PythonNode):
|
"""
|
||||||
get_names_visitor = PythonGetNamesVisitor()
|
Try to find out the variables
|
||||||
get_names_visitor.visit(ret_value.value.value.ast_)
|
This function can only be a draft, as there may be tons of different situations
|
||||||
return get_names_visitor.names
|
I guess that it can only be complete when will we have access to Sheerka memory
|
||||||
|
"""
|
||||||
|
|
||||||
if isinstance(ret_value.value, Concept):
|
#
|
||||||
return list(ret_value.value.props.keys())
|
# Case of python code
|
||||||
|
#
|
||||||
|
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, PythonNode):
|
||||||
|
python_node = ret_value.value.value
|
||||||
|
as_concept_node = python_to_concept(python_node.ast_)
|
||||||
|
variables = get_names(sheerka, as_concept_node)
|
||||||
|
variables = filter(lambda x: x in concept_name, variables)
|
||||||
|
return list(variables)
|
||||||
|
|
||||||
|
#
|
||||||
|
# case of concept
|
||||||
|
#
|
||||||
|
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, Concept):
|
||||||
|
return list(ret_value.value.value.props.keys())
|
||||||
|
|
||||||
|
#
|
||||||
|
# case of BNF
|
||||||
|
#
|
||||||
|
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, ParsingExpression):
|
||||||
|
visitor = ConceptOrRuleNameVisitor()
|
||||||
|
visitor.visit(ret_value.value.value)
|
||||||
|
return sorted(list(visitor.names))
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|||||||
@@ -18,13 +18,12 @@ class ConceptEvaluator(OneReturnValueEvaluator):
|
|||||||
|
|
||||||
def matches(self, context, return_value):
|
def matches(self, context, return_value):
|
||||||
return return_value.status and \
|
return return_value.status and \
|
||||||
return_value.who.startswith(BaseParser.PREFIX) and \
|
isinstance(return_value.value, ParserResultConcept) and \
|
||||||
isinstance(return_value.value, Concept) and \
|
isinstance(return_value.value.value, Concept)
|
||||||
not isinstance(return_value.value, ParserResultConcept) # because there are specific evaluators
|
|
||||||
|
|
||||||
def eval(self, context, return_value):
|
def eval(self, context, return_value):
|
||||||
sheerka = context.sheerka
|
sheerka = context.sheerka
|
||||||
concept = return_value.value
|
concept = return_value.value.value
|
||||||
|
|
||||||
# pre condition should already be validated by the parser.
|
# pre condition should already be validated by the parser.
|
||||||
# It's a mandatory condition for the concept before it can be recognized
|
# It's a mandatory condition for the concept before it can be recognized
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ from core.builtin_concepts import BuiltinConcepts
|
|||||||
from evaluators.AddConceptEvaluator import AddConceptEvaluator
|
from evaluators.AddConceptEvaluator import AddConceptEvaluator
|
||||||
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
|
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
|
||||||
from parsers.BaseParser import BaseParser
|
from parsers.BaseParser import BaseParser
|
||||||
|
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
|
||||||
|
|
||||||
|
|
||||||
class DuplicateConceptEvaluator(AllReturnValuesEvaluator):
|
class DuplicateConceptEvaluator(AllReturnValuesEvaluator):
|
||||||
@@ -26,7 +27,7 @@ class DuplicateConceptEvaluator(AllReturnValuesEvaluator):
|
|||||||
if ret.status:
|
if ret.status:
|
||||||
parsing = True
|
parsing = True
|
||||||
elif ret.who == sheerka.get_evaluator_name(AddConceptEvaluator.NAME):
|
elif ret.who == sheerka.get_evaluator_name(AddConceptEvaluator.NAME):
|
||||||
if not ret.status and ret.value.body.args[0] == "Duplicate object.":
|
if not ret.status and isinstance(ret.value.body, SheerkaDataProviderDuplicateKeyError):
|
||||||
add_concept_in_error = True
|
add_concept_in_error = True
|
||||||
self.already_defined = ret.value.body.obj
|
self.already_defined = ret.value.body.obj
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -14,16 +14,19 @@ def usage():
|
|||||||
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
try:
|
try:
|
||||||
opts, args = getopt.getopt(argv, "hd", ["help", "debug"])
|
opts, args = getopt.getopt(argv, "hdl:", ["help", "debug", "logger="])
|
||||||
debug = False
|
debug = False
|
||||||
|
loggers = set()
|
||||||
for o, a in opts:
|
for o, a in opts:
|
||||||
if o in ('-h', "--help"):
|
if o in ('-h', "--help"):
|
||||||
usage()
|
usage()
|
||||||
return True
|
return True
|
||||||
if o in ('-d', "--debug"):
|
if o in ('-d', "--debug"):
|
||||||
debug = True
|
debug = True
|
||||||
|
if o in ('-l', '-logger'):
|
||||||
|
loggers.add(a)
|
||||||
|
|
||||||
sheerka = Sheerka(debug=debug)
|
sheerka = Sheerka(debug=debug, loggers=loggers)
|
||||||
sheerka.initialize()
|
sheerka.initialize()
|
||||||
|
|
||||||
_in = core.utils.sysarg_to_string(args)
|
_in = core.utils.sysarg_to_string(args)
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from dataclasses import field, dataclass
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from core.builtin_concepts import BuiltinConcepts
|
from core.builtin_concepts import BuiltinConcepts
|
||||||
from core.concept import Concept
|
from core.concept import Concept
|
||||||
|
from core.sheerka import ExecutionContext
|
||||||
from core.tokenizer import TokenKind, Tokenizer, Token
|
from core.tokenizer import TokenKind, Tokenizer, Token
|
||||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||||
import core.utils
|
import core.utils
|
||||||
@@ -42,6 +43,11 @@ class LexerNode(Node):
|
|||||||
|
|
||||||
|
|
||||||
class ConceptNode(LexerNode):
|
class ConceptNode(LexerNode):
|
||||||
|
"""
|
||||||
|
Returned by the ConceptLexerParser
|
||||||
|
It represents a recognized concept
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, concept, start, end, tokens=None, source=None, children=None):
|
def __init__(self, concept, start, end, tokens=None, source=None, children=None):
|
||||||
super().__init__(start, end)
|
super().__init__(start, end)
|
||||||
self.concept = concept
|
self.concept = concept
|
||||||
@@ -67,6 +73,10 @@ class ConceptNode(LexerNode):
|
|||||||
|
|
||||||
|
|
||||||
class NonTerminalNode(LexerNode):
|
class NonTerminalNode(LexerNode):
|
||||||
|
"""
|
||||||
|
Returned by the ConceptLexerParser
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, parsing_expression, start, end, children=None):
|
def __init__(self, parsing_expression, start, end, children=None):
|
||||||
super().__init__(start, end)
|
super().__init__(start, end)
|
||||||
self.parsing_expression = parsing_expression
|
self.parsing_expression = parsing_expression
|
||||||
@@ -82,6 +92,10 @@ class NonTerminalNode(LexerNode):
|
|||||||
|
|
||||||
|
|
||||||
class TerminalNode(LexerNode):
|
class TerminalNode(LexerNode):
|
||||||
|
"""
|
||||||
|
Returned by the ConceptLexerParser
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, parsing_expression, start, end, value):
|
def __init__(self, parsing_expression, start, end, value):
|
||||||
super().__init__(start, end)
|
super().__init__(start, end)
|
||||||
self.parsing_expression = parsing_expression
|
self.parsing_expression = parsing_expression
|
||||||
@@ -97,6 +111,27 @@ class GrammarErrorNode(ErrorNode):
|
|||||||
message: str
|
message: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class UnexpectedTokenErrorNode(ErrorNode):
|
||||||
|
message: str
|
||||||
|
expected_tokens: list
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class UnexpectedEndOfFileError(ErrorNode):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class UnknownConceptNode(ErrorNode):
|
||||||
|
concept_key: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class TooManyConceptNode(ErrorNode):
|
||||||
|
concept_key: str
|
||||||
|
|
||||||
|
|
||||||
class ParsingExpression:
|
class ParsingExpression:
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
self.elements = args
|
self.elements = args
|
||||||
@@ -108,6 +143,15 @@ class ParsingExpression:
|
|||||||
|
|
||||||
self.rule_name = kwargs.get('rule_name', '')
|
self.rule_name = kwargs.get('rule_name', '')
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, ParsingExpression):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return self.rule_name == other.rule_name and self.elements == other.elements
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash((self.rule_name, self.elements))
|
||||||
|
|
||||||
def parse(self, parser):
|
def parse(self, parser):
|
||||||
return self._parse(parser)
|
return self._parse(parser)
|
||||||
|
|
||||||
@@ -133,6 +177,10 @@ class Sequence(ParsingExpression):
|
|||||||
|
|
||||||
return NonTerminalNode(self, init_pos, end_pos, children)
|
return NonTerminalNode(self, init_pos, end_pos, children)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
to_str = ", ".join(repr(n) for n in self.elements)
|
||||||
|
return f"({to_str})"
|
||||||
|
|
||||||
|
|
||||||
class OrderedChoice(ParsingExpression):
|
class OrderedChoice(ParsingExpression):
|
||||||
"""
|
"""
|
||||||
@@ -152,6 +200,10 @@ class OrderedChoice(ParsingExpression):
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
to_str = "| ".join(repr(n) for n in self.elements)
|
||||||
|
return f"({to_str})"
|
||||||
|
|
||||||
|
|
||||||
class Optional(ParsingExpression):
|
class Optional(ParsingExpression):
|
||||||
"""
|
"""
|
||||||
@@ -178,6 +230,46 @@ class Optional(ParsingExpression):
|
|||||||
|
|
||||||
return selected_node
|
return selected_node
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
if len(self.elements) == 1:
|
||||||
|
return f"{self.elements[0]}?"
|
||||||
|
else:
|
||||||
|
to_str = ", ".join(repr(n) for n in self.elements)
|
||||||
|
return f"({to_str})?"
|
||||||
|
|
||||||
|
|
||||||
|
class ZeroOrMore(ParsingExpression):
|
||||||
|
"""
|
||||||
|
ZeroOrMore will try to match parser expression specified zero or more
|
||||||
|
times. It will never fail.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _parse(self, parser):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
# Uncomment when _parse is implemented
|
||||||
|
# def __repr__(self):
|
||||||
|
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||||
|
# return f"({to_str})*"
|
||||||
|
|
||||||
|
|
||||||
|
class OneOrMore(ParsingExpression):
|
||||||
|
"""
|
||||||
|
OneOrMore will try to match parser expression specified one or more times.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _parse(self, parser):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
|
class UnorderedGroup(ParsingExpression):
|
||||||
|
"""
|
||||||
|
Will try to match all of the parsing expression in any order.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _parse(self, parser):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
class Match(ParsingExpression):
|
class Match(ParsingExpression):
|
||||||
"""
|
"""
|
||||||
@@ -197,13 +289,22 @@ class StrMatch(Match):
|
|||||||
Matches a literal
|
Matches a literal
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, to_match, rule_name="", root=False, ignore_case=None):
|
def __init__(self, to_match, rule_name="", root=False, ignore_case=True):
|
||||||
super(Match, self).__init__(rule_name=rule_name, root=root)
|
super(Match, self).__init__(rule_name=rule_name, root=root)
|
||||||
self.to_match = to_match
|
self.to_match = to_match
|
||||||
self.ignore_case = ignore_case
|
self.ignore_case = ignore_case
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"StrMatch('{self.to_match}')"
|
return f"'{self.to_match}'"
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not super().__eq__(other):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not isinstance(other, StrMatch):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return self.to_match == other.to_match and self.ignore_case == other.ignore_case
|
||||||
|
|
||||||
def _parse(self, parser):
|
def _parse(self, parser):
|
||||||
token = parser.get_token()
|
token = parser.get_token()
|
||||||
@@ -218,6 +319,31 @@ class StrMatch(Match):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class ConceptMatch(Match):
|
||||||
|
"""
|
||||||
|
Will match a concept
|
||||||
|
It used only for rule definition
|
||||||
|
|
||||||
|
When the grammar is created, it is replaced by the actual concept
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, concept_name):
|
||||||
|
super(Match, self).__init__()
|
||||||
|
self.concept_name = concept_name
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"{self.concept_name}"
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not super().__eq__(other):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not isinstance(other, ConceptMatch):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return self.concept_name == other.concept_name
|
||||||
|
|
||||||
|
|
||||||
class CrossRef:
|
class CrossRef:
|
||||||
"""
|
"""
|
||||||
During the creation of the model,
|
During the creation of the model,
|
||||||
@@ -227,11 +353,20 @@ class CrossRef:
|
|||||||
def __init__(self, concept):
|
def __init__(self, concept):
|
||||||
self.concept = concept
|
self.concept = concept
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"ref({self.concept.key})"
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, CrossRef):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return self.concept == other.concept
|
||||||
|
|
||||||
|
|
||||||
class ConceptLexerParser(BaseParser):
|
class ConceptLexerParser(BaseParser):
|
||||||
def __init__(self):
|
def __init__(self, concepts_dict=None):
|
||||||
super().__init__("ConceptLexer")
|
super().__init__("ConceptLexer")
|
||||||
self.concepts_dict = {}
|
self.concepts_dict = concepts_dict or {} # dict of concept, grammar
|
||||||
self.ignore_case = True
|
self.ignore_case = True
|
||||||
|
|
||||||
self.token = None
|
self.token = None
|
||||||
@@ -295,22 +430,28 @@ class ConceptLexerParser(BaseParser):
|
|||||||
self.pos -= 1
|
self.pos -= 1
|
||||||
self.token = self.tokens[self.pos]
|
self.token = self.tokens[self.pos]
|
||||||
|
|
||||||
def initialize(self, dict):
|
def initialize(self, context, grammars):
|
||||||
"""
|
"""
|
||||||
Adds a bunch of concepts, and how they can be recognized
|
Adds a bunch of concepts, and how they can be recognized
|
||||||
:param dict: dictionary of concept; concept_definition
|
:param context: execution context
|
||||||
|
:param grammars: dictionary of concept, concept_definition
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
self.context = context
|
||||||
|
self.sheerka = context.sheerka
|
||||||
nodes_to_resolve = []
|
nodes_to_resolve = []
|
||||||
concepts_to_resolve = set()
|
concepts_to_resolve = set()
|
||||||
|
|
||||||
# ## Gets the grammars
|
# ## Gets the grammars
|
||||||
for concept, concept_def in dict.items():
|
for concept, concept_def in grammars.items():
|
||||||
concept.init_key() # make sure that the key is initialized
|
concept.init_key() # make sure that the key is initialized
|
||||||
grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve)
|
grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve)
|
||||||
self.concepts_dict[concept] = grammar
|
self.concepts_dict[concept] = grammar
|
||||||
|
|
||||||
|
if self.has_error:
|
||||||
|
return self.sheerka.ret(self.name, False, self.error_sink)
|
||||||
|
|
||||||
# ## Removes concepts with infinite recursions
|
# ## Removes concepts with infinite recursions
|
||||||
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
|
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
|
||||||
for concept in concepts_to_remove:
|
for concept in concepts_to_remove:
|
||||||
@@ -320,7 +461,20 @@ class ConceptLexerParser(BaseParser):
|
|||||||
# ## Resolves cross references and remove grammar with unresolved references
|
# ## Resolves cross references and remove grammar with unresolved references
|
||||||
self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve)
|
self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve)
|
||||||
|
|
||||||
|
if self.has_error:
|
||||||
|
return self.sheerka.ret(self.name, False, self.error_sink)
|
||||||
|
else:
|
||||||
|
return self.sheerka.ret(self.name, True, self.concepts_dict)
|
||||||
|
|
||||||
def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve):
|
def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve):
|
||||||
|
def get_concept(concept_name):
|
||||||
|
if concept_name in self.context.concepts_cache:
|
||||||
|
return self.context.concepts_cache[concept_name]
|
||||||
|
return self.sheerka.get(concept_name)
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
# inner_get_model must not modify the initial ParsingExpression
|
||||||
|
# A copy must be created
|
||||||
def inner_get_model(expression):
|
def inner_get_model(expression):
|
||||||
if isinstance(expression, Concept):
|
if isinstance(expression, Concept):
|
||||||
ret = CrossRef(expression)
|
ret = CrossRef(expression)
|
||||||
@@ -332,6 +486,16 @@ class ConceptLexerParser(BaseParser):
|
|||||||
ret = expression
|
ret = expression
|
||||||
if ret.ignore_case is None:
|
if ret.ignore_case is None:
|
||||||
ret.ignore_case = self.ignore_case
|
ret.ignore_case = self.ignore_case
|
||||||
|
elif isinstance(expression, ConceptMatch):
|
||||||
|
to_match = get_concept(expression.concept_name)
|
||||||
|
if hasattr(to_match, "__iter__"):
|
||||||
|
ret = self.add_error(TooManyConceptNode(expression.concept_name), False)
|
||||||
|
elif self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||||
|
ret = self.add_error(UnknownConceptNode(expression.concept_name), False)
|
||||||
|
else:
|
||||||
|
ret = CrossRef(to_match)
|
||||||
|
concepts_to_resolve.add(concept)
|
||||||
|
nodes_to_resolve.append(ret)
|
||||||
elif isinstance(expression, Sequence) or \
|
elif isinstance(expression, Sequence) or \
|
||||||
isinstance(expression, OrderedChoice) or \
|
isinstance(expression, OrderedChoice) or \
|
||||||
isinstance(expression, Optional):
|
isinstance(expression, Optional):
|
||||||
@@ -341,7 +505,7 @@ class ConceptLexerParser(BaseParser):
|
|||||||
concepts_to_resolve.add(concept)
|
concepts_to_resolve.add(concept)
|
||||||
nodes_to_resolve.append(ret)
|
nodes_to_resolve.append(ret)
|
||||||
else:
|
else:
|
||||||
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."))
|
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
model = inner_get_model(concept_def)
|
model = inner_get_model(concept_def)
|
||||||
@@ -493,3 +657,242 @@ class ConceptLexerParser(BaseParser):
|
|||||||
by_end_pos[result.end].append(result)
|
by_end_pos[result.end].append(result)
|
||||||
|
|
||||||
return by_end_pos[max(by_end_pos)]
|
return by_end_pos[max(by_end_pos)]
|
||||||
|
|
||||||
|
|
||||||
|
class RegexParser:
|
||||||
|
"""
|
||||||
|
Parser used to transform litteral into ParsingExpression
|
||||||
|
example :
|
||||||
|
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
|
||||||
|
|
||||||
|
'|' (pipe) is used for OrderedChoice
|
||||||
|
',' (comma) is used for Sequence
|
||||||
|
'?' (question mark) is used for Optional
|
||||||
|
'*' (star) is used for ZeroOrMore
|
||||||
|
'+' (plus) is used for OneOrMore
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.has_error = False
|
||||||
|
self.error_sink = []
|
||||||
|
self.name = BaseParser.PREFIX + "RegexParser"
|
||||||
|
|
||||||
|
self.lexer_iter = None
|
||||||
|
self._current = None
|
||||||
|
self.after_current = None
|
||||||
|
self.nb_open_par = 0
|
||||||
|
self.context = None
|
||||||
|
self.source = ""
|
||||||
|
self.sheerka = None
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, RegexParser):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def reset_parser(self, context, text):
|
||||||
|
self.context = context
|
||||||
|
self.sheerka = context.sheerka
|
||||||
|
|
||||||
|
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
|
||||||
|
self._current = None
|
||||||
|
self.after_current = None
|
||||||
|
self.nb_open_par = 0
|
||||||
|
|
||||||
|
self.next_token()
|
||||||
|
self.eat_white_space()
|
||||||
|
|
||||||
|
def add_error(self, error, next_token=True):
|
||||||
|
self.has_error = True
|
||||||
|
self.error_sink.append(error)
|
||||||
|
if next_token:
|
||||||
|
self.next_token()
|
||||||
|
return error
|
||||||
|
|
||||||
|
def get_token(self) -> Token:
|
||||||
|
return self._current
|
||||||
|
|
||||||
|
def next_token(self, skip_whitespace=False):
|
||||||
|
if self._current and self._current.type == TokenKind.EOF:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._current = self.after_current or next(self.lexer_iter)
|
||||||
|
self.source += str(self._current.value)
|
||||||
|
self.after_current = None
|
||||||
|
|
||||||
|
if skip_whitespace:
|
||||||
|
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||||
|
self._current = next(self.lexer_iter)
|
||||||
|
self.source += str(self._current.value)
|
||||||
|
except StopIteration:
|
||||||
|
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||||
|
|
||||||
|
def next_after(self):
|
||||||
|
if self.after_current is not None:
|
||||||
|
return self.after_current
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.after_current = next(self.lexer_iter)
|
||||||
|
# self.source += str(self.after_current.value)
|
||||||
|
return self.after_current
|
||||||
|
except StopIteration:
|
||||||
|
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||||
|
return self.after_current
|
||||||
|
|
||||||
|
def eat_white_space(self):
|
||||||
|
if self.after_current is not None:
|
||||||
|
self._current = self.after_current
|
||||||
|
self.source += str(self._current.value)
|
||||||
|
self.after_current = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||||
|
self._current = next(self.lexer_iter)
|
||||||
|
self.source += str(self._current.value)
|
||||||
|
except StopIteration:
|
||||||
|
self._current = None
|
||||||
|
|
||||||
|
def maybe_sequence(self, first, second):
|
||||||
|
token = self.get_token()
|
||||||
|
return token.type == second or token.type == first and self.next_after().type == second
|
||||||
|
|
||||||
|
def parse(self, context: ExecutionContext, text):
|
||||||
|
self.reset_parser(context, text)
|
||||||
|
tree = self.parse_choice()
|
||||||
|
|
||||||
|
ret = self.sheerka.ret(
|
||||||
|
self.name,
|
||||||
|
not self.has_error,
|
||||||
|
self.sheerka.new(
|
||||||
|
BuiltinConcepts.PARSER_RESULT,
|
||||||
|
parser=self,
|
||||||
|
source=self.source,
|
||||||
|
body=self.error_sink if self.has_error else tree,
|
||||||
|
try_parsed=tree))
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def parse_choice(self):
|
||||||
|
sequence = self.parse_sequence()
|
||||||
|
|
||||||
|
self.eat_white_space()
|
||||||
|
token = self.get_token()
|
||||||
|
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
|
||||||
|
return sequence
|
||||||
|
|
||||||
|
elements = [sequence]
|
||||||
|
while True:
|
||||||
|
# maybe eat the vertical bar
|
||||||
|
self.eat_white_space()
|
||||||
|
token = self.get_token()
|
||||||
|
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
|
||||||
|
break
|
||||||
|
self.next_token(skip_whitespace=True)
|
||||||
|
|
||||||
|
sequence = self.parse_sequence()
|
||||||
|
elements.append(sequence)
|
||||||
|
|
||||||
|
return OrderedChoice(*elements)
|
||||||
|
|
||||||
|
def parse_sequence(self):
|
||||||
|
expr_and_modifier = self.parse_expression_and_modifier()
|
||||||
|
token = self.get_token()
|
||||||
|
if token is None or token.type == TokenKind.EOF or \
|
||||||
|
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||||
|
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||||
|
return expr_and_modifier
|
||||||
|
|
||||||
|
elements = [expr_and_modifier]
|
||||||
|
while True:
|
||||||
|
# maybe eat the comma
|
||||||
|
token = self.get_token()
|
||||||
|
if token is None or token.type == TokenKind.EOF or \
|
||||||
|
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||||
|
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||||
|
break
|
||||||
|
self.eat_white_space()
|
||||||
|
|
||||||
|
sequence = self.parse_expression_and_modifier()
|
||||||
|
elements.append(sequence)
|
||||||
|
|
||||||
|
return Sequence(*elements)
|
||||||
|
|
||||||
|
def parse_expression_and_modifier(self):
|
||||||
|
expression = self.parse_expression()
|
||||||
|
|
||||||
|
token = self.get_token()
|
||||||
|
|
||||||
|
if token.type == TokenKind.QMARK:
|
||||||
|
self.next_token()
|
||||||
|
return Optional(expression)
|
||||||
|
|
||||||
|
if token.type == TokenKind.STAR:
|
||||||
|
self.next_token()
|
||||||
|
return ZeroOrMore(expression)
|
||||||
|
|
||||||
|
if token.type == TokenKind.PLUS:
|
||||||
|
self.next_token()
|
||||||
|
return OneOrMore(expression)
|
||||||
|
|
||||||
|
return expression
|
||||||
|
|
||||||
|
def parse_expression(self):
|
||||||
|
token = self.get_token()
|
||||||
|
if token.type == TokenKind.EOF:
|
||||||
|
self.add_error(UnexpectedEndOfFileError(), False)
|
||||||
|
if token.type == TokenKind.LPAR:
|
||||||
|
self.nb_open_par += 1
|
||||||
|
self.next_token()
|
||||||
|
expression = self.parse_choice()
|
||||||
|
token = self.get_token()
|
||||||
|
if token.type == TokenKind.RPAR:
|
||||||
|
self.nb_open_par -= 1
|
||||||
|
self.next_token()
|
||||||
|
return expression
|
||||||
|
else:
|
||||||
|
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR]))
|
||||||
|
return expression
|
||||||
|
|
||||||
|
if token.type == TokenKind.IDENTIFIER:
|
||||||
|
self.next_token()
|
||||||
|
return ConceptMatch(token.value)
|
||||||
|
# concept = self.sheerka.get(str(token.value))
|
||||||
|
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||||
|
# self.add_error(CannotResolveConceptNode(str(token.value)))
|
||||||
|
# self.next_token()
|
||||||
|
# return None
|
||||||
|
# else:
|
||||||
|
# self.next_token()
|
||||||
|
# return concept
|
||||||
|
|
||||||
|
ret = StrMatch(core.utils.strip_quotes(token.value))
|
||||||
|
self.next_token()
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
class ParsingExpressionVisitor:
|
||||||
|
"""
|
||||||
|
visit ParsingExpression
|
||||||
|
"""
|
||||||
|
|
||||||
|
def visit(self, parsing_expression):
|
||||||
|
name = parsing_expression.__class__.__name__
|
||||||
|
|
||||||
|
method = 'visit_' + name
|
||||||
|
visitor = getattr(self, method, self.generic_visit)
|
||||||
|
return visitor(parsing_expression)
|
||||||
|
|
||||||
|
def generic_visit(self, parsing_expression):
|
||||||
|
if hasattr(self, "visit_all"):
|
||||||
|
self.visit_all(parsing_expression)
|
||||||
|
|
||||||
|
for node in parsing_expression.elements:
|
||||||
|
if isinstance(node, Concept):
|
||||||
|
self.visit(ConceptMatch(node.key or node.name))
|
||||||
|
elif isinstance(node, str):
|
||||||
|
self.visit(StrMatch(node))
|
||||||
|
else:
|
||||||
|
self.visit(node)
|
||||||
|
|||||||
+70
-22
@@ -1,11 +1,14 @@
|
|||||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
|
||||||
from core.concept import ConceptParts
|
from core.concept import ConceptParts
|
||||||
import core.builtin_helpers
|
import core.builtin_helpers
|
||||||
|
import core.utils
|
||||||
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode
|
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode
|
||||||
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from parsers.ConceptLexerParser import RegexParser
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@@ -180,20 +183,22 @@ class NameNode(DefaultParserNode):
|
|||||||
|
|
||||||
@dataclass()
|
@dataclass()
|
||||||
class DefConceptNode(DefaultParserNode):
|
class DefConceptNode(DefaultParserNode):
|
||||||
|
|
||||||
name: NameNode = NotInitializedNode()
|
name: NameNode = NotInitializedNode()
|
||||||
where: ReturnValueConcept = NotInitializedNode()
|
where: ReturnValueConcept = NotInitializedNode()
|
||||||
pre: ReturnValueConcept = NotInitializedNode()
|
pre: ReturnValueConcept = NotInitializedNode()
|
||||||
post: ReturnValueConcept = NotInitializedNode()
|
post: ReturnValueConcept = NotInitializedNode()
|
||||||
body: ReturnValueConcept = NotInitializedNode()
|
body: ReturnValueConcept = NotInitializedNode()
|
||||||
|
definition: ReturnValueConcept = NotInitializedNode()
|
||||||
|
|
||||||
def get_codes(self):
|
def get_asts(self):
|
||||||
codes = {}
|
asts = {}
|
||||||
for part_key in ConceptParts:
|
for part_key in ConceptParts:
|
||||||
prop_value = getattr(self, part_key.value)
|
prop_value = getattr(self, part_key.value)
|
||||||
if hasattr(prop_value, "ast_"):
|
if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body,
|
||||||
codes[part_key] = prop_value.ast_
|
ParserResultConcept) and hasattr(
|
||||||
return codes
|
prop_value.body.body, "ast_"):
|
||||||
|
asts[part_key] = prop_value.body.body.ast_
|
||||||
|
return asts
|
||||||
|
|
||||||
|
|
||||||
class DefaultParser(BaseParser):
|
class DefaultParser(BaseParser):
|
||||||
@@ -322,20 +327,44 @@ class DefaultParser(BaseParser):
|
|||||||
|
|
||||||
# init
|
# init
|
||||||
log.debug("It may be a definition of a concept")
|
log.debug("It may be a definition of a concept")
|
||||||
concept_special_tokens = [def_token]
|
keywords_tokens = [def_token]
|
||||||
concept_found = DefConceptNode(concept_special_tokens)
|
concept_found = DefConceptNode(keywords_tokens)
|
||||||
|
|
||||||
# the definition of a concept consists of several parts
|
# the definition of a concept consists of several parts
|
||||||
# Keywords.CONCEPT to get the name of the concept
|
# Keywords.CONCEPT to get the name of the concept
|
||||||
|
# Keywords.FROM [Keywords.REGEX] to get the definition of the concept
|
||||||
# Keywords.AS to get the body
|
# Keywords.AS to get the body
|
||||||
# Keywords.WHERE to get the conditions to recognize for the variables
|
# Keywords.WHERE to get the conditions to recognize for the variables
|
||||||
# Keywords.PRE to know if the conditions to evaluate the concept
|
# Keywords.PRE to know if the conditions to evaluate the concept
|
||||||
# Keywords.POST to apply or verify once the concept is executed
|
# Keywords.POST to apply or verify once the concept is executed
|
||||||
def_concept_parts = [Keywords.CONCEPT, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
|
#
|
||||||
|
# Regroup the tokens by parts
|
||||||
|
first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens)
|
||||||
|
|
||||||
|
# get the name
|
||||||
|
concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts)
|
||||||
|
|
||||||
|
# get the definition
|
||||||
|
concept_found.definition = self.get_concept_definition(tokens_found_by_parts)
|
||||||
|
|
||||||
|
# get the ASTs for the remaining parts
|
||||||
|
asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts)
|
||||||
|
concept_found.where = asts_found_by_parts[Keywords.WHERE]
|
||||||
|
concept_found.pre = asts_found_by_parts[Keywords.PRE]
|
||||||
|
concept_found.post = asts_found_by_parts[Keywords.POST]
|
||||||
|
concept_found.body = asts_found_by_parts[Keywords.AS]
|
||||||
|
|
||||||
|
log.debug(f"Found DefConcept node '{concept_found}'")
|
||||||
|
return concept_found
|
||||||
|
|
||||||
|
def regroup_tokens_by_parts(self, keywords_tokens):
|
||||||
|
|
||||||
|
def_concept_parts = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
|
||||||
|
|
||||||
# tokens found, when trying to recognize the parts
|
# tokens found, when trying to recognize the parts
|
||||||
tokens_found_by_parts = {
|
tokens_found_by_parts = {
|
||||||
Keywords.CONCEPT: [],
|
Keywords.CONCEPT: [],
|
||||||
|
Keywords.FROM: None,
|
||||||
Keywords.AS: None,
|
Keywords.AS: None,
|
||||||
Keywords.WHERE: None,
|
Keywords.WHERE: None,
|
||||||
Keywords.PRE: None,
|
Keywords.PRE: None,
|
||||||
@@ -348,7 +377,7 @@ class DefaultParser(BaseParser):
|
|||||||
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
|
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
|
||||||
while token.type != TokenKind.EOF:
|
while token.type != TokenKind.EOF:
|
||||||
if token.value in def_concept_parts:
|
if token.value in def_concept_parts:
|
||||||
concept_special_tokens.append(token) # keep track of the keywords
|
keywords_tokens.append(token) # keep track of the keywords
|
||||||
keyword = token.value
|
keyword = token.value
|
||||||
if tokens_found_by_parts[keyword]:
|
if tokens_found_by_parts[keyword]:
|
||||||
# a part is defined more than once
|
# a part is defined more than once
|
||||||
@@ -364,13 +393,15 @@ class DefaultParser(BaseParser):
|
|||||||
|
|
||||||
token = self.get_token()
|
token = self.get_token()
|
||||||
|
|
||||||
# semantic checks
|
return first_token, tokens_found_by_parts
|
||||||
|
|
||||||
|
def get_concept_name(self, first_token, tokens_found_by_parts):
|
||||||
name_first_token_index = 1
|
name_first_token_index = 1
|
||||||
|
token = self.get_token()
|
||||||
if first_token.value != Keywords.CONCEPT:
|
if first_token.value != Keywords.CONCEPT:
|
||||||
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
||||||
name_first_token_index = 0
|
name_first_token_index = 0
|
||||||
|
|
||||||
# Manage the name
|
|
||||||
name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
|
name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
|
||||||
if len(name_tokens) == name_first_token_index:
|
if len(name_tokens) == name_first_token_index:
|
||||||
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
|
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
|
||||||
@@ -381,8 +412,31 @@ class DefaultParser(BaseParser):
|
|||||||
if TokenKind.NEWLINE in [t.type for t in name_tokens]:
|
if TokenKind.NEWLINE in [t.type for t in name_tokens]:
|
||||||
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
|
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
|
||||||
|
|
||||||
concept_found.name = NameNode(name_tokens[name_first_token_index:]) # skip the first token
|
return NameNode(name_tokens[name_first_token_index:]) # skip the first token
|
||||||
|
|
||||||
|
def get_concept_definition(self, tokens_found_by_parts):
|
||||||
|
if tokens_found_by_parts[Keywords.FROM] is None:
|
||||||
|
return NotInitializedNode()
|
||||||
|
|
||||||
|
definition_tokens = tokens_found_by_parts[Keywords.FROM]
|
||||||
|
if definition_tokens[1].value != Keywords.BNF:
|
||||||
|
return NotInitializedNode()
|
||||||
|
|
||||||
|
tokens = core.utils.strip_tokens(definition_tokens[2:])
|
||||||
|
if len(tokens) == 0:
|
||||||
|
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
|
||||||
|
return NotInitializedNode()
|
||||||
|
|
||||||
|
regex_parser = RegexParser()
|
||||||
|
new_context = self.context.push(self.name)
|
||||||
|
parsing_result = regex_parser.parse(new_context, tokens)
|
||||||
|
if not parsing_result.status:
|
||||||
|
self.add_error(parsing_result.value)
|
||||||
|
return NotInitializedNode()
|
||||||
|
|
||||||
|
return parsing_result
|
||||||
|
|
||||||
|
def get_concept_parts(self, tokens_found_by_parts):
|
||||||
asts_found_by_parts = {
|
asts_found_by_parts = {
|
||||||
Keywords.AS: NotInitializedNode(),
|
Keywords.AS: NotInitializedNode(),
|
||||||
Keywords.WHERE: NotInitializedNode(),
|
Keywords.WHERE: NotInitializedNode(),
|
||||||
@@ -391,7 +445,7 @@ class DefaultParser(BaseParser):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for keyword in tokens_found_by_parts:
|
for keyword in tokens_found_by_parts:
|
||||||
if keyword == Keywords.CONCEPT:
|
if keyword == Keywords.CONCEPT or keyword == Keywords.FROM:
|
||||||
continue # already done
|
continue # already done
|
||||||
|
|
||||||
log.debug("Processing part '" + keyword.name + "'")
|
log.debug("Processing part '" + keyword.name + "'")
|
||||||
@@ -418,13 +472,7 @@ class DefaultParser(BaseParser):
|
|||||||
|
|
||||||
asts_found_by_parts[keyword] = parsing_result
|
asts_found_by_parts[keyword] = parsing_result
|
||||||
|
|
||||||
concept_found.where = asts_found_by_parts[Keywords.WHERE]
|
return asts_found_by_parts
|
||||||
concept_found.pre = asts_found_by_parts[Keywords.PRE]
|
|
||||||
concept_found.post = asts_found_by_parts[Keywords.POST]
|
|
||||||
concept_found.body = asts_found_by_parts[Keywords.AS]
|
|
||||||
|
|
||||||
log.debug(f"Found DefConcept node '{concept_found}'")
|
|
||||||
return concept_found
|
|
||||||
|
|
||||||
# def parse_expression(self):
|
# def parse_expression(self):
|
||||||
# return self.parse_addition()
|
# return self.parse_addition()
|
||||||
|
|||||||
@@ -20,7 +20,11 @@ class EmptyStringParser(BaseParser):
|
|||||||
isinstance(text, list) and text == [] or \
|
isinstance(text, list) and text == [] or \
|
||||||
text is None:
|
text is None:
|
||||||
log.debug(f"Recognized '{text}' as BuiltinConcepts.NOP.")
|
log.debug(f"Recognized '{text}' as BuiltinConcepts.NOP.")
|
||||||
return sheerka.ret(self.name, True, sheerka.new(BuiltinConcepts.NOP))
|
return sheerka.ret(self.name, True, sheerka.new(
|
||||||
|
BuiltinConcepts.PARSER_RESULT,
|
||||||
|
parser=self,
|
||||||
|
source="",
|
||||||
|
body=sheerka.new(BuiltinConcepts.NOP)))
|
||||||
|
|
||||||
log.debug(f"Failed to recognize '{text}'")
|
log.debug(f"Failed to recognize '{text}'")
|
||||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
|
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
|
||||||
|
|||||||
@@ -47,7 +47,15 @@ class ExactConceptParser(BaseParser):
|
|||||||
if token.startswith(VARIABLE_PREFIX):
|
if token.startswith(VARIABLE_PREFIX):
|
||||||
index = int(token[len(VARIABLE_PREFIX):])
|
index = int(token[len(VARIABLE_PREFIX):])
|
||||||
concept.set_prop_by_index(index, words[i])
|
concept.set_prop_by_index(index, words[i])
|
||||||
res.append(ReturnValueConcept(self.name, True, concept))
|
res.append(ReturnValueConcept(
|
||||||
|
self.name,
|
||||||
|
True,
|
||||||
|
context.sheerka.new(
|
||||||
|
BuiltinConcepts.PARSER_RESULT,
|
||||||
|
parser=self,
|
||||||
|
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
|
||||||
|
body=concept,
|
||||||
|
try_parsed=concept)))
|
||||||
log.debug(f"Recognized '{text}' as '{concept}'")
|
log.debug(f"Recognized '{text}' as '{concept}'")
|
||||||
recognized = True
|
recognized = True
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,9 @@
|
|||||||
- E : events
|
- E : events
|
||||||
- O : object (with history management)
|
- O : object (with history management)
|
||||||
- P : pickle
|
- P : pickle
|
||||||
|
- S : state
|
||||||
|
- C : concept
|
||||||
|
- D : concept definitions
|
||||||
|
|
||||||
## How concepts are serialized ?
|
## How concepts are serialized ?
|
||||||
- get the id of the concept
|
- get the id of the concept
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from sdp.sheerkaSerializer import Serializer, SerializerContext
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
init_log = logging.getLogger(__name__ + ".init")
|
||||||
|
|
||||||
def json_default_converter(o):
|
def json_default_converter(o):
|
||||||
"""
|
"""
|
||||||
@@ -278,7 +278,7 @@ class SheerkaDataProvider:
|
|||||||
REF_PREFIX = "##REF##:"
|
REF_PREFIX = "##REF##:"
|
||||||
|
|
||||||
def __init__(self, root=None):
|
def __init__(self, root=None):
|
||||||
log.debug("Initializing sdp.")
|
init_log.debug("Initializing sdp.")
|
||||||
|
|
||||||
self.io = SheerkaDataProviderIO.get(root)
|
self.io = SheerkaDataProviderIO.get(root)
|
||||||
self.first_time = self.io.first_time
|
self.first_time = self.io.first_time
|
||||||
@@ -312,6 +312,20 @@ class SheerkaDataProvider:
|
|||||||
else obj.get_digest() if hasattr(obj, "get_digest") \
|
else obj.get_digest() if hasattr(obj, "get_digest") \
|
||||||
else None
|
else None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_obj_origin(obj):
|
||||||
|
"""
|
||||||
|
Get the digest used to save obj if set
|
||||||
|
"""
|
||||||
|
if isinstance(obj, dict) and Serializer.ORIGIN in obj:
|
||||||
|
return obj[Serializer.ORIGIN]
|
||||||
|
|
||||||
|
if hasattr(obj, Serializer.ORIGIN):
|
||||||
|
return getattr(obj, Serializer.ORIGIN)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_stream_digest(stream):
|
def get_stream_digest(stream):
|
||||||
sha256_hash = hashlib.sha256()
|
sha256_hash = hashlib.sha256()
|
||||||
@@ -460,10 +474,10 @@ class SheerkaDataProvider:
|
|||||||
obj_key = self.get_obj_key(obj) or key
|
obj_key = self.get_obj_key(obj) or key
|
||||||
|
|
||||||
if isinstance(state.data[entry][key], list):
|
if isinstance(state.data[entry][key], list):
|
||||||
if not hasattr(obj, Serializer.ORIGIN):
|
obj_origin = self.get_obj_origin(obj)
|
||||||
|
if obj_origin is None:
|
||||||
raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj))
|
raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj))
|
||||||
|
|
||||||
obj_origin = getattr(obj, Serializer.ORIGIN)
|
|
||||||
state.modify_in_list(entry, key, obj, obj_key, obj_origin, self.load_ref_if_needed, self.save_ref_if_needed)
|
state.modify_in_list(entry, key, obj, obj_key, obj_origin, self.load_ref_if_needed, self.save_ref_if_needed)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@@ -674,7 +688,9 @@ class SheerkaDataProvider:
|
|||||||
obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
|
obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
|
||||||
|
|
||||||
# set the origin of the object
|
# set the origin of the object
|
||||||
if not isinstance(obj, str):
|
if isinstance(obj, dict):
|
||||||
|
obj[Serializer.ORIGIN] = digest
|
||||||
|
elif not isinstance(obj, str):
|
||||||
setattr(obj, Serializer.ORIGIN, digest)
|
setattr(obj, Serializer.ORIGIN, digest)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|||||||
@@ -46,9 +46,9 @@ class SheerkaDataProviderIO:
|
|||||||
|
|
||||||
|
|
||||||
class SheerkaDataProviderFileIO(SheerkaDataProviderIO):
|
class SheerkaDataProviderFileIO(SheerkaDataProviderIO):
|
||||||
log = logging.getLogger("FileIO")
|
|
||||||
|
|
||||||
def __init__(self, root):
|
def __init__(self, root):
|
||||||
|
self.log = logging.getLogger(self.__class__.__name__ + ".init")
|
||||||
root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \
|
root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \
|
||||||
if root is None \
|
if root is None \
|
||||||
else path.abspath(root)
|
else path.abspath(root)
|
||||||
@@ -180,10 +180,13 @@ def on_close(dictionary_io, file_path, stream):
|
|||||||
:param stream:
|
:param stream:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def decorator(func):
|
def decorator(func):
|
||||||
def wrapper(*args, **kwargs):
|
def wrapper(*args, **kwargs):
|
||||||
stream.seek(0)
|
stream.seek(0)
|
||||||
dictionary_io.cache[file_path] = stream.read()
|
dictionary_io.cache[file_path] = stream.read()
|
||||||
func(*args, **kwargs)
|
func(*args, **kwargs)
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
return decorator
|
return decorator
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import core.utils
|
|||||||
from core.concept import Concept
|
from core.concept import Concept
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
init_log = logging.getLogger(__name__ + ".init")
|
||||||
|
|
||||||
|
|
||||||
def json_default_converter(o):
|
def json_default_converter(o):
|
||||||
@@ -40,17 +41,18 @@ class Serializer:
|
|||||||
USERNAME = "user_name" # key to store user that as committed the snapshot
|
USERNAME = "user_name" # key to store user that as committed the snapshot
|
||||||
MODIFICATION_DATE = "modification_date" #
|
MODIFICATION_DATE = "modification_date" #
|
||||||
PARENTS = "parents"
|
PARENTS = "parents"
|
||||||
ORIGIN = "origin"
|
ORIGIN = "##origin##"
|
||||||
HISTORY = "##history##"
|
HISTORY = "##history##"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
log.debug("Initializing serializers")
|
init_log.debug("Initializing serializers")
|
||||||
self._cache = []
|
self._cache = []
|
||||||
|
|
||||||
# add builtin serializers
|
# add builtin serializers
|
||||||
self.register(EventSerializer())
|
self.register(EventSerializer())
|
||||||
self.register(StateSerializer())
|
self.register(StateSerializer())
|
||||||
self.register(ConceptSerializer())
|
self.register(ConceptSerializer())
|
||||||
|
self.register(DictionarySerializer())
|
||||||
|
|
||||||
def register(self, serializer):
|
def register(self, serializer):
|
||||||
"""
|
"""
|
||||||
@@ -58,7 +60,7 @@ class Serializer:
|
|||||||
:param serializer:
|
:param serializer:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
log.debug(f"Adding serializer {serializer}")
|
init_log.debug(f"Adding serializer {serializer}")
|
||||||
self._cache.append(serializer)
|
self._cache.append(serializer)
|
||||||
|
|
||||||
def serialize(self, obj, context):
|
def serialize(self, obj, context):
|
||||||
@@ -212,8 +214,11 @@ class PickleSerializer(BaseSerializer):
|
|||||||
|
|
||||||
class StateSerializer(PickleSerializer):
|
class StateSerializer(PickleSerializer):
|
||||||
def __init__(self, ):
|
def __init__(self, ):
|
||||||
PickleSerializer.__init__(self, lambda obj: core.utils.get_full_qualified_name(
|
PickleSerializer.__init__(
|
||||||
obj) == "sdp.sheerkaDataProvider.State", "S", 1)
|
self,
|
||||||
|
lambda obj: core.utils.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State",
|
||||||
|
"S",
|
||||||
|
1)
|
||||||
|
|
||||||
|
|
||||||
class ConceptSerializer(ObjectSerializer):
|
class ConceptSerializer(ObjectSerializer):
|
||||||
@@ -223,6 +228,15 @@ class ConceptSerializer(ObjectSerializer):
|
|||||||
def matches(self, obj):
|
def matches(self, obj):
|
||||||
return isinstance(obj, Concept)
|
return isinstance(obj, Concept)
|
||||||
|
|
||||||
|
|
||||||
|
class DictionarySerializer(PickleSerializer):
|
||||||
|
def __init__(self, ):
|
||||||
|
PickleSerializer.__init__(
|
||||||
|
self,
|
||||||
|
lambda obj: isinstance(obj, dict),
|
||||||
|
"D",
|
||||||
|
1)
|
||||||
|
|
||||||
#
|
#
|
||||||
# class SheerkaSerializer(ObjectSerializer):
|
# class SheerkaSerializer(ObjectSerializer):
|
||||||
# def __init__(self):
|
# def __init__(self):
|
||||||
|
|||||||
@@ -0,0 +1,181 @@
|
|||||||
|
import ast
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
|
||||||
|
from core.concept import VARIABLE_PREFIX, ConceptParts, Concept
|
||||||
|
from core.sheerka import Sheerka, ExecutionContext
|
||||||
|
from core.tokenizer import Tokenizer
|
||||||
|
from evaluators.AddConceptEvaluator import AddConceptEvaluator
|
||||||
|
from parsers.BaseParser import BaseParser
|
||||||
|
from parsers.ConceptLexerParser import Sequence, RegexParser, StrMatch, ZeroOrMore, ConceptMatch
|
||||||
|
from parsers.DefaultParser import DefConceptNode, NameNode
|
||||||
|
from parsers.ExactConceptParser import ExactConceptParser
|
||||||
|
from parsers.PythonParser import PythonNode, PythonParser
|
||||||
|
|
||||||
|
|
||||||
|
def get_context():
|
||||||
|
sheerka = Sheerka(skip_builtins_in_db=True)
|
||||||
|
sheerka.initialize("mem://")
|
||||||
|
return ExecutionContext("test", "xxx", sheerka)
|
||||||
|
|
||||||
|
|
||||||
|
def get_concept(name, where=None, pre=None, post=None, body=None, definition=None):
|
||||||
|
concept = DefConceptNode([], name=NameNode(list(Tokenizer(name))))
|
||||||
|
|
||||||
|
if body:
|
||||||
|
concept.body = get_concept_part(body)
|
||||||
|
if where:
|
||||||
|
concept.where = get_concept_part(where)
|
||||||
|
if pre:
|
||||||
|
concept.pre = get_concept_part(pre)
|
||||||
|
if post:
|
||||||
|
concept.post = get_concept_part(post)
|
||||||
|
if definition:
|
||||||
|
concept.definition = definition
|
||||||
|
|
||||||
|
return ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=concept))
|
||||||
|
|
||||||
|
|
||||||
|
def get_concept_part(part):
|
||||||
|
if isinstance(part, str):
|
||||||
|
node = PythonNode(part, ast.parse(part, mode="eval"))
|
||||||
|
return ReturnValueConcept(
|
||||||
|
who="Parsers:DefaultParser",
|
||||||
|
status=True,
|
||||||
|
value=ParserResultConcept(
|
||||||
|
source=part,
|
||||||
|
parser=PythonParser(),
|
||||||
|
value=node))
|
||||||
|
|
||||||
|
if isinstance(part, PythonNode):
|
||||||
|
return ReturnValueConcept(
|
||||||
|
who="Parsers:DefaultParser",
|
||||||
|
status=True,
|
||||||
|
value=ParserResultConcept(
|
||||||
|
source=part.source,
|
||||||
|
parser=PythonParser(),
|
||||||
|
value=part))
|
||||||
|
|
||||||
|
if isinstance(part, ReturnValueConcept):
|
||||||
|
return part
|
||||||
|
|
||||||
|
|
||||||
|
def get_concept_definition(source, parsing_expression):
|
||||||
|
return ReturnValueConcept(
|
||||||
|
who="Parsers:RegexParser",
|
||||||
|
status=True,
|
||||||
|
value=ParserResultConcept(
|
||||||
|
source=source,
|
||||||
|
parser=RegexParser(),
|
||||||
|
value=parsing_expression
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("ret_val, expected", [
|
||||||
|
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=DefConceptNode([]))), True),
|
||||||
|
(ReturnValueConcept(BaseParser.PREFIX + "some_name", False, ParserResultConcept(value=DefConceptNode([]))), False),
|
||||||
|
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not a ParserResultConcept"), False),
|
||||||
|
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept()), False),
|
||||||
|
])
|
||||||
|
def test_i_can_match(ret_val, expected):
|
||||||
|
context = get_context()
|
||||||
|
assert AddConceptEvaluator().matches(context, ret_val) == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_that_the_source_is_correctly_set():
|
||||||
|
context = get_context()
|
||||||
|
def_concept_return_value = get_concept(
|
||||||
|
name="hello a",
|
||||||
|
definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
|
||||||
|
where="isinstance(a, str )",
|
||||||
|
pre="a is not None",
|
||||||
|
body="print('hello' + a)")
|
||||||
|
|
||||||
|
evaluated = AddConceptEvaluator().eval(context, def_concept_return_value)
|
||||||
|
|
||||||
|
assert evaluated.status
|
||||||
|
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
|
||||||
|
|
||||||
|
created_concept = evaluated.body.body
|
||||||
|
assert created_concept.metadata.name == "hello a"
|
||||||
|
assert created_concept.metadata.where == "isinstance(a, str )"
|
||||||
|
assert created_concept.metadata.pre == "a is not None"
|
||||||
|
assert created_concept.metadata.post is None
|
||||||
|
assert created_concept.metadata.body == "print('hello' + a)"
|
||||||
|
assert created_concept.metadata.definition == "hello a"
|
||||||
|
|
||||||
|
|
||||||
|
def test_that_the_ast_is_correctly_initialized():
|
||||||
|
context = get_context()
|
||||||
|
def_concept_return_value = get_concept(
|
||||||
|
name="hello a",
|
||||||
|
definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
|
||||||
|
where="isinstance(a, str )",
|
||||||
|
pre="a is not None",
|
||||||
|
body="print('hello' + a)")
|
||||||
|
|
||||||
|
evaluated = AddConceptEvaluator().eval(context, def_concept_return_value)
|
||||||
|
|
||||||
|
assert evaluated.status
|
||||||
|
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
|
||||||
|
|
||||||
|
created_concept = evaluated.body.body
|
||||||
|
|
||||||
|
assert ConceptParts.WHERE in created_concept.cached_asts
|
||||||
|
assert ConceptParts.PRE in created_concept.cached_asts
|
||||||
|
assert ConceptParts.BODY in created_concept.cached_asts
|
||||||
|
assert ConceptParts.POST not in created_concept.cached_asts
|
||||||
|
|
||||||
|
|
||||||
|
def test_that_the_new_concept_is_correctly_saved():
|
||||||
|
context = get_context()
|
||||||
|
def_concept_return_value = get_concept(
|
||||||
|
name="hello a",
|
||||||
|
definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
|
||||||
|
where="isinstance(a, str )",
|
||||||
|
pre="a is not None",
|
||||||
|
body="print('hello' + a)")
|
||||||
|
|
||||||
|
from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0")
|
||||||
|
assert context.sheerka.isinstance(from_db, BuiltinConcepts.UNKNOWN_CONCEPT)
|
||||||
|
|
||||||
|
AddConceptEvaluator().eval(context, def_concept_return_value)
|
||||||
|
context.sheerka.concepts_cache = {} # reset cache
|
||||||
|
from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0")
|
||||||
|
|
||||||
|
assert from_db.metadata.key == f"hello {VARIABLE_PREFIX}0"
|
||||||
|
assert from_db.metadata.name == "hello a"
|
||||||
|
assert from_db.metadata.where == "isinstance(a, str )"
|
||||||
|
assert from_db.metadata.pre == "a is not None"
|
||||||
|
assert from_db.metadata.post is None
|
||||||
|
assert from_db.metadata.body == "print('hello' + a)"
|
||||||
|
assert from_db.metadata.definition == "hello a"
|
||||||
|
assert len(from_db.props) == 1
|
||||||
|
assert "a" in from_db.props
|
||||||
|
|
||||||
|
assert from_db.cached_asts == {} # ast is not saved in db
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_get_props_from_python_node():
|
||||||
|
ret_val = get_concept_part("isinstance(a, str)")
|
||||||
|
context = get_context()
|
||||||
|
|
||||||
|
assert AddConceptEvaluator.get_props(context.sheerka, ret_val, ["a"]) == ["a"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_get_props_from_another_concept():
|
||||||
|
concept = Concept("hello").set_prop("a").set_prop("b")
|
||||||
|
ret_val = ReturnValueConcept(who="some_parser",
|
||||||
|
status=True,
|
||||||
|
value=ParserResultConcept(value=concept))
|
||||||
|
|
||||||
|
assert AddConceptEvaluator.get_props(get_context(), ret_val, []) == ["a", "b"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_get_props_from_definition():
|
||||||
|
parsing_expression = Sequence(ConceptMatch('mult'), ZeroOrMore(Sequence(StrMatch("+"), ConceptMatch("add"))))
|
||||||
|
ret_val = get_concept_definition("mult (('+'|'-') add)?", parsing_expression)
|
||||||
|
|
||||||
|
assert AddConceptEvaluator.get_props(get_context(), ret_val, []) == ["add", "mult"]
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from core.tokenizer import Tokenizer, Token, TokenKind
|
||||||
|
from parsers.BaseParser import BaseParser
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -5,6 +5,7 @@ from core.concept import Concept
|
|||||||
from core.sheerka import Sheerka, ExecutionContext
|
from core.sheerka import Sheerka, ExecutionContext
|
||||||
from evaluators.ConceptEvaluator import ConceptEvaluator
|
from evaluators.ConceptEvaluator import ConceptEvaluator
|
||||||
from parsers.BaseParser import BaseParser
|
from parsers.BaseParser import BaseParser
|
||||||
|
from parsers.ExactConceptParser import ExactConceptParser
|
||||||
|
|
||||||
|
|
||||||
def get_context():
|
def get_context():
|
||||||
@@ -13,12 +14,21 @@ def get_context():
|
|||||||
return ExecutionContext("test", "xxx", sheerka)
|
return ExecutionContext("test", "xxx", sheerka)
|
||||||
|
|
||||||
|
|
||||||
|
def get_return_value(concept, source=None):
|
||||||
|
return ReturnValueConcept(
|
||||||
|
"some_name",
|
||||||
|
True,
|
||||||
|
ParserResultConcept(parser=ExactConceptParser(),
|
||||||
|
source=source or concept.name,
|
||||||
|
value=concept,
|
||||||
|
try_parsed=concept))
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("ret_val, expected", [
|
@pytest.mark.parametrize("ret_val, expected", [
|
||||||
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, Concept()), True),
|
(ReturnValueConcept("some_name", True, ParserResultConcept(value=Concept())), True),
|
||||||
(ReturnValueConcept(BaseParser.PREFIX + "some_name", False, Concept()), False),
|
(ReturnValueConcept("some_name", False, ParserResultConcept(value=Concept())), False),
|
||||||
(ReturnValueConcept("Not a parser", True, Concept()), False),
|
(ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept")), False),
|
||||||
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not a concept"), False),
|
(ReturnValueConcept("some_name", True, Concept()), False),
|
||||||
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept()), False),
|
|
||||||
])
|
])
|
||||||
def test_i_can_match(ret_val, expected):
|
def test_i_can_match(ret_val, expected):
|
||||||
context = get_context()
|
context = get_context()
|
||||||
@@ -30,7 +40,7 @@ def test_concept_is_returned_when_no_body():
|
|||||||
concept = Concept(name="one").init_key()
|
concept = Concept(name="one").init_key()
|
||||||
|
|
||||||
evaluator = ConceptEvaluator()
|
evaluator = ConceptEvaluator()
|
||||||
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept)
|
item = get_return_value(concept)
|
||||||
result = evaluator.eval(context, item)
|
result = evaluator.eval(context, item)
|
||||||
|
|
||||||
assert result.who == evaluator.name
|
assert result.who == evaluator.name
|
||||||
@@ -44,7 +54,7 @@ def test_body_is_evaluated_when_python_body():
|
|||||||
concept = Concept(name="one", body="1").init_key()
|
concept = Concept(name="one", body="1").init_key()
|
||||||
|
|
||||||
evaluator = ConceptEvaluator()
|
evaluator = ConceptEvaluator()
|
||||||
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept)
|
item = get_return_value(concept)
|
||||||
result = evaluator.eval(context, item)
|
result = evaluator.eval(context, item)
|
||||||
|
|
||||||
assert result.who == evaluator.name
|
assert result.who == evaluator.name
|
||||||
@@ -60,7 +70,7 @@ def test_body_is_evaluated_when_concept_body():
|
|||||||
concept_un = Concept(name="un", body="one").init_key()
|
concept_un = Concept(name="un", body="one").init_key()
|
||||||
|
|
||||||
evaluator = ConceptEvaluator()
|
evaluator = ConceptEvaluator()
|
||||||
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_un)
|
item = get_return_value(concept_un)
|
||||||
result = evaluator.eval(context, item)
|
result = evaluator.eval(context, item)
|
||||||
|
|
||||||
assert result.who == evaluator.name
|
assert result.who == evaluator.name
|
||||||
@@ -80,7 +90,7 @@ def test_body_is_evaluated_when_concept_body_with_a_body():
|
|||||||
concept_un = Concept(name="un", body="one").init_key()
|
concept_un = Concept(name="un", body="one").init_key()
|
||||||
|
|
||||||
evaluator = ConceptEvaluator()
|
evaluator = ConceptEvaluator()
|
||||||
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_un)
|
item = get_return_value(concept_un)
|
||||||
result = evaluator.eval(context, item)
|
result = evaluator.eval(context, item)
|
||||||
|
|
||||||
assert result.who == evaluator.name
|
assert result.who == evaluator.name
|
||||||
@@ -97,7 +107,7 @@ def test_i_can_evaluate_longer_chains():
|
|||||||
concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key())
|
concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key())
|
||||||
|
|
||||||
evaluator = ConceptEvaluator()
|
evaluator = ConceptEvaluator()
|
||||||
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_d)
|
item = get_return_value(concept_d)
|
||||||
result = evaluator.eval(context, item)
|
result = evaluator.eval(context, item)
|
||||||
|
|
||||||
assert result.status
|
assert result.status
|
||||||
@@ -112,7 +122,7 @@ def test_i_can_evaluate_longer_chains_2():
|
|||||||
concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key())
|
concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key())
|
||||||
|
|
||||||
evaluator = ConceptEvaluator()
|
evaluator = ConceptEvaluator()
|
||||||
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_d)
|
item = get_return_value(concept_d)
|
||||||
result = evaluator.eval(context, item)
|
result = evaluator.eval(context, item)
|
||||||
|
|
||||||
assert result.status
|
assert result.status
|
||||||
@@ -133,7 +143,7 @@ def test_i_can_recognize_concept_properties():
|
|||||||
.set_prop("b", "two").init_key())
|
.set_prop("b", "two").init_key())
|
||||||
|
|
||||||
evaluator = ConceptEvaluator()
|
evaluator = ConceptEvaluator()
|
||||||
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
|
item = get_return_value(concept_plus)
|
||||||
result = evaluator.eval(context, item)
|
result = evaluator.eval(context, item)
|
||||||
|
|
||||||
assert result.status
|
assert result.status
|
||||||
@@ -156,7 +166,7 @@ def test_i_can_recognize_concept_properties_with_body():
|
|||||||
.set_prop("b", "two").init_key())
|
.set_prop("b", "two").init_key())
|
||||||
|
|
||||||
evaluator = ConceptEvaluator()
|
evaluator = ConceptEvaluator()
|
||||||
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
|
item = get_return_value(concept_plus)
|
||||||
result = evaluator.eval(context, item)
|
result = evaluator.eval(context, item)
|
||||||
|
|
||||||
assert result.status
|
assert result.status
|
||||||
@@ -174,7 +184,7 @@ def test_i_can_recognize_concept_properties_with_body_when_concept_has_a_body():
|
|||||||
.set_prop("b", "two").init_key())
|
.set_prop("b", "two").init_key())
|
||||||
|
|
||||||
evaluator = ConceptEvaluator()
|
evaluator = ConceptEvaluator()
|
||||||
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
|
item = get_return_value(concept_plus)
|
||||||
result = evaluator.eval(context, item)
|
result = evaluator.eval(context, item)
|
||||||
|
|
||||||
assert result.status
|
assert result.status
|
||||||
@@ -189,7 +199,7 @@ def test_i_cannot_recognize_a_concept_if_one_of_the_prop_is_unknown():
|
|||||||
.set_prop("b", "two").init_key())
|
.set_prop("b", "two").init_key())
|
||||||
|
|
||||||
evaluator = ConceptEvaluator()
|
evaluator = ConceptEvaluator()
|
||||||
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
|
item = get_return_value(concept_plus)
|
||||||
result = evaluator.eval(context, item)
|
result = evaluator.eval(context, item)
|
||||||
|
|
||||||
assert not result.status
|
assert not result.status
|
||||||
|
|||||||
@@ -2,8 +2,18 @@ import pytest
|
|||||||
from core.builtin_concepts import BuiltinConcepts
|
from core.builtin_concepts import BuiltinConcepts
|
||||||
from core.concept import Concept
|
from core.concept import Concept
|
||||||
from core.sheerka import Sheerka, ExecutionContext
|
from core.sheerka import Sheerka, ExecutionContext
|
||||||
|
from core.tokenizer import Tokenizer, TokenKind
|
||||||
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
|
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
|
||||||
CrossRef
|
CrossRef, RegexParser, ZeroOrMore, OneOrMore, UnexpectedEndOfFileError, UnexpectedTokenErrorNode, ConceptMatch, \
|
||||||
|
ParsingExpressionVisitor
|
||||||
|
|
||||||
|
|
||||||
|
class ConceptVisitor(ParsingExpressionVisitor):
|
||||||
|
def __init__(self):
|
||||||
|
self.concepts = set()
|
||||||
|
|
||||||
|
def visit_ConceptMatch(self, node):
|
||||||
|
self.concepts.add(node.concept_name)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("match, text", [
|
@pytest.mark.parametrize("match, text", [
|
||||||
@@ -23,7 +33,7 @@ def test_i_can_match_simple_tokens(match, text):
|
|||||||
foo = Concept(name="foo")
|
foo = Concept(name="foo")
|
||||||
concepts = {foo: text}
|
concepts = {foo: text}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, text)
|
res = parser.parse(context, text)
|
||||||
|
|
||||||
@@ -38,7 +48,7 @@ def test_i_can_match_multiple_concepts_in_one_input():
|
|||||||
two = Concept(name="two")
|
two = Concept(name="two")
|
||||||
concepts = {one: "one", two: "two"}
|
concepts = {one: "one", two: "two"}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two one")
|
res = parser.parse(context, "one two one")
|
||||||
|
|
||||||
@@ -69,7 +79,7 @@ def test_i_cannot_match_when_part_of_the_input_is_unknown():
|
|||||||
two = Concept(name="two")
|
two = Concept(name="two")
|
||||||
concepts = {one: "one", two: "two"}
|
concepts = {one: "one", two: "two"}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two three")
|
res = parser.parse(context, "one two three")
|
||||||
assert not res.status
|
assert not res.status
|
||||||
@@ -86,7 +96,7 @@ def test_i_can_match_sequence():
|
|||||||
foo = Concept(name="foo")
|
foo = Concept(name="foo")
|
||||||
concepts = {foo: Sequence("one", "two", "three")}
|
concepts = {foo: Sequence("one", "two", "three")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two three")
|
res = parser.parse(context, "one two three")
|
||||||
|
|
||||||
@@ -100,7 +110,7 @@ def test_wrong_sequence_is_not_matched():
|
|||||||
foo = Concept(name="foo")
|
foo = Concept(name="foo")
|
||||||
concepts = {foo: Sequence("one", "two", "three")}
|
concepts = {foo: Sequence("one", "two", "three")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two three one")
|
res = parser.parse(context, "one two three one")
|
||||||
|
|
||||||
@@ -116,7 +126,7 @@ def test_i_cannot_match_sequence_if_end_of_file():
|
|||||||
foo = Concept(name="foo")
|
foo = Concept(name="foo")
|
||||||
concepts = {foo: Sequence("one", "two", "three")}
|
concepts = {foo: Sequence("one", "two", "three")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two")
|
res = parser.parse(context, "one two")
|
||||||
assert not res.status
|
assert not res.status
|
||||||
@@ -133,7 +143,7 @@ def test_i_always_choose_the_longest_match():
|
|||||||
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
|
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
|
||||||
|
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two three")
|
res = parser.parse(context, "one two three")
|
||||||
|
|
||||||
@@ -149,7 +159,7 @@ def test_i_can_match_several_sequences():
|
|||||||
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
|
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
|
||||||
|
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two three one two")
|
res = parser.parse(context, "one two three one two")
|
||||||
|
|
||||||
@@ -166,7 +176,7 @@ def test_i_can_match_ordered_choice():
|
|||||||
foo = Concept(name="foo")
|
foo = Concept(name="foo")
|
||||||
concepts = {foo: OrderedChoice("one", "two")}
|
concepts = {foo: OrderedChoice("one", "two")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res1 = parser.parse(context, "one")
|
res1 = parser.parse(context, "one")
|
||||||
assert res1.status
|
assert res1.status
|
||||||
@@ -189,7 +199,7 @@ def test_i_cannot_match_ordered_choice_with_empty_alternative():
|
|||||||
foo = Concept(name="foo")
|
foo = Concept(name="foo")
|
||||||
concepts = {foo: Sequence(OrderedChoice("one", ""), "two")}
|
concepts = {foo: Sequence(OrderedChoice("one", ""), "two")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "ok") # because token[0] is not "one" and not "" (it is 'two')
|
res = parser.parse(context, "ok") # because token[0] is not "one" and not "" (it is 'two')
|
||||||
assert not res.status
|
assert not res.status
|
||||||
@@ -201,7 +211,7 @@ def test_i_can_mix_sequences_and_ordered_choices():
|
|||||||
|
|
||||||
concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")}
|
concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res1 = parser.parse(context, "twenty one ok")
|
res1 = parser.parse(context, "twenty one ok")
|
||||||
assert res1.status
|
assert res1.status
|
||||||
@@ -225,7 +235,7 @@ def test_i_can_mix_ordered_choices_and_sequences():
|
|||||||
|
|
||||||
concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")}
|
concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "twenty thirty")
|
res = parser.parse(context, "twenty thirty")
|
||||||
assert res.status
|
assert res.status
|
||||||
@@ -240,7 +250,7 @@ def test_i_cannot_parse_empty_optional():
|
|||||||
|
|
||||||
concepts = {foo: Optional("one")}
|
concepts = {foo: Optional("one")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "")
|
res = parser.parse(context, "")
|
||||||
assert not res.status
|
assert not res.status
|
||||||
@@ -253,7 +263,7 @@ def test_i_can_parse_optional():
|
|||||||
|
|
||||||
concepts = {foo: Optional("one")}
|
concepts = {foo: Optional("one")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one")
|
res = parser.parse(context, "one")
|
||||||
assert res.status
|
assert res.status
|
||||||
@@ -266,7 +276,7 @@ def test_i_can_parse_sequence_starting_with_optional():
|
|||||||
|
|
||||||
concepts = {foo: Sequence(Optional("twenty"), "one")}
|
concepts = {foo: Sequence(Optional("twenty"), "one")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "twenty one")
|
res = parser.parse(context, "twenty one")
|
||||||
assert res.status
|
assert res.status
|
||||||
@@ -283,7 +293,7 @@ def test_i_can_parse_sequence_ending_with_optional():
|
|||||||
|
|
||||||
concepts = {foo: Sequence("one", "two", Optional("three"))}
|
concepts = {foo: Sequence("one", "two", Optional("three"))}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two three")
|
res = parser.parse(context, "one two three")
|
||||||
assert res.status
|
assert res.status
|
||||||
@@ -300,7 +310,7 @@ def test_i_can_parse_sequence_with_optional_in_between():
|
|||||||
|
|
||||||
concepts = {foo: Sequence("one", Optional("two"), "three")}
|
concepts = {foo: Sequence("one", Optional("two"), "three")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two three")
|
res = parser.parse(context, "one two three")
|
||||||
assert res.status
|
assert res.status
|
||||||
@@ -312,19 +322,16 @@ def test_i_can_parse_sequence_with_optional_in_between():
|
|||||||
|
|
||||||
|
|
||||||
def test_i_can_use_reference():
|
def test_i_can_use_reference():
|
||||||
# The problem here is when there are multiple match for the same input
|
# when there are multiple matches for the same input
|
||||||
# The parsing result is a list of all concepts found
|
# Do I need to create a choice concept ?
|
||||||
# So it's already a list that represents a sequence, not a choice
|
# No, create a return value for every possible graph
|
||||||
# So I need to create a choice concept
|
|
||||||
# create the return value for every possible graph
|
|
||||||
# --> The latter seems to be the best as we don't defer the resolution of the problem to someone else
|
|
||||||
context = get_context()
|
context = get_context()
|
||||||
foo = Concept(name="foo")
|
foo = Concept(name="foo")
|
||||||
bar = Concept(name="bar")
|
bar = Concept(name="bar")
|
||||||
|
|
||||||
concepts = {foo: Sequence("one", "two"), bar: foo}
|
concepts = {foo: Sequence("one", "two"), bar: foo}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two")
|
res = parser.parse(context, "one two")
|
||||||
assert len(res) == 2
|
assert len(res) == 2
|
||||||
@@ -350,7 +357,7 @@ def test_i_can_use_context_reference_with_multiple_levels():
|
|||||||
|
|
||||||
concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar}
|
concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two")
|
res = parser.parse(context, "one two")
|
||||||
assert len(res) == 3
|
assert len(res) == 3
|
||||||
@@ -375,7 +382,7 @@ def test_order_is_not_important_when_using_references():
|
|||||||
|
|
||||||
concepts = {bar: foo, foo: Sequence("one", "two")}
|
concepts = {bar: foo, foo: Sequence("one", "two")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "one two")
|
res = parser.parse(context, "one two")
|
||||||
assert len(res) == 2
|
assert len(res) == 2
|
||||||
@@ -390,7 +397,7 @@ def test_i_can_parse_when_reference():
|
|||||||
|
|
||||||
concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
|
concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "twenty two")
|
res = parser.parse(context, "twenty two")
|
||||||
assert res.status
|
assert res.status
|
||||||
@@ -415,7 +422,7 @@ def test_i_can_detect_duplicates_when_reference():
|
|||||||
foo: OrderedChoice("twenty", "thirty")
|
foo: OrderedChoice("twenty", "thirty")
|
||||||
}
|
}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
|
|
||||||
res = parser.parse(context, "twenty")
|
res = parser.parse(context, "twenty")
|
||||||
assert len(res) == 2
|
assert len(res) == 2
|
||||||
@@ -437,7 +444,7 @@ def test_i_can_detect_infinite_recursion():
|
|||||||
foo: bar
|
foo: bar
|
||||||
}
|
}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(get_context(), concepts)
|
||||||
|
|
||||||
assert bar not in parser.concepts_dict
|
assert bar not in parser.concepts_dict
|
||||||
assert foo not in parser.concepts_dict
|
assert foo not in parser.concepts_dict
|
||||||
@@ -452,7 +459,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
|
|||||||
foo: OrderedChoice(bar, "foo")
|
foo: OrderedChoice(bar, "foo")
|
||||||
}
|
}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(get_context(), concepts)
|
||||||
|
|
||||||
assert foo not in parser.concepts_dict # removed because of the infinite recursion
|
assert foo not in parser.concepts_dict # removed because of the infinite recursion
|
||||||
assert bar not in parser.concepts_dict # removed because of the infinite recursion
|
assert bar not in parser.concepts_dict # removed because of the infinite recursion
|
||||||
@@ -464,7 +471,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
|
|||||||
foo: OrderedChoice("foo", bar)
|
foo: OrderedChoice("foo", bar)
|
||||||
}
|
}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(context, concepts)
|
||||||
assert foo in parser.concepts_dict
|
assert foo in parser.concepts_dict
|
||||||
assert bar in parser.concepts_dict
|
assert bar in parser.concepts_dict
|
||||||
|
|
||||||
@@ -485,7 +492,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence():
|
|||||||
foo: Sequence("one", bar, "two")
|
foo: Sequence("one", bar, "two")
|
||||||
}
|
}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(get_context(), concepts)
|
||||||
|
|
||||||
assert foo not in parser.concepts_dict # removed because of the infinite recursion
|
assert foo not in parser.concepts_dict # removed because of the infinite recursion
|
||||||
assert bar not in parser.concepts_dict # removed because of the infinite recursion
|
assert bar not in parser.concepts_dict # removed because of the infinite recursion
|
||||||
@@ -500,7 +507,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choic
|
|||||||
foo: Sequence("one", OrderedChoice(bar, "other"), "two")
|
foo: Sequence("one", OrderedChoice(bar, "other"), "two")
|
||||||
}
|
}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(concepts)
|
parser.initialize(get_context(), concepts)
|
||||||
|
|
||||||
assert foo not in parser.concepts_dict # removed because of the infinite recursion
|
assert foo not in parser.concepts_dict # removed because of the infinite recursion
|
||||||
assert bar not in parser.concepts_dict # removed because of the infinite recursion
|
assert bar not in parser.concepts_dict # removed because of the infinite recursion
|
||||||
@@ -510,6 +517,140 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional():
|
|||||||
# TODO infinite recursion with optional
|
# TODO infinite recursion with optional
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("expression, expected", [
|
||||||
|
("'str'", StrMatch("str")),
|
||||||
|
("1", StrMatch("1")),
|
||||||
|
(" 1", StrMatch("1")),
|
||||||
|
(",", StrMatch(",")),
|
||||||
|
("'foo'?", Optional(StrMatch("foo"))),
|
||||||
|
("'foo'*", ZeroOrMore(StrMatch("foo"))),
|
||||||
|
("'foo'+", OneOrMore(StrMatch("foo"))),
|
||||||
|
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
||||||
|
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
||||||
|
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
|
||||||
|
("1 2 | 3 4+", OrderedChoice(
|
||||||
|
Sequence(StrMatch("1"), StrMatch("2")),
|
||||||
|
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
|
||||||
|
("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
|
||||||
|
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
|
||||||
|
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
|
||||||
|
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
|
||||||
|
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
|
||||||
|
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
|
||||||
|
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
|
||||||
|
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
|
||||||
|
("(1 )", StrMatch("1")),
|
||||||
|
])
|
||||||
|
def test_i_can_parse_regex(expression, expected):
|
||||||
|
parser = RegexParser()
|
||||||
|
res = parser.parse(get_context(), Tokenizer(expression))
|
||||||
|
|
||||||
|
assert not parser.has_error
|
||||||
|
assert res.status
|
||||||
|
assert res.value.value == expected
|
||||||
|
assert res.value.source == expression
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("expression, error", [
|
||||||
|
("1 ", UnexpectedEndOfFileError()),
|
||||||
|
("1|", UnexpectedEndOfFileError()),
|
||||||
|
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
|
||||||
|
])
|
||||||
|
def test_i_can_detect_errors(expression, error):
|
||||||
|
parser = RegexParser()
|
||||||
|
res = parser.parse(get_context(), Tokenizer(expression))
|
||||||
|
ret_value = res.value.value
|
||||||
|
assert parser.has_error
|
||||||
|
assert not res.status
|
||||||
|
assert ret_value[0] == error
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_parse_regex_with_reference():
|
||||||
|
expression = "foo"
|
||||||
|
parser = RegexParser()
|
||||||
|
res = parser.parse(get_context(), Tokenizer(expression))
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert res.value.value == ConceptMatch("foo")
|
||||||
|
assert res.value.source == expression
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_parse_cross_ref_with_modifier():
|
||||||
|
expression = "foo*"
|
||||||
|
parser = RegexParser()
|
||||||
|
res = parser.parse(get_context(), Tokenizer(expression))
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
|
||||||
|
assert res.value.source == expression
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_parse_sequence_with_cross_ref():
|
||||||
|
expression = "foo 'and' bar+"
|
||||||
|
parser = RegexParser()
|
||||||
|
res = parser.parse(get_context(), Tokenizer(expression))
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
|
||||||
|
assert res.value.source == expression
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_parse_choice_with_cross_ref():
|
||||||
|
foo = Concept("foo")
|
||||||
|
bar = Concept("bar")
|
||||||
|
context = get_context()
|
||||||
|
context.sheerka.add_in_cache(foo)
|
||||||
|
context.sheerka.add_in_cache(bar)
|
||||||
|
|
||||||
|
expression = "foo | bar?"
|
||||||
|
parser = RegexParser()
|
||||||
|
res = parser.parse(context, Tokenizer(expression))
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
|
||||||
|
assert res.value.source == expression
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
|
||||||
|
foo = Concept(name="foo")
|
||||||
|
bar = Concept(name="bar")
|
||||||
|
context = get_context()
|
||||||
|
context.sheerka.add_in_cache(foo)
|
||||||
|
context.sheerka.add_in_cache(bar)
|
||||||
|
|
||||||
|
regex_parser = RegexParser()
|
||||||
|
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
|
||||||
|
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
|
||||||
|
|
||||||
|
concepts = {bar: bar_definition, foo: foo_definition}
|
||||||
|
concept_parser = ConceptLexerParser()
|
||||||
|
concept_parser.initialize(context, concepts)
|
||||||
|
|
||||||
|
res = concept_parser.parse(context, "twenty two")
|
||||||
|
assert res.status
|
||||||
|
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
|
||||||
|
|
||||||
|
res = concept_parser.parse(context, "thirty one")
|
||||||
|
assert res.status
|
||||||
|
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
|
||||||
|
|
||||||
|
res = concept_parser.parse(context, "twenty")
|
||||||
|
assert res.status
|
||||||
|
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_visit_parsing_expression():
|
||||||
|
mult = Concept(name="mult")
|
||||||
|
add = Concept(name="add")
|
||||||
|
|
||||||
|
visitor = ConceptVisitor()
|
||||||
|
visitor.visit(Sequence(mult, Optional(Sequence("+", add))))
|
||||||
|
|
||||||
|
assert sorted(list(visitor.concepts)) == ["add", "mult"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
|
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
|
||||||
# context = get_context()
|
# context = get_context()
|
||||||
|
|||||||
@@ -2,12 +2,15 @@ import pytest
|
|||||||
import ast
|
import ast
|
||||||
|
|
||||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
|
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
|
||||||
|
from core.concept import Concept
|
||||||
from core.sheerka import Sheerka, ExecutionContext
|
from core.sheerka import Sheerka, ExecutionContext
|
||||||
|
from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch, RegexParser
|
||||||
from parsers.PythonParser import PythonParser, PythonNode
|
from parsers.PythonParser import PythonParser, PythonNode
|
||||||
from core.tokenizer import Keywords, Tokenizer
|
from core.tokenizer import Keywords, Tokenizer
|
||||||
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode
|
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode
|
||||||
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
|
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
|
||||||
|
|
||||||
|
|
||||||
# def nop():
|
# def nop():
|
||||||
# return NopNode()
|
# return NopNode()
|
||||||
#
|
#
|
||||||
@@ -52,7 +55,7 @@ from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
|
|||||||
# return left_as_string == right_as_string
|
# return left_as_string == right_as_string
|
||||||
#
|
#
|
||||||
|
|
||||||
def get_concept(name, where=None, pre=None, post=None, body=None):
|
def get_concept(name, where=None, pre=None, post=None, body=None, definition=None):
|
||||||
concept = DefConceptNode([], name=NameNode(list(Tokenizer(name))))
|
concept = DefConceptNode([], name=NameNode(list(Tokenizer(name))))
|
||||||
|
|
||||||
if body:
|
if body:
|
||||||
@@ -63,6 +66,12 @@ def get_concept(name, where=None, pre=None, post=None, body=None):
|
|||||||
concept.pre = get_concept_part(pre)
|
concept.pre = get_concept_part(pre)
|
||||||
if post:
|
if post:
|
||||||
concept.post = get_concept_part(post)
|
concept.post = get_concept_part(post)
|
||||||
|
if definition:
|
||||||
|
concept.definition = ReturnValueConcept(
|
||||||
|
"Parsers:RegexParser",
|
||||||
|
True,
|
||||||
|
definition)
|
||||||
|
|
||||||
return concept
|
return concept
|
||||||
|
|
||||||
|
|
||||||
@@ -324,3 +333,29 @@ def test_new_line_is_not_allowed_in_the_name():
|
|||||||
|
|
||||||
assert not res.status
|
assert not res.status
|
||||||
assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")]
|
assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")]
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_parse_def_concept_from_regex():
|
||||||
|
text = "def concept name from bnf a_concept | 'a_string' as __definition[0]"
|
||||||
|
parser = DefaultParser()
|
||||||
|
res = parser.parse(get_context(), text)
|
||||||
|
node = res.value.value
|
||||||
|
definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string"))
|
||||||
|
parser_result = ParserResultConcept(RegexParser(), "a_concept | 'a_string'", definition, definition)
|
||||||
|
expected = get_concept(name="name", body="__definition[0]", definition=parser_result)
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert res.who == parser.name
|
||||||
|
assert res.value.source == text
|
||||||
|
assert isinstance(res.value, ParserResultConcept)
|
||||||
|
assert node == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_detect_empty_bnf_declaration():
|
||||||
|
text = "def concept name from bnf as __definition[0]"
|
||||||
|
|
||||||
|
parser = DefaultParser()
|
||||||
|
res = parser.parse(get_context(), text)
|
||||||
|
|
||||||
|
assert not res.status
|
||||||
|
assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration")
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ def test_i_can_recognize_a_simple_concept():
|
|||||||
|
|
||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
assert results[0].status
|
assert results[0].status
|
||||||
assert results[0].value == concept
|
assert results[0].value.value == concept
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_recognize_concepts_defined_several_times():
|
def test_i_can_recognize_concepts_defined_several_times():
|
||||||
@@ -53,14 +53,14 @@ def test_i_can_recognize_concepts_defined_several_times():
|
|||||||
results = ExactConceptParser().parse(context, source)
|
results = ExactConceptParser().parse(context, source)
|
||||||
|
|
||||||
assert len(results) == 2
|
assert len(results) == 2
|
||||||
results = sorted(results, key=lambda x: x.value.name) # because of the usage of sets
|
results = sorted(results, key=lambda x: x.value.value.name) # because of the usage of sets
|
||||||
|
|
||||||
assert results[0].status
|
assert results[0].status
|
||||||
assert results[0].value.name == "hello a"
|
assert results[0].value.value.name == "hello a"
|
||||||
assert results[0].value.props["a"].value == "world"
|
assert results[0].value.value.props["a"].value == "world"
|
||||||
|
|
||||||
assert results[1].status
|
assert results[1].status
|
||||||
assert results[1].value.name == "hello world"
|
assert results[1].value.value.name == "hello world"
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_recognize_a_concept_with_variables():
|
def test_i_can_recognize_a_concept_with_variables():
|
||||||
@@ -72,9 +72,10 @@ def test_i_can_recognize_a_concept_with_variables():
|
|||||||
|
|
||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
assert results[0].status
|
assert results[0].status
|
||||||
assert results[0].value.key == concept.key
|
concept_found = results[0].value.value
|
||||||
assert results[0].value.props["a"].value == "10"
|
assert concept_found.key == concept.key
|
||||||
assert results[0].value.props["b"].value == "5"
|
assert concept_found.props["a"].value == "10"
|
||||||
|
assert concept_found.props["b"].value == "5"
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_recognize_a_concept_with_duplicate_variables():
|
def test_i_can_recognize_a_concept_with_duplicate_variables():
|
||||||
@@ -86,9 +87,10 @@ def test_i_can_recognize_a_concept_with_duplicate_variables():
|
|||||||
|
|
||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
assert results[0].status
|
assert results[0].status
|
||||||
assert results[0].value.key == concept.key
|
concept_found = results[0].value.value
|
||||||
assert results[0].value.props["a"].value == "10"
|
assert concept_found.key == concept.key
|
||||||
assert results[0].value.props["b"].value == "5"
|
assert concept_found.props["a"].value == "10"
|
||||||
|
assert concept_found.props["b"].value == "5"
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_manage_unknown_concept():
|
def test_i_can_manage_unknown_concept():
|
||||||
@@ -121,7 +123,7 @@ def test_i_can_detect_concept_from_tokens():
|
|||||||
|
|
||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
assert results[0].status
|
assert results[0].status
|
||||||
assert results[0].value == concept
|
assert results[0].value.value == concept
|
||||||
|
|
||||||
|
|
||||||
def get_context():
|
def get_context():
|
||||||
|
|||||||
+20
-1
@@ -1,5 +1,7 @@
|
|||||||
import ast
|
import ast
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from core.ast.nodes import NodeParent, GenericNodeConcept
|
from core.ast.nodes import NodeParent, GenericNodeConcept
|
||||||
import core.ast.nodes
|
import core.ast.nodes
|
||||||
from core.ast.visitors import ConceptNodeVisitor, UnreferencedNamesVisitor
|
from core.ast.visitors import ConceptNodeVisitor, UnreferencedNamesVisitor
|
||||||
@@ -102,7 +104,7 @@ def my_function(a,b):
|
|||||||
assert sheerka.value(visitor.names[6]) == "a"
|
assert sheerka.value(visitor.names[6]) == "a"
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_get_non_referenced_variables():
|
def test_i_can_get_unreferenced_variables():
|
||||||
source = """
|
source = """
|
||||||
def my_function(a,b):
|
def my_function(a,b):
|
||||||
for i in range(b):
|
for i in range(b):
|
||||||
@@ -126,6 +128,23 @@ my_function(x,y)
|
|||||||
assert "y" in values
|
assert "y" in values
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("source, expected", [
|
||||||
|
("a,b", ["a", "b"]),
|
||||||
|
("isinstance(a, int)", ["a", "int"])
|
||||||
|
|
||||||
|
])
|
||||||
|
def test_i_can_get_unreferenced_variables_from_simple_expressions(source, expected):
|
||||||
|
sheerka = get_sheerka()
|
||||||
|
|
||||||
|
node = ast.parse(source)
|
||||||
|
concept_node = core.ast.nodes.python_to_concept(node)
|
||||||
|
|
||||||
|
visitor = UnreferencedNamesVisitor(sheerka)
|
||||||
|
visitor.visit(concept_node)
|
||||||
|
|
||||||
|
assert sorted(list(visitor.names)) == expected
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_compare_NodeParent_with_tuple():
|
def test_i_can_compare_NodeParent_with_tuple():
|
||||||
node_parent = NodeParent(GenericNodeConcept("For", None), "target")
|
node_parent = NodeParent(GenericNodeConcept("For", None), "target")
|
||||||
assert node_parent == ("For", "target")
|
assert node_parent == ("For", "target")
|
||||||
|
|||||||
@@ -20,6 +20,13 @@ def test_i_can_get_concept_key(name, variables, expected):
|
|||||||
assert concept.metadata.key == expected
|
assert concept.metadata.key == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_key_does_not_use_variable_when_definition_is_set():
|
||||||
|
concept = Concept("plus").set_prop('plus')
|
||||||
|
|
||||||
|
concept.init_key()
|
||||||
|
assert concept.metadata.key == "plus"
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_serialize():
|
def test_i_can_serialize():
|
||||||
"""
|
"""
|
||||||
Test concept.to_dict()
|
Test concept.to_dict()
|
||||||
|
|||||||
+38
-2
@@ -9,6 +9,8 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
|||||||
from core.concept import Concept, PROPERTIES_TO_SERIALIZE
|
from core.concept import Concept, PROPERTIES_TO_SERIALIZE
|
||||||
from core.sheerka import Sheerka, ExecutionContext
|
from core.sheerka import Sheerka, ExecutionContext
|
||||||
from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator
|
from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator
|
||||||
|
from parsers.ConceptLexerParser import Sequence, ZeroOrMore, StrMatch, OrderedChoice, Optional, ConceptMatch, \
|
||||||
|
ConceptLexerParser
|
||||||
from sdp.sheerkaDataProvider import SheerkaDataProvider
|
from sdp.sheerkaDataProvider import SheerkaDataProvider
|
||||||
|
|
||||||
tests_root = path.abspath("../build/tests")
|
tests_root = path.abspath("../build/tests")
|
||||||
@@ -422,7 +424,7 @@ as:
|
|||||||
def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept():
|
def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept():
|
||||||
"""
|
"""
|
||||||
In this test, we test that the properties of 'concept a xx b' (which are 'a' and 'b')
|
In this test, we test that the properties of 'concept a xx b' (which are 'a' and 'b')
|
||||||
are correctly detected, because of the concept 'a plus b' in its body
|
are correctly detected, thanks to the source code 'a plus b' in its body
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
sheerka = get_sheerka()
|
sheerka = get_sheerka()
|
||||||
@@ -558,7 +560,7 @@ def test_i_can_manage_concepts_with_the_same_key_when_values_are_the_same():
|
|||||||
assert res[0].who == sheerka.get_evaluator_name(MultipleSameSuccessEvaluator.NAME)
|
assert res[0].who == sheerka.get_evaluator_name(MultipleSameSuccessEvaluator.NAME)
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_create_concepts_on_python_codes():
|
def test_i_can_create_concepts_with_python_code_as_body():
|
||||||
sheerka = get_sheerka()
|
sheerka = get_sheerka()
|
||||||
context = get_context(sheerka)
|
context = get_context(sheerka)
|
||||||
|
|
||||||
@@ -570,6 +572,40 @@ def test_i_can_create_concepts_on_python_codes():
|
|||||||
assert isinstance(res[0].value, list)
|
assert isinstance(res[0].value, list)
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_create_concept_with_bnf_definition():
|
||||||
|
sheerka = get_sheerka()
|
||||||
|
a = Concept("a")
|
||||||
|
sheerka.add_in_cache(a)
|
||||||
|
sheerka.concepts_grammars = ConceptLexerParser().initialize(
|
||||||
|
get_context(sheerka),
|
||||||
|
{a: OrderedChoice("one", "two")}).body
|
||||||
|
|
||||||
|
res = sheerka.eval("def concept plus from bnf a ('plus' plus)?")
|
||||||
|
assert len(res) == 1
|
||||||
|
assert res[0].status
|
||||||
|
assert sheerka.isinstance(res[0].value, BuiltinConcepts.NEW_CONCEPT)
|
||||||
|
|
||||||
|
saved_concept = sheerka.sdp.get_safe(sheerka.CONCEPTS_ENTRY, "plus")
|
||||||
|
assert saved_concept.key == "plus"
|
||||||
|
assert saved_concept.metadata.definition == "a ('plus' plus)?"
|
||||||
|
assert "a" in saved_concept.props
|
||||||
|
assert "plus" in saved_concept.props
|
||||||
|
|
||||||
|
saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY)
|
||||||
|
expected_bnf = Sequence(
|
||||||
|
ConceptMatch("a"),
|
||||||
|
Optional(Sequence(StrMatch("plus"), ConceptMatch("plus"))),
|
||||||
|
rule_name="plus")
|
||||||
|
assert saved_definitions[saved_concept] == expected_bnf
|
||||||
|
|
||||||
|
new_concept = res[0].value.body
|
||||||
|
assert new_concept.metadata.name == "plus"
|
||||||
|
assert new_concept.metadata.definition == "a ('plus' plus)?"
|
||||||
|
assert new_concept.bnf == expected_bnf
|
||||||
|
assert "a" in new_concept.props
|
||||||
|
assert "plus" in new_concept.props
|
||||||
|
|
||||||
|
|
||||||
def get_sheerka(root="mem://", skip_builtins_in_db=True):
|
def get_sheerka(root="mem://", skip_builtins_in_db=True):
|
||||||
sheerka = Sheerka(skip_builtins_in_db)
|
sheerka = Sheerka(skip_builtins_in_db)
|
||||||
sheerka.initialize(root)
|
sheerka.initialize(root)
|
||||||
|
|||||||
@@ -311,6 +311,18 @@ def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false(root):
|
|||||||
"mem://"
|
"mem://"
|
||||||
])
|
])
|
||||||
def test_i_can_add_a_dict(root):
|
def test_i_can_add_a_dict(root):
|
||||||
|
"""
|
||||||
|
Adding a dictionary.
|
||||||
|
Note that there is no key when adding a dictionary
|
||||||
|
|
||||||
|
If you add {'my_key': 'my_value'}
|
||||||
|
'my_key is not considered as the key of the entry'
|
||||||
|
|
||||||
|
Because if you add {'my_key': 'my_value', 'my_key2': 'my_value2'}
|
||||||
|
There are now multiple keys.
|
||||||
|
|
||||||
|
So for dictionary entries, the key is not managed
|
||||||
|
"""
|
||||||
sdp = SheerkaDataProvider(root)
|
sdp = SheerkaDataProvider(root)
|
||||||
obj = {"my_key": "my_value"}
|
obj = {"my_key": "my_value"}
|
||||||
|
|
||||||
@@ -735,6 +747,7 @@ def test_i_can_set_using_reference(root):
|
|||||||
# sanity check, make sure that I can load back
|
# sanity check, make sure that I can load back
|
||||||
loaded = sdp.get(entry, key)
|
loaded = sdp.get(entry, key)
|
||||||
assert loaded == ObjWithKey(2, "foo")
|
assert loaded == ObjWithKey(2, "foo")
|
||||||
|
assert getattr(loaded, Serializer.ORIGIN) == "95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("root", [
|
@pytest.mark.parametrize("root", [
|
||||||
@@ -754,7 +767,35 @@ def test_i_can_add_reference_of_an_object_with_a_key(root):
|
|||||||
assert key == obj.key
|
assert key == obj.key
|
||||||
assert entry == "entry"
|
assert entry == "entry"
|
||||||
assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}}
|
assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}}
|
||||||
assert sdp.load_obj(digest) == obj
|
|
||||||
|
loaded = sdp.load_obj(digest)
|
||||||
|
assert loaded == obj
|
||||||
|
assert getattr(loaded, Serializer.ORIGIN) == digest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("root", [
|
||||||
|
".sheerka",
|
||||||
|
"mem://"
|
||||||
|
])
|
||||||
|
def test_i_can_add_reference_a_dictionary(root):
|
||||||
|
sdp = SheerkaDataProvider(root)
|
||||||
|
obj = {"my_key": "value1"}
|
||||||
|
|
||||||
|
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
|
||||||
|
sdp.serializer.register(obj_serializer)
|
||||||
|
|
||||||
|
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
|
||||||
|
state = sdp.load_state(sdp.get_snapshot())
|
||||||
|
digest = state.data["entry"][len(SheerkaDataProvider.REF_PREFIX):]
|
||||||
|
|
||||||
|
assert key is None
|
||||||
|
assert entry == "entry"
|
||||||
|
assert state.data == {'entry': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}
|
||||||
|
|
||||||
|
loaded = sdp.load_obj(digest)
|
||||||
|
assert loaded["my_key"] == obj["my_key"]
|
||||||
|
assert loaded[Serializer.ORIGIN] == digest
|
||||||
|
assert len(loaded) == 2
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("root", [
|
@pytest.mark.parametrize("root", [
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import core.utils
|
import core.utils
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from core.tokenizer import Token, TokenKind
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("lst, as_string", [
|
@pytest.mark.parametrize("lst, as_string", [
|
||||||
(None, "",),
|
(None, "",),
|
||||||
@@ -76,3 +78,63 @@ def test_i_can_get_sub_classes():
|
|||||||
def test_i_can_product(a, b, expected):
|
def test_i_can_product(a, b, expected):
|
||||||
res = core.utils.product(a, b)
|
res = core.utils.product(a, b)
|
||||||
assert res == expected
|
assert res == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("input_as_list, expected_as_list", [
|
||||||
|
([" "], []),
|
||||||
|
([" ", "one"], ["one"]),
|
||||||
|
(["one", " "], ["one"]),
|
||||||
|
([" ", "one", " "], ["one"]),
|
||||||
|
|
||||||
|
(["\n", "one"], ["one"]),
|
||||||
|
(["one", "\n"], ["one"]),
|
||||||
|
(["\n", "one", "\n"], ["one"]),
|
||||||
|
|
||||||
|
([" ", "\n", "one"], ["one"]),
|
||||||
|
(["one", " ", "\n"], ["one"]),
|
||||||
|
([" ", "\n", "one", " ", "\n"], ["one"]),
|
||||||
|
|
||||||
|
(["\n", " ", "one"], ["one"]),
|
||||||
|
(["one", "\n", " "], ["one"]),
|
||||||
|
(["\n", " ", "one", "\n", " "], ["one"]),
|
||||||
|
|
||||||
|
([" ", "\n", " ", "one"], ["one"]),
|
||||||
|
(["one", " ", "\n", " "], ["one"]),
|
||||||
|
([" ", "\n", " ", "one", " ", "\n", " "], ["one"]),
|
||||||
|
|
||||||
|
(["\n", " ", "\n", "one"], ["one"]),
|
||||||
|
(["one", "\n", " ", "\n"], ["one"]),
|
||||||
|
(["\n", " ", "\n", "one", "\n", " ", "\n"], ["one"]),
|
||||||
|
|
||||||
|
])
|
||||||
|
def test_i_can_strip(input_as_list, expected_as_list):
|
||||||
|
actual = core.utils.strip_tokens(get_tokens(input_as_list))
|
||||||
|
expected = get_tokens(expected_as_list)
|
||||||
|
assert actual == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_by_default_eof_is_not_stripped():
|
||||||
|
actual = core.utils.strip_tokens(get_tokens(["one", "two", " ", "\n", "<EOF>"]))
|
||||||
|
expected = get_tokens(["one", "two", " ", "\n", "<EOF>"])
|
||||||
|
assert actual == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_strip_eof():
|
||||||
|
actual = core.utils.strip_tokens(get_tokens(["one", "two", " ", "\n", "<EOF>"]), True)
|
||||||
|
expected = get_tokens(["one", "two"])
|
||||||
|
assert actual == expected
|
||||||
|
|
||||||
|
|
||||||
|
def get_tokens(lst):
|
||||||
|
res = []
|
||||||
|
for e in lst:
|
||||||
|
if e == " ":
|
||||||
|
res.append(Token(TokenKind.WHITESPACE, " ", 0, 0, 0))
|
||||||
|
elif e == "\n":
|
||||||
|
res.append(Token(TokenKind.NEWLINE, "\n", 0, 0, 0))
|
||||||
|
elif e == "<EOF>":
|
||||||
|
res.append(Token(TokenKind.EOF, "\n", 0, 0, 0))
|
||||||
|
else:
|
||||||
|
res.append(Token(TokenKind.IDENTIFIER, e, 0, 0, 0))
|
||||||
|
|
||||||
|
return res
|
||||||
|
|||||||
Reference in New Issue
Block a user