Added bnf when adding a new concept + Started logging filtering

This commit is contained in:
2019-12-13 20:26:11 +01:00
parent 75c8793d53
commit c668cc46d2
29 changed files with 1487 additions and 190 deletions
+1
View File
@@ -190,3 +190,4 @@ def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclud
predicates.append(res) predicates.append(res)
return predicates return predicates
+8 -8
View File
@@ -3,6 +3,7 @@ from dataclasses import dataclass
from enum import Enum from enum import Enum
import logging import logging
import core.utils
from core.tokenizer import Tokenizer, TokenKind from core.tokenizer import Tokenizer, TokenKind
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -18,8 +19,7 @@ VARIABLE_PREFIX = "__var__"
class ConceptParts(Enum): class ConceptParts(Enum):
""" """
Helper class, Note quite sure that is it that useful Lists metadata that can contains some code
I guess, I was learning nums with Python...
""" """
WHERE = "where" WHERE = "where"
PRE = "pre" PRE = "pre"
@@ -85,6 +85,7 @@ class Concept:
self.metadata = metadata self.metadata = metadata
self.props = {} # list of Property for this concept self.props = {} # list of Property for this concept
self.cached_asts = {} # cached ast for the where, pre, post and body parts self.cached_asts = {} # cached ast for the where, pre, post and body parts
self.bnf = None
def __repr__(self): def __repr__(self):
return f"({self.metadata.id}){self.metadata.name}" return f"({self.metadata.id}){self.metadata.name}"
@@ -134,9 +135,9 @@ class Concept:
return self return self
if tokens is None: if tokens is None:
tokens = iter(Tokenizer(self.metadata.name)) tokens = list(Tokenizer(self.metadata.name))
variables = list(self.props.keys()) variables = list(self.props.keys()) if len(core.utils.strip_tokens(tokens, True)) > 1 else []
key = "" key = ""
first = True first = True
@@ -171,12 +172,11 @@ class Concept:
:param codes: :param codes:
:return: :return:
""" """
possibles_codes = ConceptParts.get_parts()
if codes is None: if codes is None:
return return
for key in codes: for key in codes:
if key in possibles_codes: self.cached_asts[key] = codes[key]
self.cached_asts[ConceptParts(key)] = codes[key]
return self return self
@@ -231,7 +231,7 @@ class Concept:
return self return self
def set_prop(self, prop_name: str, prop_value=None): def set_prop(self, prop_name: str, prop_value=None):
self.props[prop_name] = Property(prop_name, prop_value) self.props[prop_name] = Property(prop_name, prop_value) # Python 3.x order is kept in dictionaries
return self return self
def set_prop_by_index(self, index: int, prop_value): def set_prop_by_index(self, index: int, prop_value):
+85 -24
View File
@@ -1,4 +1,6 @@
from dataclasses import dataclass from dataclasses import dataclass, field
from functools import lru_cache
from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept
from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST
from evaluators.BaseEvaluator import OneReturnValueEvaluator from evaluators.BaseEvaluator import OneReturnValueEvaluator
@@ -10,8 +12,10 @@ import core.builtin_helpers
import logging import logging
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
init_log = logging.getLogger(__name__ + ".init")
concept_evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION] concept_evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION]
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
class Sheerka(Concept): class Sheerka(Concept):
@@ -19,22 +23,29 @@ class Sheerka(Concept):
Main controller for the project Main controller for the project
""" """
CONCEPTS_ENTRY = "All_Concepts" CONCEPTS_ENTRY = "All_Concepts" # to store all the concepts
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts
USER_CONCEPTS_KEYS = "User_Concepts" BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts
def __init__(self, debug=False, skip_builtins_in_db=False): def __init__(self, debug=False, skip_builtins_in_db=False, loggers=None):
log.debug("Starting Sheerka.") log.debug("Starting Sheerka.")
super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA) super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA)
# cache of the most used concepts # cache of the most used concepts
# Note that these are only templates # Note that these are only templates
# They are used as a footprint for instantiation # They are used as a footprint for instantiation
# Except of source when the concept is supposed to be unique
self.concepts_cache = {} self.concepts_cache = {}
# cache for builtin types. #
# It allow instantiation of a builtin clas # Cache for all concepts BNF
self.builtin_cache = {} self.concepts_definitions = {}
#
# cache for concepts grammars
# a grammar can be seen as a resolved BNF
self.concepts_grammars = {}
# a concept can be instantiated # a concept can be instantiated
# ex: File is a concept, but File('foo.txt') is an instance # ex: File is a concept, but File('foo.txt') is an instance
@@ -45,14 +56,16 @@ class Sheerka(Concept):
# ex: hello => say('hello') # ex: hello => say('hello')
self.rules = [] self.rules = []
self.sdp = None self.sdp: SheerkaDataProvider = None # SheerkaDataProvider
self.parsers = [] self.builtin_cache = {} # cache for builtin concepts
self.evaluators = [] self.parsers = {} # cache for builtin parsers
self.evaluators = [] # cache for builtin evaluators
self.evaluators_prefix = None self.evaluators_prefix: str = None
self.parsers_prefix = None self.parsers_prefix: str = None
self.debug = debug self.debug = debug
self.loggers = loggers or []
self.skip_builtins_in_db = skip_builtins_in_db self.skip_builtins_in_db = skip_builtins_in_db
def initialize(self, root_folder: str = None): def initialize(self, root_folder: str = None):
@@ -85,7 +98,7 @@ class Sheerka(Concept):
Initializes the builtin concepts Initializes the builtin concepts
:return: None :return: None
""" """
log.debug("Initializing builtin concepts") init_log.debug("Initializing builtin concepts")
builtins_classes = self.get_builtins_classes_as_dict() builtins_classes = self.get_builtins_classes_as_dict()
# this all initialization of the builtins seems to be little bit complicated # this all initialization of the builtins seems to be little bit complicated
@@ -101,11 +114,11 @@ class Sheerka(Concept):
if not self.skip_builtins_in_db: if not self.skip_builtins_in_db:
from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key) from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key)
if from_db is None: if from_db is None:
log.debug(f"'{concept.name}' concept is not found in db. Adding.") init_log.debug(f"'{concept.name}' concept is not found in db. Adding.")
self.set_id_if_needed(concept, True) self.set_id_if_needed(concept, True)
self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True) self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True)
else: else:
log.debug(f"Found concept '{from_db}' in db. Updating.") init_log.debug(f"Found concept '{from_db}' in db. Updating.")
concept.update_from(from_db) concept.update_from(from_db)
self.add_in_cache(concept) self.add_in_cache(concept)
@@ -120,8 +133,8 @@ class Sheerka(Concept):
if parser.__module__ == base_class.__module__: if parser.__module__ == base_class.__module__:
continue continue
log.debug(f"Adding builtin parser '{parser.__name__}'") init_log.debug(f"Adding builtin parser '{parser.__name__}'")
self.parsers.append(parser) self.parsers[core.utils.get_full_qualified_name(parser)] = parser
def initialize_builtin_evaluators(self): def initialize_builtin_evaluators(self):
""" """
@@ -129,14 +142,26 @@ class Sheerka(Concept):
:return: :return:
""" """
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.OneReturnValueEvaluator"): for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.OneReturnValueEvaluator"):
log.debug(f"Adding builtin evaluator '{evaluator.__name__}'") init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
self.evaluators.append(evaluator) self.evaluators.append(evaluator)
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.AllReturnValuesEvaluator"): for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.AllReturnValuesEvaluator"):
log.debug(f"Adding builtin evaluator '{evaluator.__name__}'") init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
self.evaluators.append(evaluator) self.evaluators.append(evaluator)
def logger_filter(self, record: logging.LogRecord):
if 'all' in self.loggers:
return True
ret = True
if 'init' not in self.loggers and record.name.endswith(".init"):
ret = False
return ret
def init_logging(self): def init_logging(self):
handler = logging.StreamHandler()
handler.addFilter(self.logger_filter)
if self.debug: if self.debug:
log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s" log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
log_level = logging.DEBUG log_level = logging.DEBUG
@@ -144,7 +169,7 @@ class Sheerka(Concept):
log_format = "%(message)s" log_format = "%(message)s"
log_level = logging.INFO log_level = logging.INFO
logging.basicConfig(format=log_format, level=log_level) logging.basicConfig(format=log_format, level=log_level, handlers=[handler])
def eval(self, text: str): def eval(self, text: str):
""" """
@@ -153,7 +178,9 @@ class Sheerka(Concept):
:param text: :param text:
:return: :return:
""" """
log.debug(f"Evaluating '{text}'.")
evt_digest = self.sdp.save_event(Event(text)) evt_digest = self.sdp.save_event(Event(text))
log.debug(f"{evt_digest=}")
exec_context = ExecutionContext(self.key, evt_digest, self) exec_context = ExecutionContext(self.key, evt_digest, self)
# Before parsing # Before parsing
@@ -183,7 +210,7 @@ class Sheerka(Concept):
debug_text = "'" + text + "'" if isinstance(text, str) \ debug_text = "'" + text + "'" if isinstance(text, str) \
else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens" else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens"
log.debug(f"Parsing {debug_text}") log.debug(f"Parsing {debug_text}")
for parser in self.parsers: for parser in self.parsers.values():
p = parser() p = parser()
res = p.parse(context, text) res = p.parse(context, text)
if isinstance(res, list): if isinstance(res, list):
@@ -193,7 +220,7 @@ class Sheerka(Concept):
return result return result
def process(self, context, return_values, initial_concepts=None): def process(self, context, return_values, initial_concepts=None):
log.debug(f"Processing parsing result. context concept={initial_concepts}") log.debug(f"{initial_concepts=}. Processing " + core.utils.pp(return_values))
# return_values must be a list # return_values must be a list
if not isinstance(return_values, list): if not isinstance(return_values, list):
@@ -303,6 +330,8 @@ class Sheerka(Concept):
""" """
concept.init_key() concept.init_key()
concepts_definitions = None
init_ret_value = None
# checks for duplicate concepts # checks for duplicate concepts
if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()): if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
@@ -312,14 +341,33 @@ class Sheerka(Concept):
# set id before saving in db # set id before saving in db
self.set_id_if_needed(concept, False) self.set_id_if_needed(concept, False)
# add the BNF if known
if concept.bnf:
concepts_definitions = self.concepts_definitions.copy()
concepts_definitions[concept] = concept.bnf
# check if it's a valid BNF or whether it breaks the known rules
concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](self.concepts_grammars.copy())
sub_context = context.push(self.name, "Initializing concept definition")
sub_context.concepts_cache[concept.key] = concept # the concept is not in the real cache yet
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
if not init_ret_value.status:
return self.ret(self.create_new_concept.__name__, False, ErrorConcept(init_ret_value.value))
# save the new context in sdp # save the new context in sdp
try: try:
self.sdp.add(context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True) self.sdp.add(context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
if concepts_definitions is not None:
self.sdp.set(context.event_digest, self.CONCEPTS_DEFINITIONS_ENTRY, concepts_definitions, use_ref=True)
except SheerkaDataProviderDuplicateKeyError as error: except SheerkaDataProviderDuplicateKeyError as error:
return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0]) return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0])
# add in cache for quick further reference # Updates the caches
self.concepts_cache[concept.key] = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key) self.concepts_cache[concept.key] = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
if concepts_definitions is not None:
self.concepts_definitions = concepts_definitions
if init_ret_value is not None and init_ret_value.status:
self.concepts_grammars = init_ret_value.body
# process the return in needed # process the return in needed
ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept)) ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
@@ -514,6 +562,18 @@ class Sheerka(Concept):
return (self.value(obj) for obj in objs) return (self.value(obj) for obj in objs)
def is_success(self, obj):
if isinstance(obj, bool):
return obj
if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE):
return obj.status
if self.isinstance(obj, BuiltinConcepts.ERROR):
return False
return False
def isinstance(self, a, b): def isinstance(self, a, b):
""" """
return true if the concept a is an instance of the concept b return true if the concept a is an instance of the concept b
@@ -603,6 +663,7 @@ class ExecutionContext:
sheerka: Sheerka # sheerka sheerka: Sheerka # sheerka
desc: str = None # human description of what is going on desc: str = None # human description of what is going on
obj: Concept = None # what is the subject of the execution context (if known) obj: Concept = None # what is the subject of the execution context (if known)
concepts_cache: dict = field(default_factory=dict)
def push(self, who, desc=None, obj=None): def push(self, who, desc=None, obj=None):
return ExecutionContext(who, self.event_digest, self.sheerka, desc=desc, obj=obj) return ExecutionContext(who, self.event_digest, self.sheerka, desc=desc, obj=obj)
+2 -21
View File
@@ -80,6 +80,8 @@ class LexerError(Exception):
class Keywords(Enum): class Keywords(Enum):
DEF = "def" DEF = "def"
CONCEPT = "concept" CONCEPT = "concept"
FROM = "from"
BNF = "bnf"
AS = "as" AS = "as"
WHERE = "where" WHERE = "where"
PRE = "pre" PRE = "pre"
@@ -308,24 +310,3 @@ class Tokenizer:
1 if lines_count > 0 else start_column + len(result)) 1 if lines_count > 0 else start_column + len(result))
return result, lines_count return result, lines_count
def seek(self, words):
if self.i == self.text_len:
return 0
# init
offsets = {}
start_index = self.i
buffer = ""
while self.i < self.text_len:
c = self.text[self.i]
# skip white space
if c in (" ", "\t"):
self.i += 1
continue
for word in words:
if c == word[offset]:
os
+63 -5
View File
@@ -3,6 +3,8 @@ import inspect
import pkgutil import pkgutil
import sys import sys
from core.tokenizer import TokenKind
def sysarg_to_string(argv): def sysarg_to_string(argv):
""" """
@@ -72,11 +74,18 @@ def get_full_qualified_name(obj):
:param obj: :param obj:
:return: :return:
""" """
module = obj.__class__.__module__ if obj.__class__ == type:
if module is None or module == str.__class__.__module__: module = obj.__module__
return obj.__class__.__name__ # Avoid reporting __builtin__ if module is None or module == str.__class__.__module__:
return obj.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__name__
else: else:
return module + '.' + obj.__class__.__name__ module = obj.__class__.__module__
if module is None or module == str.__class__.__module__:
return obj.__class__.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__class__.__name__
def get_classes(module_name): def get_classes(module_name):
@@ -137,7 +146,7 @@ def remove_from_list(lst, to_remove_predicate):
def product(a, b): def product(a, b):
""" """
Kind of cartesian product between list a and b Kind of cartesian product between lists a and b
knowing that a is also a list knowing that a is also a list
So it's a cartesian product between a list of list and a list So it's a cartesian product between a list of list and a list
@@ -155,3 +164,52 @@ def product(a, b):
res.append(items) res.append(items)
return res return res
def strip_quotes(text):
if not isinstance(text, str):
return text
if text == "":
return ""
if text[0] == "'" or text[0] == '"':
return text[1:-1]
return text
def strip_tokens(tokens, strip_eof=False):
"""
Remove the starting and trailing spaces and newline
"""
if tokens is None:
return None
start = 0
length = len(tokens)
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
start += 1
if start == length:
return []
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
if strip_eof \
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
end = length - 1
while end > 0 and tokens[end].type in end_tokens:
end -= 1
return tokens[start: end + 1]
def pp(items):
if not hasattr(items, "__iter__"):
return str(items)
if len(items) == 0:
return str(items)
return " \n" + " \n".join(str(item) for item in items)
+105 -1
View File
@@ -493,4 +493,108 @@ Even now that I am writing it, I just can't believe it. I must I have implemente
it wrong. But the profiling shows that the time is lost in the under layers of the it wrong. But the profiling shows that the time is lost in the under layers of the
FS library. FS library.
It's a shame ! It's a shame !
2019-12-01
**********
Using BNF to define concept
"""""""""""""""""""""""""""""
I always knew that there will be several ways to define the body of a concept (same
goes for the 'pre', 'post' and 'where' parts). It can be defined as Python code,
or something that is related to concepts. It can even be a new language that I will
design. The important point, is that contrarily to traditional development languages,
Sheerka must remain extensible.
Same goes for the definition of the name.
The traditional form is:
::
def concept boo bar baz as ...
So the concept is defined by the sequence 'foo', then 'bar' then 'baz'. In this order.
Another way is
::
def concept a plus b where a,b as ...
In this form, a and b are supposed to be variables.
It will be matched against :code:`one plus two`.
The concept name is 'a plus b'. It is a quick way to declare a concept with variable,
but if someone define another concept
::
def concept number1 plus number2 where number1,number2 as ...
This will produce another concept (with the same key although). I guess that, at
some point, Sheerka will be able to detect that the concepts are the same, but
the name of the concept includes its variables. Which may be annoying in some
situations.
Plus, it's not possible to define rules precedences in this way. For example,
::
def concept a plus b as ...
def concept a times b as ...
How do you express that multiplications have a higher priority in for example
:code:`one plus two times three` ?
The only right answer, at least to me, is to implement something that is inspired
by the BNF definition of a grammar.
So the definition of the concept will look like
::
def concept term as factor (('+' | '-') term)?
def concept factor as number (('*' | '/') factor)?
def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3)
This form seems great, but in the definition of term and factor, there is no more
room for the real body. ie once the components are recognized, what do we do with them ?
So we can try
::
def concept factor (('+') factor)* as factor[0] + factor[i]
def concept number (('*') number)? as number[0] + number[i]
def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3)
The body is defined, but the name of concept is to complicated ex: factor (('+') factor)*
It's quite impossible to reference a concept that is defined in this way.
So my last proposal, with marry the two ideas, is to introduce the two keyword 'using' 'bnf'
.. _bnf : https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form
::
def concept term using bnf factor (('+' | '-') term)? as factor + (or -) term
def concept factor using bnf number (('*' | '/') factor)? as number * (or /) factor
def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3)
In my implementation:
* Terminals are between quotes
* Sequences are separated by whitespaces
* '|' (vertical bar) is used for alternatives
Like in regular expressions, you will also find
* '*' (star) is used to express zero or many
* '+' (plus) to express one or many
* '?' (question mark) to expression zero or one
For those who doesn't know that BNF stands for, please have a look at the bnf_
wikipedia page.
I guess that I will need a complete chapter to explain how you retrieve what was parsed
+64 -15
View File
@@ -1,7 +1,11 @@
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept from core.ast.nodes import python_to_concept
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
from core.builtin_helpers import get_names
from core.concept import Concept from core.concept import Concept
from evaluators.BaseEvaluator import OneReturnValueEvaluator from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
from parsers.DefaultParser import DefConceptNode from parsers.DefaultParser import DefConceptNode
import functools
import logging import logging
from parsers.PythonParser import PythonGetNamesVisitor, PythonNode from parsers.PythonParser import PythonGetNamesVisitor, PythonNode
@@ -9,6 +13,23 @@ from parsers.PythonParser import PythonGetNamesVisitor, PythonNode
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
"""
Gets the concepts referenced by BNF
If a rule_name is given, it will also be considered as a potential property
"""
def __init__(self):
self.names = set()
def visit_ConceptMatch(self, node):
self.names.add(node.rule_name or node.concept_name)
def visit_all(self, node):
if node.rule_name:
self.names.add(node.rule_name)
class AddConceptEvaluator(OneReturnValueEvaluator): class AddConceptEvaluator(OneReturnValueEvaluator):
""" """
Used to add a new concept Used to add a new concept
@@ -32,7 +53,7 @@ class AddConceptEvaluator(OneReturnValueEvaluator):
props_found = set() props_found = set()
concept = Concept(def_concept_node.name) concept = Concept(def_concept_node.name)
for prop in ("where", "pre", "post", "body"): for prop in ("definition", "where", "pre", "post", "body"):
# put back the sources # put back the sources
part_ret_val = getattr(def_concept_node, prop) part_ret_val = getattr(def_concept_node, prop)
if not isinstance(part_ret_val, ReturnValueConcept) or not part_ret_val.status: if not isinstance(part_ret_val, ReturnValueConcept) or not part_ret_val.status:
@@ -43,35 +64,63 @@ class AddConceptEvaluator(OneReturnValueEvaluator):
setattr(concept.metadata, prop, source) setattr(concept.metadata, prop, source)
# try to find what can be a property # try to find what can be a property
for p in self.get_props(part_ret_val): concept_name = [part.value for part in def_concept_node.name.tokens]
for p in self.get_props(sheerka, part_ret_val, concept_name):
props_found.add(p) props_found.add(p)
# Auto discovered properties must be referenced in the name # add props order by appearance when possible
# Note that with this method, the variables will be created in the order of appearance
for token in def_concept_node.name.tokens: for token in def_concept_node.name.tokens:
if token.value in props_found: if token.value in props_found:
concept.set_prop(token.value, None) concept.set_prop(token.value, None)
# add the remaining properties
for p in props_found:
if p not in concept.props:
concept.set_prop(p, None)
# finish initialisation # finish initialisation
concept.init_key(def_concept_node.name.tokens) concept.init_key(def_concept_node.name.tokens)
concept.add_codes(def_concept_node.get_codes()) concept.add_codes(def_concept_node.get_asts())
if sheerka.is_success(def_concept_node.definition):
concept.bnf = def_concept_node.definition.value.value
ret = sheerka.create_new_concept(context, concept) ret = sheerka.create_new_concept(context, concept)
return sheerka.ret(self.name, ret.status, ret.value, parents=[return_value]) return sheerka.ret(self.name, ret.status, ret.value, parents=[return_value])
@staticmethod @staticmethod
def get_source(ret_value): def get_source(ret_value):
return ret_value.value.source if isinstance(ret_value.value, ParserResultConcept) \ return ret_value.value.source
else ret_value.value.name
@staticmethod @staticmethod
def get_props(ret_value): def get_props(sheerka, ret_value, concept_name):
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, PythonNode): """
get_names_visitor = PythonGetNamesVisitor() Try to find out the variables
get_names_visitor.visit(ret_value.value.value.ast_) This function can only be a draft, as there may be tons of different situations
return get_names_visitor.names I guess that it can only be complete when will we have access to Sheerka memory
"""
if isinstance(ret_value.value, Concept): #
return list(ret_value.value.props.keys()) # Case of python code
#
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, PythonNode):
python_node = ret_value.value.value
as_concept_node = python_to_concept(python_node.ast_)
variables = get_names(sheerka, as_concept_node)
variables = filter(lambda x: x in concept_name, variables)
return list(variables)
#
# case of concept
#
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, Concept):
return list(ret_value.value.value.props.keys())
#
# case of BNF
#
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, ParsingExpression):
visitor = ConceptOrRuleNameVisitor()
visitor.visit(ret_value.value.value)
return sorted(list(visitor.names))
return [] return []
+3 -4
View File
@@ -18,13 +18,12 @@ class ConceptEvaluator(OneReturnValueEvaluator):
def matches(self, context, return_value): def matches(self, context, return_value):
return return_value.status and \ return return_value.status and \
return_value.who.startswith(BaseParser.PREFIX) and \ isinstance(return_value.value, ParserResultConcept) and \
isinstance(return_value.value, Concept) and \ isinstance(return_value.value.value, Concept)
not isinstance(return_value.value, ParserResultConcept) # because there are specific evaluators
def eval(self, context, return_value): def eval(self, context, return_value):
sheerka = context.sheerka sheerka = context.sheerka
concept = return_value.value concept = return_value.value.value
# pre condition should already be validated by the parser. # pre condition should already be validated by the parser.
# It's a mandatory condition for the concept before it can be recognized # It's a mandatory condition for the concept before it can be recognized
+2 -1
View File
@@ -2,6 +2,7 @@ from core.builtin_concepts import BuiltinConcepts
from evaluators.AddConceptEvaluator import AddConceptEvaluator from evaluators.AddConceptEvaluator import AddConceptEvaluator
from evaluators.BaseEvaluator import AllReturnValuesEvaluator from evaluators.BaseEvaluator import AllReturnValuesEvaluator
from parsers.BaseParser import BaseParser from parsers.BaseParser import BaseParser
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
class DuplicateConceptEvaluator(AllReturnValuesEvaluator): class DuplicateConceptEvaluator(AllReturnValuesEvaluator):
@@ -26,7 +27,7 @@ class DuplicateConceptEvaluator(AllReturnValuesEvaluator):
if ret.status: if ret.status:
parsing = True parsing = True
elif ret.who == sheerka.get_evaluator_name(AddConceptEvaluator.NAME): elif ret.who == sheerka.get_evaluator_name(AddConceptEvaluator.NAME):
if not ret.status and ret.value.body.args[0] == "Duplicate object.": if not ret.status and isinstance(ret.value.body, SheerkaDataProviderDuplicateKeyError):
add_concept_in_error = True add_concept_in_error = True
self.already_defined = ret.value.body.obj self.already_defined = ret.value.body.obj
else: else:
+5 -2
View File
@@ -14,16 +14,19 @@ def usage():
def main(argv): def main(argv):
try: try:
opts, args = getopt.getopt(argv, "hd", ["help", "debug"]) opts, args = getopt.getopt(argv, "hdl:", ["help", "debug", "logger="])
debug = False debug = False
loggers = set()
for o, a in opts: for o, a in opts:
if o in ('-h', "--help"): if o in ('-h', "--help"):
usage() usage()
return True return True
if o in ('-d', "--debug"): if o in ('-d', "--debug"):
debug = True debug = True
if o in ('-l', '-logger'):
loggers.add(a)
sheerka = Sheerka(debug=debug) sheerka = Sheerka(debug=debug, loggers=loggers)
sheerka.initialize() sheerka.initialize()
_in = core.utils.sysarg_to_string(args) _in = core.utils.sysarg_to_string(args)
+411 -8
View File
@@ -10,6 +10,7 @@ from dataclasses import field, dataclass
from collections import defaultdict from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept from core.concept import Concept
from core.sheerka import ExecutionContext
from core.tokenizer import TokenKind, Tokenizer, Token from core.tokenizer import TokenKind, Tokenizer, Token
from parsers.BaseParser import BaseParser, Node, ErrorNode from parsers.BaseParser import BaseParser, Node, ErrorNode
import core.utils import core.utils
@@ -42,6 +43,11 @@ class LexerNode(Node):
class ConceptNode(LexerNode): class ConceptNode(LexerNode):
"""
Returned by the ConceptLexerParser
It represents a recognized concept
"""
def __init__(self, concept, start, end, tokens=None, source=None, children=None): def __init__(self, concept, start, end, tokens=None, source=None, children=None):
super().__init__(start, end) super().__init__(start, end)
self.concept = concept self.concept = concept
@@ -67,6 +73,10 @@ class ConceptNode(LexerNode):
class NonTerminalNode(LexerNode): class NonTerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
"""
def __init__(self, parsing_expression, start, end, children=None): def __init__(self, parsing_expression, start, end, children=None):
super().__init__(start, end) super().__init__(start, end)
self.parsing_expression = parsing_expression self.parsing_expression = parsing_expression
@@ -82,6 +92,10 @@ class NonTerminalNode(LexerNode):
class TerminalNode(LexerNode): class TerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
"""
def __init__(self, parsing_expression, start, end, value): def __init__(self, parsing_expression, start, end, value):
super().__init__(start, end) super().__init__(start, end)
self.parsing_expression = parsing_expression self.parsing_expression = parsing_expression
@@ -97,6 +111,27 @@ class GrammarErrorNode(ErrorNode):
message: str message: str
@dataclass()
class UnexpectedTokenErrorNode(ErrorNode):
message: str
expected_tokens: list
@dataclass()
class UnexpectedEndOfFileError(ErrorNode):
pass
@dataclass()
class UnknownConceptNode(ErrorNode):
concept_key: str
@dataclass()
class TooManyConceptNode(ErrorNode):
concept_key: str
class ParsingExpression: class ParsingExpression:
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.elements = args self.elements = args
@@ -108,6 +143,15 @@ class ParsingExpression:
self.rule_name = kwargs.get('rule_name', '') self.rule_name = kwargs.get('rule_name', '')
def __eq__(self, other):
if not isinstance(other, ParsingExpression):
return False
return self.rule_name == other.rule_name and self.elements == other.elements
def __hash__(self):
return hash((self.rule_name, self.elements))
def parse(self, parser): def parse(self, parser):
return self._parse(parser) return self._parse(parser)
@@ -133,6 +177,10 @@ class Sequence(ParsingExpression):
return NonTerminalNode(self, init_pos, end_pos, children) return NonTerminalNode(self, init_pos, end_pos, children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})"
class OrderedChoice(ParsingExpression): class OrderedChoice(ParsingExpression):
""" """
@@ -152,6 +200,10 @@ class OrderedChoice(ParsingExpression):
return None return None
def __repr__(self):
to_str = "| ".join(repr(n) for n in self.elements)
return f"({to_str})"
class Optional(ParsingExpression): class Optional(ParsingExpression):
""" """
@@ -178,6 +230,46 @@ class Optional(ParsingExpression):
return selected_node return selected_node
def __repr__(self):
if len(self.elements) == 1:
return f"{self.elements[0]}?"
else:
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})?"
class ZeroOrMore(ParsingExpression):
"""
ZeroOrMore will try to match parser expression specified zero or more
times. It will never fail.
"""
def _parse(self, parser):
raise NotImplementedError()
# Uncomment when _parse is implemented
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return f"({to_str})*"
class OneOrMore(ParsingExpression):
"""
OneOrMore will try to match parser expression specified one or more times.
"""
def _parse(self, parser):
raise NotImplementedError()
class UnorderedGroup(ParsingExpression):
"""
Will try to match all of the parsing expression in any order.
"""
def _parse(self, parser):
raise NotImplementedError()
class Match(ParsingExpression): class Match(ParsingExpression):
""" """
@@ -197,13 +289,22 @@ class StrMatch(Match):
Matches a literal Matches a literal
""" """
def __init__(self, to_match, rule_name="", root=False, ignore_case=None): def __init__(self, to_match, rule_name="", root=False, ignore_case=True):
super(Match, self).__init__(rule_name=rule_name, root=root) super(Match, self).__init__(rule_name=rule_name, root=root)
self.to_match = to_match self.to_match = to_match
self.ignore_case = ignore_case self.ignore_case = ignore_case
def __repr__(self): def __repr__(self):
return f"StrMatch('{self.to_match}')" return f"'{self.to_match}'"
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, StrMatch):
return False
return self.to_match == other.to_match and self.ignore_case == other.ignore_case
def _parse(self, parser): def _parse(self, parser):
token = parser.get_token() token = parser.get_token()
@@ -218,6 +319,31 @@ class StrMatch(Match):
return None return None
class ConceptMatch(Match):
"""
Will match a concept
It used only for rule definition
When the grammar is created, it is replaced by the actual concept
"""
def __init__(self, concept_name):
super(Match, self).__init__()
self.concept_name = concept_name
def __repr__(self):
return f"{self.concept_name}"
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, ConceptMatch):
return False
return self.concept_name == other.concept_name
class CrossRef: class CrossRef:
""" """
During the creation of the model, During the creation of the model,
@@ -227,11 +353,20 @@ class CrossRef:
def __init__(self, concept): def __init__(self, concept):
self.concept = concept self.concept = concept
def __repr__(self):
return f"ref({self.concept.key})"
def __eq__(self, other):
if not isinstance(other, CrossRef):
return False
return self.concept == other.concept
class ConceptLexerParser(BaseParser): class ConceptLexerParser(BaseParser):
def __init__(self): def __init__(self, concepts_dict=None):
super().__init__("ConceptLexer") super().__init__("ConceptLexer")
self.concepts_dict = {} self.concepts_dict = concepts_dict or {} # dict of concept, grammar
self.ignore_case = True self.ignore_case = True
self.token = None self.token = None
@@ -295,22 +430,28 @@ class ConceptLexerParser(BaseParser):
self.pos -= 1 self.pos -= 1
self.token = self.tokens[self.pos] self.token = self.tokens[self.pos]
def initialize(self, dict): def initialize(self, context, grammars):
""" """
Adds a bunch of concepts, and how they can be recognized Adds a bunch of concepts, and how they can be recognized
:param dict: dictionary of concept; concept_definition :param context: execution context
:param grammars: dictionary of concept, concept_definition
:return: :return:
""" """
self.context = context
self.sheerka = context.sheerka
nodes_to_resolve = [] nodes_to_resolve = []
concepts_to_resolve = set() concepts_to_resolve = set()
# ## Gets the grammars # ## Gets the grammars
for concept, concept_def in dict.items(): for concept, concept_def in grammars.items():
concept.init_key() # make sure that the key is initialized concept.init_key() # make sure that the key is initialized
grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve) grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve)
self.concepts_dict[concept] = grammar self.concepts_dict[concept] = grammar
if self.has_error:
return self.sheerka.ret(self.name, False, self.error_sink)
# ## Removes concepts with infinite recursions # ## Removes concepts with infinite recursions
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve) concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
for concept in concepts_to_remove: for concept in concepts_to_remove:
@@ -320,7 +461,20 @@ class ConceptLexerParser(BaseParser):
# ## Resolves cross references and remove grammar with unresolved references # ## Resolves cross references and remove grammar with unresolved references
self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve) self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve)
if self.has_error:
return self.sheerka.ret(self.name, False, self.error_sink)
else:
return self.sheerka.ret(self.name, True, self.concepts_dict)
def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve): def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve):
def get_concept(concept_name):
if concept_name in self.context.concepts_cache:
return self.context.concepts_cache[concept_name]
return self.sheerka.get(concept_name)
# TODO
# inner_get_model must not modify the initial ParsingExpression
# A copy must be created
def inner_get_model(expression): def inner_get_model(expression):
if isinstance(expression, Concept): if isinstance(expression, Concept):
ret = CrossRef(expression) ret = CrossRef(expression)
@@ -332,6 +486,16 @@ class ConceptLexerParser(BaseParser):
ret = expression ret = expression
if ret.ignore_case is None: if ret.ignore_case is None:
ret.ignore_case = self.ignore_case ret.ignore_case = self.ignore_case
elif isinstance(expression, ConceptMatch):
to_match = get_concept(expression.concept_name)
if hasattr(to_match, "__iter__"):
ret = self.add_error(TooManyConceptNode(expression.concept_name), False)
elif self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
ret = self.add_error(UnknownConceptNode(expression.concept_name), False)
else:
ret = CrossRef(to_match)
concepts_to_resolve.add(concept)
nodes_to_resolve.append(ret)
elif isinstance(expression, Sequence) or \ elif isinstance(expression, Sequence) or \
isinstance(expression, OrderedChoice) or \ isinstance(expression, OrderedChoice) or \
isinstance(expression, Optional): isinstance(expression, Optional):
@@ -341,7 +505,7 @@ class ConceptLexerParser(BaseParser):
concepts_to_resolve.add(concept) concepts_to_resolve.add(concept)
nodes_to_resolve.append(ret) nodes_to_resolve.append(ret)
else: else:
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'.")) ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
return ret return ret
model = inner_get_model(concept_def) model = inner_get_model(concept_def)
@@ -493,3 +657,242 @@ class ConceptLexerParser(BaseParser):
by_end_pos[result.end].append(result) by_end_pos[result.end].append(result)
return by_end_pos[max(by_end_pos)] return by_end_pos[max(by_end_pos)]
class RegexParser:
"""
Parser used to transform litteral into ParsingExpression
example :
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
'|' (pipe) is used for OrderedChoice
',' (comma) is used for Sequence
'?' (question mark) is used for Optional
'*' (star) is used for ZeroOrMore
'+' (plus) is used for OneOrMore
"""
def __init__(self):
self.has_error = False
self.error_sink = []
self.name = BaseParser.PREFIX + "RegexParser"
self.lexer_iter = None
self._current = None
self.after_current = None
self.nb_open_par = 0
self.context = None
self.source = ""
self.sheerka = None
def __eq__(self, other):
if not isinstance(other, RegexParser):
return False
return True
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
self._current = None
self.after_current = None
self.nb_open_par = 0
self.next_token()
self.eat_white_space()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=False):
if self._current and self._current.type == TokenKind.EOF:
return
try:
self._current = self.after_current or next(self.lexer_iter)
self.source += str(self._current.value)
self.after_current = None
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
def next_after(self):
if self.after_current is not None:
return self.after_current
try:
self.after_current = next(self.lexer_iter)
# self.source += str(self.after_current.value)
return self.after_current
except StopIteration:
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
return self.after_current
def eat_white_space(self):
if self.after_current is not None:
self._current = self.after_current
self.source += str(self._current.value)
self.after_current = None
try:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = None
def maybe_sequence(self, first, second):
token = self.get_token()
return token.type == second or token.type == first and self.next_after().type == second
def parse(self, context: ExecutionContext, text):
self.reset_parser(context, text)
tree = self.parse_choice()
ret = self.sheerka.ret(
self.name,
not self.has_error,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=self.source,
body=self.error_sink if self.has_error else tree,
try_parsed=tree))
return ret
def parse_choice(self):
sequence = self.parse_sequence()
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
return sequence
elements = [sequence]
while True:
# maybe eat the vertical bar
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
break
self.next_token(skip_whitespace=True)
sequence = self.parse_sequence()
elements.append(sequence)
return OrderedChoice(*elements)
def parse_sequence(self):
expr_and_modifier = self.parse_expression_and_modifier()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
return expr_and_modifier
elements = [expr_and_modifier]
while True:
# maybe eat the comma
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
break
self.eat_white_space()
sequence = self.parse_expression_and_modifier()
elements.append(sequence)
return Sequence(*elements)
def parse_expression_and_modifier(self):
expression = self.parse_expression()
token = self.get_token()
if token.type == TokenKind.QMARK:
self.next_token()
return Optional(expression)
if token.type == TokenKind.STAR:
self.next_token()
return ZeroOrMore(expression)
if token.type == TokenKind.PLUS:
self.next_token()
return OneOrMore(expression)
return expression
def parse_expression(self):
token = self.get_token()
if token.type == TokenKind.EOF:
self.add_error(UnexpectedEndOfFileError(), False)
if token.type == TokenKind.LPAR:
self.nb_open_par += 1
self.next_token()
expression = self.parse_choice()
token = self.get_token()
if token.type == TokenKind.RPAR:
self.nb_open_par -= 1
self.next_token()
return expression
else:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR]))
return expression
if token.type == TokenKind.IDENTIFIER:
self.next_token()
return ConceptMatch(token.value)
# concept = self.sheerka.get(str(token.value))
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# self.add_error(CannotResolveConceptNode(str(token.value)))
# self.next_token()
# return None
# else:
# self.next_token()
# return concept
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return ret
class ParsingExpressionVisitor:
"""
visit ParsingExpression
"""
def visit(self, parsing_expression):
name = parsing_expression.__class__.__name__
method = 'visit_' + name
visitor = getattr(self, method, self.generic_visit)
return visitor(parsing_expression)
def generic_visit(self, parsing_expression):
if hasattr(self, "visit_all"):
self.visit_all(parsing_expression)
for node in parsing_expression.elements:
if isinstance(node, Concept):
self.visit(ConceptMatch(node.key or node.name))
elif isinstance(node, str):
self.visit(StrMatch(node))
else:
self.visit(node)
+70 -22
View File
@@ -1,11 +1,14 @@
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
from core.concept import ConceptParts from core.concept import ConceptParts
import core.builtin_helpers import core.builtin_helpers
import core.utils
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field from dataclasses import dataclass, field
import logging import logging
from parsers.ConceptLexerParser import RegexParser
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -180,20 +183,22 @@ class NameNode(DefaultParserNode):
@dataclass() @dataclass()
class DefConceptNode(DefaultParserNode): class DefConceptNode(DefaultParserNode):
name: NameNode = NotInitializedNode() name: NameNode = NotInitializedNode()
where: ReturnValueConcept = NotInitializedNode() where: ReturnValueConcept = NotInitializedNode()
pre: ReturnValueConcept = NotInitializedNode() pre: ReturnValueConcept = NotInitializedNode()
post: ReturnValueConcept = NotInitializedNode() post: ReturnValueConcept = NotInitializedNode()
body: ReturnValueConcept = NotInitializedNode() body: ReturnValueConcept = NotInitializedNode()
definition: ReturnValueConcept = NotInitializedNode()
def get_codes(self): def get_asts(self):
codes = {} asts = {}
for part_key in ConceptParts: for part_key in ConceptParts:
prop_value = getattr(self, part_key.value) prop_value = getattr(self, part_key.value)
if hasattr(prop_value, "ast_"): if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body,
codes[part_key] = prop_value.ast_ ParserResultConcept) and hasattr(
return codes prop_value.body.body, "ast_"):
asts[part_key] = prop_value.body.body.ast_
return asts
class DefaultParser(BaseParser): class DefaultParser(BaseParser):
@@ -322,20 +327,44 @@ class DefaultParser(BaseParser):
# init # init
log.debug("It may be a definition of a concept") log.debug("It may be a definition of a concept")
concept_special_tokens = [def_token] keywords_tokens = [def_token]
concept_found = DefConceptNode(concept_special_tokens) concept_found = DefConceptNode(keywords_tokens)
# the definition of a concept consists of several parts # the definition of a concept consists of several parts
# Keywords.CONCEPT to get the name of the concept # Keywords.CONCEPT to get the name of the concept
# Keywords.FROM [Keywords.REGEX] to get the definition of the concept
# Keywords.AS to get the body # Keywords.AS to get the body
# Keywords.WHERE to get the conditions to recognize for the variables # Keywords.WHERE to get the conditions to recognize for the variables
# Keywords.PRE to know if the conditions to evaluate the concept # Keywords.PRE to know if the conditions to evaluate the concept
# Keywords.POST to apply or verify once the concept is executed # Keywords.POST to apply or verify once the concept is executed
def_concept_parts = [Keywords.CONCEPT, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST] #
# Regroup the tokens by parts
first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens)
# get the name
concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts)
# get the definition
concept_found.definition = self.get_concept_definition(tokens_found_by_parts)
# get the ASTs for the remaining parts
asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts)
concept_found.where = asts_found_by_parts[Keywords.WHERE]
concept_found.pre = asts_found_by_parts[Keywords.PRE]
concept_found.post = asts_found_by_parts[Keywords.POST]
concept_found.body = asts_found_by_parts[Keywords.AS]
log.debug(f"Found DefConcept node '{concept_found}'")
return concept_found
def regroup_tokens_by_parts(self, keywords_tokens):
def_concept_parts = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
# tokens found, when trying to recognize the parts # tokens found, when trying to recognize the parts
tokens_found_by_parts = { tokens_found_by_parts = {
Keywords.CONCEPT: [], Keywords.CONCEPT: [],
Keywords.FROM: None,
Keywords.AS: None, Keywords.AS: None,
Keywords.WHERE: None, Keywords.WHERE: None,
Keywords.PRE: None, Keywords.PRE: None,
@@ -348,7 +377,7 @@ class DefaultParser(BaseParser):
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry # loop thru the tokens, and put them in the correct tokens_found_by_parts entry
while token.type != TokenKind.EOF: while token.type != TokenKind.EOF:
if token.value in def_concept_parts: if token.value in def_concept_parts:
concept_special_tokens.append(token) # keep track of the keywords keywords_tokens.append(token) # keep track of the keywords
keyword = token.value keyword = token.value
if tokens_found_by_parts[keyword]: if tokens_found_by_parts[keyword]:
# a part is defined more than once # a part is defined more than once
@@ -364,13 +393,15 @@ class DefaultParser(BaseParser):
token = self.get_token() token = self.get_token()
# semantic checks return first_token, tokens_found_by_parts
def get_concept_name(self, first_token, tokens_found_by_parts):
name_first_token_index = 1 name_first_token_index = 1
token = self.get_token()
if first_token.value != Keywords.CONCEPT: if first_token.value != Keywords.CONCEPT:
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT])) self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
name_first_token_index = 0 name_first_token_index = 0
# Manage the name
name_tokens = tokens_found_by_parts[Keywords.CONCEPT] name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
if len(name_tokens) == name_first_token_index: if len(name_tokens) == name_first_token_index:
self.add_error(SyntaxErrorNode([], "Name is mandatory")) self.add_error(SyntaxErrorNode([], "Name is mandatory"))
@@ -381,8 +412,31 @@ class DefaultParser(BaseParser):
if TokenKind.NEWLINE in [t.type for t in name_tokens]: if TokenKind.NEWLINE in [t.type for t in name_tokens]:
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name.")) self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
concept_found.name = NameNode(name_tokens[name_first_token_index:]) # skip the first token return NameNode(name_tokens[name_first_token_index:]) # skip the first token
def get_concept_definition(self, tokens_found_by_parts):
if tokens_found_by_parts[Keywords.FROM] is None:
return NotInitializedNode()
definition_tokens = tokens_found_by_parts[Keywords.FROM]
if definition_tokens[1].value != Keywords.BNF:
return NotInitializedNode()
tokens = core.utils.strip_tokens(definition_tokens[2:])
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
return NotInitializedNode()
regex_parser = RegexParser()
new_context = self.context.push(self.name)
parsing_result = regex_parser.parse(new_context, tokens)
if not parsing_result.status:
self.add_error(parsing_result.value)
return NotInitializedNode()
return parsing_result
def get_concept_parts(self, tokens_found_by_parts):
asts_found_by_parts = { asts_found_by_parts = {
Keywords.AS: NotInitializedNode(), Keywords.AS: NotInitializedNode(),
Keywords.WHERE: NotInitializedNode(), Keywords.WHERE: NotInitializedNode(),
@@ -391,7 +445,7 @@ class DefaultParser(BaseParser):
} }
for keyword in tokens_found_by_parts: for keyword in tokens_found_by_parts:
if keyword == Keywords.CONCEPT: if keyword == Keywords.CONCEPT or keyword == Keywords.FROM:
continue # already done continue # already done
log.debug("Processing part '" + keyword.name + "'") log.debug("Processing part '" + keyword.name + "'")
@@ -418,13 +472,7 @@ class DefaultParser(BaseParser):
asts_found_by_parts[keyword] = parsing_result asts_found_by_parts[keyword] = parsing_result
concept_found.where = asts_found_by_parts[Keywords.WHERE] return asts_found_by_parts
concept_found.pre = asts_found_by_parts[Keywords.PRE]
concept_found.post = asts_found_by_parts[Keywords.POST]
concept_found.body = asts_found_by_parts[Keywords.AS]
log.debug(f"Found DefConcept node '{concept_found}'")
return concept_found
# def parse_expression(self): # def parse_expression(self):
# return self.parse_addition() # return self.parse_addition()
+5 -1
View File
@@ -20,7 +20,11 @@ class EmptyStringParser(BaseParser):
isinstance(text, list) and text == [] or \ isinstance(text, list) and text == [] or \
text is None: text is None:
log.debug(f"Recognized '{text}' as BuiltinConcepts.NOP.") log.debug(f"Recognized '{text}' as BuiltinConcepts.NOP.")
return sheerka.ret(self.name, True, sheerka.new(BuiltinConcepts.NOP)) return sheerka.ret(self.name, True, sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source="",
body=sheerka.new(BuiltinConcepts.NOP)))
log.debug(f"Failed to recognize '{text}'") log.debug(f"Failed to recognize '{text}'")
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME)) return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
+9 -1
View File
@@ -47,7 +47,15 @@ class ExactConceptParser(BaseParser):
if token.startswith(VARIABLE_PREFIX): if token.startswith(VARIABLE_PREFIX):
index = int(token[len(VARIABLE_PREFIX):]) index = int(token[len(VARIABLE_PREFIX):])
concept.set_prop_by_index(index, words[i]) concept.set_prop_by_index(index, words[i])
res.append(ReturnValueConcept(self.name, True, concept)) res.append(ReturnValueConcept(
self.name,
True,
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
body=concept,
try_parsed=concept)))
log.debug(f"Recognized '{text}' as '{concept}'") log.debug(f"Recognized '{text}' as '{concept}'")
recognized = True recognized = True
+3
View File
@@ -9,6 +9,9 @@
- E : events - E : events
- O : object (with history management) - O : object (with history management)
- P : pickle - P : pickle
- S : state
- C : concept
- D : concept definitions
## How concepts are serialized ? ## How concepts are serialized ?
- get the id of the concept - get the id of the concept
+21 -5
View File
@@ -10,7 +10,7 @@ from sdp.sheerkaSerializer import Serializer, SerializerContext
import logging import logging
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
init_log = logging.getLogger(__name__ + ".init")
def json_default_converter(o): def json_default_converter(o):
""" """
@@ -278,7 +278,7 @@ class SheerkaDataProvider:
REF_PREFIX = "##REF##:" REF_PREFIX = "##REF##:"
def __init__(self, root=None): def __init__(self, root=None):
log.debug("Initializing sdp.") init_log.debug("Initializing sdp.")
self.io = SheerkaDataProviderIO.get(root) self.io = SheerkaDataProviderIO.get(root)
self.first_time = self.io.first_time self.first_time = self.io.first_time
@@ -312,6 +312,20 @@ class SheerkaDataProvider:
else obj.get_digest() if hasattr(obj, "get_digest") \ else obj.get_digest() if hasattr(obj, "get_digest") \
else None else None
@staticmethod
def get_obj_origin(obj):
"""
Get the digest used to save obj if set
"""
if isinstance(obj, dict) and Serializer.ORIGIN in obj:
return obj[Serializer.ORIGIN]
if hasattr(obj, Serializer.ORIGIN):
return getattr(obj, Serializer.ORIGIN)
return None
@staticmethod @staticmethod
def get_stream_digest(stream): def get_stream_digest(stream):
sha256_hash = hashlib.sha256() sha256_hash = hashlib.sha256()
@@ -460,10 +474,10 @@ class SheerkaDataProvider:
obj_key = self.get_obj_key(obj) or key obj_key = self.get_obj_key(obj) or key
if isinstance(state.data[entry][key], list): if isinstance(state.data[entry][key], list):
if not hasattr(obj, Serializer.ORIGIN): obj_origin = self.get_obj_origin(obj)
if obj_origin is None:
raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj)) raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj))
obj_origin = getattr(obj, Serializer.ORIGIN)
state.modify_in_list(entry, key, obj, obj_key, obj_origin, self.load_ref_if_needed, self.save_ref_if_needed) state.modify_in_list(entry, key, obj, obj_key, obj_origin, self.load_ref_if_needed, self.save_ref_if_needed)
else: else:
@@ -674,7 +688,9 @@ class SheerkaDataProvider:
obj = self.serializer.deserialize(f, SerializerContext(origin=digest)) obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
# set the origin of the object # set the origin of the object
if not isinstance(obj, str): if isinstance(obj, dict):
obj[Serializer.ORIGIN] = digest
elif not isinstance(obj, str):
setattr(obj, Serializer.ORIGIN, digest) setattr(obj, Serializer.ORIGIN, digest)
return obj return obj
+4 -1
View File
@@ -46,9 +46,9 @@ class SheerkaDataProviderIO:
class SheerkaDataProviderFileIO(SheerkaDataProviderIO): class SheerkaDataProviderFileIO(SheerkaDataProviderIO):
log = logging.getLogger("FileIO")
def __init__(self, root): def __init__(self, root):
self.log = logging.getLogger(self.__class__.__name__ + ".init")
root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \ root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \
if root is None \ if root is None \
else path.abspath(root) else path.abspath(root)
@@ -180,10 +180,13 @@ def on_close(dictionary_io, file_path, stream):
:param stream: :param stream:
:return: :return:
""" """
def decorator(func): def decorator(func):
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
stream.seek(0) stream.seek(0)
dictionary_io.cache[file_path] = stream.read() dictionary_io.cache[file_path] = stream.read()
func(*args, **kwargs) func(*args, **kwargs)
return wrapper return wrapper
return decorator return decorator
+19 -5
View File
@@ -12,6 +12,7 @@ import core.utils
from core.concept import Concept from core.concept import Concept
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
init_log = logging.getLogger(__name__ + ".init")
def json_default_converter(o): def json_default_converter(o):
@@ -40,17 +41,18 @@ class Serializer:
USERNAME = "user_name" # key to store user that as committed the snapshot USERNAME = "user_name" # key to store user that as committed the snapshot
MODIFICATION_DATE = "modification_date" # MODIFICATION_DATE = "modification_date" #
PARENTS = "parents" PARENTS = "parents"
ORIGIN = "origin" ORIGIN = "##origin##"
HISTORY = "##history##" HISTORY = "##history##"
def __init__(self): def __init__(self):
log.debug("Initializing serializers") init_log.debug("Initializing serializers")
self._cache = [] self._cache = []
# add builtin serializers # add builtin serializers
self.register(EventSerializer()) self.register(EventSerializer())
self.register(StateSerializer()) self.register(StateSerializer())
self.register(ConceptSerializer()) self.register(ConceptSerializer())
self.register(DictionarySerializer())
def register(self, serializer): def register(self, serializer):
""" """
@@ -58,7 +60,7 @@ class Serializer:
:param serializer: :param serializer:
:return: :return:
""" """
log.debug(f"Adding serializer {serializer}") init_log.debug(f"Adding serializer {serializer}")
self._cache.append(serializer) self._cache.append(serializer)
def serialize(self, obj, context): def serialize(self, obj, context):
@@ -212,8 +214,11 @@ class PickleSerializer(BaseSerializer):
class StateSerializer(PickleSerializer): class StateSerializer(PickleSerializer):
def __init__(self, ): def __init__(self, ):
PickleSerializer.__init__(self, lambda obj: core.utils.get_full_qualified_name( PickleSerializer.__init__(
obj) == "sdp.sheerkaDataProvider.State", "S", 1) self,
lambda obj: core.utils.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State",
"S",
1)
class ConceptSerializer(ObjectSerializer): class ConceptSerializer(ObjectSerializer):
@@ -223,6 +228,15 @@ class ConceptSerializer(ObjectSerializer):
def matches(self, obj): def matches(self, obj):
return isinstance(obj, Concept) return isinstance(obj, Concept)
class DictionarySerializer(PickleSerializer):
def __init__(self, ):
PickleSerializer.__init__(
self,
lambda obj: isinstance(obj, dict),
"D",
1)
# #
# class SheerkaSerializer(ObjectSerializer): # class SheerkaSerializer(ObjectSerializer):
# def __init__(self): # def __init__(self):
+181
View File
@@ -0,0 +1,181 @@
import ast
import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
from core.concept import VARIABLE_PREFIX, ConceptParts, Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer
from evaluators.AddConceptEvaluator import AddConceptEvaluator
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import Sequence, RegexParser, StrMatch, ZeroOrMore, ConceptMatch
from parsers.DefaultParser import DefConceptNode, NameNode
from parsers.ExactConceptParser import ExactConceptParser
from parsers.PythonParser import PythonNode, PythonParser
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("test", "xxx", sheerka)
def get_concept(name, where=None, pre=None, post=None, body=None, definition=None):
concept = DefConceptNode([], name=NameNode(list(Tokenizer(name))))
if body:
concept.body = get_concept_part(body)
if where:
concept.where = get_concept_part(where)
if pre:
concept.pre = get_concept_part(pre)
if post:
concept.post = get_concept_part(post)
if definition:
concept.definition = definition
return ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=concept))
def get_concept_part(part):
if isinstance(part, str):
node = PythonNode(part, ast.parse(part, mode="eval"))
return ReturnValueConcept(
who="Parsers:DefaultParser",
status=True,
value=ParserResultConcept(
source=part,
parser=PythonParser(),
value=node))
if isinstance(part, PythonNode):
return ReturnValueConcept(
who="Parsers:DefaultParser",
status=True,
value=ParserResultConcept(
source=part.source,
parser=PythonParser(),
value=part))
if isinstance(part, ReturnValueConcept):
return part
def get_concept_definition(source, parsing_expression):
return ReturnValueConcept(
who="Parsers:RegexParser",
status=True,
value=ParserResultConcept(
source=source,
parser=RegexParser(),
value=parsing_expression
)
)
@pytest.mark.parametrize("ret_val, expected", [
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=DefConceptNode([]))), True),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", False, ParserResultConcept(value=DefConceptNode([]))), False),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not a ParserResultConcept"), False),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept()), False),
])
def test_i_can_match(ret_val, expected):
context = get_context()
assert AddConceptEvaluator().matches(context, ret_val) == expected
def test_that_the_source_is_correctly_set():
context = get_context()
def_concept_return_value = get_concept(
name="hello a",
definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
where="isinstance(a, str )",
pre="a is not None",
body="print('hello' + a)")
evaluated = AddConceptEvaluator().eval(context, def_concept_return_value)
assert evaluated.status
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
created_concept = evaluated.body.body
assert created_concept.metadata.name == "hello a"
assert created_concept.metadata.where == "isinstance(a, str )"
assert created_concept.metadata.pre == "a is not None"
assert created_concept.metadata.post is None
assert created_concept.metadata.body == "print('hello' + a)"
assert created_concept.metadata.definition == "hello a"
def test_that_the_ast_is_correctly_initialized():
context = get_context()
def_concept_return_value = get_concept(
name="hello a",
definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
where="isinstance(a, str )",
pre="a is not None",
body="print('hello' + a)")
evaluated = AddConceptEvaluator().eval(context, def_concept_return_value)
assert evaluated.status
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
created_concept = evaluated.body.body
assert ConceptParts.WHERE in created_concept.cached_asts
assert ConceptParts.PRE in created_concept.cached_asts
assert ConceptParts.BODY in created_concept.cached_asts
assert ConceptParts.POST not in created_concept.cached_asts
def test_that_the_new_concept_is_correctly_saved():
context = get_context()
def_concept_return_value = get_concept(
name="hello a",
definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
where="isinstance(a, str )",
pre="a is not None",
body="print('hello' + a)")
from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0")
assert context.sheerka.isinstance(from_db, BuiltinConcepts.UNKNOWN_CONCEPT)
AddConceptEvaluator().eval(context, def_concept_return_value)
context.sheerka.concepts_cache = {} # reset cache
from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0")
assert from_db.metadata.key == f"hello {VARIABLE_PREFIX}0"
assert from_db.metadata.name == "hello a"
assert from_db.metadata.where == "isinstance(a, str )"
assert from_db.metadata.pre == "a is not None"
assert from_db.metadata.post is None
assert from_db.metadata.body == "print('hello' + a)"
assert from_db.metadata.definition == "hello a"
assert len(from_db.props) == 1
assert "a" in from_db.props
assert from_db.cached_asts == {} # ast is not saved in db
def test_i_can_get_props_from_python_node():
ret_val = get_concept_part("isinstance(a, str)")
context = get_context()
assert AddConceptEvaluator.get_props(context.sheerka, ret_val, ["a"]) == ["a"]
def test_i_can_get_props_from_another_concept():
concept = Concept("hello").set_prop("a").set_prop("b")
ret_val = ReturnValueConcept(who="some_parser",
status=True,
value=ParserResultConcept(value=concept))
assert AddConceptEvaluator.get_props(get_context(), ret_val, []) == ["a", "b"]
def test_i_can_get_props_from_definition():
parsing_expression = Sequence(ConceptMatch('mult'), ZeroOrMore(Sequence(StrMatch("+"), ConceptMatch("add"))))
ret_val = get_concept_definition("mult (('+'|'-') add)?", parsing_expression)
assert AddConceptEvaluator.get_props(get_context(), ret_val, []) == ["add", "mult"]
+7
View File
@@ -0,0 +1,7 @@
import pytest
from core.tokenizer import Tokenizer, Token, TokenKind
from parsers.BaseParser import BaseParser
+25 -15
View File
@@ -5,6 +5,7 @@ from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
from evaluators.ConceptEvaluator import ConceptEvaluator from evaluators.ConceptEvaluator import ConceptEvaluator
from parsers.BaseParser import BaseParser from parsers.BaseParser import BaseParser
from parsers.ExactConceptParser import ExactConceptParser
def get_context(): def get_context():
@@ -13,12 +14,21 @@ def get_context():
return ExecutionContext("test", "xxx", sheerka) return ExecutionContext("test", "xxx", sheerka)
def get_return_value(concept, source=None):
return ReturnValueConcept(
"some_name",
True,
ParserResultConcept(parser=ExactConceptParser(),
source=source or concept.name,
value=concept,
try_parsed=concept))
@pytest.mark.parametrize("ret_val, expected", [ @pytest.mark.parametrize("ret_val, expected", [
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, Concept()), True), (ReturnValueConcept("some_name", True, ParserResultConcept(value=Concept())), True),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", False, Concept()), False), (ReturnValueConcept("some_name", False, ParserResultConcept(value=Concept())), False),
(ReturnValueConcept("Not a parser", True, Concept()), False), (ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept")), False),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not a concept"), False), (ReturnValueConcept("some_name", True, Concept()), False),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept()), False),
]) ])
def test_i_can_match(ret_val, expected): def test_i_can_match(ret_val, expected):
context = get_context() context = get_context()
@@ -30,7 +40,7 @@ def test_concept_is_returned_when_no_body():
concept = Concept(name="one").init_key() concept = Concept(name="one").init_key()
evaluator = ConceptEvaluator() evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept) item = get_return_value(concept)
result = evaluator.eval(context, item) result = evaluator.eval(context, item)
assert result.who == evaluator.name assert result.who == evaluator.name
@@ -44,7 +54,7 @@ def test_body_is_evaluated_when_python_body():
concept = Concept(name="one", body="1").init_key() concept = Concept(name="one", body="1").init_key()
evaluator = ConceptEvaluator() evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept) item = get_return_value(concept)
result = evaluator.eval(context, item) result = evaluator.eval(context, item)
assert result.who == evaluator.name assert result.who == evaluator.name
@@ -60,7 +70,7 @@ def test_body_is_evaluated_when_concept_body():
concept_un = Concept(name="un", body="one").init_key() concept_un = Concept(name="un", body="one").init_key()
evaluator = ConceptEvaluator() evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_un) item = get_return_value(concept_un)
result = evaluator.eval(context, item) result = evaluator.eval(context, item)
assert result.who == evaluator.name assert result.who == evaluator.name
@@ -80,7 +90,7 @@ def test_body_is_evaluated_when_concept_body_with_a_body():
concept_un = Concept(name="un", body="one").init_key() concept_un = Concept(name="un", body="one").init_key()
evaluator = ConceptEvaluator() evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_un) item = get_return_value(concept_un)
result = evaluator.eval(context, item) result = evaluator.eval(context, item)
assert result.who == evaluator.name assert result.who == evaluator.name
@@ -97,7 +107,7 @@ def test_i_can_evaluate_longer_chains():
concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key()) concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key())
evaluator = ConceptEvaluator() evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_d) item = get_return_value(concept_d)
result = evaluator.eval(context, item) result = evaluator.eval(context, item)
assert result.status assert result.status
@@ -112,7 +122,7 @@ def test_i_can_evaluate_longer_chains_2():
concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key()) concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key())
evaluator = ConceptEvaluator() evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_d) item = get_return_value(concept_d)
result = evaluator.eval(context, item) result = evaluator.eval(context, item)
assert result.status assert result.status
@@ -133,7 +143,7 @@ def test_i_can_recognize_concept_properties():
.set_prop("b", "two").init_key()) .set_prop("b", "two").init_key())
evaluator = ConceptEvaluator() evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus) item = get_return_value(concept_plus)
result = evaluator.eval(context, item) result = evaluator.eval(context, item)
assert result.status assert result.status
@@ -156,7 +166,7 @@ def test_i_can_recognize_concept_properties_with_body():
.set_prop("b", "two").init_key()) .set_prop("b", "two").init_key())
evaluator = ConceptEvaluator() evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus) item = get_return_value(concept_plus)
result = evaluator.eval(context, item) result = evaluator.eval(context, item)
assert result.status assert result.status
@@ -174,7 +184,7 @@ def test_i_can_recognize_concept_properties_with_body_when_concept_has_a_body():
.set_prop("b", "two").init_key()) .set_prop("b", "two").init_key())
evaluator = ConceptEvaluator() evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus) item = get_return_value(concept_plus)
result = evaluator.eval(context, item) result = evaluator.eval(context, item)
assert result.status assert result.status
@@ -189,7 +199,7 @@ def test_i_cannot_recognize_a_concept_if_one_of_the_prop_is_unknown():
.set_prop("b", "two").init_key()) .set_prop("b", "two").init_key())
evaluator = ConceptEvaluator() evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus) item = get_return_value(concept_plus)
result = evaluator.eval(context, item) result = evaluator.eval(context, item)
assert not result.status assert not result.status
+175 -34
View File
@@ -2,8 +2,18 @@ import pytest
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
CrossRef CrossRef, RegexParser, ZeroOrMore, OneOrMore, UnexpectedEndOfFileError, UnexpectedTokenErrorNode, ConceptMatch, \
ParsingExpressionVisitor
class ConceptVisitor(ParsingExpressionVisitor):
def __init__(self):
self.concepts = set()
def visit_ConceptMatch(self, node):
self.concepts.add(node.concept_name)
@pytest.mark.parametrize("match, text", [ @pytest.mark.parametrize("match, text", [
@@ -23,7 +33,7 @@ def test_i_can_match_simple_tokens(match, text):
foo = Concept(name="foo") foo = Concept(name="foo")
concepts = {foo: text} concepts = {foo: text}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, text) res = parser.parse(context, text)
@@ -38,7 +48,7 @@ def test_i_can_match_multiple_concepts_in_one_input():
two = Concept(name="two") two = Concept(name="two")
concepts = {one: "one", two: "two"} concepts = {one: "one", two: "two"}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two one") res = parser.parse(context, "one two one")
@@ -69,7 +79,7 @@ def test_i_cannot_match_when_part_of_the_input_is_unknown():
two = Concept(name="two") two = Concept(name="two")
concepts = {one: "one", two: "two"} concepts = {one: "one", two: "two"}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two three") res = parser.parse(context, "one two three")
assert not res.status assert not res.status
@@ -86,7 +96,7 @@ def test_i_can_match_sequence():
foo = Concept(name="foo") foo = Concept(name="foo")
concepts = {foo: Sequence("one", "two", "three")} concepts = {foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two three") res = parser.parse(context, "one two three")
@@ -100,7 +110,7 @@ def test_wrong_sequence_is_not_matched():
foo = Concept(name="foo") foo = Concept(name="foo")
concepts = {foo: Sequence("one", "two", "three")} concepts = {foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two three one") res = parser.parse(context, "one two three one")
@@ -116,7 +126,7 @@ def test_i_cannot_match_sequence_if_end_of_file():
foo = Concept(name="foo") foo = Concept(name="foo")
concepts = {foo: Sequence("one", "two", "three")} concepts = {foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two") res = parser.parse(context, "one two")
assert not res.status assert not res.status
@@ -133,7 +143,7 @@ def test_i_always_choose_the_longest_match():
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two three") res = parser.parse(context, "one two three")
@@ -149,7 +159,7 @@ def test_i_can_match_several_sequences():
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two three one two") res = parser.parse(context, "one two three one two")
@@ -166,7 +176,7 @@ def test_i_can_match_ordered_choice():
foo = Concept(name="foo") foo = Concept(name="foo")
concepts = {foo: OrderedChoice("one", "two")} concepts = {foo: OrderedChoice("one", "two")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res1 = parser.parse(context, "one") res1 = parser.parse(context, "one")
assert res1.status assert res1.status
@@ -189,7 +199,7 @@ def test_i_cannot_match_ordered_choice_with_empty_alternative():
foo = Concept(name="foo") foo = Concept(name="foo")
concepts = {foo: Sequence(OrderedChoice("one", ""), "two")} concepts = {foo: Sequence(OrderedChoice("one", ""), "two")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "ok") # because token[0] is not "one" and not "" (it is 'two') res = parser.parse(context, "ok") # because token[0] is not "one" and not "" (it is 'two')
assert not res.status assert not res.status
@@ -201,7 +211,7 @@ def test_i_can_mix_sequences_and_ordered_choices():
concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")} concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res1 = parser.parse(context, "twenty one ok") res1 = parser.parse(context, "twenty one ok")
assert res1.status assert res1.status
@@ -225,7 +235,7 @@ def test_i_can_mix_ordered_choices_and_sequences():
concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")} concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "twenty thirty") res = parser.parse(context, "twenty thirty")
assert res.status assert res.status
@@ -240,7 +250,7 @@ def test_i_cannot_parse_empty_optional():
concepts = {foo: Optional("one")} concepts = {foo: Optional("one")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "") res = parser.parse(context, "")
assert not res.status assert not res.status
@@ -253,7 +263,7 @@ def test_i_can_parse_optional():
concepts = {foo: Optional("one")} concepts = {foo: Optional("one")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one") res = parser.parse(context, "one")
assert res.status assert res.status
@@ -266,7 +276,7 @@ def test_i_can_parse_sequence_starting_with_optional():
concepts = {foo: Sequence(Optional("twenty"), "one")} concepts = {foo: Sequence(Optional("twenty"), "one")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "twenty one") res = parser.parse(context, "twenty one")
assert res.status assert res.status
@@ -283,7 +293,7 @@ def test_i_can_parse_sequence_ending_with_optional():
concepts = {foo: Sequence("one", "two", Optional("three"))} concepts = {foo: Sequence("one", "two", Optional("three"))}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two three") res = parser.parse(context, "one two three")
assert res.status assert res.status
@@ -300,7 +310,7 @@ def test_i_can_parse_sequence_with_optional_in_between():
concepts = {foo: Sequence("one", Optional("two"), "three")} concepts = {foo: Sequence("one", Optional("two"), "three")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two three") res = parser.parse(context, "one two three")
assert res.status assert res.status
@@ -312,19 +322,16 @@ def test_i_can_parse_sequence_with_optional_in_between():
def test_i_can_use_reference(): def test_i_can_use_reference():
# The problem here is when there are multiple match for the same input # when there are multiple matches for the same input
# The parsing result is a list of all concepts found # Do I need to create a choice concept ?
# So it's already a list that represents a sequence, not a choice # No, create a return value for every possible graph
# So I need to create a choice concept
# create the return value for every possible graph
# --> The latter seems to be the best as we don't defer the resolution of the problem to someone else
context = get_context() context = get_context()
foo = Concept(name="foo") foo = Concept(name="foo")
bar = Concept(name="bar") bar = Concept(name="bar")
concepts = {foo: Sequence("one", "two"), bar: foo} concepts = {foo: Sequence("one", "two"), bar: foo}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two") res = parser.parse(context, "one two")
assert len(res) == 2 assert len(res) == 2
@@ -350,7 +357,7 @@ def test_i_can_use_context_reference_with_multiple_levels():
concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar} concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two") res = parser.parse(context, "one two")
assert len(res) == 3 assert len(res) == 3
@@ -375,7 +382,7 @@ def test_order_is_not_important_when_using_references():
concepts = {bar: foo, foo: Sequence("one", "two")} concepts = {bar: foo, foo: Sequence("one", "two")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "one two") res = parser.parse(context, "one two")
assert len(res) == 2 assert len(res) == 2
@@ -390,7 +397,7 @@ def test_i_can_parse_when_reference():
concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "twenty two") res = parser.parse(context, "twenty two")
assert res.status assert res.status
@@ -415,7 +422,7 @@ def test_i_can_detect_duplicates_when_reference():
foo: OrderedChoice("twenty", "thirty") foo: OrderedChoice("twenty", "thirty")
} }
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
res = parser.parse(context, "twenty") res = parser.parse(context, "twenty")
assert len(res) == 2 assert len(res) == 2
@@ -437,7 +444,7 @@ def test_i_can_detect_infinite_recursion():
foo: bar foo: bar
} }
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(get_context(), concepts)
assert bar not in parser.concepts_dict assert bar not in parser.concepts_dict
assert foo not in parser.concepts_dict assert foo not in parser.concepts_dict
@@ -452,7 +459,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
foo: OrderedChoice(bar, "foo") foo: OrderedChoice(bar, "foo")
} }
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_dict # removed because of the infinite recursion
@@ -464,7 +471,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
foo: OrderedChoice("foo", bar) foo: OrderedChoice("foo", bar)
} }
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(context, concepts)
assert foo in parser.concepts_dict assert foo in parser.concepts_dict
assert bar in parser.concepts_dict assert bar in parser.concepts_dict
@@ -485,7 +492,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence():
foo: Sequence("one", bar, "two") foo: Sequence("one", bar, "two")
} }
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_dict # removed because of the infinite recursion
@@ -500,7 +507,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choic
foo: Sequence("one", OrderedChoice(bar, "other"), "two") foo: Sequence("one", OrderedChoice(bar, "other"), "two")
} }
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(concepts) parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_dict # removed because of the infinite recursion
@@ -510,6 +517,140 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional():
# TODO infinite recursion with optional # TODO infinite recursion with optional
pass pass
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
])
def test_i_can_parse_regex(expression, expected):
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
])
def test_i_can_detect_errors(expression, error):
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
ret_value = res.value.value
assert parser.has_error
assert not res.status
assert ret_value[0] == error
def test_i_can_parse_regex_with_reference():
expression = "foo"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ConceptMatch("foo")
assert res.value.source == expression
def test_i_can_parse_cross_ref_with_modifier():
expression = "foo*"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
assert res.value.source == expression
def test_i_can_parse_sequence_with_cross_ref():
expression = "foo 'and' bar+"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_parse_choice_with_cross_ref():
foo = Concept("foo")
bar = Concept("bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
expression = "foo | bar?"
parser = RegexParser()
res = parser.parse(context, Tokenizer(expression))
assert res.status
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
foo = Concept(name="foo")
bar = Concept(name="bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
regex_parser = RegexParser()
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = ConceptLexerParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
def test_i_can_visit_parsing_expression():
mult = Concept(name="mult")
add = Concept(name="add")
visitor = ConceptVisitor()
visitor.visit(Sequence(mult, Optional(Sequence("+", add))))
assert sorted(list(visitor.concepts)) == ["add", "mult"]
# #
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(): # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
# context = get_context() # context = get_context()
+36 -1
View File
@@ -2,12 +2,15 @@ import pytest
import ast import ast
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch, RegexParser
from parsers.PythonParser import PythonParser, PythonNode from parsers.PythonParser import PythonParser, PythonNode
from core.tokenizer import Keywords, Tokenizer from core.tokenizer import Keywords, Tokenizer
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
# def nop(): # def nop():
# return NopNode() # return NopNode()
# #
@@ -52,7 +55,7 @@ from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
# return left_as_string == right_as_string # return left_as_string == right_as_string
# #
def get_concept(name, where=None, pre=None, post=None, body=None): def get_concept(name, where=None, pre=None, post=None, body=None, definition=None):
concept = DefConceptNode([], name=NameNode(list(Tokenizer(name)))) concept = DefConceptNode([], name=NameNode(list(Tokenizer(name))))
if body: if body:
@@ -63,6 +66,12 @@ def get_concept(name, where=None, pre=None, post=None, body=None):
concept.pre = get_concept_part(pre) concept.pre = get_concept_part(pre)
if post: if post:
concept.post = get_concept_part(post) concept.post = get_concept_part(post)
if definition:
concept.definition = ReturnValueConcept(
"Parsers:RegexParser",
True,
definition)
return concept return concept
@@ -324,3 +333,29 @@ def test_new_line_is_not_allowed_in_the_name():
assert not res.status assert not res.status
assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")] assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")]
def test_i_can_parse_def_concept_from_regex():
text = "def concept name from bnf a_concept | 'a_string' as __definition[0]"
parser = DefaultParser()
res = parser.parse(get_context(), text)
node = res.value.value
definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string"))
parser_result = ParserResultConcept(RegexParser(), "a_concept | 'a_string'", definition, definition)
expected = get_concept(name="name", body="__definition[0]", definition=parser_result)
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_detect_empty_bnf_declaration():
text = "def concept name from bnf as __definition[0]"
parser = DefaultParser()
res = parser.parse(get_context(), text)
assert not res.status
assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration")
+14 -12
View File
@@ -41,7 +41,7 @@ def test_i_can_recognize_a_simple_concept():
assert len(results) == 1 assert len(results) == 1
assert results[0].status assert results[0].status
assert results[0].value == concept assert results[0].value.value == concept
def test_i_can_recognize_concepts_defined_several_times(): def test_i_can_recognize_concepts_defined_several_times():
@@ -53,14 +53,14 @@ def test_i_can_recognize_concepts_defined_several_times():
results = ExactConceptParser().parse(context, source) results = ExactConceptParser().parse(context, source)
assert len(results) == 2 assert len(results) == 2
results = sorted(results, key=lambda x: x.value.name) # because of the usage of sets results = sorted(results, key=lambda x: x.value.value.name) # because of the usage of sets
assert results[0].status assert results[0].status
assert results[0].value.name == "hello a" assert results[0].value.value.name == "hello a"
assert results[0].value.props["a"].value == "world" assert results[0].value.value.props["a"].value == "world"
assert results[1].status assert results[1].status
assert results[1].value.name == "hello world" assert results[1].value.value.name == "hello world"
def test_i_can_recognize_a_concept_with_variables(): def test_i_can_recognize_a_concept_with_variables():
@@ -72,9 +72,10 @@ def test_i_can_recognize_a_concept_with_variables():
assert len(results) == 1 assert len(results) == 1
assert results[0].status assert results[0].status
assert results[0].value.key == concept.key concept_found = results[0].value.value
assert results[0].value.props["a"].value == "10" assert concept_found.key == concept.key
assert results[0].value.props["b"].value == "5" assert concept_found.props["a"].value == "10"
assert concept_found.props["b"].value == "5"
def test_i_can_recognize_a_concept_with_duplicate_variables(): def test_i_can_recognize_a_concept_with_duplicate_variables():
@@ -86,9 +87,10 @@ def test_i_can_recognize_a_concept_with_duplicate_variables():
assert len(results) == 1 assert len(results) == 1
assert results[0].status assert results[0].status
assert results[0].value.key == concept.key concept_found = results[0].value.value
assert results[0].value.props["a"].value == "10" assert concept_found.key == concept.key
assert results[0].value.props["b"].value == "5" assert concept_found.props["a"].value == "10"
assert concept_found.props["b"].value == "5"
def test_i_can_manage_unknown_concept(): def test_i_can_manage_unknown_concept():
@@ -121,7 +123,7 @@ def test_i_can_detect_concept_from_tokens():
assert len(results) == 1 assert len(results) == 1
assert results[0].status assert results[0].status
assert results[0].value == concept assert results[0].value.value == concept
def get_context(): def get_context():
+20 -1
View File
@@ -1,5 +1,7 @@
import ast import ast
import pytest
from core.ast.nodes import NodeParent, GenericNodeConcept from core.ast.nodes import NodeParent, GenericNodeConcept
import core.ast.nodes import core.ast.nodes
from core.ast.visitors import ConceptNodeVisitor, UnreferencedNamesVisitor from core.ast.visitors import ConceptNodeVisitor, UnreferencedNamesVisitor
@@ -102,7 +104,7 @@ def my_function(a,b):
assert sheerka.value(visitor.names[6]) == "a" assert sheerka.value(visitor.names[6]) == "a"
def test_i_can_get_non_referenced_variables(): def test_i_can_get_unreferenced_variables():
source = """ source = """
def my_function(a,b): def my_function(a,b):
for i in range(b): for i in range(b):
@@ -126,6 +128,23 @@ my_function(x,y)
assert "y" in values assert "y" in values
@pytest.mark.parametrize("source, expected", [
("a,b", ["a", "b"]),
("isinstance(a, int)", ["a", "int"])
])
def test_i_can_get_unreferenced_variables_from_simple_expressions(source, expected):
sheerka = get_sheerka()
node = ast.parse(source)
concept_node = core.ast.nodes.python_to_concept(node)
visitor = UnreferencedNamesVisitor(sheerka)
visitor.visit(concept_node)
assert sorted(list(visitor.names)) == expected
def test_i_can_compare_NodeParent_with_tuple(): def test_i_can_compare_NodeParent_with_tuple():
node_parent = NodeParent(GenericNodeConcept("For", None), "target") node_parent = NodeParent(GenericNodeConcept("For", None), "target")
assert node_parent == ("For", "target") assert node_parent == ("For", "target")
+7
View File
@@ -20,6 +20,13 @@ def test_i_can_get_concept_key(name, variables, expected):
assert concept.metadata.key == expected assert concept.metadata.key == expected
def test_key_does_not_use_variable_when_definition_is_set():
concept = Concept("plus").set_prop('plus')
concept.init_key()
assert concept.metadata.key == "plus"
def test_i_can_serialize(): def test_i_can_serialize():
""" """
Test concept.to_dict() Test concept.to_dict()
+38 -2
View File
@@ -9,6 +9,8 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.concept import Concept, PROPERTIES_TO_SERIALIZE from core.concept import Concept, PROPERTIES_TO_SERIALIZE
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator
from parsers.ConceptLexerParser import Sequence, ZeroOrMore, StrMatch, OrderedChoice, Optional, ConceptMatch, \
ConceptLexerParser
from sdp.sheerkaDataProvider import SheerkaDataProvider from sdp.sheerkaDataProvider import SheerkaDataProvider
tests_root = path.abspath("../build/tests") tests_root = path.abspath("../build/tests")
@@ -422,7 +424,7 @@ as:
def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept(): def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept():
""" """
In this test, we test that the properties of 'concept a xx b' (which are 'a' and 'b') In this test, we test that the properties of 'concept a xx b' (which are 'a' and 'b')
are correctly detected, because of the concept 'a plus b' in its body are correctly detected, thanks to the source code 'a plus b' in its body
:return: :return:
""" """
sheerka = get_sheerka() sheerka = get_sheerka()
@@ -558,7 +560,7 @@ def test_i_can_manage_concepts_with_the_same_key_when_values_are_the_same():
assert res[0].who == sheerka.get_evaluator_name(MultipleSameSuccessEvaluator.NAME) assert res[0].who == sheerka.get_evaluator_name(MultipleSameSuccessEvaluator.NAME)
def test_i_can_create_concepts_on_python_codes(): def test_i_can_create_concepts_with_python_code_as_body():
sheerka = get_sheerka() sheerka = get_sheerka()
context = get_context(sheerka) context = get_context(sheerka)
@@ -570,6 +572,40 @@ def test_i_can_create_concepts_on_python_codes():
assert isinstance(res[0].value, list) assert isinstance(res[0].value, list)
def test_i_can_create_concept_with_bnf_definition():
sheerka = get_sheerka()
a = Concept("a")
sheerka.add_in_cache(a)
sheerka.concepts_grammars = ConceptLexerParser().initialize(
get_context(sheerka),
{a: OrderedChoice("one", "two")}).body
res = sheerka.eval("def concept plus from bnf a ('plus' plus)?")
assert len(res) == 1
assert res[0].status
assert sheerka.isinstance(res[0].value, BuiltinConcepts.NEW_CONCEPT)
saved_concept = sheerka.sdp.get_safe(sheerka.CONCEPTS_ENTRY, "plus")
assert saved_concept.key == "plus"
assert saved_concept.metadata.definition == "a ('plus' plus)?"
assert "a" in saved_concept.props
assert "plus" in saved_concept.props
saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY)
expected_bnf = Sequence(
ConceptMatch("a"),
Optional(Sequence(StrMatch("plus"), ConceptMatch("plus"))),
rule_name="plus")
assert saved_definitions[saved_concept] == expected_bnf
new_concept = res[0].value.body
assert new_concept.metadata.name == "plus"
assert new_concept.metadata.definition == "a ('plus' plus)?"
assert new_concept.bnf == expected_bnf
assert "a" in new_concept.props
assert "plus" in new_concept.props
def get_sheerka(root="mem://", skip_builtins_in_db=True): def get_sheerka(root="mem://", skip_builtins_in_db=True):
sheerka = Sheerka(skip_builtins_in_db) sheerka = Sheerka(skip_builtins_in_db)
sheerka.initialize(root) sheerka.initialize(root)
+42 -1
View File
@@ -311,6 +311,18 @@ def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false(root):
"mem://" "mem://"
]) ])
def test_i_can_add_a_dict(root): def test_i_can_add_a_dict(root):
"""
Adding a dictionary.
Note that there is no key when adding a dictionary
If you add {'my_key': 'my_value'}
'my_key is not considered as the key of the entry'
Because if you add {'my_key': 'my_value', 'my_key2': 'my_value2'}
There are now multiple keys.
So for dictionary entries, the key is not managed
"""
sdp = SheerkaDataProvider(root) sdp = SheerkaDataProvider(root)
obj = {"my_key": "my_value"} obj = {"my_key": "my_value"}
@@ -735,6 +747,7 @@ def test_i_can_set_using_reference(root):
# sanity check, make sure that I can load back # sanity check, make sure that I can load back
loaded = sdp.get(entry, key) loaded = sdp.get(entry, key)
assert loaded == ObjWithKey(2, "foo") assert loaded == ObjWithKey(2, "foo")
assert getattr(loaded, Serializer.ORIGIN) == "95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268"
@pytest.mark.parametrize("root", [ @pytest.mark.parametrize("root", [
@@ -754,7 +767,35 @@ def test_i_can_add_reference_of_an_object_with_a_key(root):
assert key == obj.key assert key == obj.key
assert entry == "entry" assert entry == "entry"
assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}} assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}}
assert sdp.load_obj(digest) == obj
loaded = sdp.load_obj(digest)
assert loaded == obj
assert getattr(loaded, Serializer.ORIGIN) == digest
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_add_reference_a_dictionary(root):
sdp = SheerkaDataProvider(root)
obj = {"my_key": "value1"}
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
state = sdp.load_state(sdp.get_snapshot())
digest = state.data["entry"][len(SheerkaDataProvider.REF_PREFIX):]
assert key is None
assert entry == "entry"
assert state.data == {'entry': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}
loaded = sdp.load_obj(digest)
assert loaded["my_key"] == obj["my_key"]
assert loaded[Serializer.ORIGIN] == digest
assert len(loaded) == 2
@pytest.mark.parametrize("root", [ @pytest.mark.parametrize("root", [
+62
View File
@@ -1,6 +1,8 @@
import core.utils import core.utils
import pytest import pytest
from core.tokenizer import Token, TokenKind
@pytest.mark.parametrize("lst, as_string", [ @pytest.mark.parametrize("lst, as_string", [
(None, "",), (None, "",),
@@ -76,3 +78,63 @@ def test_i_can_get_sub_classes():
def test_i_can_product(a, b, expected): def test_i_can_product(a, b, expected):
res = core.utils.product(a, b) res = core.utils.product(a, b)
assert res == expected assert res == expected
@pytest.mark.parametrize("input_as_list, expected_as_list", [
([" "], []),
([" ", "one"], ["one"]),
(["one", " "], ["one"]),
([" ", "one", " "], ["one"]),
(["\n", "one"], ["one"]),
(["one", "\n"], ["one"]),
(["\n", "one", "\n"], ["one"]),
([" ", "\n", "one"], ["one"]),
(["one", " ", "\n"], ["one"]),
([" ", "\n", "one", " ", "\n"], ["one"]),
(["\n", " ", "one"], ["one"]),
(["one", "\n", " "], ["one"]),
(["\n", " ", "one", "\n", " "], ["one"]),
([" ", "\n", " ", "one"], ["one"]),
(["one", " ", "\n", " "], ["one"]),
([" ", "\n", " ", "one", " ", "\n", " "], ["one"]),
(["\n", " ", "\n", "one"], ["one"]),
(["one", "\n", " ", "\n"], ["one"]),
(["\n", " ", "\n", "one", "\n", " ", "\n"], ["one"]),
])
def test_i_can_strip(input_as_list, expected_as_list):
actual = core.utils.strip_tokens(get_tokens(input_as_list))
expected = get_tokens(expected_as_list)
assert actual == expected
def test_by_default_eof_is_not_stripped():
actual = core.utils.strip_tokens(get_tokens(["one", "two", " ", "\n", "<EOF>"]))
expected = get_tokens(["one", "two", " ", "\n", "<EOF>"])
assert actual == expected
def test_i_can_strip_eof():
actual = core.utils.strip_tokens(get_tokens(["one", "two", " ", "\n", "<EOF>"]), True)
expected = get_tokens(["one", "two"])
assert actual == expected
def get_tokens(lst):
res = []
for e in lst:
if e == " ":
res.append(Token(TokenKind.WHITESPACE, " ", 0, 0, 0))
elif e == "\n":
res.append(Token(TokenKind.NEWLINE, "\n", 0, 0, 0))
elif e == "<EOF>":
res.append(Token(TokenKind.EOF, "\n", 0, 0, 0))
else:
res.append(Token(TokenKind.IDENTIFIER, e, 0, 0, 0))
return res