Added bnf when adding a new concept + Started logging filtering

This commit is contained in:
2019-12-13 20:26:11 +01:00
parent 75c8793d53
commit c668cc46d2
29 changed files with 1487 additions and 190 deletions
+1
View File
@@ -190,3 +190,4 @@ def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclud
predicates.append(res)
return predicates
+8 -8
View File
@@ -3,6 +3,7 @@ from dataclasses import dataclass
from enum import Enum
import logging
import core.utils
from core.tokenizer import Tokenizer, TokenKind
log = logging.getLogger(__name__)
@@ -18,8 +19,7 @@ VARIABLE_PREFIX = "__var__"
class ConceptParts(Enum):
"""
Helper class, Note quite sure that is it that useful
I guess, I was learning nums with Python...
Lists metadata that can contains some code
"""
WHERE = "where"
PRE = "pre"
@@ -85,6 +85,7 @@ class Concept:
self.metadata = metadata
self.props = {} # list of Property for this concept
self.cached_asts = {} # cached ast for the where, pre, post and body parts
self.bnf = None
def __repr__(self):
return f"({self.metadata.id}){self.metadata.name}"
@@ -134,9 +135,9 @@ class Concept:
return self
if tokens is None:
tokens = iter(Tokenizer(self.metadata.name))
tokens = list(Tokenizer(self.metadata.name))
variables = list(self.props.keys())
variables = list(self.props.keys()) if len(core.utils.strip_tokens(tokens, True)) > 1 else []
key = ""
first = True
@@ -171,12 +172,11 @@ class Concept:
:param codes:
:return:
"""
possibles_codes = ConceptParts.get_parts()
if codes is None:
return
for key in codes:
if key in possibles_codes:
self.cached_asts[ConceptParts(key)] = codes[key]
self.cached_asts[key] = codes[key]
return self
@@ -231,7 +231,7 @@ class Concept:
return self
def set_prop(self, prop_name: str, prop_value=None):
self.props[prop_name] = Property(prop_name, prop_value)
self.props[prop_name] = Property(prop_name, prop_value) # Python 3.x order is kept in dictionaries
return self
def set_prop_by_index(self, index: int, prop_value):
+85 -24
View File
@@ -1,4 +1,6 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
from functools import lru_cache
from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept
from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST
from evaluators.BaseEvaluator import OneReturnValueEvaluator
@@ -10,8 +12,10 @@ import core.builtin_helpers
import logging
log = logging.getLogger(__name__)
init_log = logging.getLogger(__name__ + ".init")
concept_evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION]
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
class Sheerka(Concept):
@@ -19,22 +23,29 @@ class Sheerka(Concept):
Main controller for the project
"""
CONCEPTS_ENTRY = "All_Concepts"
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts"
USER_CONCEPTS_KEYS = "User_Concepts"
CONCEPTS_ENTRY = "All_Concepts" # to store all the concepts
CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts
def __init__(self, debug=False, skip_builtins_in_db=False):
def __init__(self, debug=False, skip_builtins_in_db=False, loggers=None):
log.debug("Starting Sheerka.")
super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA)
# cache of the most used concepts
# Note that these are only templates
# They are used as a footprint for instantiation
# Except of source when the concept is supposed to be unique
self.concepts_cache = {}
# cache for builtin types.
# It allow instantiation of a builtin clas
self.builtin_cache = {}
#
# Cache for all concepts BNF
self.concepts_definitions = {}
#
# cache for concepts grammars
# a grammar can be seen as a resolved BNF
self.concepts_grammars = {}
# a concept can be instantiated
# ex: File is a concept, but File('foo.txt') is an instance
@@ -45,14 +56,16 @@ class Sheerka(Concept):
# ex: hello => say('hello')
self.rules = []
self.sdp = None
self.parsers = []
self.evaluators = []
self.sdp: SheerkaDataProvider = None # SheerkaDataProvider
self.builtin_cache = {} # cache for builtin concepts
self.parsers = {} # cache for builtin parsers
self.evaluators = [] # cache for builtin evaluators
self.evaluators_prefix = None
self.parsers_prefix = None
self.evaluators_prefix: str = None
self.parsers_prefix: str = None
self.debug = debug
self.loggers = loggers or []
self.skip_builtins_in_db = skip_builtins_in_db
def initialize(self, root_folder: str = None):
@@ -85,7 +98,7 @@ class Sheerka(Concept):
Initializes the builtin concepts
:return: None
"""
log.debug("Initializing builtin concepts")
init_log.debug("Initializing builtin concepts")
builtins_classes = self.get_builtins_classes_as_dict()
# this all initialization of the builtins seems to be little bit complicated
@@ -101,11 +114,11 @@ class Sheerka(Concept):
if not self.skip_builtins_in_db:
from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key)
if from_db is None:
log.debug(f"'{concept.name}' concept is not found in db. Adding.")
init_log.debug(f"'{concept.name}' concept is not found in db. Adding.")
self.set_id_if_needed(concept, True)
self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True)
else:
log.debug(f"Found concept '{from_db}' in db. Updating.")
init_log.debug(f"Found concept '{from_db}' in db. Updating.")
concept.update_from(from_db)
self.add_in_cache(concept)
@@ -120,8 +133,8 @@ class Sheerka(Concept):
if parser.__module__ == base_class.__module__:
continue
log.debug(f"Adding builtin parser '{parser.__name__}'")
self.parsers.append(parser)
init_log.debug(f"Adding builtin parser '{parser.__name__}'")
self.parsers[core.utils.get_full_qualified_name(parser)] = parser
def initialize_builtin_evaluators(self):
"""
@@ -129,14 +142,26 @@ class Sheerka(Concept):
:return:
"""
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.OneReturnValueEvaluator"):
log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
self.evaluators.append(evaluator)
for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.AllReturnValuesEvaluator"):
log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
self.evaluators.append(evaluator)
def logger_filter(self, record: logging.LogRecord):
if 'all' in self.loggers:
return True
ret = True
if 'init' not in self.loggers and record.name.endswith(".init"):
ret = False
return ret
def init_logging(self):
handler = logging.StreamHandler()
handler.addFilter(self.logger_filter)
if self.debug:
log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
log_level = logging.DEBUG
@@ -144,7 +169,7 @@ class Sheerka(Concept):
log_format = "%(message)s"
log_level = logging.INFO
logging.basicConfig(format=log_format, level=log_level)
logging.basicConfig(format=log_format, level=log_level, handlers=[handler])
def eval(self, text: str):
"""
@@ -153,7 +178,9 @@ class Sheerka(Concept):
:param text:
:return:
"""
log.debug(f"Evaluating '{text}'.")
evt_digest = self.sdp.save_event(Event(text))
log.debug(f"{evt_digest=}")
exec_context = ExecutionContext(self.key, evt_digest, self)
# Before parsing
@@ -183,7 +210,7 @@ class Sheerka(Concept):
debug_text = "'" + text + "'" if isinstance(text, str) \
else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens"
log.debug(f"Parsing {debug_text}")
for parser in self.parsers:
for parser in self.parsers.values():
p = parser()
res = p.parse(context, text)
if isinstance(res, list):
@@ -193,7 +220,7 @@ class Sheerka(Concept):
return result
def process(self, context, return_values, initial_concepts=None):
log.debug(f"Processing parsing result. context concept={initial_concepts}")
log.debug(f"{initial_concepts=}. Processing " + core.utils.pp(return_values))
# return_values must be a list
if not isinstance(return_values, list):
@@ -303,6 +330,8 @@ class Sheerka(Concept):
"""
concept.init_key()
concepts_definitions = None
init_ret_value = None
# checks for duplicate concepts
if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()):
@@ -312,14 +341,33 @@ class Sheerka(Concept):
# set id before saving in db
self.set_id_if_needed(concept, False)
# add the BNF if known
if concept.bnf:
concepts_definitions = self.concepts_definitions.copy()
concepts_definitions[concept] = concept.bnf
# check if it's a valid BNF or whether it breaks the known rules
concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](self.concepts_grammars.copy())
sub_context = context.push(self.name, "Initializing concept definition")
sub_context.concepts_cache[concept.key] = concept # the concept is not in the real cache yet
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
if not init_ret_value.status:
return self.ret(self.create_new_concept.__name__, False, ErrorConcept(init_ret_value.value))
# save the new context in sdp
try:
self.sdp.add(context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True)
if concepts_definitions is not None:
self.sdp.set(context.event_digest, self.CONCEPTS_DEFINITIONS_ENTRY, concepts_definitions, use_ref=True)
except SheerkaDataProviderDuplicateKeyError as error:
return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0])
# add in cache for quick further reference
# Updates the caches
self.concepts_cache[concept.key] = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
if concepts_definitions is not None:
self.concepts_definitions = concepts_definitions
if init_ret_value is not None and init_ret_value.status:
self.concepts_grammars = init_ret_value.body
# process the return in needed
ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
@@ -514,6 +562,18 @@ class Sheerka(Concept):
return (self.value(obj) for obj in objs)
def is_success(self, obj):
if isinstance(obj, bool):
return obj
if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE):
return obj.status
if self.isinstance(obj, BuiltinConcepts.ERROR):
return False
return False
def isinstance(self, a, b):
"""
return true if the concept a is an instance of the concept b
@@ -603,6 +663,7 @@ class ExecutionContext:
sheerka: Sheerka # sheerka
desc: str = None # human description of what is going on
obj: Concept = None # what is the subject of the execution context (if known)
concepts_cache: dict = field(default_factory=dict)
def push(self, who, desc=None, obj=None):
return ExecutionContext(who, self.event_digest, self.sheerka, desc=desc, obj=obj)
+2 -21
View File
@@ -80,6 +80,8 @@ class LexerError(Exception):
class Keywords(Enum):
DEF = "def"
CONCEPT = "concept"
FROM = "from"
BNF = "bnf"
AS = "as"
WHERE = "where"
PRE = "pre"
@@ -308,24 +310,3 @@ class Tokenizer:
1 if lines_count > 0 else start_column + len(result))
return result, lines_count
def seek(self, words):
if self.i == self.text_len:
return 0
# init
offsets = {}
start_index = self.i
buffer = ""
while self.i < self.text_len:
c = self.text[self.i]
# skip white space
if c in (" ", "\t"):
self.i += 1
continue
for word in words:
if c == word[offset]:
os
+63 -5
View File
@@ -3,6 +3,8 @@ import inspect
import pkgutil
import sys
from core.tokenizer import TokenKind
def sysarg_to_string(argv):
"""
@@ -72,11 +74,18 @@ def get_full_qualified_name(obj):
:param obj:
:return:
"""
module = obj.__class__.__module__
if module is None or module == str.__class__.__module__:
return obj.__class__.__name__ # Avoid reporting __builtin__
if obj.__class__ == type:
module = obj.__module__
if module is None or module == str.__class__.__module__:
return obj.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__name__
else:
return module + '.' + obj.__class__.__name__
module = obj.__class__.__module__
if module is None or module == str.__class__.__module__:
return obj.__class__.__name__ # Avoid reporting __builtin__
else:
return module + '.' + obj.__class__.__name__
def get_classes(module_name):
@@ -137,7 +146,7 @@ def remove_from_list(lst, to_remove_predicate):
def product(a, b):
"""
Kind of cartesian product between list a and b
Kind of cartesian product between lists a and b
knowing that a is also a list
So it's a cartesian product between a list of list and a list
@@ -155,3 +164,52 @@ def product(a, b):
res.append(items)
return res
def strip_quotes(text):
if not isinstance(text, str):
return text
if text == "":
return ""
if text[0] == "'" or text[0] == '"':
return text[1:-1]
return text
def strip_tokens(tokens, strip_eof=False):
"""
Remove the starting and trailing spaces and newline
"""
if tokens is None:
return None
start = 0
length = len(tokens)
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
start += 1
if start == length:
return []
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
if strip_eof \
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
end = length - 1
while end > 0 and tokens[end].type in end_tokens:
end -= 1
return tokens[start: end + 1]
def pp(items):
if not hasattr(items, "__iter__"):
return str(items)
if len(items) == 0:
return str(items)
return " \n" + " \n".join(str(item) for item in items)
+104
View File
@@ -494,3 +494,107 @@ it wrong. But the profiling shows that the time is lost in the under layers of t
FS library.
It's a shame !
2019-12-01
**********
Using BNF to define concept
"""""""""""""""""""""""""""""
I always knew that there will be several ways to define the body of a concept (same
goes for the 'pre', 'post' and 'where' parts). It can be defined as Python code,
or something that is related to concepts. It can even be a new language that I will
design. The important point, is that contrarily to traditional development languages,
Sheerka must remain extensible.
Same goes for the definition of the name.
The traditional form is:
::
def concept boo bar baz as ...
So the concept is defined by the sequence 'foo', then 'bar' then 'baz'. In this order.
Another way is
::
def concept a plus b where a,b as ...
In this form, a and b are supposed to be variables.
It will be matched against :code:`one plus two`.
The concept name is 'a plus b'. It is a quick way to declare a concept with variable,
but if someone define another concept
::
def concept number1 plus number2 where number1,number2 as ...
This will produce another concept (with the same key although). I guess that, at
some point, Sheerka will be able to detect that the concepts are the same, but
the name of the concept includes its variables. Which may be annoying in some
situations.
Plus, it's not possible to define rules precedences in this way. For example,
::
def concept a plus b as ...
def concept a times b as ...
How do you express that multiplications have a higher priority in for example
:code:`one plus two times three` ?
The only right answer, at least to me, is to implement something that is inspired
by the BNF definition of a grammar.
So the definition of the concept will look like
::
def concept term as factor (('+' | '-') term)?
def concept factor as number (('*' | '/') factor)?
def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3)
This form seems great, but in the definition of term and factor, there is no more
room for the real body. ie once the components are recognized, what do we do with them ?
So we can try
::
def concept factor (('+') factor)* as factor[0] + factor[i]
def concept number (('*') number)? as number[0] + number[i]
def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3)
The body is defined, but the name of concept is to complicated ex: factor (('+') factor)*
It's quite impossible to reference a concept that is defined in this way.
So my last proposal, with marry the two ideas, is to introduce the two keyword 'using' 'bnf'
.. _bnf : https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form
::
def concept term using bnf factor (('+' | '-') term)? as factor + (or -) term
def concept factor using bnf number (('*' | '/') factor)? as number * (or /) factor
def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3)
In my implementation:
* Terminals are between quotes
* Sequences are separated by whitespaces
* '|' (vertical bar) is used for alternatives
Like in regular expressions, you will also find
* '*' (star) is used to express zero or many
* '+' (plus) to express one or many
* '?' (question mark) to expression zero or one
For those who doesn't know that BNF stands for, please have a look at the bnf_
wikipedia page.
I guess that I will need a complete chapter to explain how you retrieve what was parsed
+64 -15
View File
@@ -1,7 +1,11 @@
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept
from core.ast.nodes import python_to_concept
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
from core.builtin_helpers import get_names
from core.concept import Concept
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
from parsers.DefaultParser import DefConceptNode
import functools
import logging
from parsers.PythonParser import PythonGetNamesVisitor, PythonNode
@@ -9,6 +13,23 @@ from parsers.PythonParser import PythonGetNamesVisitor, PythonNode
log = logging.getLogger(__name__)
class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
"""
Gets the concepts referenced by BNF
If a rule_name is given, it will also be considered as a potential property
"""
def __init__(self):
self.names = set()
def visit_ConceptMatch(self, node):
self.names.add(node.rule_name or node.concept_name)
def visit_all(self, node):
if node.rule_name:
self.names.add(node.rule_name)
class AddConceptEvaluator(OneReturnValueEvaluator):
"""
Used to add a new concept
@@ -32,7 +53,7 @@ class AddConceptEvaluator(OneReturnValueEvaluator):
props_found = set()
concept = Concept(def_concept_node.name)
for prop in ("where", "pre", "post", "body"):
for prop in ("definition", "where", "pre", "post", "body"):
# put back the sources
part_ret_val = getattr(def_concept_node, prop)
if not isinstance(part_ret_val, ReturnValueConcept) or not part_ret_val.status:
@@ -43,35 +64,63 @@ class AddConceptEvaluator(OneReturnValueEvaluator):
setattr(concept.metadata, prop, source)
# try to find what can be a property
for p in self.get_props(part_ret_val):
concept_name = [part.value for part in def_concept_node.name.tokens]
for p in self.get_props(sheerka, part_ret_val, concept_name):
props_found.add(p)
# Auto discovered properties must be referenced in the name
# Note that with this method, the variables will be created in the order of appearance
# add props order by appearance when possible
for token in def_concept_node.name.tokens:
if token.value in props_found:
concept.set_prop(token.value, None)
# add the remaining properties
for p in props_found:
if p not in concept.props:
concept.set_prop(p, None)
# finish initialisation
concept.init_key(def_concept_node.name.tokens)
concept.add_codes(def_concept_node.get_codes())
concept.add_codes(def_concept_node.get_asts())
if sheerka.is_success(def_concept_node.definition):
concept.bnf = def_concept_node.definition.value.value
ret = sheerka.create_new_concept(context, concept)
return sheerka.ret(self.name, ret.status, ret.value, parents=[return_value])
@staticmethod
def get_source(ret_value):
return ret_value.value.source if isinstance(ret_value.value, ParserResultConcept) \
else ret_value.value.name
return ret_value.value.source
@staticmethod
def get_props(ret_value):
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, PythonNode):
get_names_visitor = PythonGetNamesVisitor()
get_names_visitor.visit(ret_value.value.value.ast_)
return get_names_visitor.names
def get_props(sheerka, ret_value, concept_name):
"""
Try to find out the variables
This function can only be a draft, as there may be tons of different situations
I guess that it can only be complete when will we have access to Sheerka memory
"""
if isinstance(ret_value.value, Concept):
return list(ret_value.value.props.keys())
#
# Case of python code
#
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, PythonNode):
python_node = ret_value.value.value
as_concept_node = python_to_concept(python_node.ast_)
variables = get_names(sheerka, as_concept_node)
variables = filter(lambda x: x in concept_name, variables)
return list(variables)
#
# case of concept
#
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, Concept):
return list(ret_value.value.value.props.keys())
#
# case of BNF
#
if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, ParsingExpression):
visitor = ConceptOrRuleNameVisitor()
visitor.visit(ret_value.value.value)
return sorted(list(visitor.names))
return []
+3 -4
View File
@@ -18,13 +18,12 @@ class ConceptEvaluator(OneReturnValueEvaluator):
def matches(self, context, return_value):
return return_value.status and \
return_value.who.startswith(BaseParser.PREFIX) and \
isinstance(return_value.value, Concept) and \
not isinstance(return_value.value, ParserResultConcept) # because there are specific evaluators
isinstance(return_value.value, ParserResultConcept) and \
isinstance(return_value.value.value, Concept)
def eval(self, context, return_value):
sheerka = context.sheerka
concept = return_value.value
concept = return_value.value.value
# pre condition should already be validated by the parser.
# It's a mandatory condition for the concept before it can be recognized
+2 -1
View File
@@ -2,6 +2,7 @@ from core.builtin_concepts import BuiltinConcepts
from evaluators.AddConceptEvaluator import AddConceptEvaluator
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
from parsers.BaseParser import BaseParser
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError
class DuplicateConceptEvaluator(AllReturnValuesEvaluator):
@@ -26,7 +27,7 @@ class DuplicateConceptEvaluator(AllReturnValuesEvaluator):
if ret.status:
parsing = True
elif ret.who == sheerka.get_evaluator_name(AddConceptEvaluator.NAME):
if not ret.status and ret.value.body.args[0] == "Duplicate object.":
if not ret.status and isinstance(ret.value.body, SheerkaDataProviderDuplicateKeyError):
add_concept_in_error = True
self.already_defined = ret.value.body.obj
else:
+5 -2
View File
@@ -14,16 +14,19 @@ def usage():
def main(argv):
try:
opts, args = getopt.getopt(argv, "hd", ["help", "debug"])
opts, args = getopt.getopt(argv, "hdl:", ["help", "debug", "logger="])
debug = False
loggers = set()
for o, a in opts:
if o in ('-h', "--help"):
usage()
return True
if o in ('-d', "--debug"):
debug = True
if o in ('-l', '-logger'):
loggers.add(a)
sheerka = Sheerka(debug=debug)
sheerka = Sheerka(debug=debug, loggers=loggers)
sheerka.initialize()
_in = core.utils.sysarg_to_string(args)
+411 -8
View File
@@ -10,6 +10,7 @@ from dataclasses import field, dataclass
from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.sheerka import ExecutionContext
from core.tokenizer import TokenKind, Tokenizer, Token
from parsers.BaseParser import BaseParser, Node, ErrorNode
import core.utils
@@ -42,6 +43,11 @@ class LexerNode(Node):
class ConceptNode(LexerNode):
"""
Returned by the ConceptLexerParser
It represents a recognized concept
"""
def __init__(self, concept, start, end, tokens=None, source=None, children=None):
super().__init__(start, end)
self.concept = concept
@@ -67,6 +73,10 @@ class ConceptNode(LexerNode):
class NonTerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
"""
def __init__(self, parsing_expression, start, end, children=None):
super().__init__(start, end)
self.parsing_expression = parsing_expression
@@ -82,6 +92,10 @@ class NonTerminalNode(LexerNode):
class TerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
"""
def __init__(self, parsing_expression, start, end, value):
super().__init__(start, end)
self.parsing_expression = parsing_expression
@@ -97,6 +111,27 @@ class GrammarErrorNode(ErrorNode):
message: str
@dataclass()
class UnexpectedTokenErrorNode(ErrorNode):
message: str
expected_tokens: list
@dataclass()
class UnexpectedEndOfFileError(ErrorNode):
pass
@dataclass()
class UnknownConceptNode(ErrorNode):
concept_key: str
@dataclass()
class TooManyConceptNode(ErrorNode):
concept_key: str
class ParsingExpression:
def __init__(self, *args, **kwargs):
self.elements = args
@@ -108,6 +143,15 @@ class ParsingExpression:
self.rule_name = kwargs.get('rule_name', '')
def __eq__(self, other):
if not isinstance(other, ParsingExpression):
return False
return self.rule_name == other.rule_name and self.elements == other.elements
def __hash__(self):
return hash((self.rule_name, self.elements))
def parse(self, parser):
return self._parse(parser)
@@ -133,6 +177,10 @@ class Sequence(ParsingExpression):
return NonTerminalNode(self, init_pos, end_pos, children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})"
class OrderedChoice(ParsingExpression):
"""
@@ -152,6 +200,10 @@ class OrderedChoice(ParsingExpression):
return None
def __repr__(self):
to_str = "| ".join(repr(n) for n in self.elements)
return f"({to_str})"
class Optional(ParsingExpression):
"""
@@ -178,6 +230,46 @@ class Optional(ParsingExpression):
return selected_node
def __repr__(self):
if len(self.elements) == 1:
return f"{self.elements[0]}?"
else:
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})?"
class ZeroOrMore(ParsingExpression):
"""
ZeroOrMore will try to match parser expression specified zero or more
times. It will never fail.
"""
def _parse(self, parser):
raise NotImplementedError()
# Uncomment when _parse is implemented
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return f"({to_str})*"
class OneOrMore(ParsingExpression):
"""
OneOrMore will try to match parser expression specified one or more times.
"""
def _parse(self, parser):
raise NotImplementedError()
class UnorderedGroup(ParsingExpression):
"""
Will try to match all of the parsing expression in any order.
"""
def _parse(self, parser):
raise NotImplementedError()
class Match(ParsingExpression):
"""
@@ -197,13 +289,22 @@ class StrMatch(Match):
Matches a literal
"""
def __init__(self, to_match, rule_name="", root=False, ignore_case=None):
def __init__(self, to_match, rule_name="", root=False, ignore_case=True):
super(Match, self).__init__(rule_name=rule_name, root=root)
self.to_match = to_match
self.ignore_case = ignore_case
def __repr__(self):
return f"StrMatch('{self.to_match}')"
return f"'{self.to_match}'"
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, StrMatch):
return False
return self.to_match == other.to_match and self.ignore_case == other.ignore_case
def _parse(self, parser):
token = parser.get_token()
@@ -218,6 +319,31 @@ class StrMatch(Match):
return None
class ConceptMatch(Match):
"""
Will match a concept
It used only for rule definition
When the grammar is created, it is replaced by the actual concept
"""
def __init__(self, concept_name):
super(Match, self).__init__()
self.concept_name = concept_name
def __repr__(self):
return f"{self.concept_name}"
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, ConceptMatch):
return False
return self.concept_name == other.concept_name
class CrossRef:
"""
During the creation of the model,
@@ -227,11 +353,20 @@ class CrossRef:
def __init__(self, concept):
self.concept = concept
def __repr__(self):
return f"ref({self.concept.key})"
def __eq__(self, other):
if not isinstance(other, CrossRef):
return False
return self.concept == other.concept
class ConceptLexerParser(BaseParser):
def __init__(self):
def __init__(self, concepts_dict=None):
super().__init__("ConceptLexer")
self.concepts_dict = {}
self.concepts_dict = concepts_dict or {} # dict of concept, grammar
self.ignore_case = True
self.token = None
@@ -295,22 +430,28 @@ class ConceptLexerParser(BaseParser):
self.pos -= 1
self.token = self.tokens[self.pos]
def initialize(self, dict):
def initialize(self, context, grammars):
"""
Adds a bunch of concepts, and how they can be recognized
:param dict: dictionary of concept; concept_definition
:param context: execution context
:param grammars: dictionary of concept, concept_definition
:return:
"""
self.context = context
self.sheerka = context.sheerka
nodes_to_resolve = []
concepts_to_resolve = set()
# ## Gets the grammars
for concept, concept_def in dict.items():
for concept, concept_def in grammars.items():
concept.init_key() # make sure that the key is initialized
grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve)
self.concepts_dict[concept] = grammar
if self.has_error:
return self.sheerka.ret(self.name, False, self.error_sink)
# ## Removes concepts with infinite recursions
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
for concept in concepts_to_remove:
@@ -320,7 +461,20 @@ class ConceptLexerParser(BaseParser):
# ## Resolves cross references and remove grammar with unresolved references
self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve)
if self.has_error:
return self.sheerka.ret(self.name, False, self.error_sink)
else:
return self.sheerka.ret(self.name, True, self.concepts_dict)
def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve):
def get_concept(concept_name):
if concept_name in self.context.concepts_cache:
return self.context.concepts_cache[concept_name]
return self.sheerka.get(concept_name)
# TODO
# inner_get_model must not modify the initial ParsingExpression
# A copy must be created
def inner_get_model(expression):
if isinstance(expression, Concept):
ret = CrossRef(expression)
@@ -332,6 +486,16 @@ class ConceptLexerParser(BaseParser):
ret = expression
if ret.ignore_case is None:
ret.ignore_case = self.ignore_case
elif isinstance(expression, ConceptMatch):
to_match = get_concept(expression.concept_name)
if hasattr(to_match, "__iter__"):
ret = self.add_error(TooManyConceptNode(expression.concept_name), False)
elif self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
ret = self.add_error(UnknownConceptNode(expression.concept_name), False)
else:
ret = CrossRef(to_match)
concepts_to_resolve.add(concept)
nodes_to_resolve.append(ret)
elif isinstance(expression, Sequence) or \
isinstance(expression, OrderedChoice) or \
isinstance(expression, Optional):
@@ -341,7 +505,7 @@ class ConceptLexerParser(BaseParser):
concepts_to_resolve.add(concept)
nodes_to_resolve.append(ret)
else:
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."))
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
return ret
model = inner_get_model(concept_def)
@@ -493,3 +657,242 @@ class ConceptLexerParser(BaseParser):
by_end_pos[result.end].append(result)
return by_end_pos[max(by_end_pos)]
class RegexParser:
"""
Parser used to transform litteral into ParsingExpression
example :
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
'|' (pipe) is used for OrderedChoice
',' (comma) is used for Sequence
'?' (question mark) is used for Optional
'*' (star) is used for ZeroOrMore
'+' (plus) is used for OneOrMore
"""
def __init__(self):
self.has_error = False
self.error_sink = []
self.name = BaseParser.PREFIX + "RegexParser"
self.lexer_iter = None
self._current = None
self.after_current = None
self.nb_open_par = 0
self.context = None
self.source = ""
self.sheerka = None
def __eq__(self, other):
if not isinstance(other, RegexParser):
return False
return True
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
self._current = None
self.after_current = None
self.nb_open_par = 0
self.next_token()
self.eat_white_space()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=False):
if self._current and self._current.type == TokenKind.EOF:
return
try:
self._current = self.after_current or next(self.lexer_iter)
self.source += str(self._current.value)
self.after_current = None
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
def next_after(self):
if self.after_current is not None:
return self.after_current
try:
self.after_current = next(self.lexer_iter)
# self.source += str(self.after_current.value)
return self.after_current
except StopIteration:
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
return self.after_current
def eat_white_space(self):
if self.after_current is not None:
self._current = self.after_current
self.source += str(self._current.value)
self.after_current = None
try:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = None
def maybe_sequence(self, first, second):
token = self.get_token()
return token.type == second or token.type == first and self.next_after().type == second
def parse(self, context: ExecutionContext, text):
self.reset_parser(context, text)
tree = self.parse_choice()
ret = self.sheerka.ret(
self.name,
not self.has_error,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=self.source,
body=self.error_sink if self.has_error else tree,
try_parsed=tree))
return ret
def parse_choice(self):
sequence = self.parse_sequence()
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
return sequence
elements = [sequence]
while True:
# maybe eat the vertical bar
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
break
self.next_token(skip_whitespace=True)
sequence = self.parse_sequence()
elements.append(sequence)
return OrderedChoice(*elements)
def parse_sequence(self):
expr_and_modifier = self.parse_expression_and_modifier()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
return expr_and_modifier
elements = [expr_and_modifier]
while True:
# maybe eat the comma
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
break
self.eat_white_space()
sequence = self.parse_expression_and_modifier()
elements.append(sequence)
return Sequence(*elements)
def parse_expression_and_modifier(self):
expression = self.parse_expression()
token = self.get_token()
if token.type == TokenKind.QMARK:
self.next_token()
return Optional(expression)
if token.type == TokenKind.STAR:
self.next_token()
return ZeroOrMore(expression)
if token.type == TokenKind.PLUS:
self.next_token()
return OneOrMore(expression)
return expression
def parse_expression(self):
token = self.get_token()
if token.type == TokenKind.EOF:
self.add_error(UnexpectedEndOfFileError(), False)
if token.type == TokenKind.LPAR:
self.nb_open_par += 1
self.next_token()
expression = self.parse_choice()
token = self.get_token()
if token.type == TokenKind.RPAR:
self.nb_open_par -= 1
self.next_token()
return expression
else:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR]))
return expression
if token.type == TokenKind.IDENTIFIER:
self.next_token()
return ConceptMatch(token.value)
# concept = self.sheerka.get(str(token.value))
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# self.add_error(CannotResolveConceptNode(str(token.value)))
# self.next_token()
# return None
# else:
# self.next_token()
# return concept
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return ret
class ParsingExpressionVisitor:
"""
visit ParsingExpression
"""
def visit(self, parsing_expression):
name = parsing_expression.__class__.__name__
method = 'visit_' + name
visitor = getattr(self, method, self.generic_visit)
return visitor(parsing_expression)
def generic_visit(self, parsing_expression):
if hasattr(self, "visit_all"):
self.visit_all(parsing_expression)
for node in parsing_expression.elements:
if isinstance(node, Concept):
self.visit(ConceptMatch(node.key or node.name))
elif isinstance(node, str):
self.visit(StrMatch(node))
else:
self.visit(node)
+70 -22
View File
@@ -1,11 +1,14 @@
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
from core.concept import ConceptParts
import core.builtin_helpers
import core.utils
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field
import logging
from parsers.ConceptLexerParser import RegexParser
log = logging.getLogger(__name__)
@@ -180,20 +183,22 @@ class NameNode(DefaultParserNode):
@dataclass()
class DefConceptNode(DefaultParserNode):
name: NameNode = NotInitializedNode()
where: ReturnValueConcept = NotInitializedNode()
pre: ReturnValueConcept = NotInitializedNode()
post: ReturnValueConcept = NotInitializedNode()
body: ReturnValueConcept = NotInitializedNode()
definition: ReturnValueConcept = NotInitializedNode()
def get_codes(self):
codes = {}
def get_asts(self):
asts = {}
for part_key in ConceptParts:
prop_value = getattr(self, part_key.value)
if hasattr(prop_value, "ast_"):
codes[part_key] = prop_value.ast_
return codes
if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body,
ParserResultConcept) and hasattr(
prop_value.body.body, "ast_"):
asts[part_key] = prop_value.body.body.ast_
return asts
class DefaultParser(BaseParser):
@@ -322,20 +327,44 @@ class DefaultParser(BaseParser):
# init
log.debug("It may be a definition of a concept")
concept_special_tokens = [def_token]
concept_found = DefConceptNode(concept_special_tokens)
keywords_tokens = [def_token]
concept_found = DefConceptNode(keywords_tokens)
# the definition of a concept consists of several parts
# Keywords.CONCEPT to get the name of the concept
# Keywords.FROM [Keywords.REGEX] to get the definition of the concept
# Keywords.AS to get the body
# Keywords.WHERE to get the conditions to recognize for the variables
# Keywords.PRE to know if the conditions to evaluate the concept
# Keywords.POST to apply or verify once the concept is executed
def_concept_parts = [Keywords.CONCEPT, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
#
# Regroup the tokens by parts
first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens)
# get the name
concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts)
# get the definition
concept_found.definition = self.get_concept_definition(tokens_found_by_parts)
# get the ASTs for the remaining parts
asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts)
concept_found.where = asts_found_by_parts[Keywords.WHERE]
concept_found.pre = asts_found_by_parts[Keywords.PRE]
concept_found.post = asts_found_by_parts[Keywords.POST]
concept_found.body = asts_found_by_parts[Keywords.AS]
log.debug(f"Found DefConcept node '{concept_found}'")
return concept_found
def regroup_tokens_by_parts(self, keywords_tokens):
def_concept_parts = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
# tokens found, when trying to recognize the parts
tokens_found_by_parts = {
Keywords.CONCEPT: [],
Keywords.FROM: None,
Keywords.AS: None,
Keywords.WHERE: None,
Keywords.PRE: None,
@@ -348,7 +377,7 @@ class DefaultParser(BaseParser):
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
while token.type != TokenKind.EOF:
if token.value in def_concept_parts:
concept_special_tokens.append(token) # keep track of the keywords
keywords_tokens.append(token) # keep track of the keywords
keyword = token.value
if tokens_found_by_parts[keyword]:
# a part is defined more than once
@@ -364,13 +393,15 @@ class DefaultParser(BaseParser):
token = self.get_token()
# semantic checks
return first_token, tokens_found_by_parts
def get_concept_name(self, first_token, tokens_found_by_parts):
name_first_token_index = 1
token = self.get_token()
if first_token.value != Keywords.CONCEPT:
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
name_first_token_index = 0
# Manage the name
name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
if len(name_tokens) == name_first_token_index:
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
@@ -381,8 +412,31 @@ class DefaultParser(BaseParser):
if TokenKind.NEWLINE in [t.type for t in name_tokens]:
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
concept_found.name = NameNode(name_tokens[name_first_token_index:]) # skip the first token
return NameNode(name_tokens[name_first_token_index:]) # skip the first token
def get_concept_definition(self, tokens_found_by_parts):
if tokens_found_by_parts[Keywords.FROM] is None:
return NotInitializedNode()
definition_tokens = tokens_found_by_parts[Keywords.FROM]
if definition_tokens[1].value != Keywords.BNF:
return NotInitializedNode()
tokens = core.utils.strip_tokens(definition_tokens[2:])
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
return NotInitializedNode()
regex_parser = RegexParser()
new_context = self.context.push(self.name)
parsing_result = regex_parser.parse(new_context, tokens)
if not parsing_result.status:
self.add_error(parsing_result.value)
return NotInitializedNode()
return parsing_result
def get_concept_parts(self, tokens_found_by_parts):
asts_found_by_parts = {
Keywords.AS: NotInitializedNode(),
Keywords.WHERE: NotInitializedNode(),
@@ -391,7 +445,7 @@ class DefaultParser(BaseParser):
}
for keyword in tokens_found_by_parts:
if keyword == Keywords.CONCEPT:
if keyword == Keywords.CONCEPT or keyword == Keywords.FROM:
continue # already done
log.debug("Processing part '" + keyword.name + "'")
@@ -418,13 +472,7 @@ class DefaultParser(BaseParser):
asts_found_by_parts[keyword] = parsing_result
concept_found.where = asts_found_by_parts[Keywords.WHERE]
concept_found.pre = asts_found_by_parts[Keywords.PRE]
concept_found.post = asts_found_by_parts[Keywords.POST]
concept_found.body = asts_found_by_parts[Keywords.AS]
log.debug(f"Found DefConcept node '{concept_found}'")
return concept_found
return asts_found_by_parts
# def parse_expression(self):
# return self.parse_addition()
+5 -1
View File
@@ -20,7 +20,11 @@ class EmptyStringParser(BaseParser):
isinstance(text, list) and text == [] or \
text is None:
log.debug(f"Recognized '{text}' as BuiltinConcepts.NOP.")
return sheerka.ret(self.name, True, sheerka.new(BuiltinConcepts.NOP))
return sheerka.ret(self.name, True, sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source="",
body=sheerka.new(BuiltinConcepts.NOP)))
log.debug(f"Failed to recognize '{text}'")
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
+9 -1
View File
@@ -47,7 +47,15 @@ class ExactConceptParser(BaseParser):
if token.startswith(VARIABLE_PREFIX):
index = int(token[len(VARIABLE_PREFIX):])
concept.set_prop_by_index(index, words[i])
res.append(ReturnValueConcept(self.name, True, concept))
res.append(ReturnValueConcept(
self.name,
True,
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
body=concept,
try_parsed=concept)))
log.debug(f"Recognized '{text}' as '{concept}'")
recognized = True
+3
View File
@@ -9,6 +9,9 @@
- E : events
- O : object (with history management)
- P : pickle
- S : state
- C : concept
- D : concept definitions
## How concepts are serialized ?
- get the id of the concept
+21 -5
View File
@@ -10,7 +10,7 @@ from sdp.sheerkaSerializer import Serializer, SerializerContext
import logging
log = logging.getLogger(__name__)
init_log = logging.getLogger(__name__ + ".init")
def json_default_converter(o):
"""
@@ -278,7 +278,7 @@ class SheerkaDataProvider:
REF_PREFIX = "##REF##:"
def __init__(self, root=None):
log.debug("Initializing sdp.")
init_log.debug("Initializing sdp.")
self.io = SheerkaDataProviderIO.get(root)
self.first_time = self.io.first_time
@@ -312,6 +312,20 @@ class SheerkaDataProvider:
else obj.get_digest() if hasattr(obj, "get_digest") \
else None
@staticmethod
def get_obj_origin(obj):
"""
Get the digest used to save obj if set
"""
if isinstance(obj, dict) and Serializer.ORIGIN in obj:
return obj[Serializer.ORIGIN]
if hasattr(obj, Serializer.ORIGIN):
return getattr(obj, Serializer.ORIGIN)
return None
@staticmethod
def get_stream_digest(stream):
sha256_hash = hashlib.sha256()
@@ -460,10 +474,10 @@ class SheerkaDataProvider:
obj_key = self.get_obj_key(obj) or key
if isinstance(state.data[entry][key], list):
if not hasattr(obj, Serializer.ORIGIN):
obj_origin = self.get_obj_origin(obj)
if obj_origin is None:
raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj))
obj_origin = getattr(obj, Serializer.ORIGIN)
state.modify_in_list(entry, key, obj, obj_key, obj_origin, self.load_ref_if_needed, self.save_ref_if_needed)
else:
@@ -674,7 +688,9 @@ class SheerkaDataProvider:
obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
# set the origin of the object
if not isinstance(obj, str):
if isinstance(obj, dict):
obj[Serializer.ORIGIN] = digest
elif not isinstance(obj, str):
setattr(obj, Serializer.ORIGIN, digest)
return obj
+4 -1
View File
@@ -46,9 +46,9 @@ class SheerkaDataProviderIO:
class SheerkaDataProviderFileIO(SheerkaDataProviderIO):
log = logging.getLogger("FileIO")
def __init__(self, root):
self.log = logging.getLogger(self.__class__.__name__ + ".init")
root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \
if root is None \
else path.abspath(root)
@@ -180,10 +180,13 @@ def on_close(dictionary_io, file_path, stream):
:param stream:
:return:
"""
def decorator(func):
def wrapper(*args, **kwargs):
stream.seek(0)
dictionary_io.cache[file_path] = stream.read()
func(*args, **kwargs)
return wrapper
return decorator
+19 -5
View File
@@ -12,6 +12,7 @@ import core.utils
from core.concept import Concept
log = logging.getLogger(__name__)
init_log = logging.getLogger(__name__ + ".init")
def json_default_converter(o):
@@ -40,17 +41,18 @@ class Serializer:
USERNAME = "user_name" # key to store user that as committed the snapshot
MODIFICATION_DATE = "modification_date" #
PARENTS = "parents"
ORIGIN = "origin"
ORIGIN = "##origin##"
HISTORY = "##history##"
def __init__(self):
log.debug("Initializing serializers")
init_log.debug("Initializing serializers")
self._cache = []
# add builtin serializers
self.register(EventSerializer())
self.register(StateSerializer())
self.register(ConceptSerializer())
self.register(DictionarySerializer())
def register(self, serializer):
"""
@@ -58,7 +60,7 @@ class Serializer:
:param serializer:
:return:
"""
log.debug(f"Adding serializer {serializer}")
init_log.debug(f"Adding serializer {serializer}")
self._cache.append(serializer)
def serialize(self, obj, context):
@@ -212,8 +214,11 @@ class PickleSerializer(BaseSerializer):
class StateSerializer(PickleSerializer):
def __init__(self, ):
PickleSerializer.__init__(self, lambda obj: core.utils.get_full_qualified_name(
obj) == "sdp.sheerkaDataProvider.State", "S", 1)
PickleSerializer.__init__(
self,
lambda obj: core.utils.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State",
"S",
1)
class ConceptSerializer(ObjectSerializer):
@@ -223,6 +228,15 @@ class ConceptSerializer(ObjectSerializer):
def matches(self, obj):
return isinstance(obj, Concept)
class DictionarySerializer(PickleSerializer):
def __init__(self, ):
PickleSerializer.__init__(
self,
lambda obj: isinstance(obj, dict),
"D",
1)
#
# class SheerkaSerializer(ObjectSerializer):
# def __init__(self):
+181
View File
@@ -0,0 +1,181 @@
import ast
import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
from core.concept import VARIABLE_PREFIX, ConceptParts, Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer
from evaluators.AddConceptEvaluator import AddConceptEvaluator
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import Sequence, RegexParser, StrMatch, ZeroOrMore, ConceptMatch
from parsers.DefaultParser import DefConceptNode, NameNode
from parsers.ExactConceptParser import ExactConceptParser
from parsers.PythonParser import PythonNode, PythonParser
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("test", "xxx", sheerka)
def get_concept(name, where=None, pre=None, post=None, body=None, definition=None):
concept = DefConceptNode([], name=NameNode(list(Tokenizer(name))))
if body:
concept.body = get_concept_part(body)
if where:
concept.where = get_concept_part(where)
if pre:
concept.pre = get_concept_part(pre)
if post:
concept.post = get_concept_part(post)
if definition:
concept.definition = definition
return ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=concept))
def get_concept_part(part):
if isinstance(part, str):
node = PythonNode(part, ast.parse(part, mode="eval"))
return ReturnValueConcept(
who="Parsers:DefaultParser",
status=True,
value=ParserResultConcept(
source=part,
parser=PythonParser(),
value=node))
if isinstance(part, PythonNode):
return ReturnValueConcept(
who="Parsers:DefaultParser",
status=True,
value=ParserResultConcept(
source=part.source,
parser=PythonParser(),
value=part))
if isinstance(part, ReturnValueConcept):
return part
def get_concept_definition(source, parsing_expression):
return ReturnValueConcept(
who="Parsers:RegexParser",
status=True,
value=ParserResultConcept(
source=source,
parser=RegexParser(),
value=parsing_expression
)
)
@pytest.mark.parametrize("ret_val, expected", [
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=DefConceptNode([]))), True),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", False, ParserResultConcept(value=DefConceptNode([]))), False),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not a ParserResultConcept"), False),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept()), False),
])
def test_i_can_match(ret_val, expected):
context = get_context()
assert AddConceptEvaluator().matches(context, ret_val) == expected
def test_that_the_source_is_correctly_set():
context = get_context()
def_concept_return_value = get_concept(
name="hello a",
definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
where="isinstance(a, str )",
pre="a is not None",
body="print('hello' + a)")
evaluated = AddConceptEvaluator().eval(context, def_concept_return_value)
assert evaluated.status
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
created_concept = evaluated.body.body
assert created_concept.metadata.name == "hello a"
assert created_concept.metadata.where == "isinstance(a, str )"
assert created_concept.metadata.pre == "a is not None"
assert created_concept.metadata.post is None
assert created_concept.metadata.body == "print('hello' + a)"
assert created_concept.metadata.definition == "hello a"
def test_that_the_ast_is_correctly_initialized():
context = get_context()
def_concept_return_value = get_concept(
name="hello a",
definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
where="isinstance(a, str )",
pre="a is not None",
body="print('hello' + a)")
evaluated = AddConceptEvaluator().eval(context, def_concept_return_value)
assert evaluated.status
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
created_concept = evaluated.body.body
assert ConceptParts.WHERE in created_concept.cached_asts
assert ConceptParts.PRE in created_concept.cached_asts
assert ConceptParts.BODY in created_concept.cached_asts
assert ConceptParts.POST not in created_concept.cached_asts
def test_that_the_new_concept_is_correctly_saved():
context = get_context()
def_concept_return_value = get_concept(
name="hello a",
definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
where="isinstance(a, str )",
pre="a is not None",
body="print('hello' + a)")
from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0")
assert context.sheerka.isinstance(from_db, BuiltinConcepts.UNKNOWN_CONCEPT)
AddConceptEvaluator().eval(context, def_concept_return_value)
context.sheerka.concepts_cache = {} # reset cache
from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0")
assert from_db.metadata.key == f"hello {VARIABLE_PREFIX}0"
assert from_db.metadata.name == "hello a"
assert from_db.metadata.where == "isinstance(a, str )"
assert from_db.metadata.pre == "a is not None"
assert from_db.metadata.post is None
assert from_db.metadata.body == "print('hello' + a)"
assert from_db.metadata.definition == "hello a"
assert len(from_db.props) == 1
assert "a" in from_db.props
assert from_db.cached_asts == {} # ast is not saved in db
def test_i_can_get_props_from_python_node():
ret_val = get_concept_part("isinstance(a, str)")
context = get_context()
assert AddConceptEvaluator.get_props(context.sheerka, ret_val, ["a"]) == ["a"]
def test_i_can_get_props_from_another_concept():
concept = Concept("hello").set_prop("a").set_prop("b")
ret_val = ReturnValueConcept(who="some_parser",
status=True,
value=ParserResultConcept(value=concept))
assert AddConceptEvaluator.get_props(get_context(), ret_val, []) == ["a", "b"]
def test_i_can_get_props_from_definition():
parsing_expression = Sequence(ConceptMatch('mult'), ZeroOrMore(Sequence(StrMatch("+"), ConceptMatch("add"))))
ret_val = get_concept_definition("mult (('+'|'-') add)?", parsing_expression)
assert AddConceptEvaluator.get_props(get_context(), ret_val, []) == ["add", "mult"]
+7
View File
@@ -0,0 +1,7 @@
import pytest
from core.tokenizer import Tokenizer, Token, TokenKind
from parsers.BaseParser import BaseParser
+25 -15
View File
@@ -5,6 +5,7 @@ from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from evaluators.ConceptEvaluator import ConceptEvaluator
from parsers.BaseParser import BaseParser
from parsers.ExactConceptParser import ExactConceptParser
def get_context():
@@ -13,12 +14,21 @@ def get_context():
return ExecutionContext("test", "xxx", sheerka)
def get_return_value(concept, source=None):
return ReturnValueConcept(
"some_name",
True,
ParserResultConcept(parser=ExactConceptParser(),
source=source or concept.name,
value=concept,
try_parsed=concept))
@pytest.mark.parametrize("ret_val, expected", [
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, Concept()), True),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", False, Concept()), False),
(ReturnValueConcept("Not a parser", True, Concept()), False),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not a concept"), False),
(ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept()), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=Concept())), True),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=Concept())), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept")), False),
(ReturnValueConcept("some_name", True, Concept()), False),
])
def test_i_can_match(ret_val, expected):
context = get_context()
@@ -30,7 +40,7 @@ def test_concept_is_returned_when_no_body():
concept = Concept(name="one").init_key()
evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept)
item = get_return_value(concept)
result = evaluator.eval(context, item)
assert result.who == evaluator.name
@@ -44,7 +54,7 @@ def test_body_is_evaluated_when_python_body():
concept = Concept(name="one", body="1").init_key()
evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept)
item = get_return_value(concept)
result = evaluator.eval(context, item)
assert result.who == evaluator.name
@@ -60,7 +70,7 @@ def test_body_is_evaluated_when_concept_body():
concept_un = Concept(name="un", body="one").init_key()
evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_un)
item = get_return_value(concept_un)
result = evaluator.eval(context, item)
assert result.who == evaluator.name
@@ -80,7 +90,7 @@ def test_body_is_evaluated_when_concept_body_with_a_body():
concept_un = Concept(name="un", body="one").init_key()
evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_un)
item = get_return_value(concept_un)
result = evaluator.eval(context, item)
assert result.who == evaluator.name
@@ -97,7 +107,7 @@ def test_i_can_evaluate_longer_chains():
concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key())
evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_d)
item = get_return_value(concept_d)
result = evaluator.eval(context, item)
assert result.status
@@ -112,7 +122,7 @@ def test_i_can_evaluate_longer_chains_2():
concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key())
evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_d)
item = get_return_value(concept_d)
result = evaluator.eval(context, item)
assert result.status
@@ -133,7 +143,7 @@ def test_i_can_recognize_concept_properties():
.set_prop("b", "two").init_key())
evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
item = get_return_value(concept_plus)
result = evaluator.eval(context, item)
assert result.status
@@ -156,7 +166,7 @@ def test_i_can_recognize_concept_properties_with_body():
.set_prop("b", "two").init_key())
evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
item = get_return_value(concept_plus)
result = evaluator.eval(context, item)
assert result.status
@@ -174,7 +184,7 @@ def test_i_can_recognize_concept_properties_with_body_when_concept_has_a_body():
.set_prop("b", "two").init_key())
evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
item = get_return_value(concept_plus)
result = evaluator.eval(context, item)
assert result.status
@@ -189,7 +199,7 @@ def test_i_cannot_recognize_a_concept_if_one_of_the_prop_is_unknown():
.set_prop("b", "two").init_key())
evaluator = ConceptEvaluator()
item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus)
item = get_return_value(concept_plus)
result = evaluator.eval(context, item)
assert not result.status
+175 -34
View File
@@ -2,8 +2,18 @@ import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
CrossRef
CrossRef, RegexParser, ZeroOrMore, OneOrMore, UnexpectedEndOfFileError, UnexpectedTokenErrorNode, ConceptMatch, \
ParsingExpressionVisitor
class ConceptVisitor(ParsingExpressionVisitor):
def __init__(self):
self.concepts = set()
def visit_ConceptMatch(self, node):
self.concepts.add(node.concept_name)
@pytest.mark.parametrize("match, text", [
@@ -23,7 +33,7 @@ def test_i_can_match_simple_tokens(match, text):
foo = Concept(name="foo")
concepts = {foo: text}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, text)
@@ -38,7 +48,7 @@ def test_i_can_match_multiple_concepts_in_one_input():
two = Concept(name="two")
concepts = {one: "one", two: "two"}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two one")
@@ -69,7 +79,7 @@ def test_i_cannot_match_when_part_of_the_input_is_unknown():
two = Concept(name="two")
concepts = {one: "one", two: "two"}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two three")
assert not res.status
@@ -86,7 +96,7 @@ def test_i_can_match_sequence():
foo = Concept(name="foo")
concepts = {foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two three")
@@ -100,7 +110,7 @@ def test_wrong_sequence_is_not_matched():
foo = Concept(name="foo")
concepts = {foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two three one")
@@ -116,7 +126,7 @@ def test_i_cannot_match_sequence_if_end_of_file():
foo = Concept(name="foo")
concepts = {foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two")
assert not res.status
@@ -133,7 +143,7 @@ def test_i_always_choose_the_longest_match():
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two three")
@@ -149,7 +159,7 @@ def test_i_can_match_several_sequences():
concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two three one two")
@@ -166,7 +176,7 @@ def test_i_can_match_ordered_choice():
foo = Concept(name="foo")
concepts = {foo: OrderedChoice("one", "two")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res1 = parser.parse(context, "one")
assert res1.status
@@ -189,7 +199,7 @@ def test_i_cannot_match_ordered_choice_with_empty_alternative():
foo = Concept(name="foo")
concepts = {foo: Sequence(OrderedChoice("one", ""), "two")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "ok") # because token[0] is not "one" and not "" (it is 'two')
assert not res.status
@@ -201,7 +211,7 @@ def test_i_can_mix_sequences_and_ordered_choices():
concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res1 = parser.parse(context, "twenty one ok")
assert res1.status
@@ -225,7 +235,7 @@ def test_i_can_mix_ordered_choices_and_sequences():
concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "twenty thirty")
assert res.status
@@ -240,7 +250,7 @@ def test_i_cannot_parse_empty_optional():
concepts = {foo: Optional("one")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "")
assert not res.status
@@ -253,7 +263,7 @@ def test_i_can_parse_optional():
concepts = {foo: Optional("one")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one")
assert res.status
@@ -266,7 +276,7 @@ def test_i_can_parse_sequence_starting_with_optional():
concepts = {foo: Sequence(Optional("twenty"), "one")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "twenty one")
assert res.status
@@ -283,7 +293,7 @@ def test_i_can_parse_sequence_ending_with_optional():
concepts = {foo: Sequence("one", "two", Optional("three"))}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two three")
assert res.status
@@ -300,7 +310,7 @@ def test_i_can_parse_sequence_with_optional_in_between():
concepts = {foo: Sequence("one", Optional("two"), "three")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two three")
assert res.status
@@ -312,19 +322,16 @@ def test_i_can_parse_sequence_with_optional_in_between():
def test_i_can_use_reference():
# The problem here is when there are multiple match for the same input
# The parsing result is a list of all concepts found
# So it's already a list that represents a sequence, not a choice
# So I need to create a choice concept
# create the return value for every possible graph
# --> The latter seems to be the best as we don't defer the resolution of the problem to someone else
# when there are multiple matches for the same input
# Do I need to create a choice concept ?
# No, create a return value for every possible graph
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {foo: Sequence("one", "two"), bar: foo}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two")
assert len(res) == 2
@@ -350,7 +357,7 @@ def test_i_can_use_context_reference_with_multiple_levels():
concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two")
assert len(res) == 3
@@ -375,7 +382,7 @@ def test_order_is_not_important_when_using_references():
concepts = {bar: foo, foo: Sequence("one", "two")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "one two")
assert len(res) == 2
@@ -390,7 +397,7 @@ def test_i_can_parse_when_reference():
concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "twenty two")
assert res.status
@@ -415,7 +422,7 @@ def test_i_can_detect_duplicates_when_reference():
foo: OrderedChoice("twenty", "thirty")
}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
res = parser.parse(context, "twenty")
assert len(res) == 2
@@ -437,7 +444,7 @@ def test_i_can_detect_infinite_recursion():
foo: bar
}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(get_context(), concepts)
assert bar not in parser.concepts_dict
assert foo not in parser.concepts_dict
@@ -452,7 +459,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
foo: OrderedChoice(bar, "foo")
}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
@@ -464,7 +471,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
foo: OrderedChoice("foo", bar)
}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(context, concepts)
assert foo in parser.concepts_dict
assert bar in parser.concepts_dict
@@ -485,7 +492,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence():
foo: Sequence("one", bar, "two")
}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
@@ -500,7 +507,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choic
foo: Sequence("one", OrderedChoice(bar, "other"), "two")
}
parser = ConceptLexerParser()
parser.initialize(concepts)
parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion
@@ -510,6 +517,140 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional():
# TODO infinite recursion with optional
pass
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
])
def test_i_can_parse_regex(expression, expected):
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
])
def test_i_can_detect_errors(expression, error):
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
ret_value = res.value.value
assert parser.has_error
assert not res.status
assert ret_value[0] == error
def test_i_can_parse_regex_with_reference():
expression = "foo"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ConceptMatch("foo")
assert res.value.source == expression
def test_i_can_parse_cross_ref_with_modifier():
expression = "foo*"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
assert res.value.source == expression
def test_i_can_parse_sequence_with_cross_ref():
expression = "foo 'and' bar+"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_parse_choice_with_cross_ref():
foo = Concept("foo")
bar = Concept("bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
expression = "foo | bar?"
parser = RegexParser()
res = parser.parse(context, Tokenizer(expression))
assert res.status
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
foo = Concept(name="foo")
bar = Concept(name="bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
regex_parser = RegexParser()
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = ConceptLexerParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
def test_i_can_visit_parsing_expression():
mult = Concept(name="mult")
add = Concept(name="add")
visitor = ConceptVisitor()
visitor.visit(Sequence(mult, Optional(Sequence("+", add))))
assert sorted(list(visitor.concepts)) == ["add", "mult"]
#
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
# context = get_context()
+36 -1
View File
@@ -2,12 +2,15 @@ import pytest
import ast
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch, RegexParser
from parsers.PythonParser import PythonParser, PythonNode
from core.tokenizer import Keywords, Tokenizer
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
# def nop():
# return NopNode()
#
@@ -52,7 +55,7 @@ from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
# return left_as_string == right_as_string
#
def get_concept(name, where=None, pre=None, post=None, body=None):
def get_concept(name, where=None, pre=None, post=None, body=None, definition=None):
concept = DefConceptNode([], name=NameNode(list(Tokenizer(name))))
if body:
@@ -63,6 +66,12 @@ def get_concept(name, where=None, pre=None, post=None, body=None):
concept.pre = get_concept_part(pre)
if post:
concept.post = get_concept_part(post)
if definition:
concept.definition = ReturnValueConcept(
"Parsers:RegexParser",
True,
definition)
return concept
@@ -324,3 +333,29 @@ def test_new_line_is_not_allowed_in_the_name():
assert not res.status
assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")]
def test_i_can_parse_def_concept_from_regex():
text = "def concept name from bnf a_concept | 'a_string' as __definition[0]"
parser = DefaultParser()
res = parser.parse(get_context(), text)
node = res.value.value
definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string"))
parser_result = ParserResultConcept(RegexParser(), "a_concept | 'a_string'", definition, definition)
expected = get_concept(name="name", body="__definition[0]", definition=parser_result)
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_detect_empty_bnf_declaration():
text = "def concept name from bnf as __definition[0]"
parser = DefaultParser()
res = parser.parse(get_context(), text)
assert not res.status
assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration")
+14 -12
View File
@@ -41,7 +41,7 @@ def test_i_can_recognize_a_simple_concept():
assert len(results) == 1
assert results[0].status
assert results[0].value == concept
assert results[0].value.value == concept
def test_i_can_recognize_concepts_defined_several_times():
@@ -53,14 +53,14 @@ def test_i_can_recognize_concepts_defined_several_times():
results = ExactConceptParser().parse(context, source)
assert len(results) == 2
results = sorted(results, key=lambda x: x.value.name) # because of the usage of sets
results = sorted(results, key=lambda x: x.value.value.name) # because of the usage of sets
assert results[0].status
assert results[0].value.name == "hello a"
assert results[0].value.props["a"].value == "world"
assert results[0].value.value.name == "hello a"
assert results[0].value.value.props["a"].value == "world"
assert results[1].status
assert results[1].value.name == "hello world"
assert results[1].value.value.name == "hello world"
def test_i_can_recognize_a_concept_with_variables():
@@ -72,9 +72,10 @@ def test_i_can_recognize_a_concept_with_variables():
assert len(results) == 1
assert results[0].status
assert results[0].value.key == concept.key
assert results[0].value.props["a"].value == "10"
assert results[0].value.props["b"].value == "5"
concept_found = results[0].value.value
assert concept_found.key == concept.key
assert concept_found.props["a"].value == "10"
assert concept_found.props["b"].value == "5"
def test_i_can_recognize_a_concept_with_duplicate_variables():
@@ -86,9 +87,10 @@ def test_i_can_recognize_a_concept_with_duplicate_variables():
assert len(results) == 1
assert results[0].status
assert results[0].value.key == concept.key
assert results[0].value.props["a"].value == "10"
assert results[0].value.props["b"].value == "5"
concept_found = results[0].value.value
assert concept_found.key == concept.key
assert concept_found.props["a"].value == "10"
assert concept_found.props["b"].value == "5"
def test_i_can_manage_unknown_concept():
@@ -121,7 +123,7 @@ def test_i_can_detect_concept_from_tokens():
assert len(results) == 1
assert results[0].status
assert results[0].value == concept
assert results[0].value.value == concept
def get_context():
+20 -1
View File
@@ -1,5 +1,7 @@
import ast
import pytest
from core.ast.nodes import NodeParent, GenericNodeConcept
import core.ast.nodes
from core.ast.visitors import ConceptNodeVisitor, UnreferencedNamesVisitor
@@ -102,7 +104,7 @@ def my_function(a,b):
assert sheerka.value(visitor.names[6]) == "a"
def test_i_can_get_non_referenced_variables():
def test_i_can_get_unreferenced_variables():
source = """
def my_function(a,b):
for i in range(b):
@@ -126,6 +128,23 @@ my_function(x,y)
assert "y" in values
@pytest.mark.parametrize("source, expected", [
("a,b", ["a", "b"]),
("isinstance(a, int)", ["a", "int"])
])
def test_i_can_get_unreferenced_variables_from_simple_expressions(source, expected):
sheerka = get_sheerka()
node = ast.parse(source)
concept_node = core.ast.nodes.python_to_concept(node)
visitor = UnreferencedNamesVisitor(sheerka)
visitor.visit(concept_node)
assert sorted(list(visitor.names)) == expected
def test_i_can_compare_NodeParent_with_tuple():
node_parent = NodeParent(GenericNodeConcept("For", None), "target")
assert node_parent == ("For", "target")
+7
View File
@@ -20,6 +20,13 @@ def test_i_can_get_concept_key(name, variables, expected):
assert concept.metadata.key == expected
def test_key_does_not_use_variable_when_definition_is_set():
concept = Concept("plus").set_prop('plus')
concept.init_key()
assert concept.metadata.key == "plus"
def test_i_can_serialize():
"""
Test concept.to_dict()
+38 -2
View File
@@ -9,6 +9,8 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.concept import Concept, PROPERTIES_TO_SERIALIZE
from core.sheerka import Sheerka, ExecutionContext
from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator
from parsers.ConceptLexerParser import Sequence, ZeroOrMore, StrMatch, OrderedChoice, Optional, ConceptMatch, \
ConceptLexerParser
from sdp.sheerkaDataProvider import SheerkaDataProvider
tests_root = path.abspath("../build/tests")
@@ -422,7 +424,7 @@ as:
def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept():
"""
In this test, we test that the properties of 'concept a xx b' (which are 'a' and 'b')
are correctly detected, because of the concept 'a plus b' in its body
are correctly detected, thanks to the source code 'a plus b' in its body
:return:
"""
sheerka = get_sheerka()
@@ -558,7 +560,7 @@ def test_i_can_manage_concepts_with_the_same_key_when_values_are_the_same():
assert res[0].who == sheerka.get_evaluator_name(MultipleSameSuccessEvaluator.NAME)
def test_i_can_create_concepts_on_python_codes():
def test_i_can_create_concepts_with_python_code_as_body():
sheerka = get_sheerka()
context = get_context(sheerka)
@@ -570,6 +572,40 @@ def test_i_can_create_concepts_on_python_codes():
assert isinstance(res[0].value, list)
def test_i_can_create_concept_with_bnf_definition():
sheerka = get_sheerka()
a = Concept("a")
sheerka.add_in_cache(a)
sheerka.concepts_grammars = ConceptLexerParser().initialize(
get_context(sheerka),
{a: OrderedChoice("one", "two")}).body
res = sheerka.eval("def concept plus from bnf a ('plus' plus)?")
assert len(res) == 1
assert res[0].status
assert sheerka.isinstance(res[0].value, BuiltinConcepts.NEW_CONCEPT)
saved_concept = sheerka.sdp.get_safe(sheerka.CONCEPTS_ENTRY, "plus")
assert saved_concept.key == "plus"
assert saved_concept.metadata.definition == "a ('plus' plus)?"
assert "a" in saved_concept.props
assert "plus" in saved_concept.props
saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY)
expected_bnf = Sequence(
ConceptMatch("a"),
Optional(Sequence(StrMatch("plus"), ConceptMatch("plus"))),
rule_name="plus")
assert saved_definitions[saved_concept] == expected_bnf
new_concept = res[0].value.body
assert new_concept.metadata.name == "plus"
assert new_concept.metadata.definition == "a ('plus' plus)?"
assert new_concept.bnf == expected_bnf
assert "a" in new_concept.props
assert "plus" in new_concept.props
def get_sheerka(root="mem://", skip_builtins_in_db=True):
sheerka = Sheerka(skip_builtins_in_db)
sheerka.initialize(root)
+42 -1
View File
@@ -311,6 +311,18 @@ def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false(root):
"mem://"
])
def test_i_can_add_a_dict(root):
"""
Adding a dictionary.
Note that there is no key when adding a dictionary
If you add {'my_key': 'my_value'}
'my_key is not considered as the key of the entry'
Because if you add {'my_key': 'my_value', 'my_key2': 'my_value2'}
There are now multiple keys.
So for dictionary entries, the key is not managed
"""
sdp = SheerkaDataProvider(root)
obj = {"my_key": "my_value"}
@@ -735,6 +747,7 @@ def test_i_can_set_using_reference(root):
# sanity check, make sure that I can load back
loaded = sdp.get(entry, key)
assert loaded == ObjWithKey(2, "foo")
assert getattr(loaded, Serializer.ORIGIN) == "95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268"
@pytest.mark.parametrize("root", [
@@ -754,7 +767,35 @@ def test_i_can_add_reference_of_an_object_with_a_key(root):
assert key == obj.key
assert entry == "entry"
assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}}
assert sdp.load_obj(digest) == obj
loaded = sdp.load_obj(digest)
assert loaded == obj
assert getattr(loaded, Serializer.ORIGIN) == digest
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_can_add_reference_a_dictionary(root):
sdp = SheerkaDataProvider(root)
obj = {"my_key": "value1"}
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
state = sdp.load_state(sdp.get_snapshot())
digest = state.data["entry"][len(SheerkaDataProvider.REF_PREFIX):]
assert key is None
assert entry == "entry"
assert state.data == {'entry': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}
loaded = sdp.load_obj(digest)
assert loaded["my_key"] == obj["my_key"]
assert loaded[Serializer.ORIGIN] == digest
assert len(loaded) == 2
@pytest.mark.parametrize("root", [
+62
View File
@@ -1,6 +1,8 @@
import core.utils
import pytest
from core.tokenizer import Token, TokenKind
@pytest.mark.parametrize("lst, as_string", [
(None, "",),
@@ -76,3 +78,63 @@ def test_i_can_get_sub_classes():
def test_i_can_product(a, b, expected):
res = core.utils.product(a, b)
assert res == expected
@pytest.mark.parametrize("input_as_list, expected_as_list", [
([" "], []),
([" ", "one"], ["one"]),
(["one", " "], ["one"]),
([" ", "one", " "], ["one"]),
(["\n", "one"], ["one"]),
(["one", "\n"], ["one"]),
(["\n", "one", "\n"], ["one"]),
([" ", "\n", "one"], ["one"]),
(["one", " ", "\n"], ["one"]),
([" ", "\n", "one", " ", "\n"], ["one"]),
(["\n", " ", "one"], ["one"]),
(["one", "\n", " "], ["one"]),
(["\n", " ", "one", "\n", " "], ["one"]),
([" ", "\n", " ", "one"], ["one"]),
(["one", " ", "\n", " "], ["one"]),
([" ", "\n", " ", "one", " ", "\n", " "], ["one"]),
(["\n", " ", "\n", "one"], ["one"]),
(["one", "\n", " ", "\n"], ["one"]),
(["\n", " ", "\n", "one", "\n", " ", "\n"], ["one"]),
])
def test_i_can_strip(input_as_list, expected_as_list):
actual = core.utils.strip_tokens(get_tokens(input_as_list))
expected = get_tokens(expected_as_list)
assert actual == expected
def test_by_default_eof_is_not_stripped():
actual = core.utils.strip_tokens(get_tokens(["one", "two", " ", "\n", "<EOF>"]))
expected = get_tokens(["one", "two", " ", "\n", "<EOF>"])
assert actual == expected
def test_i_can_strip_eof():
actual = core.utils.strip_tokens(get_tokens(["one", "two", " ", "\n", "<EOF>"]), True)
expected = get_tokens(["one", "two"])
assert actual == expected
def get_tokens(lst):
res = []
for e in lst:
if e == " ":
res.append(Token(TokenKind.WHITESPACE, " ", 0, 0, 0))
elif e == "\n":
res.append(Token(TokenKind.NEWLINE, "\n", 0, 0, 0))
elif e == "<EOF>":
res.append(Token(TokenKind.EOF, "\n", 0, 0, 0))
else:
res.append(Token(TokenKind.IDENTIFIER, e, 0, 0, 0))
return res