diff --git a/core/builtin_helpers.py b/core/builtin_helpers.py index 8f47202..77be5c8 100644 --- a/core/builtin_helpers.py +++ b/core/builtin_helpers.py @@ -190,3 +190,4 @@ def _extract_predicates(sheerka, node, variables_to_include, variables_to_exclud predicates.append(res) return predicates + diff --git a/core/concept.py b/core/concept.py index dc96abb..c4a85f9 100644 --- a/core/concept.py +++ b/core/concept.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from enum import Enum import logging +import core.utils from core.tokenizer import Tokenizer, TokenKind log = logging.getLogger(__name__) @@ -18,8 +19,7 @@ VARIABLE_PREFIX = "__var__" class ConceptParts(Enum): """ - Helper class, Note quite sure that is it that useful - I guess, I was learning nums with Python... + Lists metadata that can contains some code """ WHERE = "where" PRE = "pre" @@ -85,6 +85,7 @@ class Concept: self.metadata = metadata self.props = {} # list of Property for this concept self.cached_asts = {} # cached ast for the where, pre, post and body parts + self.bnf = None def __repr__(self): return f"({self.metadata.id}){self.metadata.name}" @@ -134,9 +135,9 @@ class Concept: return self if tokens is None: - tokens = iter(Tokenizer(self.metadata.name)) + tokens = list(Tokenizer(self.metadata.name)) - variables = list(self.props.keys()) + variables = list(self.props.keys()) if len(core.utils.strip_tokens(tokens, True)) > 1 else [] key = "" first = True @@ -171,12 +172,11 @@ class Concept: :param codes: :return: """ - possibles_codes = ConceptParts.get_parts() if codes is None: return + for key in codes: - if key in possibles_codes: - self.cached_asts[ConceptParts(key)] = codes[key] + self.cached_asts[key] = codes[key] return self @@ -231,7 +231,7 @@ class Concept: return self def set_prop(self, prop_name: str, prop_value=None): - self.props[prop_name] = Property(prop_name, prop_value) + self.props[prop_name] = Property(prop_name, prop_value) # Python 3.x order is kept in dictionaries return self def set_prop_by_index(self, index: int, prop_value): diff --git a/core/sheerka.py b/core/sheerka.py index 8b9c6c1..0753222 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -1,4 +1,6 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field +from functools import lru_cache + from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST from evaluators.BaseEvaluator import OneReturnValueEvaluator @@ -10,8 +12,10 @@ import core.builtin_helpers import logging log = logging.getLogger(__name__) +init_log = logging.getLogger(__name__ + ".init") concept_evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION] +CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser" class Sheerka(Concept): @@ -19,22 +23,29 @@ class Sheerka(Concept): Main controller for the project """ - CONCEPTS_ENTRY = "All_Concepts" - BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" - USER_CONCEPTS_KEYS = "User_Concepts" + CONCEPTS_ENTRY = "All_Concepts" # to store all the concepts + CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts + BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts + USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts - def __init__(self, debug=False, skip_builtins_in_db=False): + def __init__(self, debug=False, skip_builtins_in_db=False, loggers=None): log.debug("Starting Sheerka.") super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA) # cache of the most used concepts # Note that these are only templates # They are used as a footprint for instantiation + # Except of source when the concept is supposed to be unique self.concepts_cache = {} - # cache for builtin types. - # It allow instantiation of a builtin clas - self.builtin_cache = {} + # + # Cache for all concepts BNF + self.concepts_definitions = {} + + # + # cache for concepts grammars + # a grammar can be seen as a resolved BNF + self.concepts_grammars = {} # a concept can be instantiated # ex: File is a concept, but File('foo.txt') is an instance @@ -45,14 +56,16 @@ class Sheerka(Concept): # ex: hello => say('hello') self.rules = [] - self.sdp = None - self.parsers = [] - self.evaluators = [] + self.sdp: SheerkaDataProvider = None # SheerkaDataProvider + self.builtin_cache = {} # cache for builtin concepts + self.parsers = {} # cache for builtin parsers + self.evaluators = [] # cache for builtin evaluators - self.evaluators_prefix = None - self.parsers_prefix = None + self.evaluators_prefix: str = None + self.parsers_prefix: str = None self.debug = debug + self.loggers = loggers or [] self.skip_builtins_in_db = skip_builtins_in_db def initialize(self, root_folder: str = None): @@ -85,7 +98,7 @@ class Sheerka(Concept): Initializes the builtin concepts :return: None """ - log.debug("Initializing builtin concepts") + init_log.debug("Initializing builtin concepts") builtins_classes = self.get_builtins_classes_as_dict() # this all initialization of the builtins seems to be little bit complicated @@ -101,11 +114,11 @@ class Sheerka(Concept): if not self.skip_builtins_in_db: from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.metadata.key) if from_db is None: - log.debug(f"'{concept.name}' concept is not found in db. Adding.") + init_log.debug(f"'{concept.name}' concept is not found in db. Adding.") self.set_id_if_needed(concept, True) self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True) else: - log.debug(f"Found concept '{from_db}' in db. Updating.") + init_log.debug(f"Found concept '{from_db}' in db. Updating.") concept.update_from(from_db) self.add_in_cache(concept) @@ -120,8 +133,8 @@ class Sheerka(Concept): if parser.__module__ == base_class.__module__: continue - log.debug(f"Adding builtin parser '{parser.__name__}'") - self.parsers.append(parser) + init_log.debug(f"Adding builtin parser '{parser.__name__}'") + self.parsers[core.utils.get_full_qualified_name(parser)] = parser def initialize_builtin_evaluators(self): """ @@ -129,14 +142,26 @@ class Sheerka(Concept): :return: """ for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.OneReturnValueEvaluator"): - log.debug(f"Adding builtin evaluator '{evaluator.__name__}'") + init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'") self.evaluators.append(evaluator) for evaluator in core.utils.get_sub_classes("evaluators", "evaluators.BaseEvaluator.AllReturnValuesEvaluator"): - log.debug(f"Adding builtin evaluator '{evaluator.__name__}'") + init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'") self.evaluators.append(evaluator) + def logger_filter(self, record: logging.LogRecord): + if 'all' in self.loggers: + return True + + ret = True + if 'init' not in self.loggers and record.name.endswith(".init"): + ret = False + + return ret + def init_logging(self): + handler = logging.StreamHandler() + handler.addFilter(self.logger_filter) if self.debug: log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s" log_level = logging.DEBUG @@ -144,7 +169,7 @@ class Sheerka(Concept): log_format = "%(message)s" log_level = logging.INFO - logging.basicConfig(format=log_format, level=log_level) + logging.basicConfig(format=log_format, level=log_level, handlers=[handler]) def eval(self, text: str): """ @@ -153,7 +178,9 @@ class Sheerka(Concept): :param text: :return: """ + log.debug(f"Evaluating '{text}'.") evt_digest = self.sdp.save_event(Event(text)) + log.debug(f"{evt_digest=}") exec_context = ExecutionContext(self.key, evt_digest, self) # Before parsing @@ -183,7 +210,7 @@ class Sheerka(Concept): debug_text = "'" + text + "'" if isinstance(text, str) \ else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens" log.debug(f"Parsing {debug_text}") - for parser in self.parsers: + for parser in self.parsers.values(): p = parser() res = p.parse(context, text) if isinstance(res, list): @@ -193,7 +220,7 @@ class Sheerka(Concept): return result def process(self, context, return_values, initial_concepts=None): - log.debug(f"Processing parsing result. context concept={initial_concepts}") + log.debug(f"{initial_concepts=}. Processing " + core.utils.pp(return_values)) # return_values must be a list if not isinstance(return_values, list): @@ -303,6 +330,8 @@ class Sheerka(Concept): """ concept.init_key() + concepts_definitions = None + init_ret_value = None # checks for duplicate concepts if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()): @@ -312,14 +341,33 @@ class Sheerka(Concept): # set id before saving in db self.set_id_if_needed(concept, False) + # add the BNF if known + if concept.bnf: + concepts_definitions = self.concepts_definitions.copy() + concepts_definitions[concept] = concept.bnf + + # check if it's a valid BNF or whether it breaks the known rules + concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](self.concepts_grammars.copy()) + sub_context = context.push(self.name, "Initializing concept definition") + sub_context.concepts_cache[concept.key] = concept # the concept is not in the real cache yet + init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions) + if not init_ret_value.status: + return self.ret(self.create_new_concept.__name__, False, ErrorConcept(init_ret_value.value)) + # save the new context in sdp try: self.sdp.add(context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True) + if concepts_definitions is not None: + self.sdp.set(context.event_digest, self.CONCEPTS_DEFINITIONS_ENTRY, concepts_definitions, use_ref=True) except SheerkaDataProviderDuplicateKeyError as error: return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0]) - # add in cache for quick further reference + # Updates the caches self.concepts_cache[concept.key] = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key) + if concepts_definitions is not None: + self.concepts_definitions = concepts_definitions + if init_ret_value is not None and init_ret_value.status: + self.concepts_grammars = init_ret_value.body # process the return in needed ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept)) @@ -514,6 +562,18 @@ class Sheerka(Concept): return (self.value(obj) for obj in objs) + def is_success(self, obj): + if isinstance(obj, bool): + return obj + + if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE): + return obj.status + + if self.isinstance(obj, BuiltinConcepts.ERROR): + return False + + return False + def isinstance(self, a, b): """ return true if the concept a is an instance of the concept b @@ -603,6 +663,7 @@ class ExecutionContext: sheerka: Sheerka # sheerka desc: str = None # human description of what is going on obj: Concept = None # what is the subject of the execution context (if known) + concepts_cache: dict = field(default_factory=dict) def push(self, who, desc=None, obj=None): return ExecutionContext(who, self.event_digest, self.sheerka, desc=desc, obj=obj) diff --git a/core/tokenizer.py b/core/tokenizer.py index 44aee50..921851e 100644 --- a/core/tokenizer.py +++ b/core/tokenizer.py @@ -80,6 +80,8 @@ class LexerError(Exception): class Keywords(Enum): DEF = "def" CONCEPT = "concept" + FROM = "from" + BNF = "bnf" AS = "as" WHERE = "where" PRE = "pre" @@ -308,24 +310,3 @@ class Tokenizer: 1 if lines_count > 0 else start_column + len(result)) return result, lines_count - - def seek(self, words): - if self.i == self.text_len: - return 0 - - # init - offsets = {} - start_index = self.i - - buffer = "" - while self.i < self.text_len: - c = self.text[self.i] - - # skip white space - if c in (" ", "\t"): - self.i += 1 - continue - - for word in words: - if c == word[offset]: - os diff --git a/core/utils.py b/core/utils.py index 6e777eb..3da36f4 100644 --- a/core/utils.py +++ b/core/utils.py @@ -3,6 +3,8 @@ import inspect import pkgutil import sys +from core.tokenizer import TokenKind + def sysarg_to_string(argv): """ @@ -72,11 +74,18 @@ def get_full_qualified_name(obj): :param obj: :return: """ - module = obj.__class__.__module__ - if module is None or module == str.__class__.__module__: - return obj.__class__.__name__ # Avoid reporting __builtin__ + if obj.__class__ == type: + module = obj.__module__ + if module is None or module == str.__class__.__module__: + return obj.__name__ # Avoid reporting __builtin__ + else: + return module + '.' + obj.__name__ else: - return module + '.' + obj.__class__.__name__ + module = obj.__class__.__module__ + if module is None or module == str.__class__.__module__: + return obj.__class__.__name__ # Avoid reporting __builtin__ + else: + return module + '.' + obj.__class__.__name__ def get_classes(module_name): @@ -137,7 +146,7 @@ def remove_from_list(lst, to_remove_predicate): def product(a, b): """ - Kind of cartesian product between list a and b + Kind of cartesian product between lists a and b knowing that a is also a list So it's a cartesian product between a list of list and a list @@ -155,3 +164,52 @@ def product(a, b): res.append(items) return res + + +def strip_quotes(text): + if not isinstance(text, str): + return text + + if text == "": + return "" + + if text[0] == "'" or text[0] == '"': + return text[1:-1] + + return text + + +def strip_tokens(tokens, strip_eof=False): + """ + Remove the starting and trailing spaces and newline + """ + if tokens is None: + return None + + start = 0 + length = len(tokens) + while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE): + start += 1 + + if start == length: + return [] + + end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \ + if strip_eof \ + else (TokenKind.WHITESPACE, TokenKind.NEWLINE) + + end = length - 1 + while end > 0 and tokens[end].type in end_tokens: + end -= 1 + + return tokens[start: end + 1] + + +def pp(items): + if not hasattr(items, "__iter__"): + return str(items) + + if len(items) == 0: + return str(items) + + return " \n" + " \n".join(str(item) for item in items) diff --git a/docs/blog.rst b/docs/blog.rst index 3f328bd..387e581 100644 --- a/docs/blog.rst +++ b/docs/blog.rst @@ -493,4 +493,108 @@ Even now that I am writing it, I just can't believe it. I must I have implemente it wrong. But the profiling shows that the time is lost in the under layers of the FS library. -It's a shame ! \ No newline at end of file +It's a shame ! + +2019-12-01 +********** + +Using BNF to define concept +""""""""""""""""""""""""""""" + +I always knew that there will be several ways to define the body of a concept (same +goes for the 'pre', 'post' and 'where' parts). It can be defined as Python code, +or something that is related to concepts. It can even be a new language that I will +design. The important point, is that contrarily to traditional development languages, +Sheerka must remain extensible. + +Same goes for the definition of the name. + +The traditional form is: + +:: + + def concept boo bar baz as ... + +So the concept is defined by the sequence 'foo', then 'bar' then 'baz'. In this order. + +Another way is + +:: + def concept a plus b where a,b as ... + +In this form, a and b are supposed to be variables. +It will be matched against :code:`one plus two`. + +The concept name is 'a plus b'. It is a quick way to declare a concept with variable, +but if someone define another concept + +:: + + def concept number1 plus number2 where number1,number2 as ... + +This will produce another concept (with the same key although). I guess that, at +some point, Sheerka will be able to detect that the concepts are the same, but +the name of the concept includes its variables. Which may be annoying in some +situations. + +Plus, it's not possible to define rules precedences in this way. For example, + +:: + + def concept a plus b as ... + def concept a times b as ... + +How do you express that multiplications have a higher priority in for example +:code:`one plus two times three` ? + +The only right answer, at least to me, is to implement something that is inspired +by the BNF definition of a grammar. + +So the definition of the concept will look like + +:: + + def concept term as factor (('+' | '-') term)? + def concept factor as number (('*' | '/') factor)? + def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3) + +This form seems great, but in the definition of term and factor, there is no more +room for the real body. ie once the components are recognized, what do we do with them ? + +So we can try + +:: + + def concept factor (('+') factor)* as factor[0] + factor[i] + def concept number (('*') number)? as number[0] + number[i] + def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3) + +The body is defined, but the name of concept is to complicated ex: factor (('+') factor)* +It's quite impossible to reference a concept that is defined in this way. + +So my last proposal, with marry the two ideas, is to introduce the two keyword 'using' 'bnf' + +.. _bnf : https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form + +:: + + def concept term using bnf factor (('+' | '-') term)? as factor + (or -) term + def concept factor using bnf number (('*' | '/') factor)? as number * (or /) factor + def number where number in ['one', 'two', 'three'] as match(body, 'one', 1, 'two', 2, 'three', 3) + +In my implementation: + +* Terminals are between quotes +* Sequences are separated by whitespaces +* '|' (vertical bar) is used for alternatives + +Like in regular expressions, you will also find + +* '*' (star) is used to express zero or many +* '+' (plus) to express one or many +* '?' (question mark) to expression zero or one + +For those who doesn't know that BNF stands for, please have a look at the bnf_ +wikipedia page. + +I guess that I will need a complete chapter to explain how you retrieve what was parsed \ No newline at end of file diff --git a/evaluators/AddConceptEvaluator.py b/evaluators/AddConceptEvaluator.py index 63f43b6..c9014d1 100644 --- a/evaluators/AddConceptEvaluator.py +++ b/evaluators/AddConceptEvaluator.py @@ -1,7 +1,11 @@ -from core.builtin_concepts import ParserResultConcept, ReturnValueConcept +from core.ast.nodes import python_to_concept +from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts +from core.builtin_helpers import get_names from core.concept import Concept from evaluators.BaseEvaluator import OneReturnValueEvaluator +from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor from parsers.DefaultParser import DefConceptNode +import functools import logging from parsers.PythonParser import PythonGetNamesVisitor, PythonNode @@ -9,6 +13,23 @@ from parsers.PythonParser import PythonGetNamesVisitor, PythonNode log = logging.getLogger(__name__) +class ConceptOrRuleNameVisitor(ParsingExpressionVisitor): + """ + Gets the concepts referenced by BNF + If a rule_name is given, it will also be considered as a potential property + """ + + def __init__(self): + self.names = set() + + def visit_ConceptMatch(self, node): + self.names.add(node.rule_name or node.concept_name) + + def visit_all(self, node): + if node.rule_name: + self.names.add(node.rule_name) + + class AddConceptEvaluator(OneReturnValueEvaluator): """ Used to add a new concept @@ -32,7 +53,7 @@ class AddConceptEvaluator(OneReturnValueEvaluator): props_found = set() concept = Concept(def_concept_node.name) - for prop in ("where", "pre", "post", "body"): + for prop in ("definition", "where", "pre", "post", "body"): # put back the sources part_ret_val = getattr(def_concept_node, prop) if not isinstance(part_ret_val, ReturnValueConcept) or not part_ret_val.status: @@ -43,35 +64,63 @@ class AddConceptEvaluator(OneReturnValueEvaluator): setattr(concept.metadata, prop, source) # try to find what can be a property - for p in self.get_props(part_ret_val): + concept_name = [part.value for part in def_concept_node.name.tokens] + for p in self.get_props(sheerka, part_ret_val, concept_name): props_found.add(p) - # Auto discovered properties must be referenced in the name - # Note that with this method, the variables will be created in the order of appearance + # add props order by appearance when possible for token in def_concept_node.name.tokens: if token.value in props_found: concept.set_prop(token.value, None) + # add the remaining properties + for p in props_found: + if p not in concept.props: + concept.set_prop(p, None) + # finish initialisation concept.init_key(def_concept_node.name.tokens) - concept.add_codes(def_concept_node.get_codes()) + concept.add_codes(def_concept_node.get_asts()) + if sheerka.is_success(def_concept_node.definition): + concept.bnf = def_concept_node.definition.value.value ret = sheerka.create_new_concept(context, concept) return sheerka.ret(self.name, ret.status, ret.value, parents=[return_value]) @staticmethod def get_source(ret_value): - return ret_value.value.source if isinstance(ret_value.value, ParserResultConcept) \ - else ret_value.value.name + return ret_value.value.source @staticmethod - def get_props(ret_value): - if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, PythonNode): - get_names_visitor = PythonGetNamesVisitor() - get_names_visitor.visit(ret_value.value.value.ast_) - return get_names_visitor.names + def get_props(sheerka, ret_value, concept_name): + """ + Try to find out the variables + This function can only be a draft, as there may be tons of different situations + I guess that it can only be complete when will we have access to Sheerka memory + """ - if isinstance(ret_value.value, Concept): - return list(ret_value.value.props.keys()) + # + # Case of python code + # + if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, PythonNode): + python_node = ret_value.value.value + as_concept_node = python_to_concept(python_node.ast_) + variables = get_names(sheerka, as_concept_node) + variables = filter(lambda x: x in concept_name, variables) + return list(variables) + + # + # case of concept + # + if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, Concept): + return list(ret_value.value.value.props.keys()) + + # + # case of BNF + # + if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, ParsingExpression): + visitor = ConceptOrRuleNameVisitor() + visitor.visit(ret_value.value.value) + return sorted(list(visitor.names)) return [] diff --git a/evaluators/ConceptEvaluator.py b/evaluators/ConceptEvaluator.py index 75dabba..b4371ca 100644 --- a/evaluators/ConceptEvaluator.py +++ b/evaluators/ConceptEvaluator.py @@ -18,13 +18,12 @@ class ConceptEvaluator(OneReturnValueEvaluator): def matches(self, context, return_value): return return_value.status and \ - return_value.who.startswith(BaseParser.PREFIX) and \ - isinstance(return_value.value, Concept) and \ - not isinstance(return_value.value, ParserResultConcept) # because there are specific evaluators + isinstance(return_value.value, ParserResultConcept) and \ + isinstance(return_value.value.value, Concept) def eval(self, context, return_value): sheerka = context.sheerka - concept = return_value.value + concept = return_value.value.value # pre condition should already be validated by the parser. # It's a mandatory condition for the concept before it can be recognized diff --git a/evaluators/DuplicateConceptEvaluator.py b/evaluators/DuplicateConceptEvaluator.py index 554cbd1..5dced5a 100644 --- a/evaluators/DuplicateConceptEvaluator.py +++ b/evaluators/DuplicateConceptEvaluator.py @@ -2,6 +2,7 @@ from core.builtin_concepts import BuiltinConcepts from evaluators.AddConceptEvaluator import AddConceptEvaluator from evaluators.BaseEvaluator import AllReturnValuesEvaluator from parsers.BaseParser import BaseParser +from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError class DuplicateConceptEvaluator(AllReturnValuesEvaluator): @@ -26,7 +27,7 @@ class DuplicateConceptEvaluator(AllReturnValuesEvaluator): if ret.status: parsing = True elif ret.who == sheerka.get_evaluator_name(AddConceptEvaluator.NAME): - if not ret.status and ret.value.body.args[0] == "Duplicate object.": + if not ret.status and isinstance(ret.value.body, SheerkaDataProviderDuplicateKeyError): add_concept_in_error = True self.already_defined = ret.value.body.obj else: diff --git a/main.py b/main.py index 36fad63..3023e9d 100644 --- a/main.py +++ b/main.py @@ -14,16 +14,19 @@ def usage(): def main(argv): try: - opts, args = getopt.getopt(argv, "hd", ["help", "debug"]) + opts, args = getopt.getopt(argv, "hdl:", ["help", "debug", "logger="]) debug = False + loggers = set() for o, a in opts: if o in ('-h', "--help"): usage() return True if o in ('-d', "--debug"): debug = True + if o in ('-l', '-logger'): + loggers.add(a) - sheerka = Sheerka(debug=debug) + sheerka = Sheerka(debug=debug, loggers=loggers) sheerka.initialize() _in = core.utils.sysarg_to_string(args) diff --git a/parsers/ConceptLexerParser.py b/parsers/ConceptLexerParser.py index 437cfe4..cc5442f 100644 --- a/parsers/ConceptLexerParser.py +++ b/parsers/ConceptLexerParser.py @@ -10,6 +10,7 @@ from dataclasses import field, dataclass from collections import defaultdict from core.builtin_concepts import BuiltinConcepts from core.concept import Concept +from core.sheerka import ExecutionContext from core.tokenizer import TokenKind, Tokenizer, Token from parsers.BaseParser import BaseParser, Node, ErrorNode import core.utils @@ -42,6 +43,11 @@ class LexerNode(Node): class ConceptNode(LexerNode): + """ + Returned by the ConceptLexerParser + It represents a recognized concept + """ + def __init__(self, concept, start, end, tokens=None, source=None, children=None): super().__init__(start, end) self.concept = concept @@ -67,6 +73,10 @@ class ConceptNode(LexerNode): class NonTerminalNode(LexerNode): + """ + Returned by the ConceptLexerParser + """ + def __init__(self, parsing_expression, start, end, children=None): super().__init__(start, end) self.parsing_expression = parsing_expression @@ -82,6 +92,10 @@ class NonTerminalNode(LexerNode): class TerminalNode(LexerNode): + """ + Returned by the ConceptLexerParser + """ + def __init__(self, parsing_expression, start, end, value): super().__init__(start, end) self.parsing_expression = parsing_expression @@ -97,6 +111,27 @@ class GrammarErrorNode(ErrorNode): message: str +@dataclass() +class UnexpectedTokenErrorNode(ErrorNode): + message: str + expected_tokens: list + + +@dataclass() +class UnexpectedEndOfFileError(ErrorNode): + pass + + +@dataclass() +class UnknownConceptNode(ErrorNode): + concept_key: str + + +@dataclass() +class TooManyConceptNode(ErrorNode): + concept_key: str + + class ParsingExpression: def __init__(self, *args, **kwargs): self.elements = args @@ -108,6 +143,15 @@ class ParsingExpression: self.rule_name = kwargs.get('rule_name', '') + def __eq__(self, other): + if not isinstance(other, ParsingExpression): + return False + + return self.rule_name == other.rule_name and self.elements == other.elements + + def __hash__(self): + return hash((self.rule_name, self.elements)) + def parse(self, parser): return self._parse(parser) @@ -133,6 +177,10 @@ class Sequence(ParsingExpression): return NonTerminalNode(self, init_pos, end_pos, children) + def __repr__(self): + to_str = ", ".join(repr(n) for n in self.elements) + return f"({to_str})" + class OrderedChoice(ParsingExpression): """ @@ -152,6 +200,10 @@ class OrderedChoice(ParsingExpression): return None + def __repr__(self): + to_str = "| ".join(repr(n) for n in self.elements) + return f"({to_str})" + class Optional(ParsingExpression): """ @@ -178,6 +230,46 @@ class Optional(ParsingExpression): return selected_node + def __repr__(self): + if len(self.elements) == 1: + return f"{self.elements[0]}?" + else: + to_str = ", ".join(repr(n) for n in self.elements) + return f"({to_str})?" + + +class ZeroOrMore(ParsingExpression): + """ + ZeroOrMore will try to match parser expression specified zero or more + times. It will never fail. + """ + + def _parse(self, parser): + raise NotImplementedError() + + # Uncomment when _parse is implemented + # def __repr__(self): + # to_str = ", ".join(repr(n) for n in self.elements) + # return f"({to_str})*" + + +class OneOrMore(ParsingExpression): + """ + OneOrMore will try to match parser expression specified one or more times. + """ + + def _parse(self, parser): + raise NotImplementedError() + + +class UnorderedGroup(ParsingExpression): + """ + Will try to match all of the parsing expression in any order. + """ + + def _parse(self, parser): + raise NotImplementedError() + class Match(ParsingExpression): """ @@ -197,13 +289,22 @@ class StrMatch(Match): Matches a literal """ - def __init__(self, to_match, rule_name="", root=False, ignore_case=None): + def __init__(self, to_match, rule_name="", root=False, ignore_case=True): super(Match, self).__init__(rule_name=rule_name, root=root) self.to_match = to_match self.ignore_case = ignore_case def __repr__(self): - return f"StrMatch('{self.to_match}')" + return f"'{self.to_match}'" + + def __eq__(self, other): + if not super().__eq__(other): + return False + + if not isinstance(other, StrMatch): + return False + + return self.to_match == other.to_match and self.ignore_case == other.ignore_case def _parse(self, parser): token = parser.get_token() @@ -218,6 +319,31 @@ class StrMatch(Match): return None +class ConceptMatch(Match): + """ + Will match a concept + It used only for rule definition + + When the grammar is created, it is replaced by the actual concept + """ + + def __init__(self, concept_name): + super(Match, self).__init__() + self.concept_name = concept_name + + def __repr__(self): + return f"{self.concept_name}" + + def __eq__(self, other): + if not super().__eq__(other): + return False + + if not isinstance(other, ConceptMatch): + return False + + return self.concept_name == other.concept_name + + class CrossRef: """ During the creation of the model, @@ -227,11 +353,20 @@ class CrossRef: def __init__(self, concept): self.concept = concept + def __repr__(self): + return f"ref({self.concept.key})" + + def __eq__(self, other): + if not isinstance(other, CrossRef): + return False + + return self.concept == other.concept + class ConceptLexerParser(BaseParser): - def __init__(self): + def __init__(self, concepts_dict=None): super().__init__("ConceptLexer") - self.concepts_dict = {} + self.concepts_dict = concepts_dict or {} # dict of concept, grammar self.ignore_case = True self.token = None @@ -295,22 +430,28 @@ class ConceptLexerParser(BaseParser): self.pos -= 1 self.token = self.tokens[self.pos] - def initialize(self, dict): + def initialize(self, context, grammars): """ Adds a bunch of concepts, and how they can be recognized - :param dict: dictionary of concept; concept_definition + :param context: execution context + :param grammars: dictionary of concept, concept_definition :return: """ + self.context = context + self.sheerka = context.sheerka nodes_to_resolve = [] concepts_to_resolve = set() # ## Gets the grammars - for concept, concept_def in dict.items(): + for concept, concept_def in grammars.items(): concept.init_key() # make sure that the key is initialized grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve) self.concepts_dict[concept] = grammar + if self.has_error: + return self.sheerka.ret(self.name, False, self.error_sink) + # ## Removes concepts with infinite recursions concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve) for concept in concepts_to_remove: @@ -320,7 +461,20 @@ class ConceptLexerParser(BaseParser): # ## Resolves cross references and remove grammar with unresolved references self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve) + if self.has_error: + return self.sheerka.ret(self.name, False, self.error_sink) + else: + return self.sheerka.ret(self.name, True, self.concepts_dict) + def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve): + def get_concept(concept_name): + if concept_name in self.context.concepts_cache: + return self.context.concepts_cache[concept_name] + return self.sheerka.get(concept_name) + + # TODO + # inner_get_model must not modify the initial ParsingExpression + # A copy must be created def inner_get_model(expression): if isinstance(expression, Concept): ret = CrossRef(expression) @@ -332,6 +486,16 @@ class ConceptLexerParser(BaseParser): ret = expression if ret.ignore_case is None: ret.ignore_case = self.ignore_case + elif isinstance(expression, ConceptMatch): + to_match = get_concept(expression.concept_name) + if hasattr(to_match, "__iter__"): + ret = self.add_error(TooManyConceptNode(expression.concept_name), False) + elif self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): + ret = self.add_error(UnknownConceptNode(expression.concept_name), False) + else: + ret = CrossRef(to_match) + concepts_to_resolve.add(concept) + nodes_to_resolve.append(ret) elif isinstance(expression, Sequence) or \ isinstance(expression, OrderedChoice) or \ isinstance(expression, Optional): @@ -341,7 +505,7 @@ class ConceptLexerParser(BaseParser): concepts_to_resolve.add(concept) nodes_to_resolve.append(ret) else: - ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'.")) + ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False) return ret model = inner_get_model(concept_def) @@ -493,3 +657,242 @@ class ConceptLexerParser(BaseParser): by_end_pos[result.end].append(result) return by_end_pos[max(by_end_pos)] + + +class RegexParser: + """ + Parser used to transform litteral into ParsingExpression + example : + a | b, c -> Sequence(OrderedChoice(a, b) ,c) + + '|' (pipe) is used for OrderedChoice + ',' (comma) is used for Sequence + '?' (question mark) is used for Optional + '*' (star) is used for ZeroOrMore + '+' (plus) is used for OneOrMore + + """ + + def __init__(self): + self.has_error = False + self.error_sink = [] + self.name = BaseParser.PREFIX + "RegexParser" + + self.lexer_iter = None + self._current = None + self.after_current = None + self.nb_open_par = 0 + self.context = None + self.source = "" + self.sheerka = None + + def __eq__(self, other): + if not isinstance(other, RegexParser): + return False + + return True + + def reset_parser(self, context, text): + self.context = context + self.sheerka = context.sheerka + + self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text) + self._current = None + self.after_current = None + self.nb_open_par = 0 + + self.next_token() + self.eat_white_space() + + def add_error(self, error, next_token=True): + self.has_error = True + self.error_sink.append(error) + if next_token: + self.next_token() + return error + + def get_token(self) -> Token: + return self._current + + def next_token(self, skip_whitespace=False): + if self._current and self._current.type == TokenKind.EOF: + return + + try: + self._current = self.after_current or next(self.lexer_iter) + self.source += str(self._current.value) + self.after_current = None + + if skip_whitespace: + while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: + self._current = next(self.lexer_iter) + self.source += str(self._current.value) + except StopIteration: + self._current = Token(TokenKind.EOF, "", -1, -1, -1) + + def next_after(self): + if self.after_current is not None: + return self.after_current + + try: + self.after_current = next(self.lexer_iter) + # self.source += str(self.after_current.value) + return self.after_current + except StopIteration: + self.after_current = Token(TokenKind.EOF, "", -1, -1, -1) + return self.after_current + + def eat_white_space(self): + if self.after_current is not None: + self._current = self.after_current + self.source += str(self._current.value) + self.after_current = None + + try: + while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: + self._current = next(self.lexer_iter) + self.source += str(self._current.value) + except StopIteration: + self._current = None + + def maybe_sequence(self, first, second): + token = self.get_token() + return token.type == second or token.type == first and self.next_after().type == second + + def parse(self, context: ExecutionContext, text): + self.reset_parser(context, text) + tree = self.parse_choice() + + ret = self.sheerka.ret( + self.name, + not self.has_error, + self.sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=self.source, + body=self.error_sink if self.has_error else tree, + try_parsed=tree)) + + return ret + + def parse_choice(self): + sequence = self.parse_sequence() + + self.eat_white_space() + token = self.get_token() + if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR: + return sequence + + elements = [sequence] + while True: + # maybe eat the vertical bar + self.eat_white_space() + token = self.get_token() + if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR: + break + self.next_token(skip_whitespace=True) + + sequence = self.parse_sequence() + elements.append(sequence) + + return OrderedChoice(*elements) + + def parse_sequence(self): + expr_and_modifier = self.parse_expression_and_modifier() + token = self.get_token() + if token is None or token.type == TokenKind.EOF or \ + self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \ + self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR): + return expr_and_modifier + + elements = [expr_and_modifier] + while True: + # maybe eat the comma + token = self.get_token() + if token is None or token.type == TokenKind.EOF or \ + self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \ + self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR): + break + self.eat_white_space() + + sequence = self.parse_expression_and_modifier() + elements.append(sequence) + + return Sequence(*elements) + + def parse_expression_and_modifier(self): + expression = self.parse_expression() + + token = self.get_token() + + if token.type == TokenKind.QMARK: + self.next_token() + return Optional(expression) + + if token.type == TokenKind.STAR: + self.next_token() + return ZeroOrMore(expression) + + if token.type == TokenKind.PLUS: + self.next_token() + return OneOrMore(expression) + + return expression + + def parse_expression(self): + token = self.get_token() + if token.type == TokenKind.EOF: + self.add_error(UnexpectedEndOfFileError(), False) + if token.type == TokenKind.LPAR: + self.nb_open_par += 1 + self.next_token() + expression = self.parse_choice() + token = self.get_token() + if token.type == TokenKind.RPAR: + self.nb_open_par -= 1 + self.next_token() + return expression + else: + self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR])) + return expression + + if token.type == TokenKind.IDENTIFIER: + self.next_token() + return ConceptMatch(token.value) + # concept = self.sheerka.get(str(token.value)) + # if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): + # self.add_error(CannotResolveConceptNode(str(token.value))) + # self.next_token() + # return None + # else: + # self.next_token() + # return concept + + ret = StrMatch(core.utils.strip_quotes(token.value)) + self.next_token() + return ret + + +class ParsingExpressionVisitor: + """ + visit ParsingExpression + """ + + def visit(self, parsing_expression): + name = parsing_expression.__class__.__name__ + + method = 'visit_' + name + visitor = getattr(self, method, self.generic_visit) + return visitor(parsing_expression) + + def generic_visit(self, parsing_expression): + if hasattr(self, "visit_all"): + self.visit_all(parsing_expression) + + for node in parsing_expression.elements: + if isinstance(node, Concept): + self.visit(ConceptMatch(node.key or node.name)) + elif isinstance(node, str): + self.visit(StrMatch(node)) + else: + self.visit(node) diff --git a/parsers/DefaultParser.py b/parsers/DefaultParser.py index 6bd1f8c..1ca9a75 100644 --- a/parsers/DefaultParser.py +++ b/parsers/DefaultParser.py @@ -1,11 +1,14 @@ -from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept +from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept from core.concept import ConceptParts import core.builtin_helpers +import core.utils from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode from core.tokenizer import Tokenizer, TokenKind, Token, Keywords from dataclasses import dataclass, field import logging +from parsers.ConceptLexerParser import RegexParser + log = logging.getLogger(__name__) @@ -180,20 +183,22 @@ class NameNode(DefaultParserNode): @dataclass() class DefConceptNode(DefaultParserNode): - name: NameNode = NotInitializedNode() where: ReturnValueConcept = NotInitializedNode() pre: ReturnValueConcept = NotInitializedNode() post: ReturnValueConcept = NotInitializedNode() body: ReturnValueConcept = NotInitializedNode() + definition: ReturnValueConcept = NotInitializedNode() - def get_codes(self): - codes = {} + def get_asts(self): + asts = {} for part_key in ConceptParts: prop_value = getattr(self, part_key.value) - if hasattr(prop_value, "ast_"): - codes[part_key] = prop_value.ast_ - return codes + if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body, + ParserResultConcept) and hasattr( + prop_value.body.body, "ast_"): + asts[part_key] = prop_value.body.body.ast_ + return asts class DefaultParser(BaseParser): @@ -322,20 +327,44 @@ class DefaultParser(BaseParser): # init log.debug("It may be a definition of a concept") - concept_special_tokens = [def_token] - concept_found = DefConceptNode(concept_special_tokens) + keywords_tokens = [def_token] + concept_found = DefConceptNode(keywords_tokens) # the definition of a concept consists of several parts # Keywords.CONCEPT to get the name of the concept + # Keywords.FROM [Keywords.REGEX] to get the definition of the concept # Keywords.AS to get the body # Keywords.WHERE to get the conditions to recognize for the variables # Keywords.PRE to know if the conditions to evaluate the concept # Keywords.POST to apply or verify once the concept is executed - def_concept_parts = [Keywords.CONCEPT, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST] + # + # Regroup the tokens by parts + first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens) + + # get the name + concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts) + + # get the definition + concept_found.definition = self.get_concept_definition(tokens_found_by_parts) + + # get the ASTs for the remaining parts + asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts) + concept_found.where = asts_found_by_parts[Keywords.WHERE] + concept_found.pre = asts_found_by_parts[Keywords.PRE] + concept_found.post = asts_found_by_parts[Keywords.POST] + concept_found.body = asts_found_by_parts[Keywords.AS] + + log.debug(f"Found DefConcept node '{concept_found}'") + return concept_found + + def regroup_tokens_by_parts(self, keywords_tokens): + + def_concept_parts = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST] # tokens found, when trying to recognize the parts tokens_found_by_parts = { Keywords.CONCEPT: [], + Keywords.FROM: None, Keywords.AS: None, Keywords.WHERE: None, Keywords.PRE: None, @@ -348,7 +377,7 @@ class DefaultParser(BaseParser): # loop thru the tokens, and put them in the correct tokens_found_by_parts entry while token.type != TokenKind.EOF: if token.value in def_concept_parts: - concept_special_tokens.append(token) # keep track of the keywords + keywords_tokens.append(token) # keep track of the keywords keyword = token.value if tokens_found_by_parts[keyword]: # a part is defined more than once @@ -364,13 +393,15 @@ class DefaultParser(BaseParser): token = self.get_token() - # semantic checks + return first_token, tokens_found_by_parts + + def get_concept_name(self, first_token, tokens_found_by_parts): name_first_token_index = 1 + token = self.get_token() if first_token.value != Keywords.CONCEPT: self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT])) name_first_token_index = 0 - # Manage the name name_tokens = tokens_found_by_parts[Keywords.CONCEPT] if len(name_tokens) == name_first_token_index: self.add_error(SyntaxErrorNode([], "Name is mandatory")) @@ -381,8 +412,31 @@ class DefaultParser(BaseParser): if TokenKind.NEWLINE in [t.type for t in name_tokens]: self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name.")) - concept_found.name = NameNode(name_tokens[name_first_token_index:]) # skip the first token + return NameNode(name_tokens[name_first_token_index:]) # skip the first token + def get_concept_definition(self, tokens_found_by_parts): + if tokens_found_by_parts[Keywords.FROM] is None: + return NotInitializedNode() + + definition_tokens = tokens_found_by_parts[Keywords.FROM] + if definition_tokens[1].value != Keywords.BNF: + return NotInitializedNode() + + tokens = core.utils.strip_tokens(definition_tokens[2:]) + if len(tokens) == 0: + self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False) + return NotInitializedNode() + + regex_parser = RegexParser() + new_context = self.context.push(self.name) + parsing_result = regex_parser.parse(new_context, tokens) + if not parsing_result.status: + self.add_error(parsing_result.value) + return NotInitializedNode() + + return parsing_result + + def get_concept_parts(self, tokens_found_by_parts): asts_found_by_parts = { Keywords.AS: NotInitializedNode(), Keywords.WHERE: NotInitializedNode(), @@ -391,7 +445,7 @@ class DefaultParser(BaseParser): } for keyword in tokens_found_by_parts: - if keyword == Keywords.CONCEPT: + if keyword == Keywords.CONCEPT or keyword == Keywords.FROM: continue # already done log.debug("Processing part '" + keyword.name + "'") @@ -418,13 +472,7 @@ class DefaultParser(BaseParser): asts_found_by_parts[keyword] = parsing_result - concept_found.where = asts_found_by_parts[Keywords.WHERE] - concept_found.pre = asts_found_by_parts[Keywords.PRE] - concept_found.post = asts_found_by_parts[Keywords.POST] - concept_found.body = asts_found_by_parts[Keywords.AS] - - log.debug(f"Found DefConcept node '{concept_found}'") - return concept_found + return asts_found_by_parts # def parse_expression(self): # return self.parse_addition() diff --git a/parsers/EmptyStringParser.py b/parsers/EmptyStringParser.py index 4113eec..a2d9fac 100644 --- a/parsers/EmptyStringParser.py +++ b/parsers/EmptyStringParser.py @@ -20,7 +20,11 @@ class EmptyStringParser(BaseParser): isinstance(text, list) and text == [] or \ text is None: log.debug(f"Recognized '{text}' as BuiltinConcepts.NOP.") - return sheerka.ret(self.name, True, sheerka.new(BuiltinConcepts.NOP)) + return sheerka.ret(self.name, True, sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source="", + body=sheerka.new(BuiltinConcepts.NOP))) log.debug(f"Failed to recognize '{text}'") return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME)) diff --git a/parsers/ExactConceptParser.py b/parsers/ExactConceptParser.py index bfe45b6..a84c0ab 100644 --- a/parsers/ExactConceptParser.py +++ b/parsers/ExactConceptParser.py @@ -47,7 +47,15 @@ class ExactConceptParser(BaseParser): if token.startswith(VARIABLE_PREFIX): index = int(token[len(VARIABLE_PREFIX):]) concept.set_prop_by_index(index, words[i]) - res.append(ReturnValueConcept(self.name, True, concept)) + res.append(ReturnValueConcept( + self.name, + True, + context.sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=text if isinstance(text, str) else self.get_text_from_tokens(text), + body=concept, + try_parsed=concept))) log.debug(f"Recognized '{text}' as '{concept}'") recognized = True diff --git a/sdp/readme.md b/sdp/readme.md index c7190c5..9a461e0 100644 --- a/sdp/readme.md +++ b/sdp/readme.md @@ -9,6 +9,9 @@ - E : events - O : object (with history management) - P : pickle +- S : state +- C : concept +- D : concept definitions ## How concepts are serialized ? - get the id of the concept diff --git a/sdp/sheerkaDataProvider.py b/sdp/sheerkaDataProvider.py index 2ebd9fa..200d500 100644 --- a/sdp/sheerkaDataProvider.py +++ b/sdp/sheerkaDataProvider.py @@ -10,7 +10,7 @@ from sdp.sheerkaSerializer import Serializer, SerializerContext import logging log = logging.getLogger(__name__) - +init_log = logging.getLogger(__name__ + ".init") def json_default_converter(o): """ @@ -278,7 +278,7 @@ class SheerkaDataProvider: REF_PREFIX = "##REF##:" def __init__(self, root=None): - log.debug("Initializing sdp.") + init_log.debug("Initializing sdp.") self.io = SheerkaDataProviderIO.get(root) self.first_time = self.io.first_time @@ -312,6 +312,20 @@ class SheerkaDataProvider: else obj.get_digest() if hasattr(obj, "get_digest") \ else None + @staticmethod + def get_obj_origin(obj): + """ + Get the digest used to save obj if set + """ + if isinstance(obj, dict) and Serializer.ORIGIN in obj: + return obj[Serializer.ORIGIN] + + if hasattr(obj, Serializer.ORIGIN): + return getattr(obj, Serializer.ORIGIN) + + return None + + @staticmethod def get_stream_digest(stream): sha256_hash = hashlib.sha256() @@ -460,10 +474,10 @@ class SheerkaDataProvider: obj_key = self.get_obj_key(obj) or key if isinstance(state.data[entry][key], list): - if not hasattr(obj, Serializer.ORIGIN): + obj_origin = self.get_obj_origin(obj) + if obj_origin is None: raise (SheerkaDataProviderError(f"Multiple entries under '{entry}.{key}'", obj)) - obj_origin = getattr(obj, Serializer.ORIGIN) state.modify_in_list(entry, key, obj, obj_key, obj_origin, self.load_ref_if_needed, self.save_ref_if_needed) else: @@ -674,7 +688,9 @@ class SheerkaDataProvider: obj = self.serializer.deserialize(f, SerializerContext(origin=digest)) # set the origin of the object - if not isinstance(obj, str): + if isinstance(obj, dict): + obj[Serializer.ORIGIN] = digest + elif not isinstance(obj, str): setattr(obj, Serializer.ORIGIN, digest) return obj diff --git a/sdp/sheerkaDataProviderIO.py b/sdp/sheerkaDataProviderIO.py index 70b49c5..d4d9360 100644 --- a/sdp/sheerkaDataProviderIO.py +++ b/sdp/sheerkaDataProviderIO.py @@ -46,9 +46,9 @@ class SheerkaDataProviderIO: class SheerkaDataProviderFileIO(SheerkaDataProviderIO): - log = logging.getLogger("FileIO") def __init__(self, root): + self.log = logging.getLogger(self.__class__.__name__ + ".init") root = path.abspath(path.join(path.expanduser("~"), ".sheerka")) \ if root is None \ else path.abspath(root) @@ -180,10 +180,13 @@ def on_close(dictionary_io, file_path, stream): :param stream: :return: """ + def decorator(func): def wrapper(*args, **kwargs): stream.seek(0) dictionary_io.cache[file_path] = stream.read() func(*args, **kwargs) + return wrapper + return decorator diff --git a/sdp/sheerkaSerializer.py b/sdp/sheerkaSerializer.py index 8dce556..fef47d0 100644 --- a/sdp/sheerkaSerializer.py +++ b/sdp/sheerkaSerializer.py @@ -12,6 +12,7 @@ import core.utils from core.concept import Concept log = logging.getLogger(__name__) +init_log = logging.getLogger(__name__ + ".init") def json_default_converter(o): @@ -40,17 +41,18 @@ class Serializer: USERNAME = "user_name" # key to store user that as committed the snapshot MODIFICATION_DATE = "modification_date" # PARENTS = "parents" - ORIGIN = "origin" + ORIGIN = "##origin##" HISTORY = "##history##" def __init__(self): - log.debug("Initializing serializers") + init_log.debug("Initializing serializers") self._cache = [] # add builtin serializers self.register(EventSerializer()) self.register(StateSerializer()) self.register(ConceptSerializer()) + self.register(DictionarySerializer()) def register(self, serializer): """ @@ -58,7 +60,7 @@ class Serializer: :param serializer: :return: """ - log.debug(f"Adding serializer {serializer}") + init_log.debug(f"Adding serializer {serializer}") self._cache.append(serializer) def serialize(self, obj, context): @@ -212,8 +214,11 @@ class PickleSerializer(BaseSerializer): class StateSerializer(PickleSerializer): def __init__(self, ): - PickleSerializer.__init__(self, lambda obj: core.utils.get_full_qualified_name( - obj) == "sdp.sheerkaDataProvider.State", "S", 1) + PickleSerializer.__init__( + self, + lambda obj: core.utils.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State", + "S", + 1) class ConceptSerializer(ObjectSerializer): @@ -223,6 +228,15 @@ class ConceptSerializer(ObjectSerializer): def matches(self, obj): return isinstance(obj, Concept) + +class DictionarySerializer(PickleSerializer): + def __init__(self, ): + PickleSerializer.__init__( + self, + lambda obj: isinstance(obj, dict), + "D", + 1) + # # class SheerkaSerializer(ObjectSerializer): # def __init__(self): diff --git a/tests/test_AddConceptEvaluator.py b/tests/test_AddConceptEvaluator.py new file mode 100644 index 0000000..18ec7f0 --- /dev/null +++ b/tests/test_AddConceptEvaluator.py @@ -0,0 +1,181 @@ +import ast + +import pytest + +from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts +from core.concept import VARIABLE_PREFIX, ConceptParts, Concept +from core.sheerka import Sheerka, ExecutionContext +from core.tokenizer import Tokenizer +from evaluators.AddConceptEvaluator import AddConceptEvaluator +from parsers.BaseParser import BaseParser +from parsers.ConceptLexerParser import Sequence, RegexParser, StrMatch, ZeroOrMore, ConceptMatch +from parsers.DefaultParser import DefConceptNode, NameNode +from parsers.ExactConceptParser import ExactConceptParser +from parsers.PythonParser import PythonNode, PythonParser + + +def get_context(): + sheerka = Sheerka(skip_builtins_in_db=True) + sheerka.initialize("mem://") + return ExecutionContext("test", "xxx", sheerka) + + +def get_concept(name, where=None, pre=None, post=None, body=None, definition=None): + concept = DefConceptNode([], name=NameNode(list(Tokenizer(name)))) + + if body: + concept.body = get_concept_part(body) + if where: + concept.where = get_concept_part(where) + if pre: + concept.pre = get_concept_part(pre) + if post: + concept.post = get_concept_part(post) + if definition: + concept.definition = definition + + return ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=concept)) + + +def get_concept_part(part): + if isinstance(part, str): + node = PythonNode(part, ast.parse(part, mode="eval")) + return ReturnValueConcept( + who="Parsers:DefaultParser", + status=True, + value=ParserResultConcept( + source=part, + parser=PythonParser(), + value=node)) + + if isinstance(part, PythonNode): + return ReturnValueConcept( + who="Parsers:DefaultParser", + status=True, + value=ParserResultConcept( + source=part.source, + parser=PythonParser(), + value=part)) + + if isinstance(part, ReturnValueConcept): + return part + + +def get_concept_definition(source, parsing_expression): + return ReturnValueConcept( + who="Parsers:RegexParser", + status=True, + value=ParserResultConcept( + source=source, + parser=RegexParser(), + value=parsing_expression + ) + ) + + +@pytest.mark.parametrize("ret_val, expected", [ + (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=DefConceptNode([]))), True), + (ReturnValueConcept(BaseParser.PREFIX + "some_name", False, ParserResultConcept(value=DefConceptNode([]))), False), + (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not a ParserResultConcept"), False), + (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept()), False), +]) +def test_i_can_match(ret_val, expected): + context = get_context() + assert AddConceptEvaluator().matches(context, ret_val) == expected + + +def test_that_the_source_is_correctly_set(): + context = get_context() + def_concept_return_value = get_concept( + name="hello a", + definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))), + where="isinstance(a, str )", + pre="a is not None", + body="print('hello' + a)") + + evaluated = AddConceptEvaluator().eval(context, def_concept_return_value) + + assert evaluated.status + assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT) + + created_concept = evaluated.body.body + assert created_concept.metadata.name == "hello a" + assert created_concept.metadata.where == "isinstance(a, str )" + assert created_concept.metadata.pre == "a is not None" + assert created_concept.metadata.post is None + assert created_concept.metadata.body == "print('hello' + a)" + assert created_concept.metadata.definition == "hello a" + + +def test_that_the_ast_is_correctly_initialized(): + context = get_context() + def_concept_return_value = get_concept( + name="hello a", + definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))), + where="isinstance(a, str )", + pre="a is not None", + body="print('hello' + a)") + + evaluated = AddConceptEvaluator().eval(context, def_concept_return_value) + + assert evaluated.status + assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT) + + created_concept = evaluated.body.body + + assert ConceptParts.WHERE in created_concept.cached_asts + assert ConceptParts.PRE in created_concept.cached_asts + assert ConceptParts.BODY in created_concept.cached_asts + assert ConceptParts.POST not in created_concept.cached_asts + + +def test_that_the_new_concept_is_correctly_saved(): + context = get_context() + def_concept_return_value = get_concept( + name="hello a", + definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))), + where="isinstance(a, str )", + pre="a is not None", + body="print('hello' + a)") + + from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0") + assert context.sheerka.isinstance(from_db, BuiltinConcepts.UNKNOWN_CONCEPT) + + AddConceptEvaluator().eval(context, def_concept_return_value) + context.sheerka.concepts_cache = {} # reset cache + from_db = context.sheerka.get("hello " + VARIABLE_PREFIX + "0") + + assert from_db.metadata.key == f"hello {VARIABLE_PREFIX}0" + assert from_db.metadata.name == "hello a" + assert from_db.metadata.where == "isinstance(a, str )" + assert from_db.metadata.pre == "a is not None" + assert from_db.metadata.post is None + assert from_db.metadata.body == "print('hello' + a)" + assert from_db.metadata.definition == "hello a" + assert len(from_db.props) == 1 + assert "a" in from_db.props + + assert from_db.cached_asts == {} # ast is not saved in db + + +def test_i_can_get_props_from_python_node(): + ret_val = get_concept_part("isinstance(a, str)") + context = get_context() + + assert AddConceptEvaluator.get_props(context.sheerka, ret_val, ["a"]) == ["a"] + + +def test_i_can_get_props_from_another_concept(): + concept = Concept("hello").set_prop("a").set_prop("b") + ret_val = ReturnValueConcept(who="some_parser", + status=True, + value=ParserResultConcept(value=concept)) + + assert AddConceptEvaluator.get_props(get_context(), ret_val, []) == ["a", "b"] + + +def test_i_can_get_props_from_definition(): + parsing_expression = Sequence(ConceptMatch('mult'), ZeroOrMore(Sequence(StrMatch("+"), ConceptMatch("add")))) + ret_val = get_concept_definition("mult (('+'|'-') add)?", parsing_expression) + + assert AddConceptEvaluator.get_props(get_context(), ret_val, []) == ["add", "mult"] \ No newline at end of file diff --git a/tests/test_BaseParser.py b/tests/test_BaseParser.py new file mode 100644 index 0000000..95d3902 --- /dev/null +++ b/tests/test_BaseParser.py @@ -0,0 +1,7 @@ +import pytest + +from core.tokenizer import Tokenizer, Token, TokenKind +from parsers.BaseParser import BaseParser + + + diff --git a/tests/test_ConceptEvaluator.py b/tests/test_ConceptEvaluator.py index af273d6..0124a96 100644 --- a/tests/test_ConceptEvaluator.py +++ b/tests/test_ConceptEvaluator.py @@ -5,6 +5,7 @@ from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext from evaluators.ConceptEvaluator import ConceptEvaluator from parsers.BaseParser import BaseParser +from parsers.ExactConceptParser import ExactConceptParser def get_context(): @@ -13,12 +14,21 @@ def get_context(): return ExecutionContext("test", "xxx", sheerka) +def get_return_value(concept, source=None): + return ReturnValueConcept( + "some_name", + True, + ParserResultConcept(parser=ExactConceptParser(), + source=source or concept.name, + value=concept, + try_parsed=concept)) + + @pytest.mark.parametrize("ret_val, expected", [ - (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, Concept()), True), - (ReturnValueConcept(BaseParser.PREFIX + "some_name", False, Concept()), False), - (ReturnValueConcept("Not a parser", True, Concept()), False), - (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not a concept"), False), - (ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept()), False), + (ReturnValueConcept("some_name", True, ParserResultConcept(value=Concept())), True), + (ReturnValueConcept("some_name", False, ParserResultConcept(value=Concept())), False), + (ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept")), False), + (ReturnValueConcept("some_name", True, Concept()), False), ]) def test_i_can_match(ret_val, expected): context = get_context() @@ -30,7 +40,7 @@ def test_concept_is_returned_when_no_body(): concept = Concept(name="one").init_key() evaluator = ConceptEvaluator() - item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept) + item = get_return_value(concept) result = evaluator.eval(context, item) assert result.who == evaluator.name @@ -44,7 +54,7 @@ def test_body_is_evaluated_when_python_body(): concept = Concept(name="one", body="1").init_key() evaluator = ConceptEvaluator() - item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept) + item = get_return_value(concept) result = evaluator.eval(context, item) assert result.who == evaluator.name @@ -60,7 +70,7 @@ def test_body_is_evaluated_when_concept_body(): concept_un = Concept(name="un", body="one").init_key() evaluator = ConceptEvaluator() - item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_un) + item = get_return_value(concept_un) result = evaluator.eval(context, item) assert result.who == evaluator.name @@ -80,7 +90,7 @@ def test_body_is_evaluated_when_concept_body_with_a_body(): concept_un = Concept(name="un", body="one").init_key() evaluator = ConceptEvaluator() - item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_un) + item = get_return_value(concept_un) result = evaluator.eval(context, item) assert result.who == evaluator.name @@ -97,7 +107,7 @@ def test_i_can_evaluate_longer_chains(): concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key()) evaluator = ConceptEvaluator() - item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_d) + item = get_return_value(concept_d) result = evaluator.eval(context, item) assert result.status @@ -112,7 +122,7 @@ def test_i_can_evaluate_longer_chains_2(): concept_d = context.sheerka.add_in_cache(Concept(name="d", body="c").init_key()) evaluator = ConceptEvaluator() - item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_d) + item = get_return_value(concept_d) result = evaluator.eval(context, item) assert result.status @@ -133,7 +143,7 @@ def test_i_can_recognize_concept_properties(): .set_prop("b", "two").init_key()) evaluator = ConceptEvaluator() - item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus) + item = get_return_value(concept_plus) result = evaluator.eval(context, item) assert result.status @@ -156,7 +166,7 @@ def test_i_can_recognize_concept_properties_with_body(): .set_prop("b", "two").init_key()) evaluator = ConceptEvaluator() - item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus) + item = get_return_value(concept_plus) result = evaluator.eval(context, item) assert result.status @@ -174,7 +184,7 @@ def test_i_can_recognize_concept_properties_with_body_when_concept_has_a_body(): .set_prop("b", "two").init_key()) evaluator = ConceptEvaluator() - item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus) + item = get_return_value(concept_plus) result = evaluator.eval(context, item) assert result.status @@ -189,7 +199,7 @@ def test_i_cannot_recognize_a_concept_if_one_of_the_prop_is_unknown(): .set_prop("b", "two").init_key()) evaluator = ConceptEvaluator() - item = ReturnValueConcept(BaseParser.PREFIX + "some_name", True, concept_plus) + item = get_return_value(concept_plus) result = evaluator.eval(context, item) assert not result.status diff --git a/tests/test_ConceptLexerParser.py b/tests/test_ConceptLexerParser.py index bb808c3..7a783c5 100644 --- a/tests/test_ConceptLexerParser.py +++ b/tests/test_ConceptLexerParser.py @@ -2,8 +2,18 @@ import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext +from core.tokenizer import Tokenizer, TokenKind from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ - CrossRef + CrossRef, RegexParser, ZeroOrMore, OneOrMore, UnexpectedEndOfFileError, UnexpectedTokenErrorNode, ConceptMatch, \ + ParsingExpressionVisitor + + +class ConceptVisitor(ParsingExpressionVisitor): + def __init__(self): + self.concepts = set() + + def visit_ConceptMatch(self, node): + self.concepts.add(node.concept_name) @pytest.mark.parametrize("match, text", [ @@ -23,7 +33,7 @@ def test_i_can_match_simple_tokens(match, text): foo = Concept(name="foo") concepts = {foo: text} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, text) @@ -38,7 +48,7 @@ def test_i_can_match_multiple_concepts_in_one_input(): two = Concept(name="two") concepts = {one: "one", two: "two"} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two one") @@ -69,7 +79,7 @@ def test_i_cannot_match_when_part_of_the_input_is_unknown(): two = Concept(name="two") concepts = {one: "one", two: "two"} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two three") assert not res.status @@ -86,7 +96,7 @@ def test_i_can_match_sequence(): foo = Concept(name="foo") concepts = {foo: Sequence("one", "two", "three")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two three") @@ -100,7 +110,7 @@ def test_wrong_sequence_is_not_matched(): foo = Concept(name="foo") concepts = {foo: Sequence("one", "two", "three")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two three one") @@ -116,7 +126,7 @@ def test_i_cannot_match_sequence_if_end_of_file(): foo = Concept(name="foo") concepts = {foo: Sequence("one", "two", "three")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two") assert not res.status @@ -133,7 +143,7 @@ def test_i_always_choose_the_longest_match(): concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two three") @@ -149,7 +159,7 @@ def test_i_can_match_several_sequences(): concepts = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two three one two") @@ -166,7 +176,7 @@ def test_i_can_match_ordered_choice(): foo = Concept(name="foo") concepts = {foo: OrderedChoice("one", "two")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res1 = parser.parse(context, "one") assert res1.status @@ -189,7 +199,7 @@ def test_i_cannot_match_ordered_choice_with_empty_alternative(): foo = Concept(name="foo") concepts = {foo: Sequence(OrderedChoice("one", ""), "two")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "ok") # because token[0] is not "one" and not "" (it is 'two') assert not res.status @@ -201,7 +211,7 @@ def test_i_can_mix_sequences_and_ordered_choices(): concepts = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res1 = parser.parse(context, "twenty one ok") assert res1.status @@ -225,7 +235,7 @@ def test_i_can_mix_ordered_choices_and_sequences(): concepts = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "twenty thirty") assert res.status @@ -240,7 +250,7 @@ def test_i_cannot_parse_empty_optional(): concepts = {foo: Optional("one")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "") assert not res.status @@ -253,7 +263,7 @@ def test_i_can_parse_optional(): concepts = {foo: Optional("one")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one") assert res.status @@ -266,7 +276,7 @@ def test_i_can_parse_sequence_starting_with_optional(): concepts = {foo: Sequence(Optional("twenty"), "one")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "twenty one") assert res.status @@ -283,7 +293,7 @@ def test_i_can_parse_sequence_ending_with_optional(): concepts = {foo: Sequence("one", "two", Optional("three"))} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two three") assert res.status @@ -300,7 +310,7 @@ def test_i_can_parse_sequence_with_optional_in_between(): concepts = {foo: Sequence("one", Optional("two"), "three")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two three") assert res.status @@ -312,19 +322,16 @@ def test_i_can_parse_sequence_with_optional_in_between(): def test_i_can_use_reference(): - # The problem here is when there are multiple match for the same input - # The parsing result is a list of all concepts found - # So it's already a list that represents a sequence, not a choice - # So I need to create a choice concept - # create the return value for every possible graph - # --> The latter seems to be the best as we don't defer the resolution of the problem to someone else + # when there are multiple matches for the same input + # Do I need to create a choice concept ? + # No, create a return value for every possible graph context = get_context() foo = Concept(name="foo") bar = Concept(name="bar") concepts = {foo: Sequence("one", "two"), bar: foo} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two") assert len(res) == 2 @@ -350,7 +357,7 @@ def test_i_can_use_context_reference_with_multiple_levels(): concepts = {foo: Sequence("one", "two"), bar: foo, baz: bar} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two") assert len(res) == 3 @@ -375,7 +382,7 @@ def test_order_is_not_important_when_using_references(): concepts = {bar: foo, foo: Sequence("one", "two")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "one two") assert len(res) == 2 @@ -390,7 +397,7 @@ def test_i_can_parse_when_reference(): concepts = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "twenty two") assert res.status @@ -415,7 +422,7 @@ def test_i_can_detect_duplicates_when_reference(): foo: OrderedChoice("twenty", "thirty") } parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) res = parser.parse(context, "twenty") assert len(res) == 2 @@ -437,7 +444,7 @@ def test_i_can_detect_infinite_recursion(): foo: bar } parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(get_context(), concepts) assert bar not in parser.concepts_dict assert foo not in parser.concepts_dict @@ -452,7 +459,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(): foo: OrderedChoice(bar, "foo") } parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(get_context(), concepts) assert foo not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_dict # removed because of the infinite recursion @@ -464,7 +471,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(): foo: OrderedChoice("foo", bar) } parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(context, concepts) assert foo in parser.concepts_dict assert bar in parser.concepts_dict @@ -485,7 +492,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence(): foo: Sequence("one", bar, "two") } parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(get_context(), concepts) assert foo not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_dict # removed because of the infinite recursion @@ -500,7 +507,7 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choic foo: Sequence("one", OrderedChoice(bar, "other"), "two") } parser = ConceptLexerParser() - parser.initialize(concepts) + parser.initialize(get_context(), concepts) assert foo not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_dict # removed because of the infinite recursion @@ -510,6 +517,140 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional(): # TODO infinite recursion with optional pass + +@pytest.mark.parametrize("expression, expected", [ + ("'str'", StrMatch("str")), + ("1", StrMatch("1")), + (" 1", StrMatch("1")), + (",", StrMatch(",")), + ("'foo'?", Optional(StrMatch("foo"))), + ("'foo'*", ZeroOrMore(StrMatch("foo"))), + ("'foo'+", OneOrMore(StrMatch("foo"))), + ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), + ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), + ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), + ("1 2 | 3 4+", OrderedChoice( + Sequence(StrMatch("1"), StrMatch("2")), + Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))), + ("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))), + ("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))), + ("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))), + ("1 *", Sequence(StrMatch("1"), StrMatch("*"))), + ("1 ?", Sequence(StrMatch("1"), StrMatch("?"))), + ("1 +", Sequence(StrMatch("1"), StrMatch("+"))), + ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), + ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), + ("(1 )", StrMatch("1")), +]) +def test_i_can_parse_regex(expression, expected): + parser = RegexParser() + res = parser.parse(get_context(), Tokenizer(expression)) + + assert not parser.has_error + assert res.status + assert res.value.value == expected + assert res.value.source == expression + + +@pytest.mark.parametrize("expression, error", [ + ("1 ", UnexpectedEndOfFileError()), + ("1|", UnexpectedEndOfFileError()), + ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])), +]) +def test_i_can_detect_errors(expression, error): + parser = RegexParser() + res = parser.parse(get_context(), Tokenizer(expression)) + ret_value = res.value.value + assert parser.has_error + assert not res.status + assert ret_value[0] == error + + +def test_i_can_parse_regex_with_reference(): + expression = "foo" + parser = RegexParser() + res = parser.parse(get_context(), Tokenizer(expression)) + + assert res.status + assert res.value.value == ConceptMatch("foo") + assert res.value.source == expression + + +def test_i_can_parse_cross_ref_with_modifier(): + expression = "foo*" + parser = RegexParser() + res = parser.parse(get_context(), Tokenizer(expression)) + + assert res.status + assert res.value.value == ZeroOrMore(ConceptMatch("foo")) + assert res.value.source == expression + + +def test_i_can_parse_sequence_with_cross_ref(): + expression = "foo 'and' bar+" + parser = RegexParser() + res = parser.parse(get_context(), Tokenizer(expression)) + + assert res.status + assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar"))) + assert res.value.source == expression + + +def test_i_can_parse_choice_with_cross_ref(): + foo = Concept("foo") + bar = Concept("bar") + context = get_context() + context.sheerka.add_in_cache(foo) + context.sheerka.add_in_cache(bar) + + expression = "foo | bar?" + parser = RegexParser() + res = parser.parse(context, Tokenizer(expression)) + + assert res.status + assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar"))) + assert res.value.source == expression + + +def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(): + foo = Concept(name="foo") + bar = Concept(name="bar") + context = get_context() + context.sheerka.add_in_cache(foo) + context.sheerka.add_in_cache(bar) + + regex_parser = RegexParser() + foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value + bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value + + concepts = {bar: bar_definition, foo: foo_definition} + concept_parser = ConceptLexerParser() + concept_parser.initialize(context, concepts) + + res = concept_parser.parse(context, "twenty two") + assert res.status + assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")] + + res = concept_parser.parse(context, "thirty one") + assert res.status + assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")] + + res = concept_parser.parse(context, "twenty") + assert res.status + assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")] + + +def test_i_can_visit_parsing_expression(): + mult = Concept(name="mult") + add = Concept(name="add") + + visitor = ConceptVisitor() + visitor.visit(Sequence(mult, Optional(Sequence("+", add)))) + + assert sorted(list(visitor.concepts)) == ["add", "mult"] + + + # # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(): # context = get_context() diff --git a/tests/test_DefaultParser.py b/tests/test_DefaultParser.py index ac5b22d..75c8d88 100644 --- a/tests/test_DefaultParser.py +++ b/tests/test_DefaultParser.py @@ -2,12 +2,15 @@ import pytest import ast from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept +from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext +from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch, RegexParser from parsers.PythonParser import PythonParser, PythonNode from core.tokenizer import Keywords, Tokenizer from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode + # def nop(): # return NopNode() # @@ -52,7 +55,7 @@ from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode # return left_as_string == right_as_string # -def get_concept(name, where=None, pre=None, post=None, body=None): +def get_concept(name, where=None, pre=None, post=None, body=None, definition=None): concept = DefConceptNode([], name=NameNode(list(Tokenizer(name)))) if body: @@ -63,6 +66,12 @@ def get_concept(name, where=None, pre=None, post=None, body=None): concept.pre = get_concept_part(pre) if post: concept.post = get_concept_part(post) + if definition: + concept.definition = ReturnValueConcept( + "Parsers:RegexParser", + True, + definition) + return concept @@ -324,3 +333,29 @@ def test_new_line_is_not_allowed_in_the_name(): assert not res.status assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")] + + +def test_i_can_parse_def_concept_from_regex(): + text = "def concept name from bnf a_concept | 'a_string' as __definition[0]" + parser = DefaultParser() + res = parser.parse(get_context(), text) + node = res.value.value + definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string")) + parser_result = ParserResultConcept(RegexParser(), "a_concept | 'a_string'", definition, definition) + expected = get_concept(name="name", body="__definition[0]", definition=parser_result) + + assert res.status + assert res.who == parser.name + assert res.value.source == text + assert isinstance(res.value, ParserResultConcept) + assert node == expected + + +def test_i_can_detect_empty_bnf_declaration(): + text = "def concept name from bnf as __definition[0]" + + parser = DefaultParser() + res = parser.parse(get_context(), text) + + assert not res.status + assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration") diff --git a/tests/test_ExactConceptParser.py b/tests/test_ExactConceptParser.py index 320c58a..19ac4bd 100644 --- a/tests/test_ExactConceptParser.py +++ b/tests/test_ExactConceptParser.py @@ -41,7 +41,7 @@ def test_i_can_recognize_a_simple_concept(): assert len(results) == 1 assert results[0].status - assert results[0].value == concept + assert results[0].value.value == concept def test_i_can_recognize_concepts_defined_several_times(): @@ -53,14 +53,14 @@ def test_i_can_recognize_concepts_defined_several_times(): results = ExactConceptParser().parse(context, source) assert len(results) == 2 - results = sorted(results, key=lambda x: x.value.name) # because of the usage of sets + results = sorted(results, key=lambda x: x.value.value.name) # because of the usage of sets assert results[0].status - assert results[0].value.name == "hello a" - assert results[0].value.props["a"].value == "world" + assert results[0].value.value.name == "hello a" + assert results[0].value.value.props["a"].value == "world" assert results[1].status - assert results[1].value.name == "hello world" + assert results[1].value.value.name == "hello world" def test_i_can_recognize_a_concept_with_variables(): @@ -72,9 +72,10 @@ def test_i_can_recognize_a_concept_with_variables(): assert len(results) == 1 assert results[0].status - assert results[0].value.key == concept.key - assert results[0].value.props["a"].value == "10" - assert results[0].value.props["b"].value == "5" + concept_found = results[0].value.value + assert concept_found.key == concept.key + assert concept_found.props["a"].value == "10" + assert concept_found.props["b"].value == "5" def test_i_can_recognize_a_concept_with_duplicate_variables(): @@ -86,9 +87,10 @@ def test_i_can_recognize_a_concept_with_duplicate_variables(): assert len(results) == 1 assert results[0].status - assert results[0].value.key == concept.key - assert results[0].value.props["a"].value == "10" - assert results[0].value.props["b"].value == "5" + concept_found = results[0].value.value + assert concept_found.key == concept.key + assert concept_found.props["a"].value == "10" + assert concept_found.props["b"].value == "5" def test_i_can_manage_unknown_concept(): @@ -121,7 +123,7 @@ def test_i_can_detect_concept_from_tokens(): assert len(results) == 1 assert results[0].status - assert results[0].value == concept + assert results[0].value.value == concept def get_context(): diff --git a/tests/test_ast.py b/tests/test_ast.py index ef6ff57..53ce5d5 100644 --- a/tests/test_ast.py +++ b/tests/test_ast.py @@ -1,5 +1,7 @@ import ast +import pytest + from core.ast.nodes import NodeParent, GenericNodeConcept import core.ast.nodes from core.ast.visitors import ConceptNodeVisitor, UnreferencedNamesVisitor @@ -102,7 +104,7 @@ def my_function(a,b): assert sheerka.value(visitor.names[6]) == "a" -def test_i_can_get_non_referenced_variables(): +def test_i_can_get_unreferenced_variables(): source = """ def my_function(a,b): for i in range(b): @@ -126,6 +128,23 @@ my_function(x,y) assert "y" in values +@pytest.mark.parametrize("source, expected", [ + ("a,b", ["a", "b"]), + ("isinstance(a, int)", ["a", "int"]) + +]) +def test_i_can_get_unreferenced_variables_from_simple_expressions(source, expected): + sheerka = get_sheerka() + + node = ast.parse(source) + concept_node = core.ast.nodes.python_to_concept(node) + + visitor = UnreferencedNamesVisitor(sheerka) + visitor.visit(concept_node) + + assert sorted(list(visitor.names)) == expected + + def test_i_can_compare_NodeParent_with_tuple(): node_parent = NodeParent(GenericNodeConcept("For", None), "target") assert node_parent == ("For", "target") diff --git a/tests/test_concept.py b/tests/test_concept.py index 8d31fb2..ec5bfe6 100644 --- a/tests/test_concept.py +++ b/tests/test_concept.py @@ -20,6 +20,13 @@ def test_i_can_get_concept_key(name, variables, expected): assert concept.metadata.key == expected +def test_key_does_not_use_variable_when_definition_is_set(): + concept = Concept("plus").set_prop('plus') + + concept.init_key() + assert concept.metadata.key == "plus" + + def test_i_can_serialize(): """ Test concept.to_dict() diff --git a/tests/test_sheerka.py b/tests/test_sheerka.py index 9074117..5df5488 100644 --- a/tests/test_sheerka.py +++ b/tests/test_sheerka.py @@ -9,6 +9,8 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept from core.concept import Concept, PROPERTIES_TO_SERIALIZE from core.sheerka import Sheerka, ExecutionContext from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator +from parsers.ConceptLexerParser import Sequence, ZeroOrMore, StrMatch, OrderedChoice, Optional, ConceptMatch, \ + ConceptLexerParser from sdp.sheerkaDataProvider import SheerkaDataProvider tests_root = path.abspath("../build/tests") @@ -422,7 +424,7 @@ as: def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept(): """ In this test, we test that the properties of 'concept a xx b' (which are 'a' and 'b') - are correctly detected, because of the concept 'a plus b' in its body + are correctly detected, thanks to the source code 'a plus b' in its body :return: """ sheerka = get_sheerka() @@ -558,7 +560,7 @@ def test_i_can_manage_concepts_with_the_same_key_when_values_are_the_same(): assert res[0].who == sheerka.get_evaluator_name(MultipleSameSuccessEvaluator.NAME) -def test_i_can_create_concepts_on_python_codes(): +def test_i_can_create_concepts_with_python_code_as_body(): sheerka = get_sheerka() context = get_context(sheerka) @@ -570,6 +572,40 @@ def test_i_can_create_concepts_on_python_codes(): assert isinstance(res[0].value, list) +def test_i_can_create_concept_with_bnf_definition(): + sheerka = get_sheerka() + a = Concept("a") + sheerka.add_in_cache(a) + sheerka.concepts_grammars = ConceptLexerParser().initialize( + get_context(sheerka), + {a: OrderedChoice("one", "two")}).body + + res = sheerka.eval("def concept plus from bnf a ('plus' plus)?") + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].value, BuiltinConcepts.NEW_CONCEPT) + + saved_concept = sheerka.sdp.get_safe(sheerka.CONCEPTS_ENTRY, "plus") + assert saved_concept.key == "plus" + assert saved_concept.metadata.definition == "a ('plus' plus)?" + assert "a" in saved_concept.props + assert "plus" in saved_concept.props + + saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY) + expected_bnf = Sequence( + ConceptMatch("a"), + Optional(Sequence(StrMatch("plus"), ConceptMatch("plus"))), + rule_name="plus") + assert saved_definitions[saved_concept] == expected_bnf + + new_concept = res[0].value.body + assert new_concept.metadata.name == "plus" + assert new_concept.metadata.definition == "a ('plus' plus)?" + assert new_concept.bnf == expected_bnf + assert "a" in new_concept.props + assert "plus" in new_concept.props + + def get_sheerka(root="mem://", skip_builtins_in_db=True): sheerka = Sheerka(skip_builtins_in_db) sheerka.initialize(root) diff --git a/tests/test_sheerkaDataProvider.py b/tests/test_sheerkaDataProvider.py index 57a47a2..08c1b9e 100644 --- a/tests/test_sheerkaDataProvider.py +++ b/tests/test_sheerkaDataProvider.py @@ -311,6 +311,18 @@ def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false(root): "mem://" ]) def test_i_can_add_a_dict(root): + """ + Adding a dictionary. + Note that there is no key when adding a dictionary + + If you add {'my_key': 'my_value'} + 'my_key is not considered as the key of the entry' + + Because if you add {'my_key': 'my_value', 'my_key2': 'my_value2'} + There are now multiple keys. + + So for dictionary entries, the key is not managed + """ sdp = SheerkaDataProvider(root) obj = {"my_key": "my_value"} @@ -735,6 +747,7 @@ def test_i_can_set_using_reference(root): # sanity check, make sure that I can load back loaded = sdp.get(entry, key) assert loaded == ObjWithKey(2, "foo") + assert getattr(loaded, Serializer.ORIGIN) == "95b5cbab545dded0b90b57a3d15a157b9a559fb586ee2f8d6ccbc6d2491f1268" @pytest.mark.parametrize("root", [ @@ -754,7 +767,35 @@ def test_i_can_add_reference_of_an_object_with_a_key(root): assert key == obj.key assert entry == "entry" assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}} - assert sdp.load_obj(digest) == obj + + loaded = sdp.load_obj(digest) + assert loaded == obj + assert getattr(loaded, Serializer.ORIGIN) == digest + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_can_add_reference_a_dictionary(root): + sdp = SheerkaDataProvider(root) + obj = {"my_key": "value1"} + + obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj)) + sdp.serializer.register(obj_serializer) + + entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True) + state = sdp.load_state(sdp.get_snapshot()) + digest = state.data["entry"][len(SheerkaDataProvider.REF_PREFIX):] + + assert key is None + assert entry == "entry" + assert state.data == {'entry': f"{SheerkaDataProvider.REF_PREFIX}{digest}"} + + loaded = sdp.load_obj(digest) + assert loaded["my_key"] == obj["my_key"] + assert loaded[Serializer.ORIGIN] == digest + assert len(loaded) == 2 @pytest.mark.parametrize("root", [ diff --git a/tests/test_utils.py b/tests/test_utils.py index 88ae4c5..5d00b30 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,8 @@ import core.utils import pytest +from core.tokenizer import Token, TokenKind + @pytest.mark.parametrize("lst, as_string", [ (None, "",), @@ -76,3 +78,63 @@ def test_i_can_get_sub_classes(): def test_i_can_product(a, b, expected): res = core.utils.product(a, b) assert res == expected + + +@pytest.mark.parametrize("input_as_list, expected_as_list", [ + ([" "], []), + ([" ", "one"], ["one"]), + (["one", " "], ["one"]), + ([" ", "one", " "], ["one"]), + + (["\n", "one"], ["one"]), + (["one", "\n"], ["one"]), + (["\n", "one", "\n"], ["one"]), + + ([" ", "\n", "one"], ["one"]), + (["one", " ", "\n"], ["one"]), + ([" ", "\n", "one", " ", "\n"], ["one"]), + + (["\n", " ", "one"], ["one"]), + (["one", "\n", " "], ["one"]), + (["\n", " ", "one", "\n", " "], ["one"]), + + ([" ", "\n", " ", "one"], ["one"]), + (["one", " ", "\n", " "], ["one"]), + ([" ", "\n", " ", "one", " ", "\n", " "], ["one"]), + + (["\n", " ", "\n", "one"], ["one"]), + (["one", "\n", " ", "\n"], ["one"]), + (["\n", " ", "\n", "one", "\n", " ", "\n"], ["one"]), + +]) +def test_i_can_strip(input_as_list, expected_as_list): + actual = core.utils.strip_tokens(get_tokens(input_as_list)) + expected = get_tokens(expected_as_list) + assert actual == expected + + +def test_by_default_eof_is_not_stripped(): + actual = core.utils.strip_tokens(get_tokens(["one", "two", " ", "\n", ""])) + expected = get_tokens(["one", "two", " ", "\n", ""]) + assert actual == expected + + +def test_i_can_strip_eof(): + actual = core.utils.strip_tokens(get_tokens(["one", "two", " ", "\n", ""]), True) + expected = get_tokens(["one", "two"]) + assert actual == expected + + +def get_tokens(lst): + res = [] + for e in lst: + if e == " ": + res.append(Token(TokenKind.WHITESPACE, " ", 0, 0, 0)) + elif e == "\n": + res.append(Token(TokenKind.NEWLINE, "\n", 0, 0, 0)) + elif e == "": + res.append(Token(TokenKind.EOF, "\n", 0, 0, 0)) + else: + res.append(Token(TokenKind.IDENTIFIER, e, 0, 0, 0)) + + return res