From 9e10e77737a6250d4277805d3d5008cf5ae9b281 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Thu, 14 Nov 2019 22:04:38 +0100 Subject: [PATCH] Refactored to allow ConceptEvaluator --- core/builtin_concepts.py | 207 +++++++++ core/concept.py | 115 ++--- core/sheerka.py | 406 +++++++++++------- core/tokenizer.py | 36 +- core/utils.py | 50 ++- docs/Concept_properties.md | 56 +++ docs/blog.rst | 16 +- docs/syntax_v3.md | 19 +- evaluators/AddConceptEvaluator.py | 67 ++- evaluators/BaseEvaluator.py | 28 +- evaluators/ConceptEvaluator.py | 35 ++ evaluators/DefaultEvaluator.py | 39 -- evaluators/DuplicateConceptEvaluator.py | 41 ++ evaluators/ParsersEvaluator.py | 42 ++ evaluators/PythonEvaluator.py | 30 +- main.py | 41 +- parsers/BaseParser.py | 26 +- parsers/DefaultParser.py | 533 ++++++++++++++---------- parsers/ExactConceptParser.py | 24 +- parsers/PythonParser.py | 57 ++- sdp/sheerkaDataProvider.py | 34 +- sdp/sheerkaSerializer.py | 5 + tests/test_DefaultParser.py | 350 ++++++++++++++++ tests/test_ExactConceptParser.py | 68 +-- tests/test_PythonParser.py | 77 ++++ tests/test_defautparser.py | 346 --------------- tests/test_sheerka.py | 459 +++++++++++++++++--- tests/test_sheerkaDataProvider.py | 75 +++- tests/test_tokenizer.py | 119 ++++++ tests/test_utils.py | 12 +- 30 files changed, 2406 insertions(+), 1007 deletions(-) create mode 100644 core/builtin_concepts.py create mode 100644 docs/Concept_properties.md create mode 100644 evaluators/ConceptEvaluator.py delete mode 100644 evaluators/DefaultEvaluator.py create mode 100644 evaluators/DuplicateConceptEvaluator.py create mode 100644 evaluators/ParsersEvaluator.py create mode 100644 tests/test_DefaultParser.py create mode 100644 tests/test_PythonParser.py delete mode 100644 tests/test_defautparser.py create mode 100644 tests/test_tokenizer.py diff --git a/core/builtin_concepts.py b/core/builtin_concepts.py new file mode 100644 index 0000000..21e83f7 --- /dev/null +++ b/core/builtin_concepts.py @@ -0,0 +1,207 @@ +from enum import Enum + +from core.concept import Concept + + +class BuiltinConcepts(Enum): + """ + List of builtin concepts that do no need any specific implementation + """ + SHEERKA = 1 + SUCCESS = 2 + ERROR = 3 + UNKNOWN_CONCEPT = 4 # the request concept is not recognized + RETURN_VALUE = 5 # a value is returned + CONCEPT_TOO_LONG = 6 # concept cannot be processed by exactConcept parser + NEW_CONCEPT = 7 # when a new concept is added + UNKNOWN_PROPERTY = 8 # when requesting for a unknown property + PARSER_RESULT = 9 + TOO_MANY_SUCCESS = 10 # when expecting a limited number of successful return value + TOO_MANY_ERRORS = 11 # when expecting a limited number of successful return value + NOT_FOR_ME = 12 # a parser recognize that the entry is not meant for it + IS_EMPTY = 13 # when a set is empty + INVALID_RETURN_VALUE = 14 # the return value of an evaluator is not correct + BEFORE_PARSING = 15 # activated before evaluation by the parsers + PARSING = 16 # activated during the parsing. It contains the text to parse + AFTER_PARSING = 17 # activated when the parsing process seems to be finished + CONCEPT_ALREADY_DEFINED = 18 # when you try to add the same concept twice + + +""" +Some concepts have a specific implementation +It's mainly to a have proper __repr__ implementation, or redefine the is_unique attribut +""" + + +class SuccessConcept(Concept): + def __init__(self): + super().__init__(BuiltinConcepts.SUCCESS, True, True, BuiltinConcepts.SUCCESS) + + +class ErrorConcept(Concept): + def __init__(self, error=None): + super().__init__(BuiltinConcepts.ERROR, True, False, BuiltinConcepts.ERROR, body=error) + + def __repr__(self): + return f"({self.id}){self.name}: {self.body}" + + +class ReturnValueConcept(Concept): + """ + This class represents the result of a data flow processing + It's the main input for the evaluators + """ + + def __init__(self, who=None, status=None, value=None, message=None, parents=None): + super().__init__(BuiltinConcepts.RETURN_VALUE, True, False, BuiltinConcepts.RETURN_VALUE) + self.set_prop("who", who) + self.set_prop("status", status) + self.body = value + self.set_prop("message", message) + self.set_prop("parents", parents) + + @property + def who(self): + return self.props["who"].value + + @who.setter + def who(self, value): + self.set_prop("who", value) + + @property + def status(self): + return self.props["status"].value + + @status.setter + def status(self, value): + self.set_prop("status", value) + + @property + def value(self): + return self.body + + @value.setter + def value(self, value): + self.body = value + + @property + def message(self): + return self.props["message"].value + + @message.setter + def message(self, value): + self.set_prop("message", value) + + @property + def parents(self): + return self.props["parents"].value + + @parents.setter + def parents(self, value): + self.set_prop("parents", value) + + def __repr__(self): + return f"ReturnValue(who={self.who}, status={self.status}, value={self.value}, message={self.message})" + + def __eq__(self, other): + if not isinstance(other, ReturnValueConcept): + return False + + return self.who == other.who and \ + self.status == other.status and \ + self.value == other.value and \ + self.message == other.message + + def __hash__(self): + return hash((self.who, self.status, self.value)) + + +class UnknownPropertyConcept(Concept): + """ + This error is raised when, during sheerka.new(), an unknown property is asked + """ + + def __init__(self, property_name=None, concept=None): + super().__init__(BuiltinConcepts.UNKNOWN_PROPERTY, True, False, BuiltinConcepts.UNKNOWN_PROPERTY) + self.set_prop("concept", concept) + self.body = property_name + + def __repr__(self): + return f"UnknownProperty(property={self.property_name}, concept={self.concept})" + + @property + def concept(self): + return self.props["concept"].value + + @property + def property_name(self): + return self.body + + +class ParserResultConcept(Concept): + """ + Result of a parsing + """ + + def __init__(self, parser=None, source=None, value=None, try_parsed=None): + super().__init__(BuiltinConcepts.PARSER_RESULT, True, False, BuiltinConcepts.PARSER_RESULT) + self.set_prop("parser", parser) + self.set_prop("source", source) + self.set_prop("try_parsed", try_parsed) # in case of error, what was found before the error + self.body = value + + def __repr__(self): + return f"ParserResult({self.body})" + + def __eq__(self, other): + if not isinstance(other, ParserResultConcept): + return False + + return self.source == other.source and \ + self.parser == other.parser and \ + self.body == other.body and \ + self.try_parsed == other.try_parsed + + @property + def value(self): + return self.body + + @property + def try_parsed(self): + return self.props["try_parsed"].value + + @property + def source(self): + return self.props["source"].value + + @property + def parser(self): + return self.props["parser"].value + + +class InvalidReturnValueConcept(Concept): + """ + Error returned when an evaluator is not correctly coded + The accepted return value are + ReturnValueConcept, list of ReturnValueConcept or None + """ + + def __init__(self, return_value=None, evaluator=None): + super().__init__(BuiltinConcepts.INVALID_RETURN_VALUE, True, False, BuiltinConcepts.INVALID_RETURN_VALUE) + self.set_prop("evaluator", evaluator) + self.body = return_value + + +class BeforeParsingConcept(Concept): + def __init__(self): + super().__init__(BuiltinConcepts.BEFORE_PARSING, True, True, BuiltinConcepts.BEFORE_PARSING) + + +class ParsingConcept(Concept): + def __init__(self): + super().__init__(BuiltinConcepts.PARSING, True, True, BuiltinConcepts.PARSING) + + +class AfterParsingConcept(Concept): + def __init__(self): + super().__init__(BuiltinConcepts.AFTER_PARSING, True, True, BuiltinConcepts.AFTER_PARSING) diff --git a/core/concept.py b/core/concept.py index f4c498b..515a639 100644 --- a/core/concept.py +++ b/core/concept.py @@ -8,6 +8,10 @@ log = logging.getLogger(__name__) class ConceptParts(Enum): + """ + Helper class, Note quite sure that is it that useful + I guess, I was learning nums with Python... + """ WHERE = "where" PRE = "pre" POST = "post" @@ -20,21 +24,36 @@ class Concept: A concept is a the base object of our universe Everything is a concept """ - props_to_serialize = ("id", "is_builtin", "name", "where", "pre", "post", "body", "desc") + props_to_serialize = ("id", "is_builtin", "key", "name", "where", "pre", "post", "body", "desc", "obj") + props_for_digest = ("is_builtin", "key", "name", "where", "pre", "post", "body", "desc") + concept_parts = set(item.value for item in ConceptParts) PROPERTY_PREFIX = "__var__" - def __init__(self, name=None, is_builtin=False, where=None, pre=None, post=None, body=None, desc=None, key=None): - self.name = name + def __init__(self, name=None, + is_builtin=False, + is_unique=False, + key=None, + where=None, + pre=None, + post=None, + body=None, + desc=None, + obj=None): + + self.name = str(name) if name else None self.is_builtin = is_builtin + self.is_unique = is_unique + self.key = str(key) if key else None # name od the concept, where prop are replaced. to ease search + self.where = where # condition to recognize variables in name self.pre = pre # list of pre conditions before calling the main function self.post = post # list of post conditions after calling the main function self.body = body # main method, can also be the value of the concept self.desc = desc - self.id = None - self.key = key + self.id = None # unique identifier for a concept. The id will never be modified + self.obj = obj # main of principal property of the concept self.props = {} # list of Property for this concept self.functions = {} # list of helper functions @@ -46,11 +65,19 @@ class Concept: def __eq__(self, other): if not isinstance(other, Concept): return False - return self.name == other.name and \ - self.where == other.where and \ - self.pre == other.pre and \ - self.post == other.post and \ - self.body == other.body + + # check the attributes + for prop in self.props_to_serialize: + if getattr(self, prop) != getattr(other, prop): + print(prop) + return False + + # check the props (Concept variables) + for var_name, p in self.props.items(): + if p != other.props[var_name]: + return False + + return True def __hash__(self): return hash(self.name) @@ -68,7 +95,7 @@ class Concept: :return: """ if self.key is not None: - return self.key + return self if tokens is None: tokens = iter(Tokenizer(self.name)) @@ -100,10 +127,11 @@ class Concept: So the values are kept in cache. For concepts loaded from sdp, these ASTs must be created again + TODO : Seems to be a service method. Can be put somewhere else :param codes: :return: """ - possibles_codes = set(item.value for item in ConceptParts) + possibles_codes = self.concept_parts if codes is None: return for key in codes: @@ -117,14 +145,17 @@ class Concept: Returns the digest of the event :return: hexa form of the sha256 """ - return hashlib.sha256(f"Concept:{self.name}{self.pre}{self.post}{self.body}".encode("utf-8")).hexdigest() + return hashlib.sha256(f"Concept:{self.to_dict(self.props_for_digest)}".encode("utf-8")).hexdigest() - def to_dict(self): + def to_dict(self, props_to_use=None): """ Returns a dict representing 'self' :return: """ - props_as_dict = dict((prop, getattr(self, prop)) for prop in self.props_to_serialize) + + props_to_use = props_to_use or self.props_to_serialize + + props_as_dict = dict((prop, getattr(self, prop)) for prop in props_to_use) props_as_dict["props"] = [(p, self.props[p].value) for p in self.props] return props_as_dict @@ -150,51 +181,30 @@ class Concept: :param other: :return: """ - for prop in self.props_to_serialize: - setattr(self, prop, getattr(other, prop)) + if other is None: + return self + + self.from_dict(other.to_dict()) + # for prop in self.props_to_serialize: + # setattr(self, prop, getattr(other, prop)) return self - def set_prop(self, prop_name, prop_value): + def set_prop(self, prop_name, prop_value=None): self.props[prop_name] = Property(prop_name, prop_value) + return self def set_prop_by_index(self, index, prop_value): prop_name = list(self.props.keys())[index] self.props[prop_name] = Property(prop_name, prop_value) - -class ErrorConcept(Concept): - NAME = "Error" - - def __init__(self, where=None, pre=None, post=None, body=None, desc=None): - Concept.__init__(self, self.NAME, is_builtin=True, where=where, pre=pre, post=post, body=body, desc=desc) - self.key = self.NAME - - def __repr__(self): - return f"({self.id}){self.name}: {self.body}" - - -class TooManySuccessConcept(Concept): - NAME = "Too many successful items" - - def __init__(self, items=None): - super().__init__(self.NAME, body=items) - self.key = self.NAME - - -class ReturnValueConcept(Concept): - NAME = "Return Value" - - def __init__(self, return_value=None): - super().__init__(self.NAME, body=return_value) - self.key = self.NAME - - def __repr__(self): - return f"({self.id}){self.name}: {self.body}" + return self class Property: """ - Defines a behaviour of Concept + Defines the variables of a concept + It as its specific class, because from experience, + property management is more complex than a key/value pair """ def __init__(self, name, value): @@ -203,3 +213,12 @@ class Property: def __repr__(self): return f"{self.name}={self.value}" + + def __eq__(self, other): + if not isinstance(other, Property): + return False + + return self.name == other.name and self.value == other.value + + def __hash__(self): + return hash((self.name, self.value)) diff --git a/core/sheerka.py b/core/sheerka.py index c72233a..17f2e64 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -1,9 +1,9 @@ from dataclasses import dataclass - -from core.concept import Concept, ErrorConcept, Property, TooManySuccessConcept, ReturnValueConcept -from parsers.PythonParser import PythonGetNamesVisitor, PythonNode +from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept +from core.concept import Concept, ConceptParts +from evaluators.BaseEvaluator import OneReturnValueEvaluator +from parsers.BaseParser import BaseParser from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderDuplicateKeyError -from parsers.DefaultParser import DefConceptNode, DefaultParser import core.utils import logging @@ -11,60 +11,28 @@ import logging log = logging.getLogger(__name__) -class Singleton(type): - _instances = {} - - def __call__(cls, *args, **kwargs): - if cls not in cls._instances: - cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) - return cls._instances[cls] - - -@dataclass -class ReturnValue: - """ - Class that handle the return of a concept - To avoid using the try/except pattern for each and every call - To give context (ie return message) even when the call is successful - """ - who: object - status: bool - value: Concept - message: str = None - - -@dataclass -class ExecutionContext: - """ - To keep track of the execution of a request - """ - sheerka: object - event_digest: str - - class Sheerka(Concept): """ Main controller for the project """ - NAME = "Sheerka" - UNKNOWN_CONCEPT_NAME = "Unknown Concept" - SUCCESS_CONCEPT_NAME = "Success" - CONCEPT_TOO_LONG_CONCEPT_NAME = "Concept too long" - CONCEPTS_ENTRY = "All_Concepts" BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" USER_CONCEPTS_KEYS = "User_Concepts" def __init__(self, debug=False): log.debug("Starting Sheerka.") - super().__init__(Sheerka.NAME) + super().__init__(BuiltinConcepts.SHEERKA, True, True, BuiltinConcepts.SHEERKA) # cache of the most used concepts # Note that these are only templates # They are used as a footprint for instantiation self.concepts_cache = {} + # cache for builtin types. + # It allow instantiation of a builtin clas + self.builtin_cache = {} + # a concept can be instantiated # ex: File is a concept, but File('foo.txt') is an instance # TODO: manage contexts @@ -78,7 +46,6 @@ class Sheerka(Concept): self.parsers = [] self.evaluators = [] - self.key = self.NAME self.debug = debug def initialize(self, root_folder=None): @@ -86,7 +53,6 @@ class Sheerka(Concept): Starting Sheerka Loads the current configuration Notes that when it's the first time, it also create the needed working folders - :param debug: :param root_folder: root configuration folder :return: ReturnValue(Success or Error) """ @@ -94,22 +60,27 @@ class Sheerka(Concept): try: self.init_logging() self.sdp = SheerkaDataProvider(root_folder) - self.parsers.append(core.utils.get_class("parsers.DefaultParser.DefaultParser")) - self.parsers.append(core.utils.get_class("parsers.PythonParser.PythonParser")) - #self.parsers.append(core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser")) - - self.evaluators.append(core.utils.get_object("evaluators.DefaultEvaluator.DefaultEvaluator")) - self.evaluators.append(core.utils.get_object("evaluators.AddConceptEvaluator.AddConceptEvaluator")) - self.evaluators.append(core.utils.get_object("evaluators.PythonEvaluator.PythonEvaluator")) if self.sdp.first_time: self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000) - self.create_builtin_concepts() - except IOError as e: - return ReturnValue(self, False, self.get(ErrorConcept.NAME), e) + self.initialize_builtin_concepts() - return ReturnValue(self, True, self.get(Sheerka.SUCCESS_CONCEPT_NAME)) + self.parsers.append(core.utils.get_class("parsers.DefaultParser.DefaultParser")) + self.parsers.append(core.utils.get_class("parsers.PythonParser.PythonParser")) + self.parsers.append(core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser")) + + self.evaluators.append(core.utils.new_object("evaluators.ParsersEvaluator.ParsersEvaluator")) + self.evaluators.append(core.utils.new_object("evaluators.AddConceptEvaluator.AddConceptEvaluator")) + self.evaluators.append(core.utils.new_object("evaluators.PythonEvaluator.PythonEvaluator")) + self.evaluators.append(core.utils.new_object("evaluators.ConceptEvaluator.ConceptEvaluator")) + self.evaluators.append( + core.utils.new_object("evaluators.DuplicateConceptEvaluator.DuplicateConceptEvaluator")) + + except IOError as e: + return ReturnValueConcept(self, False, self.get(BuiltinConcepts.ERROR), e) + + return ReturnValueConcept(self, True, self) def set_id_if_needed(self, obj, is_builtin): """ @@ -123,34 +94,35 @@ class Sheerka(Concept): obj.id = self.sdp.get_next_key(self.BUILTIN_CONCEPTS_KEYS if is_builtin else self.USER_CONCEPTS_KEYS) log.debug(f"Setting id '{obj.id}' to concept '{obj.name}'.") - def create_builtin_concepts(self): + def initialize_builtin_concepts(self): """ Initializes the builtin concepts :return: None """ log.debug("Initializing builtin concepts") - builtins = [ - self, - Concept(Sheerka.UNKNOWN_CONCEPT_NAME, key=Sheerka.UNKNOWN_CONCEPT_NAME), - Concept(Sheerka.SUCCESS_CONCEPT_NAME, key=Sheerka.SUCCESS_CONCEPT_NAME), - Concept(Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME, key=Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME), - ErrorConcept(), - TooManySuccessConcept(), - ReturnValueConcept(), - ] + builtins_classes = self.get_builtins_classes_as_dict() - for concept in builtins: - self.add_in_cache(concept) + # this all initialization of the builtins seems to be little bit complicated + # why do we need to update it from DB ? + for key in BuiltinConcepts: + concept = self if key == BuiltinConcepts.SHEERKA \ + else builtins_classes[str(key)]() if str(key) in builtins_classes \ + else Concept(key, True, False, key) + + if not concept.is_unique and str(key) in builtins_classes: + self.builtin_cache[key] = builtins_classes[str(key)] from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key) if from_db is None: - log.debug(f"'{concept.name}' concept is not found. Adding.") + log.debug(f"'{concept.name}' concept is not found in db. Adding.") self.set_id_if_needed(concept, True) self.sdp.add("init", self.CONCEPTS_ENTRY, concept, use_ref=True) else: - log.debug(f"Found concept '{from_db}'. Updating.") + log.debug(f"Found concept '{from_db}' in db. Updating.") concept.update_from(from_db) + self.add_in_cache(concept) + def init_logging(self): if self.debug: log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s" @@ -163,37 +135,75 @@ class Sheerka(Concept): def eval(self, text): evt_digest = self.sdp.save_event(Event(text)) - exec_context = ExecutionContext(self, evt_digest) - return_values = self.try_parse(exec_context, text) - return_values = self.try_eval(exec_context, return_values) + exec_context = ExecutionContext(self.key, evt_digest, self) - # return_values = [] - # for parser_name, status, node in result: - # if not status: - # return_values.append(ReturnValue(False, ErrorConcept(body=node))) - # elif status and isinstance(node, DefConceptNode): - # return_values.append(self.add_concept(exec_context, node)) - # else: - # return_values.append(ReturnValue(True, node)) + before_parsing = self.ret(self.eval.__name__, True, self.new(BuiltinConcepts.BEFORE_PARSING)) + return_values = self.process(exec_context, [], [before_parsing]) + return_values = core.utils.remove_from_list(return_values, [before_parsing]) + + parsing_results = self.parse(exec_context, text) + return_values.extend(parsing_results) + processing_parsing = self.ret(self.eval.__name__, True, self.new(BuiltinConcepts.PARSING)) + return_values = self.process(exec_context, return_values, [processing_parsing]) + return_values = core.utils.remove_from_list(return_values, [processing_parsing]) + + after_parsing = self.ret(self.eval.__name__, True, self.new(BuiltinConcepts.AFTER_PARSING)) + return_values = self.process(exec_context, return_values, [after_parsing]) + return_values = core.utils.remove_from_list(return_values, [after_parsing]) return return_values - def try_parse(self, context, text): + def expect_one(self, context, items): + + if not isinstance(items, list): + items = [items] + + if len(items) == 0: + return self.ret(context.who, False, self.new(BuiltinConcepts.IS_EMPTY, obj=items)) + + successful_results = [item for item in items if item.status] + number_of_successful = len(successful_results) + total_items = len(items) + + # remove errors when a winner is found + if number_of_successful == 1: + # log.debug(f"1 / {total_items} good item found.") + return successful_results[0] + + # too many winners, which one to choose ? + if number_of_successful > 1: + log.debug(f"{number_of_successful} / {total_items} good items. Too many success") + return self.ret(context.who, False, self.new(BuiltinConcepts.TOO_MANY_SUCCESS, obj=successful_results)) + + # only errors, i cannot help you + log.debug(f"{total_items} items. Only errors") + return self.ret(context.who, False, self.new(BuiltinConcepts.TOO_MANY_ERRORS, obj=items)) + + def parse(self, context, text): result = [] - log.debug(f"Parsing '{text}'") + if log.isEnabledFor(logging.DEBUG): + debug_text = "'" + text + "'" if isinstance(text, str) \ + else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens" + log.debug(f"Parsing {debug_text}") for parser in self.parsers: p = parser() - # try: - # tree = p.parse() - # result.append((p.name, tree)) - # except Exception as e: - # result.append((p.name, e)) - tree = p.parse(context, text) - result.append(ReturnValue(p.name, not p.has_error, p.error_sink if p.has_error else tree)) + res = p.parse(context, text) + if isinstance(res, list): + result.extend(res) + else: + result.append(res) return result - def try_eval(self, context, items): + def process(self, context, return_values, contextual_concepts=None): log.debug("Evaluating parsing result.") + + # init + if not isinstance(return_values, list): + return_values = [return_values] + + if contextual_concepts: + return_values.extend(contextual_concepts) + # group the evaluators by priority and sort them # The first one to be applied will be the one with the highest priority grouped_evaluators = {} @@ -201,60 +211,102 @@ class Sheerka(Concept): grouped_evaluators.setdefault(item.priority, []).append(item) sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True) - for priority in sorted_priorities: - log.debug("Processing priority " + str(priority)) - for item in items: - log.debug(item) - original_items = items[:] - evaluated_items = [] - for evaluator in grouped_evaluators[priority]: - if evaluator.matches(context, original_items): - result = evaluator.eval(context, original_items) - if isinstance(result, list): - evaluated_items.extend(result) + # process + while True: + simple_digest = return_values[:] # set(id(r) for r in return_values) + + for priority in sorted_priorities: + # log.debug("Processing priority " + str(priority)) + # for item in return_values: + # log.debug(item) + original_items = return_values[:] + evaluated_items = [] + to_delete = [] + for evaluator in grouped_evaluators[priority]: + + # process evaluators that work on return value + if isinstance(evaluator, OneReturnValueEvaluator): + for item in original_items: + if evaluator.matches(context, item): + result = evaluator.eval(context, item) + if result is None: + continue + elif isinstance(result, list): + evaluated_items.extend(result) + to_delete.append(item) + elif isinstance(result, ReturnValueConcept): + evaluated_items.append(result) + to_delete.append(item) + else: + error = self.new(BuiltinConcepts.INVALID_RETURN_VALUE, body=result, + evaluator=evaluator) + evaluated_items.append(self.ret("sheerka.process", False, error, parents=[item])) + to_delete.append(item) + # process evaluators that work on all return values else: - evaluated_items.append(result) + if evaluator.matches(context, original_items): + results = evaluator.eval(context, original_items) + if not isinstance(results, list): + results = [results] + for result in results: + evaluated_items.append(result) + to_delete.extend(result.parents) - # what was computed by this group will be the input of the following group - items = evaluated_items if len(evaluated_items) > 0 else original_items + return_values = evaluated_items + return_values.extend([item for item in original_items if item not in to_delete]) - return items + # have we done something ? + to_compare = return_values[:] # set(id(r) for r in return_values) + if simple_digest == to_compare: + break - def add_concept(self, exec_context, def_concept_node: DefConceptNode): + return return_values + + def create_new_concept(self, context, concept): """ Adds a new concept to the system - :param exec_context: - :param def_concept_node: DefConceptNode + :param context: + :param concept: DefConceptNode :return: digest of the new concept """ - # validate the node - get_names_visitor = PythonGetNamesVisitor() + concept.init_key() - concept = Concept(def_concept_node.name) - for prop in ("where", "pre", "post", "body"): - # put back the sources - concept_part_node = getattr(def_concept_node, prop) - if isinstance(concept_part_node, PythonNode): - get_names_visitor.visit(concept_part_node.ast) - source = concept_part_node.source if hasattr(concept_part_node, "source") else "" - setattr(concept, prop, source) + # checks for duplicate concepts + if self.sdp.exists(self.CONCEPTS_ENTRY, concept.key, concept.get_digest()): + error = SheerkaDataProviderDuplicateKeyError(self.CONCEPTS_ENTRY + "." + concept.key, concept) + return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0]) - # try to find variables (eg props) - # Note that with this method, the variables will be created in the order of appearance - for token in def_concept_node.tokens["name"]: - if token.value in get_names_visitor.names: - concept.set_prop(token.value, None) - - concept.init_key(def_concept_node.tokens["name"]) - concept.add_codes(def_concept_node.get_codes()) + # set id before saving in db self.set_id_if_needed(concept, False) + # save the new context in sdp try: - self.sdp.add(exec_context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True) + self.sdp.add(context.event_digest, self.CONCEPTS_ENTRY, concept, use_ref=True) except SheerkaDataProviderDuplicateKeyError as error: - return ReturnValue(self.add_concept.__name__, False, ErrorConcept(body=error), error.args[0]) - return ReturnValue(self.add_concept.__name__, True, concept) + return self.ret(self.create_new_concept.__name__, False, ErrorConcept(error), error.args[0]) + + # add in cache for quick further reference + self.concepts_cache[concept.key] = concept + + # process the return in needed + ret = self.ret(self.create_new_concept.__name__, True, self.new(BuiltinConcepts.NEW_CONCEPT, body=concept)) + return ret + + def add_codes_to_concept(self, context, concept): + """ + Updates the codes of the newly created concept + Basically, it runs the parsers on all parts + :param concept: + :param context: + :return: + """ + for part_key in ConceptParts: + source = getattr(concept, part_key.value) + if source is None or source == "": + continue + ret_val = self.expect_one(context, self.parse(context, source)) + concept.codes[part_key] = ret_val def add_in_cache(self, concept): """ @@ -268,36 +320,85 @@ class Sheerka(Concept): def get(self, concept_key): """ Tries to find a concept - TODO: how to manage single vs multiple instances + What is return must be used a template for another concept. + You must not modify the returned concept :param concept_key: :return: """ + if isinstance(concept_key, BuiltinConcepts): + concept_key = str(concept_key) + # first search in cache if concept_key in self.concepts_cache: return self.concepts_cache[concept_key] - return self.sdp.get_safe(self.CONCEPTS_ENTRY, concept_key) or \ - self.new(self.UNKNOWN_CONCEPT_NAME, body=concept_key) + # else look in sdp + from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept_key) + if from_db is not None: + return from_db - def new(self, concept, **kwargs): + # else return new Unknown concept + # Note that I don't call the new() method, as it use get() -> cyclic call + unknown_concept = Concept() + template = self.concepts_cache[str(BuiltinConcepts.UNKNOWN_CONCEPT)] + unknown_concept.update_from(template) + unknown_concept.body = concept_key + return unknown_concept + + def new(self, concept_key, **kwargs): """ Returns an instance of a new concept - TODO: Checks if the concept is supposed to be unique (ex Sheerka, or the number 'one' for example) - :param concept: + When the concept is supposed to be unique, returns the same instance + :param concept_key: :param kwargs: :return: """ + template = self.get(concept_key) - if isinstance(concept, str): - concept = self.get(concept) + # manage concept not found + if self.isinstance(template, BuiltinConcepts.UNKNOWN_CONCEPT) and \ + concept_key != BuiltinConcepts.UNKNOWN_CONCEPT: + return template + # manage singleton + if template.is_unique: + return template + + # otherwise, create another instance + concept = self.builtin_cache[concept_key]() if concept_key in self.builtin_cache else Concept() + concept.update_from(template) + + # update the properties for k, v in kwargs.items(): - if hasattr(concept, k): + if k in concept.props: + concept.set_prop(k, v) + elif hasattr(concept, k): setattr(concept, k, v) + else: + return self.new(BuiltinConcepts.UNKNOWN_PROPERTY, body=k, concept=concept) + # TODO : add the concept to the list of known concepts (self.instances) return concept + def ret(self, who, status, value, message=None, parents=None): + """ + Creates and returns a ReturnValue concept + :param who: + :param status: + :param value: + :param message: + :param parents: + :return: + """ + return self.new( + BuiltinConcepts.RETURN_VALUE, + who=who, + status=status, + value=value, + message=message, + parents=parents) + def isinstance(self, a, b): """ return true if the concept a is an instance of the concept b @@ -306,15 +407,40 @@ class Sheerka(Concept): :return: """ - if not isinstance(a, Concept): - raise SyntaxError("The first parameter of isinstance MUST be a concept") + if isinstance(a, BuiltinConcepts): # common KSI error ;-) + raise SyntaxError("Remember that the first parameter of isinstance MUST be a concept") - b_key = b if isinstance(b, str) else b.key + if not isinstance(a, Concept): + return False + + b_key = b.key if isinstance(b, Concept) else str(b) # TODO : manage when a is the list of all possible b # for example, if a is a color, it will be found the entry 'All_Colors' return a.key == b_key + @staticmethod + def get_builtins_classes_as_dict(): + res = {} + for c in core.utils.get_classes("core.builtin_concepts"): + if issubclass(c, Concept) and c != Concept: + res[c().key] = c + + return res + @staticmethod def test(): return "I have access to Sheerka !" + + +@dataclass +class ExecutionContext: + """ + To keep track of the execution of a request + """ + who: object + event_digest: str + sheerka: Sheerka + + def push(self, who): + return ExecutionContext(who, self.event_digest, self.sheerka) diff --git a/core/tokenizer.py b/core/tokenizer.py index cb72dd6..83072ad 100644 --- a/core/tokenizer.py +++ b/core/tokenizer.py @@ -32,19 +32,17 @@ class TokenKind(Enum): AMPER = "amper" EQUALS = "=" AT = "at" - BACK_QUOTE = "bquote" # ` - BACK_SLASH = "bslash" # \ - CARAT = "carat" # ^ - DOLLAR = "dollar" # $ - EMARK = "emark" # ! - GREATER = "greater" # > - LESS = "less" # < - HASH = "HASH" # # - TILDE = "tilde" # ~ - UNDERSCORE = "underscore" # _ - DEGREE = "degree" # ° - - + BACK_QUOTE = "bquote" # ` + BACK_SLASH = "bslash" # \ + CARAT = "carat" # ^ + DOLLAR = "dollar" # $ + EMARK = "emark" # ! + GREATER = "greater" # > + LESS = "less" # < + HASH = "HASH" # # + TILDE = "tilde" # ~ + UNDERSCORE = "underscore" # _ + DEGREE = "degree" # ° @dataclass() @@ -55,6 +53,18 @@ class Token: line: int column: int + def __repr__(self): + if type == TokenKind.IDENTIFIER: + value = "ident:" + str(self.value) + elif type == TokenKind.WHITESPACE: + value = " " + elif type == TokenKind.NEWLINE: + value = r"\n" + else: + value = self.value + + return f"Token({value})" + @dataclass() class LexerError(Exception): diff --git a/core/utils.py b/core/utils.py index aa129ec..26f9c25 100644 --- a/core/utils.py +++ b/core/utils.py @@ -1,3 +1,7 @@ +import inspect +import sys + + def sysarg_to_string(argv): """ Transform a list of strings into a single string @@ -16,24 +20,38 @@ def sysarg_to_string(argv): result += '"' + s + '"' if " " in s else s first = False + if result[0] in ('"', "'"): + result = result[1:-1] # strip quotes return result -def get_class(kls): +def get_class(qname): """ - Loads a class from its string full qualified name - :param kls: + Loads a class from its full qualified name + :param qname: :return: """ - parts = kls.split('.') + parts = qname.split('.') module = ".".join(parts[:-1]) m = __import__(module) for comp in parts[1:]: m = getattr(m, comp) return m +def get_module(qname): + """ + Loads a module from its full qualified name + :param qname: + :return: + """ + parts = qname.split('.') + m = __import__(qname) + for comp in parts[1:]: + m = getattr(m, comp) + return m -def get_object(kls, *args, **kwargs): + +def new_object(kls, *args, **kwargs): """ New instance of an object :param kls: @@ -56,3 +74,25 @@ def get_full_qualified_name(obj): return obj.__class__.__name__ # Avoid reporting __builtin__ else: return module + '.' + obj.__class__.__name__ + + +def get_classes(module_name): + mod = get_module(module_name) + for name in dir(mod): + obj = getattr(mod, name) + if inspect.isclass(obj): + yield obj + + +def remove_from_list(lst, to_remove): + """ + Removes elements from a list if they exist + :param lst: + :param to_remove: + :return: + """ + for item in to_remove: + if item in lst: + lst.remove(item) + + return lst diff --git a/docs/Concept_properties.md b/docs/Concept_properties.md new file mode 100644 index 0000000..940a338 --- /dev/null +++ b/docs/Concept_properties.md @@ -0,0 +1,56 @@ +class Concept: + * name + + human reading name of the concept. It is how the concept will be referenced + * is_builtin + + Simple flag to make the difference between user defined concept and builtins + + * is_unique + + To manage singleton. There should be only one instance of the concept if True + + * key + + It is the name of the concept, where all variables, or the obj are replaced by + __var__x (x is the number of the variable). It's use to quickly find concepts + with variables + + * id + + The key is the unique identifier for the class of concept. The concept may be + modified along its life, but its key will never change + + * where + + preconditions for the variables of the concept. Body will be executed only + if the where conditions on the properties are True + + * pre + + Execution context to verified before executing the context + + * post + + Post conditions to verify, once the context is executed + + * obj + + main or principal subject of the concept + + * body + + Code or value of the concept + + * desc + + explanation of the concept + + * digest + + SHA 256 of all properties. It allows + + * props + + other properties of the concept (obj is the first one) + \ No newline at end of file diff --git a/docs/blog.rst b/docs/blog.rst index 8fc41ba..8ecc310 100644 --- a/docs/blog.rst +++ b/docs/blog.rst @@ -382,4 +382,18 @@ so I can type 1 + 1 sheerka.test() -I will now work on how to call an already defined concept. \ No newline at end of file +I will now work on how to call an already defined concept. + + +2019-11-11 +********** + +Maintaining the blog +"""""""""""""""""""" +It's not very easy to maintain this blog. Every time I have some time to work on **Sheerka**, +I must choose between expressing my ideas in this blog and coding. + +I have plenty of ideas that I would like to express, sometimes just to put the idea down, +but I lack of time. It would be great if I can find a tool that will allow me to just to +dictate my words. I know that there are plenty out there, I need to spend some time to test +them and choose one. \ No newline at end of file diff --git a/docs/syntax_v3.md b/docs/syntax_v3.md index 7d62626..3dfbcba 100644 --- a/docs/syntax_v3.md +++ b/docs/syntax_v3.md @@ -1,6 +1,10 @@ +In this example, I try to think of how I can deal with errors ``` > "hello -> unfinished quote " + +create a concept to recognize the error + > def concept unfinished quote q ... where: ...... q in ('"', '"') @@ -9,9 +13,16 @@ ... input = sheerka.last_input +Create the rule to manage the error + > when unfinished quote q as c: -... add rule as: -...... if q in sheerka.input: +... add rule r as: +...... if q is last of sheerka.input: ......... sheerka.resume(c, c.input + input) -......... remove rule -``` \ No newline at end of file +......... remove r + +def concept a is last of b as: + where b is list + as b[-1] == a +``` + diff --git a/evaluators/AddConceptEvaluator.py b/evaluators/AddConceptEvaluator.py index f3a91e2..955d48d 100644 --- a/evaluators/AddConceptEvaluator.py +++ b/evaluators/AddConceptEvaluator.py @@ -1,11 +1,15 @@ -from evaluators.BaseEvaluator import BaseEvaluator +from core.builtin_concepts import ParserResultConcept, ReturnValueConcept +from core.concept import Concept +from evaluators.BaseEvaluator import OneReturnValueEvaluator from parsers.DefaultParser import DefConceptNode import logging +from parsers.PythonParser import PythonGetNamesVisitor, PythonNode + log = logging.getLogger(__name__) -class AddConceptEvaluator(BaseEvaluator): +class AddConceptEvaluator(OneReturnValueEvaluator): """ Used to add a new concept """ @@ -13,11 +17,60 @@ class AddConceptEvaluator(BaseEvaluator): def __init__(self): super().__init__("Add new Concept", 50) - def matches(self, context, items): - return len(items) == 1 and items[0].status and isinstance(items[0].value, DefConceptNode) + def matches(self, context, return_value): + return return_value.status and \ + isinstance(return_value.value, ParserResultConcept) and \ + isinstance(return_value.value.value, DefConceptNode) - def eval(self, context, items): + def eval(self, context, return_value): log.debug("Adding a new concept") - node = items[0].value + def_concept_node = return_value.value.value sheerka = context.sheerka - return sheerka.add_concept(context, node) + + # validate the node + props_found = set() + + concept = Concept(def_concept_node.name) + for prop in ("where", "pre", "post", "body"): + # put back the sources + part_ret_val = getattr(def_concept_node, prop) + if not isinstance(part_ret_val, ReturnValueConcept) or not part_ret_val.status: + continue # not quite sure that it's possible + + # update the parts + source = self.get_source(part_ret_val) + setattr(concept, prop, source) + + # try to find what can be a property + for p in self.get_props(part_ret_val): + props_found.add(p) + + # Auto discovered properties must be referenced in the name + # Note that with this method, the variables will be created in the order of appearance + for token in def_concept_node.name.tokens: + if token.value in props_found: + concept.set_prop(token.value, None) + + # finish initialisation + concept.init_key(def_concept_node.name.tokens) + concept.add_codes(def_concept_node.get_codes()) + + ret = sheerka.create_new_concept(context, concept) + return sheerka.ret(self.name, ret.status, ret.value, parents=[return_value]) + + @staticmethod + def get_source(ret_value): + return ret_value.value.source if isinstance(ret_value.value, ParserResultConcept) \ + else ret_value.value.name + + @staticmethod + def get_props(ret_value): + if isinstance(ret_value.value, ParserResultConcept) and isinstance(ret_value.value.value, PythonNode): + get_names_visitor = PythonGetNamesVisitor() + get_names_visitor.visit(ret_value.value.value.ast_) + return get_names_visitor.names + + if isinstance(ret_value.value, Concept): + return list(ret_value.value.props.keys()) + + return [] diff --git a/evaluators/BaseEvaluator.py b/evaluators/BaseEvaluator.py index a83d26d..ae40ce4 100644 --- a/evaluators/BaseEvaluator.py +++ b/evaluators/BaseEvaluator.py @@ -1,14 +1,34 @@ class BaseEvaluator: """ - base class to evaluate concepts or nodes + Base class to evaluate ReturnValues """ + PREFIX = "Evaluators:" + def __init__(self, name, priority: int): - self.name = name + self.name = self.PREFIX + name self.priority = priority - def matches(self, context, items): + +class OneReturnValueEvaluator(BaseEvaluator): + """ + Evaluate one specific return value + """ + + def matches(self, context, return_value): pass - def eval(self, context, items): + def eval(self, context, return_value): + pass + + +class AllReturnValuesEvaluator(BaseEvaluator): + """ + Evaluates the groups of ReturnValues + """ + + def matches(self, context, return_values): + pass + + def eval(self, context, return_values): pass diff --git a/evaluators/ConceptEvaluator.py b/evaluators/ConceptEvaluator.py new file mode 100644 index 0000000..054ddaf --- /dev/null +++ b/evaluators/ConceptEvaluator.py @@ -0,0 +1,35 @@ +from core.concept import Concept, ConceptParts +from evaluators.BaseEvaluator import OneReturnValueEvaluator +import logging + +log = logging.getLogger(__name__) + + +class ConceptEvaluator(OneReturnValueEvaluator): + def __init__(self): + super().__init__("Concept Evaluator", 50) + + def matches(self, context, return_value): + return return_value.status and \ + return_value.who == "Parsers:ConceptParser" and \ + isinstance(return_value.value, Concept) + + def eval(self, context, return_value): + sheerka = context.sheerka + concept = return_value.value + + # pre condition should already be validated by the parser. + # It's a mandatory condition for the concept before it can be recognized + + if len(concept.codes) == 0: + sheerka.add_codes_to_concept(context, concept) + + # TODO; check pre + # if pre is not true, return Concept with a false value + + body = concept.codes[ConceptParts.BODY] + if body is None: + return None # nothing to do + + return sheerka.ret(self.name, True, body.value, parents=[return_value]) + diff --git a/evaluators/DefaultEvaluator.py b/evaluators/DefaultEvaluator.py deleted file mode 100644 index d70a11b..0000000 --- a/evaluators/DefaultEvaluator.py +++ /dev/null @@ -1,39 +0,0 @@ -from core.concept import TooManySuccessConcept -from core.sheerka import ReturnValue -from evaluators.BaseEvaluator import BaseEvaluator -import logging - -log = logging.getLogger(__name__) - - -class DefaultEvaluator(BaseEvaluator): - """ - Used to filter the responses of the parsers - """ - - def __init__(self): - super().__init__("Default Evaluator", 90) - - def matches(self, context, items): - return True - - def eval(self, context, items): - successful_results = [item for item in items if item.status] - number_of_successful = len(successful_results) - total_items = len(items) - - # remove errors when a winner is found - if number_of_successful == 1: - log.debug(f"1 / {total_items} good item found.") - return successful_results - - # too many winners, which one to choose ? - if number_of_successful > 1: - log.debug(f"{number_of_successful} / {total_items} good items. Too many success") - return ReturnValue(self.name, - False, - context.sheerka.new(TooManySuccessConcept.NAME, body=items)) - - # only errors, i cannot help you - log.debug(f"{total_items} items. Only errors") - return items diff --git a/evaluators/DuplicateConceptEvaluator.py b/evaluators/DuplicateConceptEvaluator.py new file mode 100644 index 0000000..8922359 --- /dev/null +++ b/evaluators/DuplicateConceptEvaluator.py @@ -0,0 +1,41 @@ +from core.builtin_concepts import BuiltinConcepts +from evaluators.BaseEvaluator import AllReturnValuesEvaluator +from parsers.BaseParser import BaseParser + + +class DuplicateConceptEvaluator(AllReturnValuesEvaluator): + """ + Use to recognize when we tried to add the same concept twice + """ + + def __init__(self): + super().__init__("Duplicate Concept Evaluator", 10) + self.already_defined = None + + def matches(self, context, return_values): + sheerka = context.sheerka + parsing = False + add_concept_in_error = False + only_parsers = True + + for ret in return_values: + if sheerka.isinstance(ret.value, BuiltinConcepts.PARSING): + if ret.status: + parsing = True + elif ret.who == "Evaluators:Add new Concept": + if not ret.status and ret.value.body.args[0] == "Duplicate object.": + add_concept_in_error = True + self.already_defined = ret.value.body.obj + else: + if not ret.who.startswith(BaseParser.PREFIX): + only_parsers = False + + return parsing and add_concept_in_error and only_parsers + + def eval(self, context, return_values): + sheerka = context.sheerka + return sheerka.ret( + self.name, + False, + sheerka.new(BuiltinConcepts.CONCEPT_ALREADY_DEFINED, obj=self.already_defined), + parents=return_values) diff --git a/evaluators/ParsersEvaluator.py b/evaluators/ParsersEvaluator.py new file mode 100644 index 0000000..3b2e6c2 --- /dev/null +++ b/evaluators/ParsersEvaluator.py @@ -0,0 +1,42 @@ +from core.builtin_concepts import BuiltinConcepts +from core.concept import Concept +from evaluators.BaseEvaluator import AllReturnValuesEvaluator +import logging + +from parsers.BaseParser import BaseParser + +log = logging.getLogger(__name__) + + +class ParsersEvaluator(AllReturnValuesEvaluator): + """ + Used to filter the responses + It has a low priority to let other evaluators try to resolve the errors + """ + + def __init__(self): + super().__init__("Parsers Evaluator", 10) + self.successful_return_value = None + + def matches(self, context, return_values): + sheerka = context.sheerka + after_parsing = False + nb_successful_evaluators = 0 + only_parsers = True + for ret in return_values: + if sheerka.isinstance(ret.value, BuiltinConcepts.AFTER_PARSING): + if ret.status: + after_parsing = True + elif ret.who.startswith(self.PREFIX): + if ret.status: + nb_successful_evaluators += 1 + self.successful_return_value = ret + else: + if not ret.who.startswith(BaseParser.PREFIX): + only_parsers = False + + return after_parsing and nb_successful_evaluators == 1 and only_parsers + + def eval(self, context, return_values): + sheerka = context.sheerka + return sheerka.ret(self.name, True, self.successful_return_value.value, parents=return_values) diff --git a/evaluators/PythonEvaluator.py b/evaluators/PythonEvaluator.py index 000e7ae..327ae73 100644 --- a/evaluators/PythonEvaluator.py +++ b/evaluators/PythonEvaluator.py @@ -1,32 +1,32 @@ -from core.concept import ReturnValueConcept, ErrorConcept -from evaluators.BaseEvaluator import BaseEvaluator +from core.builtin_concepts import BuiltinConcepts, ParserResultConcept +from evaluators.BaseEvaluator import OneReturnValueEvaluator from parsers.PythonParser import PythonNode import ast -from core.sheerka import ReturnValue, Sheerka import logging log = logging.getLogger(__name__) -class PythonEvaluator(BaseEvaluator): +class PythonEvaluator(OneReturnValueEvaluator): def __init__(self): super().__init__("Python Evaluator", 50) - def matches(self, context, items): - return len(items) == 1 and isinstance(items[0].value, PythonNode) + def matches(self, context, return_value): + return return_value.status and \ + isinstance(return_value.value, ParserResultConcept) and \ + isinstance(return_value.value.value, PythonNode) - def eval(self, context, items): + def eval(self, context, return_value): sheerka = context.sheerka - node = items[0].value - if isinstance(node.ast, ast.Expression): + node = return_value.value.value + if isinstance(node.ast_, ast.Expression): try: - log.debug("Evaluating python expression") - compiled = compile(node.ast, "", "eval") + log.debug(f"Evaluating python node {node}") + compiled = compile(node.ast_, "", "eval") evaluated = eval(compiled, {}, {"sheerka": context.sheerka}) - concept = sheerka.new(ReturnValueConcept.NAME, body=evaluated) - return ReturnValue(self.name, True, concept) + return sheerka.ret(self.name, True, evaluated, parents=[return_value]) except Exception as error: - error = sheerka.new(ErrorConcept.NAME, body=error) - return ReturnValue(self.name, False, error) + error = sheerka.new(BuiltinConcepts.ERROR, body=error) + return sheerka.ret(self.name, False, error, parents=[return_value]) else: raise NotImplementedError() diff --git a/main.py b/main.py index 4dc9e86..36fad63 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,10 @@ import getopt import sys -from core.sheerka import Sheerka import logging +from core.sheerka import Sheerka +import core.utils + def usage(): print("Sheerka v0.1\n") @@ -10,40 +12,6 @@ def usage(): print(sys.argv[0] + "[-hd] command ") -def sysarg_to_string(argv): - """ - Transform a list of strings into a single string - Add quotes if needed - :return: - """ - if argv is None or not argv: - return "" - - result = "" - first = True - for s in argv: - if not first: - result += " " - - result += '"' + s + '"' if " " in s else s - first = False - - if result[0] in ('"', "'"): - result = result[1:-1] # strip quotes - return result - - -def init_logging(debug): - if debug: - log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s" - log_level = logging.DEBUG - else: - log_format = "%(message)s" - log_level = logging.INFO - - logging.basicConfig(format=log_format, level=log_level) - - def main(argv): try: opts, args = getopt.getopt(argv, "hd", ["help", "debug"]) @@ -55,11 +23,10 @@ def main(argv): if o in ('-d', "--debug"): debug = True - # init_logging(debug) sheerka = Sheerka(debug=debug) sheerka.initialize() - _in = sysarg_to_string(args) + _in = core.utils.sysarg_to_string(args) result = sheerka.eval(_in) for res in result: diff --git a/parsers/BaseParser.py b/parsers/BaseParser.py index 397184b..399a7af 100644 --- a/parsers/BaseParser.py +++ b/parsers/BaseParser.py @@ -15,17 +15,34 @@ class NopNode(Node): return "nop" +class NotInitializedNode(Node): + pass + + def __repr__(self): + return "**N/A**" + + @dataclass() class ErrorNode(Node): pass class BaseParser: + PREFIX = "Parsers:" + def __init__(self, name): - self.name = name + self.name = self.PREFIX + name self.has_error = False self.error_sink = [] + def __eq__(self, other): + if not isinstance(other, self.__class__): + return False + return self.name == other.name + + def __hash__(self): + return hash(self.name) + def parse(self, context, text): pass @@ -38,3 +55,10 @@ class BaseParser: value = Keywords(token.value).value if token.type == TokenKind.KEYWORD else token.value res += value return res + + @staticmethod + def log_result(log, text, ret): + if ret.status: + log.debug(f"Recognized '{text}' as {ret.value}") + else: + log.debug(f"Failed to recognize '{text}'") diff --git a/parsers/DefaultParser.py b/parsers/DefaultParser.py index fccf6c9..81b7343 100644 --- a/parsers/DefaultParser.py +++ b/parsers/DefaultParser.py @@ -1,4 +1,6 @@ -from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode +from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept +from core.concept import ConceptParts +from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode from core.tokenizer import Tokenizer, TokenKind, Token, Keywords from dataclasses import dataclass, field import logging @@ -8,16 +10,10 @@ log = logging.getLogger(__name__) @dataclass() class DefaultParserNode(Node): - tokens: list = field(compare=False) - - def is_same(self, other): - if type(self) != type(other): - return False - - if hasattr(self, "value") and self.value != other.value: - return False - - return True + """ + Base node for all default parser nodes + """ + tokens: list = field(compare=False, repr=False) @dataclass() @@ -30,8 +26,8 @@ class UnexpectedTokenErrorNode(DefaultParserErrorNode): message: str expected_tokens: list - def __post_init__(self): - log.debug("-> UnexpectedTokenErrorNode: " + self.message) + # def __post_init__(self): + # log.debug("-> UnexpectedTokenErrorNode: " + self.message) @dataclass() @@ -41,8 +37,8 @@ class SyntaxErrorNode(DefaultParserErrorNode): """ message: str - def __post_init__(self): - log.debug("-> SyntaxErrorNode: " + self.message) + # def __post_init__(self): + # log.debug("-> SyntaxErrorNode: " + self.message) @dataclass() @@ -52,117 +48,209 @@ class CannotHandleErrorNode(DefaultParserErrorNode): """ text: str - def __post_init__(self): - log.debug("-> CannotHandleErrorNode: " + self.text) + # def __post_init__(self): + # log.debug("-> CannotHandleErrorNode: " + self.text) + + # + # @dataclass() + # class NumberNode(DefaultParserNode): + # value: object + # + # def __repr__(self): + # return str(self.value) + # + # + # @dataclass() + # class StringNode(DefaultParserNode): + # value: str + # quote: str + # + # def is_same(self, other): + # if not super(StringNode, self).is_same(other): + # return False + # return self.quote == other.quote + # + # def __repr__(self): + # return self.quote + self.value + self.quote + # + # + # @dataclass() + # class VariableNode(DefaultParserNode): + # value: str + # + # def __repr__(self): + # return self.value + # + # + # @dataclass() + # class TrueNode(DefaultParserNode): + # pass + # + # def __repr__(self): + # return "true" + # + # + # @dataclass() + # class FalseNode(DefaultParserNode): + # pass + # + # def __repr__(self): + # return "false" + # + # + # @dataclass() + # class NullNode(DefaultParserNode): + # pass + # + # def __repr__(self): + # return "null" + # + # + # @dataclass() + # class BinaryNode(DefaultParserNode): + # operator: TokenKind + # left: Node + # right: Node + # + # def is_same(self, other): + # if not super(BinaryNode, self).is_same(other): + # return False + # if self.operator != other.operator: + # return False + # if not self.left.is_same(other.left): + # return False + # return self.right.is_same(other.right) + # + # def __repr__(self): + # return f"({self.left} {self.operator} {self.right})" + # + + @staticmethod + def get_concept_key(tokens, variables=None): + key = "" + first = True + for token in tokens: + if token.type == TokenKind.EOF: + break + if token.type == TokenKind.WHITESPACE: + continue + if not first: + key += " " + if variables is not None and token.value in variables: + key += "__var__" + str(variables.index(token.value)) + else: + key += token.value[1:-1] if token.type == TokenKind.STRING else token.value + first = False + + return key + + +@dataclass() +class NameNode(DefaultParserNode): + + def get_name(self): + name = "" + first = True + for token in self.tokens: + if token.type == TokenKind.EOF: + break + if token.type == TokenKind.WHITESPACE: + continue + if not first: + name += " " + + name += token.value[1:-1] if token.type == TokenKind.STRING else token.value + first = False + + return name + + def __repr__(self): + return self.get_name() + + def __eq__(self, other): + if not isinstance(other, NameNode): + return False + + return self.get_name() == other.get_name() + + def __hash__(self): + return hash(self.get_name()) @dataclass() class DefConceptNode(DefaultParserNode): - name: str - where: Node = None - pre: Node = None - post: Node = None - body: Node = NopNode + + name: NameNode = NotInitializedNode() + where: ReturnValueConcept = NotInitializedNode() + pre: ReturnValueConcept = NotInitializedNode() + post: ReturnValueConcept = NotInitializedNode() + body: ReturnValueConcept = NotInitializedNode() def get_codes(self): codes = {} - for prop in ["where", "pre", "post", "body"]: - prop_value = getattr(self, prop) - if hasattr(prop_value, "ast"): - codes[prop] = prop_value.ast + for part_key in ConceptParts: + prop_value = getattr(self, part_key.value) + if hasattr(prop_value, "ast_"): + codes[part_key] = prop_value.ast_ return codes -@dataclass() -class NumberNode(DefaultParserNode): - value: object - - def __repr__(self): - return str(self.value) - - -@dataclass() -class StringNode(DefaultParserNode): - value: str - quote: str - - def is_same(self, other): - if not super(StringNode, self).is_same(other): - return False - return self.quote == other.quote - - def __repr__(self): - return self.quote + self.value + self.quote - - -@dataclass() -class VariableNode(DefaultParserNode): - value: str - - def __repr__(self): - return self.value - - -@dataclass() -class TrueNode(DefaultParserNode): - pass - - def __repr__(self): - return "true" - - -@dataclass() -class FalseNode(DefaultParserNode): - pass - - def __repr__(self): - return "false" - - -@dataclass() -class NullNode(DefaultParserNode): - pass - - def __repr__(self): - return "null" - - -@dataclass() -class BinaryNode(DefaultParserNode): - operator: TokenKind - left: Node - right: Node - - def is_same(self, other): - if not super(BinaryNode, self).is_same(other): - return False - if self.operator != other.operator: - return False - if not self.left.is_same(other.left): - return False - return self.right.is_same(other.right) - - def __repr__(self): - return f"({self.left} {self.operator} {self.right})" - - class DefaultParser(BaseParser): """ Parse sheerka specific grammar (like def concept) """ - def __init__(self, sub_parser=None): + + def __init__(self): BaseParser.__init__(self, "DefaultParser") - self.sub_parser = sub_parser self.lexer_iter = None self._current = None self.context = None self.text = None + self.sheerka = None + + @staticmethod + def fix_indentation(tokens): + """ + In the following example + def concept add one to a as: + def func(x): + return x+1 + func(a) + indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error + :param tokens: + :return: + """ + if tokens[0].type != TokenKind.COLON: + return tokens + + if len(tokens) < 3: + return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE]) + + if tokens[1].type != TokenKind.NEWLINE: + return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE]) + + if tokens[2].type != TokenKind.WHITESPACE: + return SyntaxErrorNode([tokens[2]], "Indentation not found.") + indent_size = len(tokens[2].value) + + # now fix the other indentations + i = 3 + while i < len(tokens) - 1: + if tokens[i].type == TokenKind.NEWLINE: + if tokens[i + 1].type != TokenKind.WHITESPACE: + return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE]) + + if len(tokens[i + 1].value) < indent_size: + return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.") + + tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size) + i += 1 + + return tokens[3:] def reset_parser(self, context, text): self.context = context - # hack before implementing all the sub parsers - if context: - self.sub_parser = context.sheerka.parsers[1] + self.sheerka = context.sheerka self.text = text self.lexer_iter = iter(Tokenizer(text)) @@ -190,167 +278,152 @@ class DefaultParser(BaseParser): self._current = None return - @staticmethod - def get_concept_key(tokens, variables=None): - key = "" - first = True - for token in tokens: - if token.type == TokenKind.EOF: - break - if token.type == TokenKind.WHITESPACE: - continue - if not first: - key += " " - if variables is not None and token.value in variables: - key += "__var__" + str(variables.index(token.value)) - else: - key += token.value[1:-1] if token.type == TokenKind.STRING else token.value - first = False - - return key - - @staticmethod - def fix_indentation(tokens): - """ - In the following example - def concept add one to a as: - def func(x): - return x+1 - func(a) - indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error - :param tokens: - :return: - """ - if tokens[1].type != TokenKind.COLON: - return tokens[1:] - - if len(tokens) < 3: - return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE]) - - if tokens[2].type != TokenKind.NEWLINE: - return UnexpectedTokenErrorNode([tokens[2]], "Unexpected token after colon", [TokenKind.NEWLINE]) - - if tokens[3].type != TokenKind.WHITESPACE: - return SyntaxErrorNode([tokens[3]], "Indentation not found") - indent_size = len(tokens[3].value) - - # now fix the other indentations - i = 4 - while i < len(tokens) - 1: - if tokens[i].type == TokenKind.NEWLINE: - if tokens[i + 1].type != TokenKind.WHITESPACE: - return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE]) - - if len(tokens[i + 1].value) < indent_size: - return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.") - - tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size) - i += 1 - - return tokens[4:] - def parse(self, context, text): + # default parser can only manage string text + if not isinstance(text, str): + log.debug(f"Failed to recognize '{text}'") + return context.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text)) + self.reset_parser(context, text) - return self.parse_statement() + tree = self.parse_statement() + + # If a error is found it must be sent to error_sink + # tree must contain what was recognized + + ret = self.sheerka.ret( + self.name, + not self.has_error, + self.sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=text, + body=self.error_sink if self.has_error else tree, + try_parsed=tree)) + + self.log_result(log, text, ret) + return ret def parse_statement(self): token = self.get_token() if token.value == Keywords.DEF: self.next_token() - return self.parse_def_concept() + return self.parse_def_concept(token) else: return self.add_error(CannotHandleErrorNode([], self.text)) - def parse_def_concept(self): + def parse_def_concept(self, def_token): """ def concept name [where xxx] [pre xxx] [post xxx] [as xxx] """ - def_concept_parts = [Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST] + # init + log.debug("It may be a definition of a concept") + concept_special_tokens = [def_token] + concept_found = DefConceptNode(concept_special_tokens) - tokens_found = {} # Node token is supposed to be a list, but here, it will be a dict + # the definition of a concept consists of several parts + # Keywords.CONCEPT to get the name of the concept + # Keywords.AS to get the body + # Keywords.WHERE to get the conditions to recognize for the variables + # Keywords.PRE to know if the conditions to evaluate the concept + # Keywords.POST to apply or verify once the concept is executed + def_concept_parts = [Keywords.CONCEPT, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST] - token = self.get_token() - if token.value != Keywords.CONCEPT: - return self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT])) - - self.next_token() - token = self.get_token() - - if token.value in (Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST): - return self.add_error(UnexpectedTokenErrorNode([token], "Concept name is missing.", [""])) - - name_as_tokens = [] - while token.type != TokenKind.EOF and token.value not in def_concept_parts: - name_as_tokens.append(token) - self.next_token() - token = self.get_token() - name = self.get_concept_key(name_as_tokens) - tokens_found["name"] = name_as_tokens - - # try to parse as, where, pre and post declarations - tokens = { + # tokens found, when trying to recognize the parts + tokens_found_by_parts = { + Keywords.CONCEPT: [], Keywords.AS: None, Keywords.WHERE: None, Keywords.PRE: None, Keywords.POST: None, } - current_part = None + current_part = Keywords.CONCEPT + token = self.get_token() + first_token = token + + # loop thru the tokens, and put them in the correct tokens_found_by_parts entry while token.type != TokenKind.EOF: if token.value in def_concept_parts: + concept_special_tokens.append(token) # keep track of the keywords keyword = token.value - if tokens[keyword]: - return self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations.")) - tokens[keyword] = [token] # first element of the list is the keyword + if tokens_found_by_parts[keyword]: + # a part is defined more than once + self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations.")) + tokens_found_by_parts[current_part].append(token) # adds the token again + else: + tokens_found_by_parts[keyword] = [token] current_part = keyword self.next_token() else: - if current_part is None: - return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", def_concept_parts)) - else: - tokens[current_part].append(token) - self.next_token(False) + tokens_found_by_parts[current_part].append(token) + self.next_token(False) token = self.get_token() - for t in tokens: - tokens_found[t.value] = tokens[t] - asts = { - Keywords.AS: NopNode(), - Keywords.WHERE: NopNode(), - Keywords.PRE: NopNode(), - Keywords.POST: NopNode(), + # semantic checks + name_first_token_index = 1 + if first_token.value != Keywords.CONCEPT: + self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT])) + name_first_token_index = 0 + + # Manage the name + name_tokens = tokens_found_by_parts[Keywords.CONCEPT] + if len(name_tokens) == name_first_token_index: + self.add_error(SyntaxErrorNode([], "Name is mandatory")) + + if name_tokens[-1].type == TokenKind.NEWLINE: + name_tokens = name_tokens[:-1] # strip trailing newlines + + if TokenKind.NEWLINE in [t.type for t in name_tokens]: + self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name.")) + + concept_found.name = NameNode(name_tokens[name_first_token_index:]) # skip the first token + + asts_found_by_parts = { + Keywords.AS: NotInitializedNode(), + Keywords.WHERE: NotInitializedNode(), + Keywords.PRE: NotInitializedNode(), + Keywords.POST: NotInitializedNode(), } - # check for empty declarations - for keyword in tokens: - current_tokens = tokens[keyword] - if current_tokens is not None: - if len(current_tokens) == 0: # only one element means empty decl - return self.add_error(SyntaxErrorNode([current_tokens[0]], "Empty declaration"), False) - else: - current_tokens = self.fix_indentation(current_tokens) - if isinstance(current_tokens, ErrorNode): - self.add_error(current_tokens) - continue + for keyword in tokens_found_by_parts: + if keyword == Keywords.CONCEPT: + continue # already done - # start = current_tokens[0].index - # end = current_tokens[-1].index + len(current_tokens[-1].value) - sub_parser = self.sub_parser(source=keyword.value) - sub_tree = sub_parser.parse(self.context, current_tokens) - if isinstance(sub_tree, ErrorNode): - self.add_error(sub_tree, False) - asts[keyword] = sub_tree + log.debug("Processing part '" + keyword.name + "'") - def_concept_node = DefConceptNode(tokens_found, # dict instead of list is wanted. - name, - asts[Keywords.WHERE], - asts[Keywords.PRE], - asts[Keywords.POST], - asts[Keywords.AS]) + tokens = tokens_found_by_parts[keyword] + if tokens is None: + continue # nothing to do - log.debug(f"Found DefConcept node '{def_concept_node}'") - return def_concept_node + if len(tokens) == 1: # check for empty declarations + self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False) + continue + + tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations + if isinstance(tokens, ErrorNode): + self.add_error(tokens) + continue + + # ask the other parsers if they recognize the tokens + new_context = self.context.push(self) + parsing_result = self.sheerka.expect_one(new_context, self.sheerka.parse(new_context, tokens)) + if not parsing_result.status: + self.add_error(parsing_result.value) + continue + + asts_found_by_parts[keyword] = parsing_result + + concept_found.where = asts_found_by_parts[Keywords.WHERE] + concept_found.pre = asts_found_by_parts[Keywords.PRE] + concept_found.post = asts_found_by_parts[Keywords.POST] + concept_found.body = asts_found_by_parts[Keywords.AS] + + log.debug(f"Found DefConcept node '{concept_found}'") + return concept_found # def parse_expression(self): # return self.parse_addition() diff --git a/parsers/ExactConceptParser.py b/parsers/ExactConceptParser.py index b7eb9f1..aa8b17f 100644 --- a/parsers/ExactConceptParser.py +++ b/parsers/ExactConceptParser.py @@ -1,7 +1,10 @@ -from core.sheerka import ReturnValue +from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts from parsers.BaseParser import BaseParser from core.tokenizer import Tokenizer, Keywords, TokenKind from core.concept import Concept +import logging + +log = logging.getLogger(__name__) class ExactConceptParser(BaseParser): @@ -15,11 +18,17 @@ class ExactConceptParser(BaseParser): BaseParser.__init__(self, "ConceptParser") def parse(self, context, text): + """ + text can be string, but text can also be an list of tokens + :param context: + :param text: + :return: + """ res = [] sheerka = context.sheerka words = self.get_words(text) if len(words) > self.MAX_WORDS_SIZE: - return ReturnValue(self.name, False, sheerka.new(sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME)) + return ReturnValueConcept(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, obj=text)) recognized = False for combination in self.combinations(words): @@ -30,24 +39,27 @@ class ExactConceptParser(BaseParser): # That will depend on the context # Let's return a new one for now and see if it works concept = sheerka.new(concept_key) - if not sheerka.isinstance(concept, sheerka.UNKNOWN_CONCEPT_NAME): + if not sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): # update the properties if needed for i, token in enumerate(combination): if token.startswith(Concept.PROPERTY_PREFIX): index = int(token[len(Concept.PROPERTY_PREFIX):]) concept.set_prop_by_index(index, words[i]) - res.append(ReturnValue(self.name, True, concept)) + res.append(ReturnValueConcept(self.name, True, concept)) + log.debug(f"Recognized '{text}' as '{concept}'") recognized = True if recognized: return res - return ReturnValue(self.name, False, sheerka.new(sheerka.UNKNOWN_CONCEPT_NAME, body=text)) + log.debug(f"Failed to recognize {words}") + return ReturnValueConcept(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, obj=text)) @staticmethod def get_words(text): + tokens = iter(Tokenizer(text)) if isinstance(text, str) else text res = [] - for t in iter(Tokenizer(text)): + for t in tokens: if t.type == TokenKind.EOF: break if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE: diff --git a/parsers/PythonParser.py b/parsers/PythonParser.py index 60b6c80..465cb2f 100644 --- a/parsers/PythonParser.py +++ b/parsers/PythonParser.py @@ -1,3 +1,4 @@ +from core.builtin_concepts import BuiltinConcepts from parsers.BaseParser import BaseParser, Node, ErrorNode from dataclasses import dataclass import ast @@ -12,23 +13,50 @@ class PythonErrorNode(ErrorNode): source: str exception: Exception - def __post_init__(self): - log.debug("-> PythonErrorNode: " + str(self.exception)) + # def __post_init__(self): + # log.debug("-> PythonErrorNode: " + str(self.exception)) + @dataclass() class PythonNode(Node): source: str - ast: ast.AST + ast_: ast.AST + + # def __repr__(self): + # return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")" def __repr__(self): - return "PythonNode(" + ast.dump(self.ast) + ")" - # return "PythonNode(" + self.source + ")" + ast_type = "expr" if isinstance(self.ast_, ast.Expression) else "module" + return "PythonNode(" + ast_type + "='" + self.source + "')" + + def __eq__(self, other): + if not isinstance(other, PythonNode): + return False + + if self.source != other.source: + return False + + self_dump = self.get_dump(self.ast_) + other_dump = self.get_dump(other.ast_) + + return self_dump == other_dump + + def __hash__(self): + return hash((self.source, self.ast_.hash)) + + @staticmethod + def get_dump(ast_): + dump = ast.dump(ast_) + for to_remove in [", ctx=Load()", ", kind=None", ", type_ignores=[]"]: + dump = dump.replace(to_remove, "") + return dump class PythonParser(BaseParser): """ Parse Python scripts """ + def __init__(self, source=""): BaseParser.__init__(self, "PythonParser") @@ -38,6 +66,8 @@ class PythonParser(BaseParser): text = text if isinstance(text, str) else self.get_text_from_tokens(text) text = text.strip() + sheerka = context.sheerka + # first, try to parse an expression res, tree, error = self.try_parse_expression(text) if not res: @@ -47,10 +77,19 @@ class PythonParser(BaseParser): self.has_error = True error_node = PythonErrorNode(text, error) self.error_sink.append(error_node) - return error_node - log.debug("Recognized python code.") - return PythonNode(text, tree) + ret = sheerka.ret( + self.name, + not self.has_error, + sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=text, + body=self.error_sink if self.has_error else PythonNode(text, tree), + try_parsed=None)) + + self.log_result(log, text, ret) + return ret def try_parse_expression(self, text): try: @@ -91,6 +130,7 @@ class PythonGetNamesVisitor(ast.NodeVisitor): """ This visitor will find all the name declared in the ast """ + def __init__(self): self.names = set() log.debug("Searching for names.") @@ -98,4 +138,3 @@ class PythonGetNamesVisitor(ast.NodeVisitor): def visit_Name(self, node): log.debug(f"Found name : {node.id}") self.names.add(node.id) - diff --git a/sdp/sheerkaDataProvider.py b/sdp/sheerkaDataProvider.py index bb6d00b..24ef914 100644 --- a/sdp/sheerkaDataProvider.py +++ b/sdp/sheerkaDataProvider.py @@ -133,7 +133,7 @@ class State: for item in items: item_digest = SheerkaDataProvider.get_obj_digest(item) if item_digest == digest: - raise SheerkaDataProviderDuplicateKeyError("duplicate key", key, obj.obj) + raise SheerkaDataProviderDuplicateKeyError(key, obj.obj) def update(self, entry, obj: ObjToUpdate, append=True): """ @@ -258,8 +258,8 @@ class SheerkaDataProviderError(Exception): class SheerkaDataProviderDuplicateKeyError(Exception): - def __init__(self, message, key, obj): - Exception.__init__(self, message) + def __init__(self, key, obj): + Exception.__init__(self, "Duplicate object.") self.key = key self.obj = obj @@ -371,7 +371,6 @@ class SheerkaDataProvider: state.parents = [] if snapshot is None else [snapshot] state.events = [event_digest] state.date = datetime.now() - log.debug(state.data) if use_ref: obj.set_digest(self.save_obj(obj.obj)) @@ -579,15 +578,36 @@ class SheerkaDataProvider: return self.load_ref_if_needed(state.data[entry] if key is None else state.data[entry][key])[0] - def exists(self, entry): + def exists(self, entry, key=None, digest=None): """ Returns true if the entry is defined + :param digest: + :param key: :param entry: :return: """ snapshot = self.get_snapshot() state = self.load_state(snapshot) - return entry in state.data + exist = entry in state.data + + if not exist or key is None: + return exist + + items = state.data[entry] + exist = key in items + if not exist or digest is None: + return exist + + items = items[key] + if not isinstance(items, list): + items = [items] + + for item in items: + item_digest = SheerkaDataProvider.get_obj_digest(item) + if item_digest == digest: + return True + + return False def save_event(self, event: Event): """ @@ -657,7 +677,7 @@ class SheerkaDataProvider: with open(target_path, "wb") as f: f.write(stream.read()) - log.debug(f"...digest is {digest}.") + log.debug(f"...digest={digest}.") return digest def load_obj(self, digest): diff --git a/sdp/sheerkaSerializer.py b/sdp/sheerkaSerializer.py index 9c9acf9..8dce556 100644 --- a/sdp/sheerkaSerializer.py +++ b/sdp/sheerkaSerializer.py @@ -5,6 +5,8 @@ import struct import io from dataclasses import dataclass import logging +from enum import Enum + import core.utils from core.concept import Concept @@ -23,6 +25,9 @@ def json_default_converter(o): if isinstance(o, (datetime.date, datetime.datetime)): return o.isoformat() + if isinstance(o, Enum): + return o.key + @dataclass() class SerializerContext: diff --git a/tests/test_DefaultParser.py b/tests/test_DefaultParser.py new file mode 100644 index 0000000..21997d8 --- /dev/null +++ b/tests/test_DefaultParser.py @@ -0,0 +1,350 @@ +import os +import shutil + +import pytest +from os import path +import ast + +from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept +from core.sheerka import Sheerka, ExecutionContext +from parsers.BaseParser import BaseParser +from parsers.PythonParser import PythonParser, PythonNode, PythonErrorNode +from core.tokenizer import Keywords, Tokenizer +from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode +# from parsers.DefaultParser import NumberNode, StringNode, VariableNode, TrueNode, FalseNode, NullNode, BinaryNode +from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode, NopNode + +tests_root = path.abspath("../build/tests") +root_folder = "init_folder" + + +@pytest.fixture(autouse=True) +def init_test(): + if path.exists(tests_root): + shutil.rmtree(tests_root) + + if not path.exists(tests_root): + os.makedirs(tests_root) + current_pwd = os.getcwd() + os.chdir(tests_root) + + yield None + os.chdir(current_pwd) + + +# def nop(): +# return NopNode() +# +# +# def n(number): +# return NumberNode([], number) +# +# +# def s(string, quote="'"): +# return StringNode([], string, quote) +# +# +# def v(name): +# return VariableNode([], name) +# +# +# def t(): +# return TrueNode([]) +# +# +# def f(): +# return FalseNode([]) +# +# +# def null(): +# return NullNode([]) +# +# +# def b(operator, left, right): +# return BinaryNode([], operator, left, right) + +# +# def compare_ast(left, right): +# left_as_string = ast.dump(left) +# left_as_string = left_as_string.replace(", ctx=Load()", "") +# left_as_string = left_as_string.replace(", kind=None", "") +# +# right_as_string = right if isinstance(right, str) else ast.dump(right) +# right_as_string = right_as_string.replace(", ctx=Load()", "") +# right_as_string = right_as_string.replace(", kind=None", "") +# +# return left_as_string == right_as_string +# + +def get_concept(name, where=None, pre=None, post=None, body=None): + concept = DefConceptNode([], name=NameNode(list(Tokenizer(name)))) + + if body: + concept.body = get_concept_part(body) + if where: + concept.where = get_concept_part(where) + if pre: + concept.pre = get_concept_part(pre) + if post: + concept.post = get_concept_part(post) + return concept + + +def get_context(): + sheerka = Sheerka() + sheerka.initialize(root_folder) + return ExecutionContext("test", "xxx", sheerka) + + +def get_concept_part(part): + if isinstance(part, str): + node = PythonNode(part, ast.parse(part, mode="eval")) + return ReturnValueConcept( + who="Parsers:PythonParser", + status=True, + value=ParserResultConcept( + source=part, + parser=PythonParser(), + value=node)) + + if isinstance(part, PythonNode): + return ReturnValueConcept( + who="Parsers:PythonParser", + status=True, + value=ParserResultConcept( + source=part.source, + parser=PythonParser(), + value=part)) + + if isinstance(part, ReturnValueConcept): + return part + + # @pytest.mark.parametrize("text, expected", [ + # ("1", n(1)), + # ("+1", n(1)), + # ("-1", n(-1)), + # ("'foo'", s("foo")), + # ("identifier", v("identifier")), + # ("true", t()), + # ("false", f()), + # ("null", null()), + # ("1 * 2", b(TokenKind.STAR, n(1), n(2))), + # ("1 * 2/3", b(TokenKind.STAR, n(1), b(TokenKind.SLASH, n(2), n(3)))), + # ("1 + 2", b(TokenKind.PLUS, n(1), n(2))), + # ("1 + 2 - 3", b(TokenKind.PLUS, n(1), b(TokenKind.MINUS, n(2), n(3)))), + # ("1 + 2-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))), + # ("1 + 2 +-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))), + # ("1 + 2 * 3", b(TokenKind.PLUS, n(1), b(TokenKind.STAR, n(2), n(3)))), + # ("1 * 2 + 3", b(TokenKind.PLUS, b(TokenKind.STAR, n(1), n(2)), n(3))), + # ("(1 + 2) * 3", b(TokenKind.STAR, b(TokenKind.PLUS, n(1), n(2)), n(3))), + # ("1 * (2 + 3)", b(TokenKind.STAR, n(1), b(TokenKind.PLUS, n(2), n(3)))), + # ]) + # def test_i_can_parse_simple_expression(text, expected): + # parser = DefaultParser(text, None) + # ast = parser.parse() + # assert ast.is_same(expected) + # + # + # @pytest.mark.parametrize("text, token_found, expected_tokens", [ + # ("1+", TokenKind.EOF, + # [TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, 'true', 'false', 'null', TokenKind.LPAR]), + # ("(1+1", TokenKind.EOF, [TokenKind.RPAR]) + # ]) + # def test_i_can_detect_unexpected_end_of_code(text, token_found, expected_tokens): + # parser = DefaultParser(text, None) + # parser.parse() + # + # assert parser.has_error + # assert parser.error_sink[0].tokens[0].type == token_found + # assert parser.error_sink[0].expected_tokens == expected_tokens + + +@pytest.mark.parametrize("text, expected", [ + ("def concept hello", get_concept(name="hello")), + ("def concept hello ", get_concept(name="hello")), + ("def concept a + b", get_concept(name="a + b")), + ("def concept a+b", get_concept(name="a + b")), + ("def concept 'a+b'+c", get_concept(name="'a+b' + c")), + ("def concept 'as if'", get_concept(name="'as if'")), + ("def concept 'as' if", get_concept(name="'as if'")), + ("def concept hello as 'hello'", get_concept(name="hello", body="'hello'")), + ("def concept hello as 1", get_concept(name="hello", body="1")), + ("def concept hello as 1 + 1", get_concept(name="hello", body="1 + 1")), +]) +def test_i_can_parse_def_concept(text, expected): + parser = DefaultParser() + res = parser.parse(get_context(), text) + node = res.value.value + + assert res.status + assert res.who == parser.name + assert res.value.source == text + assert isinstance(res.value, ParserResultConcept) + assert node == expected + + +def test_i_can_parse_complex_def_concept_statement(): + text = """def concept a plus b +where a,b +pre isinstance(a, int) and isinstance(b, float) +post isinstance(res, int) +as res = a + b +""" + parser = DefaultParser() + res = parser.parse(get_context(), text) + return_value = res.value + expected_concept = get_concept( + name="a plus b", + where="a,b", + pre="isinstance(a, int) and isinstance(b, float)", + post="isinstance(res, int)", + body=PythonNode("res = a + b", ast.parse("res = a + b", mode="exec")) + ) + + assert res.status + assert isinstance(return_value, ParserResultConcept) + assert return_value.value == expected_concept + + +def test_i_can_have_mutilines_declarations(): + text = """ +def concept add one to a as +def func(x): + return x+1 +func(a) + """ + + expected_concept = get_concept( + name="add one to a ", + body=PythonNode( + "def func(x):\n return x+1\nfunc(a)", + ast.parse("def func(x):\n return x+1\nfunc(a)", mode="exec")) + ) + + parser = DefaultParser() + res = parser.parse(get_context(), text) + return_value = res.value + + assert res.status + assert isinstance(return_value, ParserResultConcept) + assert return_value.value == expected_concept + + +def test_i_can_use_colon_to_use_indentation(): + text = """ +def concept add one to a as: + def func(x): + return x+1 + func(a) +""" + + expected_concept = get_concept( + name="add one to a ", + body=PythonNode( + "def func(x):\n return x+1\nfunc(a)", + ast.parse("def func(x):\n return x+1\nfunc(a)", mode="exec")) + ) + + parser = DefaultParser() + res = parser.parse(get_context(), text) + return_value = res.value + + assert res.status + assert isinstance(return_value, ParserResultConcept) + assert return_value.value == expected_concept + + +def test_indentation_is_mandatory_after_a_colon(): + text = """ +def concept add one to a as: +def func(x): + return x+1 +func(a) +""" + + parser = DefaultParser() + res = parser.parse(get_context(), text) + return_value = res.value + + assert not res.status + assert isinstance(return_value, ParserResultConcept) + assert isinstance(return_value.value[0], SyntaxErrorNode) + assert return_value.value[0].message == "Indentation not found." + + +def test_indentation_is_not_allowed_if_the_colon_is_missing(): + text = """ +def concept add one to a as + def func(x): + return x+1 + func(a) + """ + context = get_context() + sheerka = context.sheerka + + parser = DefaultParser() + res = parser.parse(context, text) + return_value = res.value + + assert not res.status + assert isinstance(return_value, ParserResultConcept) + assert sheerka.isinstance(return_value.value[0], BuiltinConcepts.TOO_MANY_ERRORS) + + +def test_name_is_mandatory(): + text = "def concept as 'hello'" + + parser = DefaultParser() + res = parser.parse(get_context(), text) + return_value = res.value + + assert not res.status + assert isinstance(return_value, ParserResultConcept) + assert isinstance(return_value.value[0], SyntaxErrorNode) + assert return_value.value[0].message == "Name is mandatory" + + +def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(): + text = "def hello as a where b pre c post d" + + expected_concept = get_concept(name="hello", body="a", where="b", pre="c", post="d") + parser = DefaultParser() + res = parser.parse(get_context(), text) + return_value = res.value + + assert not res.status + assert isinstance(return_value, ParserResultConcept) + assert isinstance(return_value.value[0], UnexpectedTokenErrorNode) + assert return_value.value[0].message == "Syntax error." + assert return_value.value[0].expected_tokens == [Keywords.CONCEPT] + assert return_value.try_parsed == expected_concept + + +@pytest.mark.parametrize("text", [ + "def concept hello where 1+", + "def concept hello pre 1+", + "def concept hello post 1+", + "def concept hello as 1+" +]) +def test_i_can_detect_error_in_declaration(text): + context = get_context() + sheerka = context.sheerka + + parser = DefaultParser() + res = parser.parse(context, text) + return_value = res.value + + assert not res.status + assert isinstance(return_value, ParserResultConcept) + assert sheerka.isinstance(return_value.value[0], BuiltinConcepts.TOO_MANY_ERRORS) + + +def test_new_line_is_not_allowed_in_the_name(): + text = "def concept hello \n my friend as 'hello'" + + parser = DefaultParser() + res = parser.parse(get_context(), text) + return_value = res.value + + assert not res.status + assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")] diff --git a/tests/test_ExactConceptParser.py b/tests/test_ExactConceptParser.py index cd4f24f..443ee38 100644 --- a/tests/test_ExactConceptParser.py +++ b/tests/test_ExactConceptParser.py @@ -3,8 +3,10 @@ from os import path import shutil import os +from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from core.concept import Concept, Property from core.sheerka import Sheerka, ExecutionContext +from core.tokenizer import Tokenizer from parsers.DefaultParser import DefaultParser from parsers.ExactConceptParser import ExactConceptParser @@ -54,29 +56,28 @@ def test_i_can_compute_combinations_with_duplicates(): def test_i_can_recognize_a_simple_concept(): - sheerka = get_sheerka() + context = get_context() concept = get_concept("hello world", []) - sheerka.add_in_cache(concept) + context.sheerka.add_in_cache(concept) + source = "hello world" - context = ExecutionContext(sheerka, "xxxx") results = ExactConceptParser().parse(context, source) assert len(results) == 1 assert results[0].status - assert results[0].value.key == concept.key + assert results[0].value == concept def test_i_can_recognize_concepts_defined_several_times(): - sheerka = get_sheerka() - sheerka.add_in_cache(get_concept("hello world", [])) - sheerka.add_in_cache(get_concept("hello a", ["a"])) + context = get_context() + context.sheerka.add_in_cache(get_concept("hello world", [])) + context.sheerka.add_in_cache(get_concept("hello a", ["a"])) source = "hello world" - context = ExecutionContext(sheerka, "xxxx") results = ExactConceptParser().parse(context, source) assert len(results) == 2 - results = sorted(results, key=lambda x: x.value.name) # because of the usage of sets + results = sorted(results, key=lambda x: x.value.name) # because of the usage of sets assert results[0].status assert results[0].value.name == "hello a" @@ -87,11 +88,10 @@ def test_i_can_recognize_concepts_defined_several_times(): def test_i_can_recognize_a_concept_with_variables(): - sheerka = get_sheerka() + context = get_context() concept = get_concept("a + b", ["a", "b"]) - sheerka.concepts_cache[concept.key] = concept + context.sheerka.add_in_cache(concept) source = "10 + 5" - context = ExecutionContext(sheerka, "xxxx") results = ExactConceptParser().parse(context, source) assert len(results) == 1 @@ -102,11 +102,10 @@ def test_i_can_recognize_a_concept_with_variables(): def test_i_can_recognize_a_concept_with_duplicate_variables(): - sheerka = get_sheerka() + context = get_context() concept = get_concept("a + b + a", ["a", "b"]) - sheerka.concepts_cache[concept.key] = concept + context.sheerka.concepts_cache[concept.key] = concept source = "10 + 5 + 10" - context = ExecutionContext(sheerka, "xxxx") results = ExactConceptParser().parse(context, source) assert len(results) == 1 @@ -117,23 +116,43 @@ def test_i_can_recognize_a_concept_with_duplicate_variables(): def test_i_can_manage_unknown_concept(): - sheerka = get_sheerka() + context = get_context() source = "def concept hello world" # this is not a concept by itself - context = ExecutionContext(sheerka, "xxxx") res = ExactConceptParser().parse(context, source) assert not res.status - assert sheerka.isinstance(res.value, Sheerka.UNKNOWN_CONCEPT_NAME) + assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) + assert res.value.obj == "def concept hello world" def test_i_can_detect_concepts_too_long(): - sheerka = get_sheerka() + context = get_context() source = "a very very long concept that cannot be an unique one" - context = ExecutionContext(sheerka, "xxxx") res = ExactConceptParser().parse(context, source) assert not res.status - assert sheerka.isinstance(res.value, Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME) + assert context.sheerka.isinstance(res.value, BuiltinConcepts.CONCEPT_TOO_LONG) + assert res.value.obj == "a very very long concept that cannot be an unique one" + + +def test_i_can_detect_concept_from_tokens(): + context = get_context() + concept = get_concept("hello world", []) + context.sheerka.add_in_cache(concept) + + source = "hello world" + results = ExactConceptParser().parse(context, list(Tokenizer(source))) + + assert len(results) == 1 + assert results[0].status + assert results[0].value == concept + + +def get_context(): + sheerka = Sheerka() + sheerka.initialize(root_folder) + + return ExecutionContext("sheerka", "xxxx", sheerka) def get_concept(name, variables): @@ -143,10 +162,3 @@ def get_concept(name, variables): c.props[v] = Property(v, None) c.init_key() return c - - -def get_sheerka(): - sheerka = Sheerka() - sheerka.initialize(root_folder) - - return sheerka diff --git a/tests/test_PythonParser.py b/tests/test_PythonParser.py new file mode 100644 index 0000000..a016c5d --- /dev/null +++ b/tests/test_PythonParser.py @@ -0,0 +1,77 @@ +import ast +import os +import shutil +from os import path + +import pytest + +from core.builtin_concepts import ParserResultConcept +from core.sheerka import Sheerka, ExecutionContext +from core.tokenizer import Tokenizer +from parsers.BaseParser import BaseParser +from parsers.PythonParser import PythonNode, PythonParser, PythonErrorNode + +tests_root = path.abspath("../build/tests") +root_folder = "init_folder" + + +@pytest.fixture(autouse=True) +def init_test(): + if path.exists(tests_root): + shutil.rmtree(tests_root) + + if not path.exists(tests_root): + os.makedirs(tests_root) + current_pwd = os.getcwd() + os.chdir(tests_root) + + yield None + os.chdir(current_pwd) + + +def get_context(): + sheerka = Sheerka() + sheerka.initialize(root_folder) + return ExecutionContext("test", "xxx", sheerka) + + +@pytest.mark.parametrize("text, expected", [ + ("1+1", PythonNode("1+1", ast.parse("1+1", mode="eval"))), + ("a=10", PythonNode("a=10", ast.parse("a=10", mode="exec"))), +]) +def test_i_can_parse_a_simple_expression(text, expected): + parser = PythonParser() + res = parser.parse(get_context(), text) + + assert res.status + assert res.who == parser.name + assert isinstance(res.value, ParserResultConcept) + assert res.value.value == expected + + +@pytest.mark.parametrize("text, expected", [ + ("1+1", PythonNode("1+1", ast.parse("1+1", mode="eval"))), + ("a=10", PythonNode("a=10", ast.parse("a=10", mode="exec"))), +]) +def test_i_can_parse_from_tokens(text, expected): + parser = PythonParser() + tokens = list(Tokenizer(text)) + res = parser.parse(get_context(), tokens) + + assert res.status + assert res.who == parser.name + assert isinstance(res.value, ParserResultConcept) + assert res.value.value == expected + + +def test_i_can_detect_error(): + text = "1+" + + parser = PythonParser() + res = parser.parse(get_context(), text) + + assert not res.status + assert res.who == parser.name + assert isinstance(res.value, ParserResultConcept) + assert isinstance(res.value.value[0], PythonErrorNode) + assert isinstance(res.value.value[0].exception, SyntaxError) diff --git a/tests/test_defautparser.py b/tests/test_defautparser.py deleted file mode 100644 index 97bd948..0000000 --- a/tests/test_defautparser.py +++ /dev/null @@ -1,346 +0,0 @@ -import pytest - -from parsers.ExactConceptParser import ExactConceptParser -from parsers.PythonParser import PythonParser, PythonNode, PythonErrorNode -from core.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError -from parsers.DefaultParser import DefaultParser -from parsers.DefaultParser import NumberNode, StringNode, VariableNode, TrueNode, FalseNode, NullNode, BinaryNode -from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode, NopNode -import ast - - -def nop(): - return NopNode() - - -def n(number): - return NumberNode([], number) - - -def s(string, quote="'"): - return StringNode([], string, quote) - - -def v(name): - return VariableNode([], name) - - -def t(): - return TrueNode([]) - - -def f(): - return FalseNode([]) - - -def null(): - return NullNode([]) - - -def b(operator, left, right): - return BinaryNode([], operator, left, right) - - -def compare_ast(left, right): - left_as_string = ast.dump(left) - left_as_string = left_as_string.replace(", ctx=Load()", "") - left_as_string = left_as_string.replace(", kind=None", "") - - right_as_string = right if isinstance(right, str) else ast.dump(right) - right_as_string = right_as_string.replace(", ctx=Load()", "") - right_as_string = right_as_string.replace(", kind=None", "") - - return left_as_string == right_as_string - - -def test_i_can_tokenize(): - source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&" - tokens = list(Tokenizer(source)) - assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1) - assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2) - assert tokens[2] == Token(TokenKind.MINUS, "-", 2, 1, 3) - assert tokens[3] == Token(TokenKind.SLASH, "/", 3, 1, 4) - assert tokens[4] == Token(TokenKind.LBRACE, "{", 4, 1, 5) - assert tokens[5] == Token(TokenKind.RBRACE, "}", 5, 1, 6) - assert tokens[6] == Token(TokenKind.LBRACKET, "[", 6, 1, 7) - assert tokens[7] == Token(TokenKind.RBRACKET, "]", 7, 1, 8) - assert tokens[8] == Token(TokenKind.LPAR, "(", 8, 1, 9) - assert tokens[9] == Token(TokenKind.RPAR, ")", 9, 1, 10) - assert tokens[10] == Token(TokenKind.WHITESPACE, " ", 10, 1, 11) - assert tokens[11] == Token(TokenKind.COMMA, ",", 14, 1, 15) - assert tokens[12] == Token(TokenKind.SEMICOLON, ";", 15, 1, 16) - assert tokens[13] == Token(TokenKind.COLON, ":", 16, 1, 17) - assert tokens[14] == Token(TokenKind.DOT, ".", 17, 1, 18) - assert tokens[15] == Token(TokenKind.QMARK, "?", 18, 1, 19) - assert tokens[16] == Token(TokenKind.NEWLINE, "\n", 19, 1, 20) - assert tokens[17] == Token(TokenKind.NEWLINE, "\n\r", 20, 2, 1) - assert tokens[18] == Token(TokenKind.NEWLINE, "\r", 22, 3, 1) - assert tokens[19] == Token(TokenKind.NEWLINE, "\r\n", 23, 4, 1) - assert tokens[20] == Token(TokenKind.IDENTIFIER, "identifier_0", 25, 5, 1) - assert tokens[21] == Token(TokenKind.WHITESPACE, "\t \t", 37, 5, 13) - assert tokens[22] == Token(TokenKind.NUMBER, "10.15", 41, 5, 17) - assert tokens[23] == Token(TokenKind.WHITESPACE, " ", 46, 5, 22) - assert tokens[24] == Token(TokenKind.NUMBER, "10", 47, 5, 23) - assert tokens[25] == Token(TokenKind.WHITESPACE, " ", 49, 5, 25) - assert tokens[26] == Token(TokenKind.STRING, "'string\n'", 50, 5, 26) - assert tokens[27] == Token(TokenKind.WHITESPACE, " ", 59, 6, 1) - assert tokens[28] == Token(TokenKind.STRING, '"another string"', 60, 6, 2) - assert tokens[29] == Token(TokenKind.EQUALS, '=', 76, 6, 18) - assert tokens[30] == Token(TokenKind.VBAR, '|', 77, 6, 19) - assert tokens[31] == Token(TokenKind.AMPER, '&', 78, 6, 20) - - -@pytest.mark.parametrize("text, expected", [ - ("_ident", True), - ("ident", True), - ("ident123", True), - ("ident_123", True), - ("ident-like-this", True), - ("àèùéû", True), - ("011254", False), - ("0abcd", False), - ("-abcd", False) -]) -def test_i_can_tokenize_identifiers(text, expected): - tokens = list(Tokenizer(text)) - comparison = tokens[0].type == TokenKind.IDENTIFIER - assert comparison == expected - - -@pytest.mark.parametrize("text, error_text, index, line, column", [ - ("'string", "'string", 7, 1, 8), - ('"string', '"string', 7, 1, 8), - ('"a" + "string', '"string', 13, 1, 14), - ('"a"\n\n"string', '"string', 12, 3, 8), -]) -def test_i_can_detect_unfinished_strings(text, error_text, index, line, column): - with pytest.raises(LexerError) as e: - list(Tokenizer(text)) - assert e.value.text == error_text - assert e.value.index == index - assert e.value.line == line - assert e.value.column == column - - -@pytest.mark.parametrize("text, expected_text, expected_newlines", [ - ("'foo'", "'foo'", 0), - ('"foo"', '"foo"', 0), - ("'foo\rbar'", "'foo\rbar'", 1), - ("'foo\nbar'", "'foo\nbar'", 1), - ("'foo\n\rbar'", "'foo\n\rbar'", 1), - ("'foo\r\nbar'", "'foo\r\nbar'", 1), - ("'foo\r\rbar'", "'foo\r\rbar'", 2), - ("'foo\n\nbar'", "'foo\n\nbar'", 2), - ("'foo\r\n\n\rbar'", "'foo\r\n\n\rbar'", 2), - ("'\rfoo\rbar\r'", "'\rfoo\rbar\r'", 3), - ("'\nfoo\nbar\n'", "'\nfoo\nbar\n'", 3), - ("'\n\rfoo\r\n'", "'\n\rfoo\r\n'", 2), - (r"'foo\'bar'", r"'foo\'bar'", 0), - (r'"foo\"bar"', r'"foo\"bar"', 0), - ('"foo"bar"', '"foo"', 0), - ("'foo'bar'", "'foo'", 0), -]) -def test_i_can_parse_strings(text, expected_text, expected_newlines): - lexer = Tokenizer(text) - text_found, nb_of_newlines = lexer.eat_string(0, 1, 1) - - assert nb_of_newlines == expected_newlines - assert text_found == expected_text - - -@pytest.mark.parametrize("text", [ - "1", "3.1415", "0.5", "01", "-5", "-5.10" -]) -def test_i_can_parse_numbers(text): - tokens = list(Tokenizer(text)) - assert tokens[0].type == TokenKind.NUMBER - assert tokens[0].value == text - - -@pytest.mark.parametrize("text, expected", [ - ("def", Keywords.DEF), - ("concept", Keywords.CONCEPT), - ("as", Keywords.AS), - ("pre", Keywords.PRE), - ("post", Keywords.POST) -]) -def test_i_can_recognize_keywords(text, expected): - tokens = list(Tokenizer(text)) - assert tokens[0].type == TokenKind.KEYWORD - assert tokens[0].value == expected - - -# @pytest.mark.parametrize("text, expected", [ -# ("1", n(1)), -# ("+1", n(1)), -# ("-1", n(-1)), -# ("'foo'", s("foo")), -# ("identifier", v("identifier")), -# ("true", t()), -# ("false", f()), -# ("null", null()), -# ("1 * 2", b(TokenKind.STAR, n(1), n(2))), -# ("1 * 2/3", b(TokenKind.STAR, n(1), b(TokenKind.SLASH, n(2), n(3)))), -# ("1 + 2", b(TokenKind.PLUS, n(1), n(2))), -# ("1 + 2 - 3", b(TokenKind.PLUS, n(1), b(TokenKind.MINUS, n(2), n(3)))), -# ("1 + 2-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))), -# ("1 + 2 +-3", b(TokenKind.PLUS, n(1), b(TokenKind.PLUS, n(2), n(-3)))), -# ("1 + 2 * 3", b(TokenKind.PLUS, n(1), b(TokenKind.STAR, n(2), n(3)))), -# ("1 * 2 + 3", b(TokenKind.PLUS, b(TokenKind.STAR, n(1), n(2)), n(3))), -# ("(1 + 2) * 3", b(TokenKind.STAR, b(TokenKind.PLUS, n(1), n(2)), n(3))), -# ("1 * (2 + 3)", b(TokenKind.STAR, n(1), b(TokenKind.PLUS, n(2), n(3)))), -# ]) -# def test_i_can_parse_simple_expression(text, expected): -# parser = DefaultParser(text, None) -# ast = parser.parse() -# assert ast.is_same(expected) -# -# -# @pytest.mark.parametrize("text, token_found, expected_tokens", [ -# ("1+", TokenKind.EOF, -# [TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, 'true', 'false', 'null', TokenKind.LPAR]), -# ("(1+1", TokenKind.EOF, [TokenKind.RPAR]) -# ]) -# def test_i_can_detect_unexpected_end_of_code(text, token_found, expected_tokens): -# parser = DefaultParser(text, None) -# parser.parse() -# -# assert parser.has_error -# assert parser.error_sink[0].tokens[0].type == token_found -# assert parser.error_sink[0].expected_tokens == expected_tokens - - -@pytest.mark.parametrize("text, expected_name, expected_expr", [ - ("def concept hello", "hello", nop()), - ("def concept hello ", "hello", nop()), - ("def concept a+b", "a + b", nop()), - ("def concept 'a+b'", "a+b", nop()), - ("def concept 'a+b'+c", "a+b + c", nop()), - ("def concept 'as if'", "as if", nop()), - ("def concept 'as' if", "as if", nop()), - ("def concept hello as 'hello'", "hello", ast.Expression(body=ast.Str(s='hello'))), - ("def concept hello as 1", "hello", ast.Expression(body=ast.Num(n=1))), - ("def concept h as 1 + 1", "h", ast.Expression(ast.BinOp(left=ast.Num(n=1), op=ast.Add(), right=ast.Num(n=1)))), -]) -def test_i_can_parse_def_concept(text, expected_name, expected_expr): - parser = DefaultParser(PythonParser) - tree = parser.parse(None, text) - assert isinstance(tree, DefConceptNode) - assert tree.name == expected_name - if isinstance(tree.body, PythonNode): - assert compare_ast(tree.body.ast, expected_expr) - else: - assert tree.body == expected_expr - - -def test_i_can_parse_complex_def_concept_statement(): - text = """def concept a plus b - where a,b - pre isinstance(a, int) and isinstance(b, float) - post isinstance(res, int) - as res = a + b - """ - parser = DefaultParser(PythonParser) - tree = parser.parse(None, text) - assert not parser.has_error - assert isinstance(tree, DefConceptNode) - assert tree.name == "a plus b" - assert tree.where.source == "a,b" - assert isinstance(tree.where.ast, ast.Expression) - assert tree.pre.source == "isinstance(a, int) and isinstance(b, float)" - assert isinstance(tree.pre.ast, ast.Expression) - assert tree.post.source == "isinstance(res, int)" - assert isinstance(tree.post.ast, ast.Expression) - assert tree.body.source == "res = a + b" - assert isinstance(tree.body.ast, ast.Module) - - -def test_i_can_use_colon_to_declare_indentation(): - text = """ -def concept add one to a as: - def func(x): - return x+1 - func(a) - """ - parser = DefaultParser(PythonParser) - tree = parser.parse(None, text) - assert not parser.has_error - assert isinstance(tree, DefConceptNode) - - -def test_i_can_use_colon_to_declare_indentation2(): - text = """ -def concept add one to a as: - def func(x): - return x+1 - """ - parser = DefaultParser(PythonParser) - tree = parser.parse(None, text) - assert not parser.has_error - assert isinstance(tree, DefConceptNode) - - -def test_without_colon_i_get_an_indent_error(): - text = """ -def concept add one to a as - def func(x): - return x+1 - func(a) - """ - parser = DefaultParser(PythonParser) - tree = parser.parse(None, text) - assert parser.has_error - assert isinstance(tree, DefConceptNode) - assert isinstance(parser.error_sink[0].exception, IndentationError) - - -def test_i_can_detect_error(): - """ - In this test, func(b) is not correctly indented while colon is specified after the 'as' keyword - """ - - text = """ -def concept add one to a as: - def func(x): - return x+1 - func(a) -func(b) - """ - parser = DefaultParser(PythonParser) - tree = parser.parse(None, text) - assert parser.has_error - assert isinstance(tree, DefConceptNode) - assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode) - # check that the error is caused by 'func(b)' - assert parser.error_sink[0].tokens[0].line == 6 - assert parser.error_sink[0].tokens[0].column == 1 - - -@pytest.mark.parametrize("text, token_found, expected_tokens", [ - ("def hello as 'hello'", "hello", [Keywords.CONCEPT]), - ("def concept as", Keywords.AS, [""]), -]) -def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, expected_tokens): - parser = DefaultParser(PythonParser) - parser.parse(None, text) - - assert parser.has_error - assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode) - assert parser.error_sink[0].tokens[0].value == token_found - assert parser.error_sink[0].expected_tokens == expected_tokens - - -@pytest.mark.parametrize("text", [ - "def concept hello where 1+", - "def concept hello pre 1+", - "def concept hello post 1+", - "def concept hello as 1+" -]) -def test_i_can_detect_error_in_declaration(text): - parser = DefaultParser(PythonParser) - parser.parse(None, text) - assert parser.has_error - assert isinstance(parser.error_sink[0], PythonErrorNode) - - - diff --git a/tests/test_sheerka.py b/tests/test_sheerka.py index 688bafe..9e75963 100644 --- a/tests/test_sheerka.py +++ b/tests/test_sheerka.py @@ -5,11 +5,13 @@ import os from os import path import shutil -from core.concept import Concept, ConceptParts, ReturnValueConcept +from core import utils +from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept +from core.concept import Concept, ConceptParts from core.sheerka import Sheerka, ExecutionContext -from parsers.DefaultParser import DefConceptNode, DefaultParser +from parsers.DefaultParser import DefaultParser from parsers.PythonParser import PythonParser -from sdp.sheerkaDataProvider import SheerkaDataProvider +from sdp.sheerkaDataProvider import SheerkaDataProvider, SheerkaDataProviderDuplicateKeyError tests_root = path.abspath("../build/tests") root_folder = "init_folder" @@ -36,68 +38,55 @@ def test_root_folder_is_created_after_initialization(): assert os.path.exists(root_folder), "init folder should be created" -def test_lists_of_concepts_is_initialized(): +def test_i_can_list_builtin_concepts(): sheerka = get_sheerka() - assert len(sheerka.concepts_cache) > 1 + builtins = list(sheerka.get_builtins_classes_as_dict()) + + assert str(BuiltinConcepts.ERROR) in builtins + assert str(BuiltinConcepts.RETURN_VALUE) in builtins -def get_concept(): - text = """ - def concept a+b - where isinstance(a, int) and isinstance(b, int) - pre isinstance(a, int) and isinstance(b, int) - post isinstance(res, int) - as: - def func(x,y): - return x+y - func(a,b) - """ - parser = DefaultParser(PythonParser) - return parser.parse(None, text) +def test_builtin_concepts_are_initialized(): + sheerka = get_sheerka() + assert len(sheerka.concepts_cache) == len(BuiltinConcepts) + for concept_name in BuiltinConcepts: + assert str(concept_name) in sheerka.concepts_cache + assert sheerka.sdp.get_safe(sheerka.CONCEPTS_ENTRY, str(concept_name)) is not None + + for key, concept_class in sheerka.get_builtins_classes_as_dict().items(): + assert isinstance(sheerka.concepts_cache[key], concept_class) + + +def test_builtin_concepts_can_be_updated(): + sheerka = get_sheerka() + loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA) + loaded_sheerka.desc = "I have a description" + sheerka.sdp.modify("Test", sheerka.CONCEPTS_ENTRY, loaded_sheerka.key, loaded_sheerka) + + sheerka = get_sheerka() + loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA) + + assert loaded_sheerka.desc == "I have a description" def test_i_can_add_a_concept(): sheerka = get_sheerka() - concept = get_concept() - res = sheerka.add_concept(ExecutionContext(sheerka, "xxx"), concept) - concept_found = res.value + concept = get_default_concept() + + res = sheerka.create_new_concept(get_context(sheerka), concept) assert res.status - assert concept_found == Concept( - name="a + b", - where="isinstance(a, int) and isinstance(b, int)", - pre="isinstance(a, int) and isinstance(b, int)", - post="isinstance(res, int)", - body="def func(x,y):\n return x+y\nfunc(a,b)") - assert isinstance(concept_found.codes[ConceptParts.WHERE], ast.Expression) - assert isinstance(concept_found.codes[ConceptParts.PRE], ast.Expression) - assert isinstance(concept_found.codes[ConceptParts.POST], ast.Expression) - assert isinstance(concept_found.codes[ConceptParts.BODY], ast.Module) + assert sheerka.isinstance(res.value, BuiltinConcepts.NEW_CONCEPT) - all_props = list(concept_found.props.keys()) - assert all_props == ["a", "b"] + concept_found = res.value.body + for prop in Concept.props_to_serialize: + assert getattr(concept_found, prop) == getattr(concept, prop) assert concept_found.key == "__var__0 + __var__1" assert concept_found.id == "1001" - assert path.exists(sheerka.sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder, - "4f249487410db35d8bcbcf4521acb3dd8354978804cd99bbc4de17a323b2f237")) - - -@pytest.mark.parametrize("text, expected", [ - ("1 + 1", 2), - ("sheerka.test()", 'I have access to Sheerka !') -]) -def test_i_can_eval_simple_python_expressions(text, expected): - sheerka = Sheerka(debug=True) - sheerka.initialize(root_folder) - - res = sheerka.eval(text) - - assert len(res) == 1 - assert res[0].status - assert res[0].value.body == expected - assert sheerka.isinstance(res[0].value, ReturnValueConcept()) + assert concept.key in sheerka.concepts_cache + assert path.exists(sheerka.sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_found.get_digest())) def test_i_cannot_add_the_same_concept_twice(): @@ -105,30 +94,382 @@ def test_i_cannot_add_the_same_concept_twice(): Checks that duplicated concepts are managed by sheerka, not by sheerka.sdp :return: """ - pass + sheerka = get_sheerka() + concept = get_default_concept() + + sheerka.create_new_concept(get_context(sheerka), concept) + res = sheerka.create_new_concept(get_context(sheerka), concept) + + assert not res.status + assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR) + assert res.value.body.args[0] == "Duplicate object." -def test_i_can_get_a_concept(): +def test_i_can_get_a_builtin_concept_by_their_enum_or_the_string(): """ Checks that a concept can be found its name even when there are variables in the name (ex 'hello + a' or 'a + b' ) :return: """ - pass + sheerka = get_sheerka() + for key in sheerka.get_builtins_classes_as_dict(): + assert sheerka.get(key) is not None + assert sheerka.get(str(key)) is not None + + +def test_i_can_get_new_concept(): + sheerka = get_sheerka() + concept = get_default_concept() + + sheerka.create_new_concept(get_context(sheerka), concept) + + from_cache = sheerka.get(concept.key) + assert from_cache is not None + assert from_cache.key == concept.key + assert from_cache == concept + + +def test_i_first_look_in_local_cache(): + sheerka = get_sheerka() + concept = get_default_concept() + + sheerka.create_new_concept(get_context(sheerka), concept) + sheerka.concepts_cache[concept.key].pre = "I have modified the concept in cache" + + from_cache = sheerka.get(concept.key) + assert from_cache is not None + assert from_cache.key == concept.key + assert from_cache.pre == "I have modified the concept in cache" + + +def test_i_can_get_a_known_concept_when_not_in_cache(): + """ + When not in cache, uses sdp + :return: + """ + sheerka = get_sheerka() + concept = get_default_concept() + sheerka.create_new_concept(get_context(sheerka), concept) + + sheerka.concepts_cache = {} # reset the cache + loaded = sheerka.get(concept.key) + + assert loaded is not None + assert loaded == concept + + +def test_unknown_concept_is_return_when_the_concept_is_not_found(): + sheerka = get_sheerka() + + loaded = sheerka.get("fake_key") + + assert loaded is not None + assert sheerka.isinstance(loaded, BuiltinConcepts.UNKNOWN_CONCEPT) + assert loaded.body == "fake_key" + + +def test_i_can_instantiate_a_builtin_concept_when_it_has_its_own_class(): + sheerka = get_sheerka() + ret = sheerka.new(BuiltinConcepts.RETURN_VALUE, who="who", status="status", value="value", message="message") + + assert isinstance(ret, ReturnValueConcept) + assert ret.key == str(BuiltinConcepts.RETURN_VALUE) + assert ret.who == "who" + assert ret.status == "status" + assert ret.value == "value" + assert ret.message == "message" + + +def test_i_can_instantiate_a_builtin_concept_when_no_specific_class(): + sheerka = get_sheerka() + ret = sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body="fake_concept") + + assert isinstance(ret, Concept) + assert ret.key == str(BuiltinConcepts.UNKNOWN_CONCEPT) + assert ret.body == "fake_concept" def test_i_can_instantiate_a_concept(): + sheerka = get_sheerka() + concept = get_default_concept() + sheerka.create_new_concept(get_context(sheerka), concept) + + new = sheerka.new(concept.key, a=10, b="value") + + assert sheerka.isinstance(new, concept) + for prop in Concept.props_to_serialize: + assert getattr(new, prop) == getattr(concept, prop) + + assert new.props["a"].value == 10 + assert new.props["b"].value == "value" + + +def test_instances_are_different_when_asking_for_new(): + sheerka = get_sheerka() + concept = get_default_concept() + sheerka.create_new_concept(get_context(sheerka), concept) + + new1 = sheerka.new(concept.key, a=10, b="value") + new2 = sheerka.new(concept.key, a=10, b="value") + + assert new1 == new2 + assert id(new1) != id(new2) + + +def test_i_get_the_same_instance_when_is_unique_is_true(): + sheerka = get_sheerka() + concept = get_unique_concept() + sheerka.create_new_concept(get_context(sheerka), concept) + + new1 = sheerka.new(concept.key, a=10, b="value") + new2 = sheerka.new(concept.key, a=10, b="value") + + assert new1 == new2 + assert id(new1) == id(new2) + + +def test_i_cannot_instantiate_an_unknown_concept(): + sheerka = get_sheerka() + + new = sheerka.new("fake_concept") + + assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT) + assert new.body == "fake_concept" + + +def test_i_cannot_instantiate_when_properties_are_not_recognized(): + sheerka = get_sheerka() + concept = get_default_concept() + sheerka.create_new_concept(get_context(sheerka), concept) + + new = sheerka.new(concept.key, a=10, c="value") + + assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_PROPERTY) + assert new.property_name == "c" + assert sheerka.isinstance(new.concept, concept) + + +def test_i_can_use_expect_one_when_empty(): + sheerka = get_sheerka() + + res = sheerka.expect_one(get_context(sheerka), []) + assert not res.status + assert sheerka.isinstance(res.value, BuiltinConcepts.IS_EMPTY) + + +def test_i_can_use_expect_one_when_too_many_success(): + sheerka = get_sheerka() + + items = [ + ReturnValueConcept("who", True, None), + ReturnValueConcept("who", True, None), + ] + res = sheerka.expect_one(get_context(sheerka), items) + assert not res.status + assert sheerka.isinstance(res.value, BuiltinConcepts.TOO_MANY_SUCCESS) + assert res.value.obj == items + + +def test_i_can_use_expect_when_only_errors_1(): + sheerka = get_sheerka() + + items = [ + ReturnValueConcept("who", False, None), + ] + res = sheerka.expect_one(get_context(sheerka), items) + assert not res.status + assert sheerka.isinstance(res.value, BuiltinConcepts.TOO_MANY_ERRORS) + assert res.value.obj == items + + +def test_i_can_use_expect_when_only_errors_2(): + sheerka = get_sheerka() + + items = [ + ReturnValueConcept("who", False, None), + ReturnValueConcept("who", False, None), + ] + res = sheerka.expect_one(get_context(sheerka), items) + assert not res.status + assert sheerka.isinstance(res.value, BuiltinConcepts.TOO_MANY_ERRORS) + assert res.value.obj == items + + +def test_i_can_use_expect_one_when_one_success_1(): + sheerka = get_sheerka() + + items = [ + ReturnValueConcept("who", True, None), + ] + res = sheerka.expect_one(get_context(sheerka), items) + assert res.status + assert res == items[0] + + +def test_i_can_use_expect_one_when_one_success_2(): + sheerka = get_sheerka() + + items = [ + ReturnValueConcept("who", False, None), + ReturnValueConcept("who", True, None), + ReturnValueConcept("who", False, None), + ] + res = sheerka.expect_one(get_context(sheerka), items) + assert res.status + assert res == items[1] + + +def test_i_can_use_expect_one_when_not_a_list_true(): + sheerka = get_sheerka() + + res = sheerka.expect_one(get_context(sheerka), ReturnValueConcept("who", True, None)) + assert res.status + assert res == ReturnValueConcept("who", True, None) + + +def test_i_can_use_expect_one_when_not_a_list_false(): + sheerka = get_sheerka() + + res = sheerka.expect_one(get_context(sheerka), ReturnValueConcept("who", False, None)) + + assert not res.status + assert sheerka.isinstance(res.value, BuiltinConcepts.TOO_MANY_ERRORS) + assert res.value.obj == [ReturnValueConcept("who", False, None)] + + +@pytest.mark.parametrize("text, expected", [ + ("1 + 1", 2), + ("sheerka.test()", 'I have access to Sheerka !') +]) +def test_i_can_eval_simple_python_expressions(text, expected): + sheerka = get_sheerka() + + res = sheerka.eval(text) + + assert len(res) == 1 + assert res[0].status + assert res[0].value == expected + + +def test_i_can_eval_simple_concept(): + sheerka = get_sheerka() + concept = Concept(name="one", body="1").init_key() + sheerka.add_in_cache(concept) + + text = "one" + res = sheerka.eval(text) + assert len(res) == 1 + assert res[0].status + assert res[0].value == 1 + + +def test_i_can_eval_def_concept_request(): + text = """ +def concept a + b +where isinstance(a, int) and isinstance(b, int) +pre isinstance(a, int) and isinstance(b, int) +post isinstance(res, int) +as: + def func(x,y): + return x+y + func(a,b) """ - Test the new() functionnality - make sure that some Concept are singleton (ex Sheerka, True, False) - otherwise, make sure that new() returns a **new** instance + + expected = get_default_concept() + expected.id = "1001" + expected.desc = None + expected.init_key() + + sheerka = get_sheerka() + res = sheerka.eval(text) + + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].value, BuiltinConcepts.NEW_CONCEPT) + + concept_saved = res[0].value.body + + for prop in Concept.props_to_serialize: + assert getattr(concept_saved, prop) == getattr(expected, prop) + + assert concept_saved.key in sheerka.concepts_cache + assert path.exists(sheerka.sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_digest())) + + +def test_i_can_eval_def_concept_part_when_one_part_is_a_ref_of_another_concept(): + """ + In this test, we test that the properties of 'concept a xx b' (which are 'a' and 'b') + are correctly detected, because of the concept 'a plus b' in its body :return: """ - pass + sheerka = get_sheerka() + + # concept 'a plus b' is known + concept_a_plus_b = Concept(name="a plus b").set_prop("a").set_prop("b").init_key() + sheerka.add_in_cache(concept_a_plus_b) + + res = sheerka.eval("def concept a xx b as a plus b") + expected = Concept(name="a xx b", body="a plus b").set_prop("a").set_prop("b").init_key() + expected.id = "1001" + + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].value, BuiltinConcepts.NEW_CONCEPT) + + concept_saved = res[0].value.body + + for prop in Concept.props_to_serialize: + assert getattr(concept_saved, prop) == getattr(expected, prop) + + assert concept_saved.key in sheerka.concepts_cache + assert path.exists(sheerka.sdp.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_digest())) + + +def test_i_cannot_eval_the_same_def_concept_twice(): + text = """ +def concept a + b +where isinstance(a, int) and isinstance(b, int) +pre isinstance(a, int) and isinstance(b, int) +post isinstance(res, int) +as: + def func(x,y): + return x+y + func(a,b) + """ + + sheerka = get_sheerka() + sheerka.eval(text) + res = sheerka.eval(text) + + assert len(res) == 1 + assert not res[0].status + assert sheerka.isinstance(res[0].value, BuiltinConcepts.CONCEPT_ALREADY_DEFINED) def get_sheerka(): sheerka = Sheerka() sheerka.initialize(root_folder) - return sheerka \ No newline at end of file + return sheerka + + +def get_context(sheerka): + return ExecutionContext("test", "xxx", sheerka) + + +def get_default_concept(): + concept = Concept( + name="a + b", + where="isinstance(a, int) and isinstance(b, int)", + pre="isinstance(a, int) and isinstance(b, int)", + post="isinstance(res, int)", + body="def func(x,y):\n return x+y\nfunc(a,b)", + desc="specific description") + concept.set_prop("a", "value1") + concept.set_prop("b", "value2") + + return concept + + +def get_unique_concept(): + return Concept(name="unique", is_unique=True) diff --git a/tests/test_sheerkaDataProvider.py b/tests/test_sheerkaDataProvider.py index bc9815e..0f9186e 100644 --- a/tests/test_sheerkaDataProvider.py +++ b/tests/test_sheerkaDataProvider.py @@ -122,7 +122,7 @@ class ObjWithDigestNoKey: self.b == obj.b def __repr__(self): - return f"ObjNoKey({self.a}, {self.b})" + return f"ObjWithDigestNoKey({self.a}, {self.b})" def get_digest(self): return str(self.a) + str(self.b) @@ -142,7 +142,7 @@ class ObjWithDigestWithKey: self.b == obj.b def __repr__(self): - return f"ObjNoKey({self.a}, {self.b})" + return f"ObjWithDigestWithKey({self.a}, {self.b})" def get_key(self): return self.a @@ -466,7 +466,7 @@ def test_i_cannot_add_the_same_digest_twice_in_the_same_entry(): assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() assert error.value.key == "entry" - assert error.value.args[0] == "duplicate key" + assert error.value.args[0] == "Duplicate object." def test_i_cannot_add_the_same_digest_twice_in_the_same_entry2(): @@ -483,7 +483,7 @@ def test_i_cannot_add_the_same_digest_twice_in_the_same_entry2(): assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() assert error.value.key == "entry" - assert error.value.args[0] == "duplicate key" + assert error.value.args[0] == "Duplicate object." def test_i_cannot_add_the_same_digest_twice_in_the_same_entry3(): @@ -499,7 +499,7 @@ def test_i_cannot_add_the_same_digest_twice_in_the_same_entry3(): assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() assert error.value.key == "entry.a" - assert error.value.args[0] == "duplicate key" + assert error.value.args[0] == "Duplicate object." def test_i_cannot_add_the_same_digest_twice_in_the_same_entry4(): @@ -516,7 +516,7 @@ def test_i_cannot_add_the_same_digest_twice_in_the_same_entry4(): assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() assert error.value.key == "entry.a" - assert error.value.args[0] == "duplicate key" + assert error.value.args[0] == "Duplicate object." def test_i_can_get_and_set_key(): @@ -1198,7 +1198,7 @@ def test_i_can_remove_from_cache(): assert not sdp.in_cache(category, key) -def test_i_can_test_than_an_entry_exits(): +def test_i_can_test_than_an_entry_exists(): sdp = SheerkaDataProvider(".sheerka") assert not sdp.exists("entry") @@ -1206,6 +1206,67 @@ def test_i_can_test_than_an_entry_exits(): assert sdp.exists("entry") +def test_i_can_test_if_a_key_exists(): + sdp = SheerkaDataProvider(".sheerka") + obj = ObjWithDigestWithKey("key", "value") + + assert not sdp.exists("entry") + assert not sdp.exists("entry", obj.get_key()) + + sdp.add(evt_digest, "entry", obj) + assert not sdp.exists("entry", "wrong_key") + assert sdp.exists("entry", obj.get_key()) + + +def test_i_can_test_that_the_object_exists(): + sdp = SheerkaDataProvider(".sheerka") + obj = ObjWithDigestWithKey("key", "value") + + assert not sdp.exists("entry") + assert not sdp.exists("entry", obj.get_key()) + assert not sdp.exists("entry", obj.get_key(), obj.get_digest()) + + # test for a single item under the key + sdp.add(evt_digest, "entry", obj) + assert not sdp.exists("entry", obj.get_key(), "wrong_digest") + assert sdp.exists("entry", obj.get_key(), obj.get_digest()) + + # test for a list item under the key + sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value2")) + assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) + + sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value3")) + assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) + + sdp.add(evt_digest, "entry2", obj) + assert sdp.exists("entry2", obj.get_key(), obj.get_digest()) + + +def test_i_can_test_than_the_object_exists_when_using_references(): + sdp = SheerkaDataProvider(".sheerka") + sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) + obj = ObjWithDigestWithKey("key", "value") + + assert not sdp.exists("entry") + assert not sdp.exists("entry", obj.get_key()) + assert not sdp.exists("entry", obj.get_key(), obj.get_digest()) + + # test for a single item under the key + sdp.add(evt_digest, "entry", obj, use_ref=True) + assert not sdp.exists("entry", obj.get_key(), "wrong_digest") + assert sdp.exists("entry", obj.get_key(), obj.get_digest()) + + # test for a list item under the key + sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value2"), use_ref=True) + assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) + + sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value3"), use_ref=True) + assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) + + sdp.add(evt_digest, "entry2", obj, use_ref=True) + assert sdp.exists("entry2", obj.get_key(), obj.get_digest()) + + def test_i_can_save_and_load_object_ref_with_history(): sdp = SheerkaDataProvider(".sheerka") obj = ObjDumpJson("my_key", "value1") diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py new file mode 100644 index 0000000..eceaceb --- /dev/null +++ b/tests/test_tokenizer.py @@ -0,0 +1,119 @@ +import pytest +from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords + + +def test_i_can_tokenize(): + source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&" + tokens = list(Tokenizer(source)) + assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1) + assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2) + assert tokens[2] == Token(TokenKind.MINUS, "-", 2, 1, 3) + assert tokens[3] == Token(TokenKind.SLASH, "/", 3, 1, 4) + assert tokens[4] == Token(TokenKind.LBRACE, "{", 4, 1, 5) + assert tokens[5] == Token(TokenKind.RBRACE, "}", 5, 1, 6) + assert tokens[6] == Token(TokenKind.LBRACKET, "[", 6, 1, 7) + assert tokens[7] == Token(TokenKind.RBRACKET, "]", 7, 1, 8) + assert tokens[8] == Token(TokenKind.LPAR, "(", 8, 1, 9) + assert tokens[9] == Token(TokenKind.RPAR, ")", 9, 1, 10) + assert tokens[10] == Token(TokenKind.WHITESPACE, " ", 10, 1, 11) + assert tokens[11] == Token(TokenKind.COMMA, ",", 14, 1, 15) + assert tokens[12] == Token(TokenKind.SEMICOLON, ";", 15, 1, 16) + assert tokens[13] == Token(TokenKind.COLON, ":", 16, 1, 17) + assert tokens[14] == Token(TokenKind.DOT, ".", 17, 1, 18) + assert tokens[15] == Token(TokenKind.QMARK, "?", 18, 1, 19) + assert tokens[16] == Token(TokenKind.NEWLINE, "\n", 19, 1, 20) + assert tokens[17] == Token(TokenKind.NEWLINE, "\n\r", 20, 2, 1) + assert tokens[18] == Token(TokenKind.NEWLINE, "\r", 22, 3, 1) + assert tokens[19] == Token(TokenKind.NEWLINE, "\r\n", 23, 4, 1) + assert tokens[20] == Token(TokenKind.IDENTIFIER, "identifier_0", 25, 5, 1) + assert tokens[21] == Token(TokenKind.WHITESPACE, "\t \t", 37, 5, 13) + assert tokens[22] == Token(TokenKind.NUMBER, "10.15", 41, 5, 17) + assert tokens[23] == Token(TokenKind.WHITESPACE, " ", 46, 5, 22) + assert tokens[24] == Token(TokenKind.NUMBER, "10", 47, 5, 23) + assert tokens[25] == Token(TokenKind.WHITESPACE, " ", 49, 5, 25) + assert tokens[26] == Token(TokenKind.STRING, "'string\n'", 50, 5, 26) + assert tokens[27] == Token(TokenKind.WHITESPACE, " ", 59, 6, 1) + assert tokens[28] == Token(TokenKind.STRING, '"another string"', 60, 6, 2) + assert tokens[29] == Token(TokenKind.EQUALS, '=', 76, 6, 18) + assert tokens[30] == Token(TokenKind.VBAR, '|', 77, 6, 19) + assert tokens[31] == Token(TokenKind.AMPER, '&', 78, 6, 20) + + +@pytest.mark.parametrize("text, expected", [ + ("_ident", True), + ("ident", True), + ("ident123", True), + ("ident_123", True), + ("ident-like-this", True), + ("àèùéû", True), + ("011254", False), + ("0abcd", False), + ("-abcd", False) +]) +def test_i_can_tokenize_identifiers(text, expected): + tokens = list(Tokenizer(text)) + comparison = tokens[0].type == TokenKind.IDENTIFIER + assert comparison == expected + + +@pytest.mark.parametrize("text, error_text, index, line, column", [ + ("'string", "'string", 7, 1, 8), + ('"string', '"string', 7, 1, 8), + ('"a" + "string', '"string', 13, 1, 14), + ('"a"\n\n"string', '"string', 12, 3, 8), +]) +def test_i_can_detect_unfinished_strings(text, error_text, index, line, column): + with pytest.raises(LexerError) as e: + list(Tokenizer(text)) + assert e.value.text == error_text + assert e.value.index == index + assert e.value.line == line + assert e.value.column == column + + +@pytest.mark.parametrize("text, expected_text, expected_newlines", [ + ("'foo'", "'foo'", 0), + ('"foo"', '"foo"', 0), + ("'foo\rbar'", "'foo\rbar'", 1), + ("'foo\nbar'", "'foo\nbar'", 1), + ("'foo\n\rbar'", "'foo\n\rbar'", 1), + ("'foo\r\nbar'", "'foo\r\nbar'", 1), + ("'foo\r\rbar'", "'foo\r\rbar'", 2), + ("'foo\n\nbar'", "'foo\n\nbar'", 2), + ("'foo\r\n\n\rbar'", "'foo\r\n\n\rbar'", 2), + ("'\rfoo\rbar\r'", "'\rfoo\rbar\r'", 3), + ("'\nfoo\nbar\n'", "'\nfoo\nbar\n'", 3), + ("'\n\rfoo\r\n'", "'\n\rfoo\r\n'", 2), + (r"'foo\'bar'", r"'foo\'bar'", 0), + (r'"foo\"bar"', r'"foo\"bar"', 0), + ('"foo"bar"', '"foo"', 0), + ("'foo'bar'", "'foo'", 0), +]) +def test_i_can_parse_strings(text, expected_text, expected_newlines): + lexer = Tokenizer(text) + text_found, nb_of_newlines = lexer.eat_string(0, 1, 1) + + assert nb_of_newlines == expected_newlines + assert text_found == expected_text + + +@pytest.mark.parametrize("text", [ + "1", "3.1415", "0.5", "01", "-5", "-5.10" +]) +def test_i_can_parse_numbers(text): + tokens = list(Tokenizer(text)) + assert tokens[0].type == TokenKind.NUMBER + assert tokens[0].value == text + + +@pytest.mark.parametrize("text, expected", [ + ("def", Keywords.DEF), + ("concept", Keywords.CONCEPT), + ("as", Keywords.AS), + ("pre", Keywords.PRE), + ("post", Keywords.POST) +]) +def test_i_can_recognize_keywords(text, expected): + tokens = list(Tokenizer(text)) + assert tokens[0].type == TokenKind.KEYWORD + assert tokens[0].value == expected diff --git a/tests/test_utils.py b/tests/test_utils.py index 8415b98..fc67b1e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -6,7 +6,17 @@ import pytest (None, "",), ([], ""), (["hello", "world"], "hello world"), - (["hello world", "my friend"], '"hello world" "my friend"') + # (["hello world", "my friend"], '"hello world" "my friend"') ]) def test_i_can_create_string_from_a_list(lst, as_string): assert core.utils.sysarg_to_string(lst) == as_string + + +def test_i_can_get_classes(): + classes = list(core.utils.get_classes("core.builtin_concepts")) + error_concept = core.utils.get_class("core.builtin_concepts.ErrorConcept") + return_value_concept = core.utils.get_class("core.builtin_concepts.ReturnValueConcept") + + assert len(classes) > 2 + assert error_concept in classes + assert return_value_concept in classes