From 576ce777402f7a19338e0a395abddc0eedebdbad Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Sat, 9 Nov 2019 17:29:50 +0100 Subject: [PATCH] Added ExactConceptParser --- core/concept.py | 81 ++++++++++++- core/sheerka.py | 62 ++++++---- {parsers => core}/tokenizer.py | 22 ++++ evaluators/DefaultEvaluator.py | 4 +- parsers/BaseParser.py | 9 +- parsers/DefaultParser.py | 196 ++++++++++++++++--------------- parsers/ExactConceptParser.py | 107 +++++++++++++++++ parsers/PythonParser.py | 31 +++-- tests/test_ExactConceptParser.py | 152 ++++++++++++++++++++++++ tests/test_concept.py | 38 ++++++ tests/test_defautparser.py | 49 ++++---- tests/test_sheerka.py | 21 ++-- 12 files changed, 603 insertions(+), 169 deletions(-) rename {parsers => core}/tokenizer.py (93%) create mode 100644 parsers/ExactConceptParser.py create mode 100644 tests/test_ExactConceptParser.py create mode 100644 tests/test_concept.py diff --git a/core/concept.py b/core/concept.py index e46ff74..f4c498b 100644 --- a/core/concept.py +++ b/core/concept.py @@ -2,6 +2,8 @@ import hashlib from enum import Enum import logging +from core.tokenizer import Tokenizer, TokenKind + log = logging.getLogger(__name__) @@ -20,6 +22,8 @@ class Concept: """ props_to_serialize = ("id", "is_builtin", "name", "where", "pre", "post", "body", "desc") + PROPERTY_PREFIX = "__var__" + def __init__(self, name=None, is_builtin=False, where=None, pre=None, post=None, body=None, desc=None, key=None): self.name = name self.is_builtin = is_builtin @@ -31,7 +35,7 @@ class Concept: self.id = None self.key = key - self.props = [] # list of Property for this concept + self.props = {} # list of Property for this concept self.functions = {} # list of helper functions self.codes = {} # cached ast for the where, pre, post and body parts @@ -54,10 +58,48 @@ class Concept: def get_key(self): return self.key + def init_key(self, tokens=None): + """ + Create the key for this concept. + Must be called only when the concept if fully initialized + + The method is not called set_key to make sure that no other class set the key by mistake + :param tokens: + :return: + """ + if self.key is not None: + return self.key + + if tokens is None: + tokens = iter(Tokenizer(self.name)) + + variables = list(self.props.keys()) + + key = "" + first = True + for token in tokens: + if token.type == TokenKind.EOF: + break + if token.type == TokenKind.WHITESPACE: + continue + if not first: + key += " " + if variables is not None and token.value in variables: + key += self.PROPERTY_PREFIX + str(variables.index(token.value)) + else: + key += token.value[1:-1] if token.type == TokenKind.STRING else token.value + first = False + + self.key = key + return self + def add_codes(self, codes): """ - From a dict of <ConceptParts, AST> - fill the codes + Gets the ASTs for 'where', 'pre', 'post' and 'body' + There ASTs are know when the concept is freshly parsed. + So the values are kept in cache. + + For concepts loaded from sdp, these ASTs must be created again :param codes: :return: """ @@ -68,6 +110,8 @@ class Concept: if key in possibles_codes: self.codes[ConceptParts(key)] = codes[key] + return self + def get_digest(self): """ Returns the digest of the event @@ -76,23 +120,47 @@ class Concept: return hashlib.sha256(f"Concept:{self.name}{self.pre}{self.post}{self.body}".encode("utf-8")).hexdigest() def to_dict(self): + """ + Returns a dict representing 'self' + :return: + """ props_as_dict = dict((prop, getattr(self, prop)) for prop in self.props_to_serialize) - props_as_dict["props"] = [(p.name, p.value) for p in self.props] + props_as_dict["props"] = [(p, self.props[p].value) for p in self.props] return props_as_dict def from_dict(self, as_dict): + """ + Initializes 'self' from a dict + :param as_dict: + :return: + """ for prop in self.props_to_serialize: if prop in as_dict: setattr(self, prop, as_dict[prop]) if "props" in as_dict: for n, v in as_dict["props"]: - self.props.append(Property(n, v)) + self.set_prop(n, v) return self def update_from(self, other): + """ + Update self using the properties of another concept + This method is to mimic the class to instance pattern + 'other' is the class, the template, and 'self' is a new instance + :param other: + :return: + """ for prop in self.props_to_serialize: setattr(self, prop, getattr(other, prop)) + return self + + def set_prop(self, prop_name, prop_value): + self.props[prop_name] = Property(prop_name, prop_value) + + def set_prop_by_index(self, index, prop_value): + prop_name = list(self.props.keys())[index] + self.props[prop_name] = Property(prop_name, prop_value) class ErrorConcept(Concept): NAME = "Error" @@ -132,3 +200,6 @@ class Property: def __init__(self, name, value): self.name = name self.value = value + + def __repr__(self): + return f"{self.name}={self.value}" diff --git a/core/sheerka.py b/core/sheerka.py index 3600d5b..c72233a 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -1,9 +1,9 @@ from dataclasses import dataclass from core.concept import Concept, ErrorConcept, Property, TooManySuccessConcept, ReturnValueConcept -from parsers.PythonParser import PythonParser, PythonGetNamesVisitor, PythonNode +from parsers.PythonParser import PythonGetNamesVisitor, PythonNode from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderDuplicateKeyError -from parsers.DefaultParser import DefaultParser, DefConceptNode +from parsers.DefaultParser import DefConceptNode, DefaultParser import core.utils import logging @@ -50,6 +50,7 @@ class Sheerka(Concept): NAME = "Sheerka" UNKNOWN_CONCEPT_NAME = "Unknown Concept" SUCCESS_CONCEPT_NAME = "Success" + CONCEPT_TOO_LONG_CONCEPT_NAME = "Concept too long" CONCEPTS_ENTRY = "All_Concepts" BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" @@ -60,6 +61,8 @@ class Sheerka(Concept): super().__init__(Sheerka.NAME) # cache of the most used concepts + # Note that these are only templates + # They are used as a footprint for instantiation self.concepts_cache = {} # a concept can be instantiated @@ -91,8 +94,9 @@ class Sheerka(Concept): try: self.init_logging() self.sdp = SheerkaDataProvider(root_folder) - self.parsers.append(lambda text: DefaultParser(text, PythonParser)) - self.parsers.append(lambda text: PythonParser(text)) + self.parsers.append(core.utils.get_class("parsers.DefaultParser.DefaultParser")) + self.parsers.append(core.utils.get_class("parsers.PythonParser.PythonParser")) + #self.parsers.append(core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser")) self.evaluators.append(core.utils.get_object("evaluators.DefaultEvaluator.DefaultEvaluator")) self.evaluators.append(core.utils.get_object("evaluators.AddConceptEvaluator.AddConceptEvaluator")) @@ -103,7 +107,7 @@ class Sheerka(Concept): self.create_builtin_concepts() except IOError as e: - return ReturnValue(self, False, self.get(Sheerka.ERROR_CONCEPT_NAME), e) + return ReturnValue(self, False, self.get(ErrorConcept.NAME), e) return ReturnValue(self, True, self.get(Sheerka.SUCCESS_CONCEPT_NAME)) @@ -129,12 +133,15 @@ class Sheerka(Concept): self, Concept(Sheerka.UNKNOWN_CONCEPT_NAME, key=Sheerka.UNKNOWN_CONCEPT_NAME), Concept(Sheerka.SUCCESS_CONCEPT_NAME, key=Sheerka.SUCCESS_CONCEPT_NAME), + Concept(Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME, key=Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME), ErrorConcept(), TooManySuccessConcept(), ReturnValueConcept(), ] for concept in builtins: + self.add_in_cache(concept) + from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key) if from_db is None: log.debug(f"'{concept.name}' concept is not found. Adding.") @@ -143,7 +150,6 @@ class Sheerka(Concept): else: log.debug(f"Found concept '{from_db}'. Updating.") concept.update_from(from_db) - self.concepts_cache[concept.key] = concept def init_logging(self): if self.debug: @@ -158,7 +164,7 @@ class Sheerka(Concept): def eval(self, text): evt_digest = self.sdp.save_event(Event(text)) exec_context = ExecutionContext(self, evt_digest) - return_values = self.try_parse(text) + return_values = self.try_parse(exec_context, text) return_values = self.try_eval(exec_context, return_values) # return_values = [] @@ -172,17 +178,17 @@ class Sheerka(Concept): return return_values - def try_parse(self, text): + def try_parse(self, context, text): result = [] log.debug(f"Parsing '{text}'") for parser in self.parsers: - p = parser(text) + p = parser() # try: # tree = p.parse() # result.append((p.name, tree)) # except Exception as e: # result.append((p.name, e)) - tree = p.parse() + tree = p.parse(context, text) result.append(ReturnValue(p.name, not p.has_error, p.error_sink if p.has_error else tree)) return result @@ -235,11 +241,12 @@ class Sheerka(Concept): setattr(concept, prop, source) # try to find variables (eg props) + # Note that with this method, the variables will be created in the order of appearance for token in def_concept_node.tokens["name"]: if token.value in get_names_visitor.names: - concept.props.append(Property(token.value, None)) + concept.set_prop(token.value, None) - concept.key = DefaultParser.get_concept_name(def_concept_node.tokens["name"], [p.name for p in concept.props]) + concept.init_key(def_concept_node.tokens["name"]) concept.add_codes(def_concept_node.get_codes()) self.set_id_if_needed(concept, False) @@ -249,22 +256,34 @@ class Sheerka(Concept): return ReturnValue(self.add_concept.__name__, False, ErrorConcept(body=error), error.args[0]) return ReturnValue(self.add_concept.__name__, True, concept) - def get(self, concept_name): + def add_in_cache(self, concept): + """ + Adds a concept template in cache. + The cache is used as a proxy before looking at sdp + :param concept: + :return: + """ + self.concepts_cache[concept.key] = concept + + def get(self, concept_key): """ Tries to find a concept - :param concept_name: + TODO: how to manage single vs multiple instances + :param concept_key: :return: """ # first search in cache - if concept_name in self.concepts_cache: - return self.concepts_cache[concept_name] + if concept_key in self.concepts_cache: + return self.concepts_cache[concept_key] - return self.sdp.get(self.CONCEPTS_ENTRY, concept_name) + return self.sdp.get_safe(self.CONCEPTS_ENTRY, concept_key) or \ + self.new(self.UNKNOWN_CONCEPT_NAME, body=concept_key) def new(self, concept, **kwargs): """ Returns an instance of a new concept + TODO: Checks if the concept is supposed to be unique (ex Sheerka, or the number 'one' for example) :param concept: :param kwargs: :return: @@ -287,11 +306,14 @@ class Sheerka(Concept): :return: """ - if not isinstance(a, Concept) or not isinstance(b, Concept): - return False + if not isinstance(a, Concept): + raise SyntaxError("The first parameter of isinstance MUST be a concept") + + b_key = b if isinstance(b, str) else b.key # TODO : manage when a is the list of all possible b - return a.key == b.key + # for example, if a is a color, it will be found the entry 'All_Colors' + return a.key == b_key @staticmethod def test(): diff --git a/parsers/tokenizer.py b/core/tokenizer.py similarity index 93% rename from parsers/tokenizer.py rename to core/tokenizer.py index b9473e4..cb72dd6 100644 --- a/parsers/tokenizer.py +++ b/core/tokenizer.py @@ -31,6 +31,20 @@ class TokenKind(Enum): VBAR = "vbar" AMPER = "amper" EQUALS = "=" + AT = "at" + BACK_QUOTE = "bquote" # ` + BACK_SLASH = "bslash" # \ + CARAT = "carat" # ^ + DOLLAR = "dollar" # $ + EMARK = "emark" # ! + GREATER = "greater" # > + LESS = "less" # < + HASH = "HASH" # # + TILDE = "tilde" # ~ + UNDERSCORE = "underscore" # _ + DEGREE = "degree" # ° + + @dataclass() @@ -159,6 +173,14 @@ class Tokenizer: yield Token(TokenKind.QMARK, "?", self.i, self.line, self.column) self.i += 1 self.column += 1 + elif c == "|": + yield Token(TokenKind.VBAR, "|", self.i, self.line, self.column) + self.i += 1 + self.column += 1 + elif c == "&": + yield Token(TokenKind.AMPER, "&", self.i, self.line, self.column) + self.i += 1 + self.column += 1 elif c == "\n" or c == "\r": newline = self.eat_newline(self.i) yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column) diff --git a/evaluators/DefaultEvaluator.py b/evaluators/DefaultEvaluator.py index 2b38297..d70a11b 100644 --- a/evaluators/DefaultEvaluator.py +++ b/evaluators/DefaultEvaluator.py @@ -1,5 +1,5 @@ +from core.concept import TooManySuccessConcept from core.sheerka import ReturnValue -from core.sheerka import Sheerka from evaluators.BaseEvaluator import BaseEvaluator import logging @@ -32,7 +32,7 @@ class DefaultEvaluator(BaseEvaluator): log.debug(f"{number_of_successful} / {total_items} good items. Too many success") return ReturnValue(self.name, False, - context.sheerka.new(Sheerka.TOO_MANY_SUCCESS_CONCEPT_NAME, body=items)) + context.sheerka.new(TooManySuccessConcept.NAME, body=items)) # only errors, i cannot help you log.debug(f"{total_items} items. Only errors") diff --git a/parsers/BaseParser.py b/parsers/BaseParser.py index ca00c5c..397184b 100644 --- a/parsers/BaseParser.py +++ b/parsers/BaseParser.py @@ -1,5 +1,5 @@ -from dataclasses import dataclass, field -from parsers.tokenizer import TokenKind, Keywords +from dataclasses import dataclass +from core.tokenizer import TokenKind, Keywords @dataclass() @@ -21,13 +21,12 @@ class ErrorNode(Node): class BaseParser: - def __init__(self, name, text): + def __init__(self, name): self.name = name - self.text = text self.has_error = False self.error_sink = [] - def parse(self): + def parse(self, context, text): pass @staticmethod diff --git a/parsers/DefaultParser.py b/parsers/DefaultParser.py index f00c209..fccf6c9 100644 --- a/parsers/DefaultParser.py +++ b/parsers/DefaultParser.py @@ -1,5 +1,5 @@ from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode -from parsers.tokenizer import Tokenizer, TokenKind, Token, Keywords +from core.tokenizer import Tokenizer, TokenKind, Token, Keywords from dataclasses import dataclass, field import logging @@ -147,24 +147,29 @@ class BinaryNode(DefaultParserNode): class DefaultParser(BaseParser): - def __init__(self, text, sub_parser): - BaseParser.__init__(self, "DefaultParser", text) + """ + Parse sheerka specific grammar (like def concept) + """ + def __init__(self, sub_parser=None): + BaseParser.__init__(self, "DefaultParser") self.sub_parser = sub_parser - self.lexer = Tokenizer(text) + self.lexer_iter = None + self._current = None + self.context = None + self.text = None + + def reset_parser(self, context, text): + self.context = context + # hack before implementing all the sub parsers + if context: + self.sub_parser = context.sheerka.parsers[1] + + self.text = text self.lexer_iter = iter(Tokenizer(text)) self._current = None self.next_token() - def collect_tokens(self, *args): - result = [] - for item in args: - if isinstance(item, Node): - result.extend(item.tokens) - else: - result.append(item) - return result - def add_error(self, error, next_token=True): self.has_error = True self.error_sink.append(error) @@ -186,21 +191,23 @@ class DefaultParser(BaseParser): return @staticmethod - def get_concept_name(tokens, variables=None): - name = "" + def get_concept_key(tokens, variables=None): + key = "" first = True for token in tokens: if token.type == TokenKind.EOF: break + if token.type == TokenKind.WHITESPACE: + continue if not first: - name += " " + key += " " if variables is not None and token.value in variables: - name += "__var__" + str(variables.index(token.value)) + key += "__var__" + str(variables.index(token.value)) else: - name += token.value[1:-1] if token.type == TokenKind.STRING else token.value + key += token.value[1:-1] if token.type == TokenKind.STRING else token.value first = False - return name + return key @staticmethod def fix_indentation(tokens): @@ -242,7 +249,8 @@ class DefaultParser(BaseParser): return tokens[4:] - def parse(self): + def parse(self, context, text): + self.reset_parser(context, text) return self.parse_statement() def parse_statement(self): @@ -277,7 +285,7 @@ class DefaultParser(BaseParser): name_as_tokens.append(token) self.next_token() token = self.get_token() - name = self.get_concept_name(name_as_tokens) + name = self.get_concept_key(name_as_tokens) tokens_found["name"] = name_as_tokens # try to parse as, where, pre and post declarations @@ -328,8 +336,8 @@ class DefaultParser(BaseParser): # start = current_tokens[0].index # end = current_tokens[-1].index + len(current_tokens[-1].value) - sub_parser = self.sub_parser(current_tokens, source=keyword.value) - sub_tree = sub_parser.parse() + sub_parser = self.sub_parser(source=keyword.value) + sub_tree = sub_parser.parse(self.context, current_tokens) if isinstance(sub_tree, ErrorNode): self.add_error(sub_tree, False) asts[keyword] = sub_tree @@ -344,74 +352,74 @@ class DefaultParser(BaseParser): log.debug(f"Found DefConcept node '{def_concept_node}'") return def_concept_node - def parse_expression(self): - return self.parse_addition() - - def parse_addition(self): - left = self.parse_multiply() - token = self.get_token() - if token is None or token.type == TokenKind.EOF: - return left - - if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5 - right = self.parse_addition() - return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right) - - if token.type not in (TokenKind.PLUS, TokenKind.MINUS): - return left - - self.next_token() - right = self.parse_addition() - return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right) - - def parse_multiply(self): - left = self.parse_atom() - token = self.get_token() - if token is None or token.type == TokenKind.EOF: - return left - - if token.type not in (TokenKind.STAR, TokenKind.SLASH): - return left - - self.next_token() - right = self.parse_multiply() - return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right) - - def parse_atom(self): - token = self.get_token() - if token.type == TokenKind.NUMBER: - self.next_token() - return NumberNode([token], float(token.value) if '.' in token.value else int(token.value)) - elif token.type == TokenKind.STRING: - self.next_token() - return StringNode([token], token.value[1:-1], token.value[0]) - elif token.type == TokenKind.IDENTIFIER: - if token.value == "true": - self.next_token() - return TrueNode([token]) - elif token.value == "false": - self.next_token() - return FalseNode([token]) - elif token.value == "null": - self.next_token() - return NullNode([token]) - else: - self.next_token() - return VariableNode([token], token.value) - elif token.type == TokenKind.LPAR: - self.next_token() - exp = self.parse_expression() - token = self.get_token() - self.next_token() - - if token.type != TokenKind.RPAR: - error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR]) - self.add_error(error) - return error - - return exp - else: - error = UnexpectedTokenErrorNode([token], "Unexpected token", - [TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false", - "null", TokenKind.LPAR]) - return self.add_error(error) + # def parse_expression(self): + # return self.parse_addition() + # + # def parse_addition(self): + # left = self.parse_multiply() + # token = self.get_token() + # if token is None or token.type == TokenKind.EOF: + # return left + # + # if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5 + # right = self.parse_addition() + # return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right) + # + # if token.type not in (TokenKind.PLUS, TokenKind.MINUS): + # return left + # + # self.next_token() + # right = self.parse_addition() + # return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right) + # + # def parse_multiply(self): + # left = self.parse_atom() + # token = self.get_token() + # if token is None or token.type == TokenKind.EOF: + # return left + # + # if token.type not in (TokenKind.STAR, TokenKind.SLASH): + # return left + # + # self.next_token() + # right = self.parse_multiply() + # return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right) + # + # def parse_atom(self): + # token = self.get_token() + # if token.type == TokenKind.NUMBER: + # self.next_token() + # return NumberNode([token], float(token.value) if '.' in token.value else int(token.value)) + # elif token.type == TokenKind.STRING: + # self.next_token() + # return StringNode([token], token.value[1:-1], token.value[0]) + # elif token.type == TokenKind.IDENTIFIER: + # if token.value == "true": + # self.next_token() + # return TrueNode([token]) + # elif token.value == "false": + # self.next_token() + # return FalseNode([token]) + # elif token.value == "null": + # self.next_token() + # return NullNode([token]) + # else: + # self.next_token() + # return VariableNode([token], token.value) + # elif token.type == TokenKind.LPAR: + # self.next_token() + # exp = self.parse_expression() + # token = self.get_token() + # self.next_token() + # + # if token.type != TokenKind.RPAR: + # error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR]) + # self.add_error(error) + # return error + # + # return exp + # else: + # error = UnexpectedTokenErrorNode([token], "Unexpected token", + # [TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false", + # "null", TokenKind.LPAR]) + # return self.add_error(error) diff --git a/parsers/ExactConceptParser.py b/parsers/ExactConceptParser.py new file mode 100644 index 0000000..b7eb9f1 --- /dev/null +++ b/parsers/ExactConceptParser.py @@ -0,0 +1,107 @@ +from core.sheerka import ReturnValue +from parsers.BaseParser import BaseParser +from core.tokenizer import Tokenizer, Keywords, TokenKind +from core.concept import Concept + + +class ExactConceptParser(BaseParser): + """ + Tries to recognize a single concept + """ + + MAX_WORDS_SIZE = 10 + + def __init__(self): + BaseParser.__init__(self, "ConceptParser") + + def parse(self, context, text): + res = [] + sheerka = context.sheerka + words = self.get_words(text) + if len(words) > self.MAX_WORDS_SIZE: + return ReturnValue(self.name, False, sheerka.new(sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME)) + + recognized = False + for combination in self.combinations(words): + concept_key = " ".join(combination) + + # Very important question to think about later + # Must we return a new instance or the existing one + # That will depend on the context + # Let's return a new one for now and see if it works + concept = sheerka.new(concept_key) + if not sheerka.isinstance(concept, sheerka.UNKNOWN_CONCEPT_NAME): + # update the properties if needed + for i, token in enumerate(combination): + if token.startswith(Concept.PROPERTY_PREFIX): + index = int(token[len(Concept.PROPERTY_PREFIX):]) + concept.set_prop_by_index(index, words[i]) + res.append(ReturnValue(self.name, True, concept)) + recognized = True + + if recognized: + return res + + return ReturnValue(self.name, False, sheerka.new(sheerka.UNKNOWN_CONCEPT_NAME, body=text)) + + @staticmethod + def get_words(text): + res = [] + for t in iter(Tokenizer(text)): + if t.type == TokenKind.EOF: + break + if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE: + continue + res.append(t.value.value if isinstance(t.value, Keywords) else t.value) + return res + + def combinations(self, iterable): + # combinations('foo', 'bar', 'baz') --> + # ('foo', 'bar', 'baz'), + # ('__var__0', 'bar', 'baz'), + # ('foo', '__var__0', 'baz'), + # ('foo', 'bar', '__var__0'), + # ('__var__0', '__var__1', 'baz'), + # ('__var__0', 'bar', '__var__1'), + # ('foo', '__var__0', '__var__1'), + # ('__var__0', '__var__1', '__var__2')] + + pool = tuple(iterable) + n = len(pool) + + res = set() + + for r in range(0, n + 1): + indices = list(range(r)) + res.add(self.get_tuple(pool, indices)) + while True: + for i in reversed(range(r)): + if indices[i] != i + n - r: + break + else: + break + indices[i] += 1 + for j in range(i + 1, r): + indices[j] = indices[j - 1] + 1 + res.add(self.get_tuple(pool, indices)) + + return res + + @staticmethod + def get_tuple(pool, indices): + res = [] + vars = {} + k = 0 + + # init vars + for i in indices: + value = pool[i] + if value not in vars: + vars[pool[i]] = f"{Concept.PROPERTY_PREFIX}{k}" + k += 1 + + # create tuple + for i in range(len(pool)): + value = pool[i] + res.append(vars[value] if value in vars else value) + return tuple(res) diff --git a/parsers/PythonParser.py b/parsers/PythonParser.py index c7cb4d7..60b6c80 100644 --- a/parsers/PythonParser.py +++ b/parsers/PythonParser.py @@ -26,36 +26,41 @@ class PythonNode(Node): class PythonParser(BaseParser): - def __init__(self, text, source=""): - text = text if isinstance(text, str) else self.get_text_from_tokens(text) - text = text.strip() - BaseParser.__init__(self, "PythonParser", text) + """ + Parse Python scripts + """ + def __init__(self, source=""): + + BaseParser.__init__(self, "PythonParser") self.source = source - def parse(self): + def parse(self, context, text): + text = text if isinstance(text, str) else self.get_text_from_tokens(text) + text = text.strip() + # first, try to parse an expression - res, tree, error = self.try_parse_expression() + res, tree, error = self.try_parse_expression(text) if not res: # then try to parse a statement - res, tree, error = self.try_parse_statement() + res, tree, error = self.try_parse_statement(text) if not res: self.has_error = True - error_node = PythonErrorNode(self.text, error) + error_node = PythonErrorNode(text, error) self.error_sink.append(error_node) return error_node log.debug("Recognized python code.") - return PythonNode(self.text, tree) + return PythonNode(text, tree) - def try_parse_expression(self): + def try_parse_expression(self, text): try: - return True, ast.parse(self.text, f"<{self.source}>", 'eval'), None + return True, ast.parse(text, f"<{self.source}>", 'eval'), None except Exception as error: return False, None, error - def try_parse_statement(self): + def try_parse_statement(self, text): try: - return True, ast.parse(self.text, f"<{self.source}>", 'exec'), None + return True, ast.parse(text, f"<{self.source}>", 'exec'), None except Exception as error: return False, None, error diff --git a/tests/test_ExactConceptParser.py b/tests/test_ExactConceptParser.py new file mode 100644 index 0000000..cd4f24f --- /dev/null +++ b/tests/test_ExactConceptParser.py @@ -0,0 +1,152 @@ +import pytest +from os import path +import shutil +import os + +from core.concept import Concept, Property +from core.sheerka import Sheerka, ExecutionContext +from parsers.DefaultParser import DefaultParser +from parsers.ExactConceptParser import ExactConceptParser + +tests_root = path.abspath("../build/tests") +root_folder = "init_folder" + + +@pytest.fixture(autouse=True) +def init_test(): + if path.exists(tests_root): + shutil.rmtree(tests_root) + + if not path.exists(tests_root): + os.makedirs(tests_root) + current_pwd = os.getcwd() + os.chdir(tests_root) + + yield None + + os.chdir(current_pwd) + + +def test_i_can_compute_combinations(): + parser = ExactConceptParser() + res = parser.combinations(["foo", "bar", "baz"]) + + assert res == {('foo', 'bar', 'baz'), + ('__var__0', 'bar', 'baz'), + ('foo', '__var__0', 'baz'), + ('foo', 'bar', '__var__0'), + ('__var__0', '__var__1', 'baz'), + ('__var__0', 'bar', '__var__1'), + ('foo', '__var__0', '__var__1'), + ('__var__0', '__var__1', '__var__2')} + + +def test_i_can_compute_combinations_with_duplicates(): + parser = ExactConceptParser() + res = parser.combinations(["foo", "bar", "foo"]) + + assert res == {('foo', 'bar', 'foo'), + ('__var__0', 'bar', '__var__0'), + ('foo', '__var__0', 'foo'), + ('__var__0', '__var__1', '__var__0'), + ('__var__1', '__var__0', '__var__1')} + # TODO: the last tuple is not possible, so the algo can be improved + + +def test_i_can_recognize_a_simple_concept(): + sheerka = get_sheerka() + concept = get_concept("hello world", []) + sheerka.add_in_cache(concept) + source = "hello world" + context = ExecutionContext(sheerka, "xxxx") + results = ExactConceptParser().parse(context, source) + + assert len(results) == 1 + assert results[0].status + assert results[0].value.key == concept.key + + +def test_i_can_recognize_concepts_defined_several_times(): + sheerka = get_sheerka() + sheerka.add_in_cache(get_concept("hello world", [])) + sheerka.add_in_cache(get_concept("hello a", ["a"])) + + source = "hello world" + context = ExecutionContext(sheerka, "xxxx") + results = ExactConceptParser().parse(context, source) + + assert len(results) == 2 + results = sorted(results, key=lambda x: x.value.name) # because of the usage of sets + + assert results[0].status + assert results[0].value.name == "hello a" + assert results[0].value.props["a"].value == "world" + + assert results[1].status + assert results[1].value.name == "hello world" + + +def test_i_can_recognize_a_concept_with_variables(): + sheerka = get_sheerka() + concept = get_concept("a + b", ["a", "b"]) + sheerka.concepts_cache[concept.key] = concept + source = "10 + 5" + context = ExecutionContext(sheerka, "xxxx") + results = ExactConceptParser().parse(context, source) + + assert len(results) == 1 + assert results[0].status + assert results[0].value.key == concept.key + assert results[0].value.props["a"].value == "10" + assert results[0].value.props["b"].value == "5" + + +def test_i_can_recognize_a_concept_with_duplicate_variables(): + sheerka = get_sheerka() + concept = get_concept("a + b + a", ["a", "b"]) + sheerka.concepts_cache[concept.key] = concept + source = "10 + 5 + 10" + context = ExecutionContext(sheerka, "xxxx") + results = ExactConceptParser().parse(context, source) + + assert len(results) == 1 + assert results[0].status + assert results[0].value.key == concept.key + assert results[0].value.props["a"].value == "10" + assert results[0].value.props["b"].value == "5" + + +def test_i_can_manage_unknown_concept(): + sheerka = get_sheerka() + source = "def concept hello world" # this is not a concept by itself + context = ExecutionContext(sheerka, "xxxx") + res = ExactConceptParser().parse(context, source) + + assert not res.status + assert sheerka.isinstance(res.value, Sheerka.UNKNOWN_CONCEPT_NAME) + + +def test_i_can_detect_concepts_too_long(): + sheerka = get_sheerka() + source = "a very very long concept that cannot be an unique one" + context = ExecutionContext(sheerka, "xxxx") + res = ExactConceptParser().parse(context, source) + + assert not res.status + assert sheerka.isinstance(res.value, Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME) + + +def get_concept(name, variables): + c = Concept(name=name) + if variables: + for v in variables: + c.props[v] = Property(v, None) + c.init_key() + return c + + +def get_sheerka(): + sheerka = Sheerka() + sheerka.initialize(root_folder) + + return sheerka diff --git a/tests/test_concept.py b/tests/test_concept.py new file mode 100644 index 0000000..a940289 --- /dev/null +++ b/tests/test_concept.py @@ -0,0 +1,38 @@ +import pytest + +from core.concept import Concept + + +@pytest.mark.parametrize("name, variables, expected", [ + ("my name is a", ["a"], "my name is __var__0"), + ("a b c d", ["b", "c"], "a __var__0 __var__1 d"), + ("a 'b c' d", ["b", "c"], "a b c d"), + ("a | b", ["a", "b"], "__var__0 | __var__1"), + ("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"), + ("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"), +]) +def test_i_can_get_concept_key(name, variables, expected): + concept = Concept(name) + for v in variables: + concept.set_prop(v, None) + + concept.init_key() + assert concept.key == expected + + +def test_i_can_serialize(): + """ + Test concept.to_dict() + :return: + """ + # TODO + pass + + +def test_i_can_deserialize(): + """ + Test concept.from_dict() + :return: + """ + # TODO + pass diff --git a/tests/test_defautparser.py b/tests/test_defautparser.py index 8a98023..97bd948 100644 --- a/tests/test_defautparser.py +++ b/tests/test_defautparser.py @@ -1,10 +1,11 @@ import pytest +from parsers.ExactConceptParser import ExactConceptParser from parsers.PythonParser import PythonParser, PythonNode, PythonErrorNode -from parsers.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError +from core.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError from parsers.DefaultParser import DefaultParser from parsers.DefaultParser import NumberNode, StringNode, VariableNode, TrueNode, FalseNode, NullNode, BinaryNode -from parsers.DefaultParser import Node, UnexpectedTokenErrorNode, DefConceptNode, NopNode +from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode, NopNode import ast @@ -39,6 +40,7 @@ def null(): def b(operator, left, right): return BinaryNode([], operator, left, right) + def compare_ast(left, right): left_as_string = ast.dump(left) left_as_string = left_as_string.replace(", ctx=Load()", "") @@ -51,9 +53,8 @@ def compare_ast(left, right): return left_as_string == right_as_string - def test_i_can_tokenize(): - source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=" + source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&" tokens = list(Tokenizer(source)) assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1) assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2) @@ -85,6 +86,8 @@ def test_i_can_tokenize(): assert tokens[27] == Token(TokenKind.WHITESPACE, " ", 59, 6, 1) assert tokens[28] == Token(TokenKind.STRING, '"another string"', 60, 6, 2) assert tokens[29] == Token(TokenKind.EQUALS, '=', 76, 6, 18) + assert tokens[30] == Token(TokenKind.VBAR, '|', 77, 6, 19) + assert tokens[31] == Token(TokenKind.AMPER, '&', 78, 6, 20) @pytest.mark.parametrize("text, expected", [ @@ -220,8 +223,8 @@ def test_i_can_recognize_keywords(text, expected): ("def concept h as 1 + 1", "h", ast.Expression(ast.BinOp(left=ast.Num(n=1), op=ast.Add(), right=ast.Num(n=1)))), ]) def test_i_can_parse_def_concept(text, expected_name, expected_expr): - parser = DefaultParser(text, PythonParser) - tree = parser.parse() + parser = DefaultParser(PythonParser) + tree = parser.parse(None, text) assert isinstance(tree, DefConceptNode) assert tree.name == expected_name if isinstance(tree.body, PythonNode): @@ -230,8 +233,6 @@ def test_i_can_parse_def_concept(text, expected_name, expected_expr): assert tree.body == expected_expr - - def test_i_can_parse_complex_def_concept_statement(): text = """def concept a plus b where a,b @@ -239,8 +240,8 @@ def test_i_can_parse_complex_def_concept_statement(): post isinstance(res, int) as res = a + b """ - parser = DefaultParser(text, PythonParser) - tree = parser.parse() + parser = DefaultParser(PythonParser) + tree = parser.parse(None, text) assert not parser.has_error assert isinstance(tree, DefConceptNode) assert tree.name == "a plus b" @@ -261,19 +262,20 @@ def concept add one to a as: return x+1 func(a) """ - parser = DefaultParser(text, PythonParser) - tree = parser.parse() + parser = DefaultParser(PythonParser) + tree = parser.parse(None, text) assert not parser.has_error assert isinstance(tree, DefConceptNode) + def test_i_can_use_colon_to_declare_indentation2(): text = """ def concept add one to a as: def func(x): return x+1 """ - parser = DefaultParser(text, PythonParser) - tree = parser.parse() + parser = DefaultParser(PythonParser) + tree = parser.parse(None, text) assert not parser.has_error assert isinstance(tree, DefConceptNode) @@ -285,8 +287,8 @@ def concept add one to a as return x+1 func(a) """ - parser = DefaultParser(text, PythonParser) - tree = parser.parse() + parser = DefaultParser(PythonParser) + tree = parser.parse(None, text) assert parser.has_error assert isinstance(tree, DefConceptNode) assert isinstance(parser.error_sink[0].exception, IndentationError) @@ -304,8 +306,8 @@ def concept add one to a as: func(a) func(b) """ - parser = DefaultParser(text, PythonParser) - tree = parser.parse() + parser = DefaultParser(PythonParser) + tree = parser.parse(None, text) assert parser.has_error assert isinstance(tree, DefConceptNode) assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode) @@ -319,8 +321,8 @@ func(b) ("def concept as", Keywords.AS, [""]), ]) def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, expected_tokens): - parser = DefaultParser(text, PythonParser) - parser.parse() + parser = DefaultParser(PythonParser) + parser.parse(None, text) assert parser.has_error assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode) @@ -335,7 +337,10 @@ def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, e "def concept hello as 1+" ]) def test_i_can_detect_error_in_declaration(text): - parser = DefaultParser(text, PythonParser) - parser.parse() + parser = DefaultParser(PythonParser) + parser.parse(None, text) assert parser.has_error assert isinstance(parser.error_sink[0], PythonErrorNode) + + + diff --git a/tests/test_sheerka.py b/tests/test_sheerka.py index 9227cec..688bafe 100644 --- a/tests/test_sheerka.py +++ b/tests/test_sheerka.py @@ -37,8 +37,7 @@ def test_root_folder_is_created_after_initialization(): def test_lists_of_concepts_is_initialized(): - sheerka = Sheerka() - sheerka.initialize(root_folder) + sheerka = get_sheerka() assert len(sheerka.concepts_cache) > 1 @@ -53,14 +52,13 @@ def get_concept(): return x+y func(a,b) """ - parser = DefaultParser(text, PythonParser) - return parser.parse() + parser = DefaultParser(PythonParser) + return parser.parse(None, text) def test_i_can_add_a_concept(): + sheerka = get_sheerka() concept = get_concept() - sheerka = Sheerka() - sheerka.initialize(root_folder) res = sheerka.add_concept(ExecutionContext(sheerka, "xxx"), concept) concept_found = res.value @@ -76,7 +74,7 @@ def test_i_can_add_a_concept(): assert isinstance(concept_found.codes[ConceptParts.POST], ast.Expression) assert isinstance(concept_found.codes[ConceptParts.BODY], ast.Module) - all_props = [p.name for p in concept_found.props] + all_props = list(concept_found.props.keys()) assert all_props == ["a", "b"] assert concept_found.key == "__var__0 + __var__1" @@ -123,7 +121,14 @@ def test_i_can_instantiate_a_concept(): """ Test the new() functionnality make sure that some Concept are singleton (ex Sheerka, True, False) - but some other need a new instance everytime + otherwise, make sure that new() returns a **new** instance :return: """ pass + + +def get_sheerka(): + sheerka = Sheerka() + sheerka.initialize(root_folder) + + return sheerka \ No newline at end of file