diff --git a/src/core/builtin_concepts.py b/src/core/builtin_concepts.py index 112b88a..162f458 100644 --- a/src/core/builtin_concepts.py +++ b/src/core/builtin_concepts.py @@ -349,9 +349,6 @@ class EnumerationConcept(Concept): self.set_value(ConceptParts.BODY, iteration) self.metadata.is_evaluated = True - # def __iter__(self): - # return iter(self.body) - class ListConcept(Concept): def __init__(self, items=None): @@ -362,21 +359,6 @@ class ListConcept(Concept): def append(self, obj): self.body.append(obj) - # def __len__(self): - # return len(self.body) - # - # def __getitem__(self, key): - # return self.body[key] - # - # def __setitem__(self, key, value): - # self.body[key] = value - # - # def __iter__(self): - # return iter(self.body) - # - # def __contains__(self, item): - # return item in self.body - class FilteredConcept(Concept): def __init__(self, filtered=None, iterable=None, predicate=None): @@ -450,5 +432,5 @@ class ExplanationConcept(Concept): self.set_value("command", command) # explain command parameters self.set_value("title", title) # a title to the explanation self.set_value("instructions", instructions) # instructions for SheerkaPrint - self.set_value(ConceptParts.BODY, execution_result) # list of results + self.set_value(ConceptParts.BODY, execution_result) # list of results self.metadata.is_evaluated = True diff --git a/src/core/builtin_helpers.py b/src/core/builtin_helpers.py index 68fd5c6..1b73b14 100644 --- a/src/core/builtin_helpers.py +++ b/src/core/builtin_helpers.py @@ -326,6 +326,7 @@ def ensure_evaluated(context, concept): return evaluated + def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers): """ Using parsers, try to recognize concepts from source diff --git a/src/core/concept.py b/src/core/concept.py index 5364855..dcc3a92 100644 --- a/src/core/concept.py +++ b/src/core/concept.py @@ -221,7 +221,7 @@ class Concept: Create the key for this concept. Must be called only when the concept if fully initialized - The method is not called set_key to make sure that no other class set the key by mistake + The method is not called 'set_key' to make sure that no other class set the key by mistake :param tokens: :return: """ @@ -248,8 +248,8 @@ class Concept: if token.value in variables: key += VARIABLE_PREFIX + str(variables.index(token.value)) else: - value = token.value[1:-1] if token.type == TokenKind.STRING else token.value - key += value + #value = token.value[1:-1] if token.type == TokenKind.STRING else token.value + key += token.value first = False self.metadata.key = key diff --git a/src/core/sheerka/Services/SheerkaCreateNewConcept.py b/src/core/sheerka/Services/SheerkaCreateNewConcept.py index 533d50f..f11a475 100644 --- a/src/core/sheerka/Services/SheerkaCreateNewConcept.py +++ b/src/core/sheerka/Services/SheerkaCreateNewConcept.py @@ -56,12 +56,6 @@ class SheerkaCreateNewConcept: return sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value)) resolved_concepts_by_first_keyword = init_ret_value.body - # update concept definition by key - # init_sya_ret_value = self.bnp.initialize(context, [concept], use_sheerka=True) - # if not init_sya_ret_value.status: - # return sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value)) - # concepts_by_first_keyword = init_sya_ret_value.body - concept.freeze_definition_hash() cache_manager.add_concept(concept) @@ -74,21 +68,3 @@ class SheerkaCreateNewConcept: # process the return if needed ret = sheerka.ret(self.logger_name, True, sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept)) return ret - - # def load_concepts_nodes_definitions(self, context): - # """ - # Gets from sdp what is need to parse nodes - # :return: - # """ - # sdp = self.sheerka.sdp - # - # concepts_by_first_keyword = sdp.get( - # self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, - # load_origin=False) or {} - # - # init_ret_value = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword) - # if not init_ret_value.status: - # return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value)) - # resolved_concepts_by_first_keyword = init_ret_value.body - # - # return concepts_by_first_keyword, resolved_concepts_by_first_keyword diff --git a/src/core/sheerka/Services/SheerkaExecute.py b/src/core/sheerka/Services/SheerkaExecute.py index de8957e..35217c7 100644 --- a/src/core/sheerka/Services/SheerkaExecute.py +++ b/src/core/sheerka/Services/SheerkaExecute.py @@ -1,8 +1,9 @@ -from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept import core.utils +from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept NO_MATCH = "** No Match **" + class SheerkaExecute: """ Manage the execution of a process flow @@ -58,7 +59,8 @@ class SheerkaExecute: # else "'" + BaseParser.get_text_from_tokens(to_parse) + "' as tokens" # execution_context.log(f"Parsing {debug_text}") - with execution_context.push(desc=f"Parsing using {parser.name}", logger=parser.verbose_log) as sub_context: + with execution_context.push(desc=f"Parsing using {parser.name}", + logger=parser.verbose_log) as sub_context: sub_context.add_inputs(to_parse=to_parse) res = parser.parse(sub_context, to_parse) if res is not None: @@ -86,7 +88,6 @@ class SheerkaExecute: stop_processing = True sub_context.add_values(return_values=res) - if stop_processing: break # Do not try the other priorities if a match is found diff --git a/src/core/sheerka/Services/SheerkaModifyConcept.py b/src/core/sheerka/Services/SheerkaModifyConcept.py index 21938a9..8ab8de0 100644 --- a/src/core/sheerka/Services/SheerkaModifyConcept.py +++ b/src/core/sheerka/Services/SheerkaModifyConcept.py @@ -35,7 +35,7 @@ class SheerkaModifyConcept: # TODO : update concept by first keyword # TODO : update resolved by first keyword - # TODO : update concets grammars + # TODO : update concepts grammars ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept)) return ret diff --git a/src/core/sheerka/Services/SheerkaVariableManager.py b/src/core/sheerka/Services/SheerkaVariableManager.py index 558fc62..f7b3f6d 100644 --- a/src/core/sheerka/Services/SheerkaVariableManager.py +++ b/src/core/sheerka/Services/SheerkaVariableManager.py @@ -1,8 +1,6 @@ from dataclasses import dataclass from typing import List -from sdp.sheerkaSerializer import Serializer - @dataclass class Variable: diff --git a/src/core/sheerka/Sheerka.py b/src/core/sheerka/Sheerka.py index d0dafe2..41ca7f0 100644 --- a/src/core/sheerka/Sheerka.py +++ b/src/core/sheerka/Sheerka.py @@ -60,10 +60,6 @@ class Sheerka(Concept): self.bnp = None # reference to the BaseNodeParser class (to compute first keyword token) - # # Cache for concepts grammars - # # To be shared between BNFNode parsers instances - # self.concepts_grammars = {} - # a concept can be instantiated # ex: File is a concept, but File('foo.txt') is an instance # TODO: manage contexts @@ -303,27 +299,6 @@ class Sheerka(Concept): res = self.bnp.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword) self.cache_manager.put(self.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, False, res.body) - # sya = self.bnf.resolve_sya_associativity_and_precedence() - # self.cache_manager.put(self.RESOLVED_CONCEPTS_SYA_DEFINITION_ENTRY, sya) - # - # - # self.concepts_by_first_keyword, \ - # self.resolved_concepts_by_first_keyword = \ - # self.create_new_concept_handler.load_concepts_nodes_definitions(context) - - # self.concepts_by_first_keyword = self.sdp.get_safe( - # self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, - # load_origin=False) or {} - # - # self.sya_definitions = self.sdp.get_safe( - # self.CONCEPTS_SYA_DEFINITION_ENTRY, - # load_origin=False) or {} - # - # init_ret_value = self.bnp.resolve_concepts_by_first_keyword(self, self.concepts_by_first_keyword) - # if not init_ret_value.status: - # return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value)) - # self.resolved_concepts_by_first_keyword = init_ret_value.body - def reset(self, cache_only=False): self.cache_manager.clear() self.cache_manager.cache_only = cache_only @@ -346,7 +321,6 @@ class Sheerka(Concept): with ExecutionContext(self.key, event, self, f"Evaluating '{text}'", self.log) as execution_context: user_input = self.ret(self.name, True, self.new(BuiltinConcepts.USER_INPUT, body=text, user_name=user_name)) reduce_requested = self.ret(self.name, True, self.new(BuiltinConcepts.REDUCE_REQUESTED)) - # execution_context.local_hints.add(BuiltinConcepts.EVAL_WHERE_REQUESTED) steps = [ BuiltinConcepts.BEFORE_PARSING, @@ -525,28 +499,6 @@ class Sheerka(Concept): return concept - # - # def get(self, concept_key, concept_id=None): - # """ - # Tries to find a concept - # What is return must be used a template for another concept. - # You must not modify the returned concept - # :param concept_key: key of the concept - # :param concept_id: when multiple concepts with the same key, use the id - # :return: - # """ - # - # by_key = self.get_by_key(concept_key) - # if self.is_known(by_key): - # return by_key - # - # # else return by name - # by_name = self.get_by_name(concept_key) - # if self.is_known(by_name): - # return by_name - # - # return by_key # return not found for key - def get_by_key(self, concept_key, concept_id=None): concept_key = str(concept_key) if isinstance(concept_key, BuiltinConcepts) else concept_key return self.internal_get("key", concept_key, self.CONCEPTS_BY_KEY_ENTRY, concept_id) diff --git a/src/core/tokenizer.py b/src/core/tokenizer.py index de3bd1f..181361d 100644 --- a/src/core/tokenizer.py +++ b/src/core/tokenizer.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum @@ -48,6 +48,7 @@ class TokenKind(Enum): DEGREE = "degree" # ° WORD = "word" EQUALSEQUALS = "==" + VAR_DEF = "__var__" @dataclass() @@ -58,6 +59,8 @@ class Token: line: int column: int + _str_value: str = field(default=None, repr=False, compare=False, hash=None) + def __repr__(self): if self.type == TokenKind.IDENTIFIER: value = str(self.value) @@ -72,6 +75,23 @@ class Token: return f"Token({value})" + @property + def str_value(self): + if self._str_value: + return self._str_value + + if self.type == TokenKind.STRING: + self._str_value = self.value[1:-1] + elif self.type == TokenKind.KEYWORD: + self._str_value = self.value.value + else: + self._str_value = str(self.value) + return self._str_value + + @staticmethod + def is_whitespace(token): + return token and token.type == TokenKind.WHITESPACE + @dataclass() class LexerError(Exception): @@ -101,12 +121,13 @@ class Tokenizer: KEYWORDS = set(x.value for x in Keywords) - def __init__(self, text, parse_word=False): + def __init__(self, text, yield_eof=True, parse_word=False): self.text = text self.text_len = len(text) self.column = 1 self.line = 1 self.i = 0 + self.yield_eof = yield_eof self.parse_word = parse_word def __iter__(self): @@ -134,6 +155,7 @@ class Tokenizer: self.i += 1 self.column += 1 elif c == "_": + from core.concept import VARIABLE_PREFIX if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha(): identifier = self.eat_identifier(self.i) token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER @@ -141,6 +163,13 @@ class Tokenizer: yield Token(token_type, value, self.i, self.line, self.column) self.i += len(identifier) self.column += len(identifier) + elif self.i + 7 < self.text_len and \ + self.text[self.i: self.i + 7] == VARIABLE_PREFIX and \ + self.text[self.i + 7].isdigit(): + number = self.eat_number(self.i + 7) + yield Token(TokenKind.VAR_DEF, VARIABLE_PREFIX + number, self.i, self.line, self.column) + self.i += 7 + len(number) + self.column += 7 + len(number) else: yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column) self.i += 1 @@ -308,7 +337,8 @@ class Tokenizer: else: raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column) - yield Token(TokenKind.EOF, "", self.i, self.line, self.column) + if self.yield_eof: + yield Token(TokenKind.EOF, "", self.i, self.line, self.column) def eat_concept(self, start, line, column): key, id, buffer = None, None, "" diff --git a/src/parsers/AtomNodeParser.py b/src/parsers/AtomNodeParser.py index 3285fed..e1090db 100644 --- a/src/parsers/AtomNodeParser.py +++ b/src/parsers/AtomNodeParser.py @@ -91,7 +91,7 @@ class AtomConceptParserHelper: self.debug.append(token) - if self.expected_tokens[0] != BaseNodeParser.get_token_value(token): + if self.expected_tokens[0] != token.str_value: self.errors.append(UnexpectedTokenErrorNode( f"Found '{token}' while expecting '{self.expected_tokens[0]}'", token, @@ -119,7 +119,7 @@ class AtomConceptParserHelper: forked.eat_concept(concept, pos) concept_node = ConceptNode(concept, pos, pos) - expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1] + expected = [t.str_value for t in Tokenizer(concept.name)][1:-1] if not expected: # the concept is already matched diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index 4b46dc0..235a732 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -53,9 +53,6 @@ class UnrecognizedTokensNode(LexerNode): self.is_frozen = False self.parenthesis_count = 0 - def has_open_paren(self): - return self.parenthesis_count > 0 - def add_token(self, token, pos): if self.is_frozen: raise Exception("The node is frozen") @@ -78,6 +75,21 @@ class UnrecognizedTokensNode(LexerNode): return self + def pop(self, token_kind): + if self.is_frozen: + raise Exception("The node is frozen") + + if len(self.tokens) > 0 and self.tokens[-1].type == token_kind: + self.tokens.pop() + if len(self.tokens) == 0: + self.reset() + else: + self.end -= 1 + + + def has_open_paren(self): + return self.parenthesis_count > 0 + def not_whitespace(self): return not self.is_whitespace() @@ -90,6 +102,11 @@ class UnrecognizedTokensNode(LexerNode): def is_empty(self): return len(self.tokens) == 0 + def last_token_type(self): + if len(self.tokens) == 0: + return None + return self.tokens[-1].type + def __eq__(self, other): if isinstance(other, utnode): return self.start == other.start and \ @@ -676,15 +693,6 @@ class BaseNodeParser(BaseParser): return custom_concepts if custom else None - @staticmethod - def get_token_value(token): - if token.type == TokenKind.STRING: - return token.value[1:-1] - elif token.type == TokenKind.KEYWORD: - return token.value.value - else: - return token.value - @staticmethod def get_concepts_by_first_keyword(context, concepts, use_sheerka=False): """ diff --git a/src/parsers/SyaNodeParser.py b/src/parsers/SyaNodeParser.py index 5820a28..4c65245 100644 --- a/src/parsers/SyaNodeParser.py +++ b/src/parsers/SyaNodeParser.py @@ -1,15 +1,16 @@ from collections import namedtuple from dataclasses import dataclass, field +from operator import attrgetter from typing import List from core import builtin_helpers from core.builtin_concepts import BuiltinConcepts -from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF +from core.concept import Concept, DEFINITION_TYPE_BNF from core.sheerka.ExecutionContext import ExecutionContext -from core.tokenizer import Token, TokenKind +from core.tokenizer import Token, TokenKind, Tokenizer from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \ SourceCodeWithConceptNode, BaseNodeParser -from parsers.BaseParser import ErrorNode, UnexpectedTokenErrorNode +from parsers.BaseParser import ErrorNode PARSERS = ["BnfNode", "AtomNode", "Python"] @@ -88,10 +89,13 @@ class SyaConceptParserHelper: concept: Concept start: int # position of the token in the tokenizer (Caution, it is not token.index) end: int = field(default=-1, repr=False, compare=False, hash=None) - expected: List[str] = field(default_factory=list, repr=False, compare=False, hash=None) + expected: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None) expected_parameters_before_first_token: int = field(default=0, repr=False, compare=False, hash=None) + last_token_before_first_token: Token = field(default=None, repr=False, compare=False, hash=None) potential_pos: int = field(default=-1, repr=False, compare=False, hash=None) parameters_list_at_init: list = field(default_factory=list, repr=False, compare=False, hash=None) + tokens: List[Token] = field(default_factory=list, repr=False, compare=False, hash=None) # tokens eaten + remember_whitespace: Token = field(default=None, repr=False, compare=False, hash=None) error: str = None def __post_init__(self): @@ -99,17 +103,20 @@ class SyaConceptParserHelper: if self.end == -1: self.end = self.start - first_keyword_found = False - for name in concept.key.split(): - if not name.startswith(VARIABLE_PREFIX) and not first_keyword_found: - first_keyword_found = True + first_keyword_found = None + for token in Tokenizer(concept.key, yield_eof=False): + if not first_keyword_found and token.type != TokenKind.WHITESPACE and token.type != TokenKind.VAR_DEF: + first_keyword_found = token if first_keyword_found: - self.expected.append(name) + self.expected.append(token) else: - self.expected_parameters_before_first_token += 1 + self.last_token_before_first_token = token + if token.type != TokenKind.WHITESPACE: + self.expected_parameters_before_first_token += 1 - self.eat_token() # remove the fist token + self.eat_token(first_keyword_found) # remove the first token + self.tokens.append(first_keyword_found) def is_matched(self): return len(self.expected) == 0 @@ -117,23 +124,38 @@ class SyaConceptParserHelper: def is_atom(self): return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0 - def is_expected(self, token): - if self.is_matched(): + def is_next(self, token): + if self.is_matched() or len(self.expected) == 0: return False - token_value = BaseNodeParser.get_token_value(token) + # True if the next token is the one that is expected + # Or if the next token is a whitespace and the expected one is the one after + # (whitespace are sometimes not mandatory) + return token.str_value == self.expected[0].str_value or \ + self.expected[0].type == TokenKind.WHITESPACE and token.str_value == self.expected[1].str_value + + def is_expected(self, token): + if self.is_matched() or token.type == TokenKind.WHITESPACE: + return False for expected in self.expected: - if not expected.startswith(VARIABLE_PREFIX) and expected == token_value: + if expected.type != TokenKind.VAR_DEF and expected.str_value == token.str_value: return True return False def expected_parameters(self): - return sum(map(lambda e: e.startswith(VARIABLE_PREFIX), self.expected)) + return sum(map(lambda e: e.type == TokenKind.VAR_DEF, self.expected)) - def eat_token(self): - # No check, as it is used only after is_expected + def eat_token(self, until_token): + """ + eat until token 'until' + :param until_token: + :return: + """ + # No check, as it is used only after is_expected() or is_next() + while self.expected[0].str_value != until_token.str_value: + del self.expected[0] del self.expected[0] # return True is a whole sequence of keyword is eaten @@ -143,7 +165,10 @@ class SyaConceptParserHelper: if len(self.expected) == 0: return True - return self.expected[0].startswith(VARIABLE_PREFIX) + # also return True at the end of a name sequence + # ... bar baz qux + # return True after 'qux', to indicate all the parameters from must be processed + return self.expected[0].type == TokenKind.VAR_DEF def eat_parameter(self, parameter): if self.is_matched() and parameter == self: @@ -153,7 +178,7 @@ class SyaConceptParserHelper: self.error = "No more parameter expected" return - if not self.expected[0].startswith(VARIABLE_PREFIX): + if self.expected[0].type != TokenKind.VAR_DEF: self.error = "Parameter was not expected" return @@ -202,6 +227,7 @@ class InFixToPostFix: self.errors = [] # Not quite sure that I can handle more than one error self.debug = [] + self.false_positives = [] # concepts that looks like known one, but not (for debug purpose) self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens def __repr__(self): @@ -245,7 +271,6 @@ class InFixToPostFix: Note that when we are parsing non recognized tokens, we consider that the parenthesis are part of the non recognized :param token: - :param stack: :return: """ return isinstance(token, Token) and token.type == TokenKind.RPAR @@ -268,10 +293,10 @@ class InFixToPostFix: :return: """ if isinstance(item, SyaConceptParserHelper) and len(item.expected) > 0 and not item.error: - if item.expected[0].startswith(VARIABLE_PREFIX): + if item.expected[0].type == TokenKind.VAR_DEF: item.error = "Not enough suffix parameters" else: - item.error = f"token '{item.expected[0]}' not found" + item.error = f"token '{item.expected[0].str_value}' not found" if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1: self.out.insert(item.potential_pos, item) @@ -328,6 +353,16 @@ class InFixToPostFix: ).pseudo_fix_source() return source_code + def _transform_to_unrecognized(self, parser_helper): + # an Unrecognized when sent to out too prematurely + if len(self.out) > 0 and isinstance(self.out[-1], UnrecognizedTokensNode): + self.unrecognized_tokens = self.out.pop() + + if parser_helper.remember_whitespace: + self.unrecognized_tokens.add_token(parser_helper.remember_whitespace, parser_helper.start - 1) + for i, token in enumerate(parser_helper.tokens): + self.unrecognized_tokens.add_token(token, parser_helper.start + i) + def get_errors(self): res = [] res.extend(self.errors) @@ -343,28 +378,28 @@ class InFixToPostFix: self.is_locked = False - def manage_parameters_when_new_concept(self, temp_concept_node): + def manage_parameters_when_new_concept(self, parser_helper): """ When a new concept is create, we need to check what to do with the parameters that were queued - :param temp_concept_node: new concept + :param parser_helper: new concept :return: """ - if len(self.parameters_list) < temp_concept_node.expected_parameters_before_first_token: + if len(self.parameters_list) < parser_helper.expected_parameters_before_first_token: # The new concept expect some prefix parameters, but there's not enough - temp_concept_node.error = "Not enough prefix parameters" + parser_helper.error = "Not enough prefix parameters" return - if len(self.parameters_list) > temp_concept_node.expected_parameters_before_first_token: + if len(self.parameters_list) > parser_helper.expected_parameters_before_first_token: # There are more parameters than needed by the new concept # The others are either # - parameters for the previous concept (if any) # - concepts on their own # - syntax error # In all the cases, the only thing that matter is to pop what is expected by the new concept - for i in range(temp_concept_node.expected_parameters_before_first_token): + for i in range(parser_helper.expected_parameters_before_first_token): self.parameters_list.pop() - temp_concept_node.parameters_list_at_init.extend(self.parameters_list) + parser_helper.parameters_list_at_init.extend(self.parameters_list) return # len(self.parameters_list) == temp_concept_node.expected_parameters_before_first_token @@ -385,14 +420,18 @@ class InFixToPostFix: :return: """ + # manage parenthesis that didn't find any match + if self._is_lpar(self.stack[-1]): + self._add_error(ParenthesisMismatchErrorNode(self.stack[-1])) + # The parameter must be part the current concept being parsed assert len(self._concepts()) != 0 # sanity check current_concept = self._concepts()[-1] - while len(current_concept.expected) > 0 and current_concept.expected[0].startswith(VARIABLE_PREFIX): + while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF: # eat everything that was expected if len(self.parameters_list) == 0: - # current_concept.error = f"Failed to match parameter '{current_concept.expected[0]}'" + current_concept.error = f"Failed to match parameter '{current_concept.expected[0].str_value}'" return del self.parameters_list[0] del current_concept.expected[0] @@ -506,6 +545,11 @@ class InFixToPostFix: if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No: self._add_error(NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, concept_node.start)) + if not current.precedence: + # precedence is not set (None or zero) + # Do not apply any rule + return False + if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence: return True @@ -528,9 +572,55 @@ class InFixToPostFix: :return: """ + def _pop_stack(c): + while self.stack[-1] != c and not self._is_lpar(c): + self.pop_stack_to_out() + + if self._is_lpar(self.stack[-1]): + self._add_error(ParenthesisMismatchErrorNode(self.stack[-1])) + return False + + # Manage concepts ending with long names + if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched(): + self.pop_stack_to_out() + for current_concept in reversed(self._concepts()): + # As I may loose memory again ;-) + # it's a reversed loop to manage cases like + # if a plus b then ... + # The current concept is 'plus', but the token is 'then' + # It's means that I have finished to parse the 'plus' and started the second part of the 'if' + + if current_concept.is_next(token): + current_concept.end = pos + current_concept.tokens.append(token) + if current_concept.eat_token(token): + _pop_stack(current_concept) + return True + + if len(current_concept.expected) > 0 and current_concept.expected[0].type != TokenKind.VAR_DEF: + if current_concept.expected[0].type == TokenKind.WHITESPACE: + # drop it. It's the case where an optional whitespace is missing + del (current_concept.expected[0]) + else: + # error + # We are not parsing the concept we tought we were parsing. + # Transform the eaten tokens into unrecognized + # and discard the current SyaConceptParserHelper + # TODO: manage the pending LPAR, RPAR ? + self._transform_to_unrecognized(current_concept) + self.false_positives.append(current_concept) + self.stack.pop() + return False if current_concept.is_expected(token): + + # Fix the whitespace between var and expected if needed + # current_concept.expected[0] is '' + # current_concept.expected[1] is what separate var from expected (normally a whitespace) + if current_concept.expected[1].type == TokenKind.WHITESPACE: + self.unrecognized_tokens.pop(TokenKind.WHITESPACE) + current_concept.end = pos self.manage_unrecognized() # manage that some clones may have been forked @@ -550,36 +640,33 @@ class InFixToPostFix: self.parameters_list[:])) return True # no need to continue - while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched(): - self.pop_stack_to_out() + while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1] != current_concept: + current = self.stack[-1] + if current.error: + self._transform_to_unrecognized(current) + self.false_positives.append(current) + self.stack.pop() + + if current_concept.expected[1].type == TokenKind.WHITESPACE: + self.unrecognized_tokens.pop(TokenKind.WHITESPACE) + + self.manage_unrecognized() + # manage that some clones may have been forked + for forked in self.forked: + forked.handle_expected_token(token, pos) + else: + self.pop_stack_to_out() self.manage_parameters() - if current_concept.eat_token(): - while self.stack[-1] != current_concept and not self._is_lpar(current_concept): - self.pop_stack_to_out() + # maybe eat whitespace that was between and expected token + if current_concept.expected[0].type == TokenKind.WHITESPACE: + del current_concept.expected[0] - if self._is_lpar(self.stack[-1]): - self._add_error(ParenthesisMismatchErrorNode(self.stack[-1])) - return False - - # Manage concepts ending with long names - if self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].is_matched(): - self.pop_stack_to_out() + if current_concept.eat_token(token): + _pop_stack(current_concept) return True - # else: - # if token.type != TokenKind.WHITESPACE: - # # hack, because whitespaces are not correctly parsed in self.expected - # # KSI 2020/04/25 - # # I no longer understand why we are in a loop (the reverse one) - # # if we are parsing a concept and the expected token does not match - # # The whole class should be in error - # self._add_error(UnexpectedTokenErrorNode( - # f"Failed to parse '{current_concept.concept.concept}'", - # token, current_concept.expected)) - # return False - return False def eat_token(self, token, pos): @@ -692,10 +779,11 @@ class InFixToPostFix: return False - def eat_concept(self, sya_concept_def, pos): + def eat_concept(self, sya_concept_def, token, pos): """ a concept is found :param sya_concept_def: + :param token: :param pos: :return: """ @@ -704,37 +792,43 @@ class InFixToPostFix: return self.debug.append(sya_concept_def) - temp_concept_node = SyaConceptParserHelper(sya_concept_def, pos) + parser_helper = SyaConceptParserHelper(sya_concept_def, pos) + + if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE: + parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1] + + if Token.is_whitespace(parser_helper.last_token_before_first_token): + self.unrecognized_tokens.pop(TokenKind.WHITESPACE) # First, try to recognize the tokens that are waiting self.manage_unrecognized() for forked in self.forked: # manage the fact that some clone may have been forked - forked.eat_concept(sya_concept_def, pos) + forked.eat_concept(sya_concept_def, token, pos) # then, check if this new concept is linked to the previous ones # ie, is the previous concept fully matched ? - if temp_concept_node.expected_parameters_before_first_token == 0: + if parser_helper.expected_parameters_before_first_token == 0: # => does not expect pending parameter (it's suffixed concept) while self._stack_isinstance(SyaConceptParserHelper) and self.stack[-1].potential_pos != -1: # => previous seems to have everything it needs in the parameter list self.pop_stack_to_out() - if temp_concept_node.is_atom(): - self._put_to_out(temp_concept_node.fix_concept()) + if parser_helper.is_atom(): + self._put_to_out(parser_helper.fix_concept()) else: # call shunting yard algorithm - while self.i_can_pop(temp_concept_node): + while self.i_can_pop(parser_helper): self.pop_stack_to_out() - if temp_concept_node.is_matched(): + if parser_helper.is_matched(): # case of a prefix concept which has found happiness with self.parameters_list # directly put it in out - self.manage_parameters_when_new_concept(temp_concept_node) - self._put_to_out(temp_concept_node.fix_concept()) + self.manage_parameters_when_new_concept(parser_helper) + self._put_to_out(parser_helper.fix_concept()) else: - self.stack.append(temp_concept_node) - self.manage_parameters_when_new_concept(temp_concept_node) + self.stack.append(parser_helper) + self.manage_parameters_when_new_concept(parser_helper) def eat_unrecognized(self, token, pos): """ @@ -762,18 +856,34 @@ class InFixToPostFix: if len(self.stack) == 0 and len(self.out) == 0: return # no need to pop the buffer, as no concept is found + while len(self.stack) > 0: + parser_helper = self.stack[-1] + + # validate parenthesis + if self._is_lpar(parser_helper) or self._is_rpar(parser_helper): + self._add_error(ParenthesisMismatchErrorNode(parser_helper)) + return None + + self.manage_unrecognized() + for forked in self.forked: + # manage that some clones may have been forked + forked.finalize() + + failed_to_match = sum(map(lambda e: e.type != TokenKind.VAR_DEF, parser_helper.expected)) + if failed_to_match > 0: + # didn't manage to read all tokens. + # Transform them into unrecognized + self._transform_to_unrecognized(parser_helper) + self.false_positives.append(parser_helper) + self.stack.pop() # discard the parser helper + else: + self.pop_stack_to_out() # process it + self.manage_unrecognized() for forked in self.forked: # manage that some clones may have been forked forked.finalize() - while len(self.stack) > 0: - if self._is_lpar(self.stack[-1]) or self._is_rpar(self.stack[-1]): - self._add_error(ParenthesisMismatchErrorNode(self.stack[-1])) - return None - - self.pop_stack_to_out() - def clone(self): clone = InFixToPostFix(self.context) clone.is_locked = self.is_locked @@ -975,7 +1085,7 @@ class SyaNodeParser(BaseNodeParser): try: if token.type in (TokenKind.LPAR, TokenKind.RPAR): - # little optim, no need to get the concept when parenthesis + # little optim, no need to lock, unlock or get the concept when parenthesis for infix_to_postfix in res: infix_to_postfix.eat_token(token, self.pos) continue @@ -992,7 +1102,7 @@ class SyaNodeParser(BaseNodeParser): if len(concepts) == 1: for infix_to_postfix in res: - infix_to_postfix.eat_concept(concepts[0], self.pos) + infix_to_postfix.eat_concept(concepts[0], token, self.pos) continue # make the cartesian product @@ -1001,7 +1111,7 @@ class SyaNodeParser(BaseNodeParser): for concept in concepts: clone = infix_to_postfix.clone() temp_res.append(clone) - clone.eat_concept(concept, self.pos) + clone.eat_concept(concept, token, self.pos) res = temp_res finally: @@ -1100,6 +1210,11 @@ class SyaNodeParser(BaseNodeParser): to_insert = item sequence.insert(0, to_insert) + if has_unrecognized: + # Manage some sick cases where missing parenthesis mess the order or the sequence + # example "foo bar(one plus two" + sequence.sort(key=attrgetter("start")) + ret.append( self.sheerka.ret( self.name, diff --git a/src/parsers/_BnfNodeParser_Old.py b/src/parsers/_BnfNodeParser_Old.py deleted file mode 100644 index bd8b1e3..0000000 --- a/src/parsers/_BnfNodeParser_Old.py +++ /dev/null @@ -1,912 +0,0 @@ -# ##################################################################################################### -# # This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio) -# # I don't directly use the project, but it helped me figure out -# # what to do. -# # Dejanović I., Milosavljević G., Vaderna R.: -# # Arpeggio: A flexible PEG parser for Python, -# # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004 -# ##################################################################################################### -# from collections import namedtuple -# from dataclasses import dataclass -# from collections import defaultdict -# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept -# from core.concept import Concept, ConceptParts, DoNotResolve -# from core.tokenizer import TokenKind, Tokenizer, Token -# from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode -# from parsers.BaseParser import BaseParser, ErrorNode -# import core.utils -# -# -# class NonTerminalNode(LexerNode): -# """ -# Returned by the BnfNodeParser -# """ -# -# def __init__(self, parsing_expression, start, end, tokens, children=None): -# super().__init__(start, end, tokens) -# self.parsing_expression = parsing_expression -# self.children = children -# -# def __repr__(self): -# name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__ -# if len(self.children) > 0: -# sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")" -# else: -# sub_names = "" -# return name + sub_names -# -# def __eq__(self, other): -# if not isinstance(other, NonTerminalNode): -# return False -# -# return self.parsing_expression == other.parsing_expression and \ -# self.start == other.start and \ -# self.end == other.end and \ -# self.children == other.children -# -# def __hash__(self): -# return hash((self.parsing_expression, self.start, self.end, self.children)) -# -# -# class TerminalNode(LexerNode): -# """ -# Returned by the BnfNodeParser -# """ -# -# def __init__(self, parsing_expression, start, end, value): -# super().__init__(start, end, source=value) -# self.parsing_expression = parsing_expression -# self.value = value -# -# def __repr__(self): -# name = self.parsing_expression.rule_name or "" -# return name + f"'{self.value}'" -# -# def __eq__(self, other): -# if not isinstance(other, TerminalNode): -# return False -# -# return self.parsing_expression == other.parsing_expression and \ -# self.start == other.start and \ -# self.end == other.end and \ -# self.value == other.value -# -# def __hash__(self): -# return hash((self.parsing_expression, self.start, self.end, self.value)) -# -# -# @dataclass() -# class UnknownConceptNode(ErrorNode): -# concept_key: str -# -# -# @dataclass() -# class TooManyConceptNode(ErrorNode): -# concept_key: str -# -# -# class ParsingExpression: -# def __init__(self, *args, **kwargs): -# self.elements = args -# -# nodes = kwargs.get('nodes', []) -# if not hasattr(nodes, '__iter__'): -# nodes = [nodes] -# self.nodes = nodes -# -# self.rule_name = kwargs.get('rule_name', '') -# -# def __eq__(self, other): -# if not isinstance(other, ParsingExpression): -# return False -# -# return self.rule_name == other.rule_name and self.elements == other.elements -# -# def __hash__(self): -# return hash((self.rule_name, self.elements)) -# -# def parse(self, parser): -# return self._parse(parser) -# -# def add_rule_name_if_needed(self, text): -# return text + "=" + self.rule_name if self.rule_name else text -# -# -# class ConceptExpression(ParsingExpression): -# """ -# Will match a concept -# It used only for rule definition -# -# When the grammar is created, it is replaced by the actual concept -# """ -# -# def __init__(self, concept, rule_name=""): -# super().__init__(rule_name=rule_name) -# self.concept = concept -# -# def __repr__(self): -# return self.add_rule_name_if_needed(f"{self.concept}") -# -# def __eq__(self, other): -# if not super().__eq__(other): -# return False -# -# if not isinstance(other, ConceptExpression): -# return False -# -# if isinstance(self.concept, Concept): -# return self.concept.name == other.concept.name -# -# # when it's only the name of the concept -# return self.concept == other.concept -# -# def __hash__(self): -# return hash((self.concept, self.rule_name)) -# -# @staticmethod -# def get_parsing_expression_from_name(name): -# tokens = Tokenizer(name) -# nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]] -# if len(nodes) == 1: -# return nodes[0] -# else: -# sequence = Sequence(nodes) -# sequence.nodes = nodes -# return sequence -# -# def _parse(self, parser): -# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept -# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): -# return None -# -# self.concept = to_match # Memoize -# -# if to_match not in parser.concepts_grammars: -# # Try to match the concept using its name -# expr = self.get_parsing_expression_from_name(to_match.name) -# node = expr.parse(parser) -# else: -# node = parser.concepts_grammars[to_match].parse(parser) -# -# if node is None: -# return None -# -# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node]) -# -# -# class ConceptGroupExpression(ConceptExpression): -# def _parse(self, parser): -# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept -# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): -# return None -# -# self.concept = to_match # Memoize -# -# if to_match not in parser.concepts_grammars: -# concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept) -# nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group] -# expr = OrderedChoice(nodes) -# expr.nodes = nodes -# node = expr.parse(parser) -# else: -# node = parser.concepts_grammars[to_match].parse(parser) -# -# if node is None: -# return None -# -# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node]) -# -# -# class Sequence(ParsingExpression): -# """ -# Will match sequence of parser expressions in exact order they are defined. -# """ -# -# def _parse(self, parser): -# init_pos = parser.pos -# end_pos = parser.pos -# -# children = [] -# for e in self.nodes: -# node = e.parse(parser) -# if node is None: -# return None -# else: -# if node.end != -1: # because returns -1 when no match -# children.append(node) -# end_pos = node.end -# -# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) -# -# def __repr__(self): -# to_str = ", ".join(repr(n) for n in self.elements) -# return self.add_rule_name_if_needed(f"({to_str})") -# -# -# class OrderedChoice(ParsingExpression): -# """ -# Will match one among multiple -# It will stop at the first match (so the order of definition is important) -# """ -# -# def _parse(self, parser): -# init_pos = parser.pos -# -# for e in self.nodes: -# node = e.parse(parser) -# if node: -# return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node]) -# -# parser.seek(init_pos) # backtrack -# -# return None -# -# def __repr__(self): -# to_str = "| ".join(repr(n) for n in self.elements) -# return self.add_rule_name_if_needed(f"({to_str})") -# -# -# class Optional(ParsingExpression): -# """ -# Will match or not the elements -# if many matches, will choose longest one -# If you need order, use Optional(OrderedChoice) -# """ -# -# def _parse(self, parser): -# init_pos = parser.pos -# selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found -# -# for e in self.nodes: -# node = e.parse(parser) -# if node: -# if node.end > selected_node.end: -# selected_node = NonTerminalNode( -# self, -# node.start, -# node.end, -# parser.tokens[node.start: node.end + 1], -# [node]) -# -# parser.seek(init_pos) # backtrack -# -# if selected_node.end != -1: -# parser.seek(selected_node.end) -# parser.next_token() # eat the tokens found -# -# return selected_node -# -# def __repr__(self): -# if len(self.elements) == 1: -# return f"{self.elements[0]}?" -# else: -# to_str = ", ".join(repr(n) for n in self.elements) -# return self.add_rule_name_if_needed(f"({to_str})?") -# -# -# class Repetition(ParsingExpression): -# """ -# Base class for all repetition-like parser expressions (?,*,+) -# Args: -# eolterm(bool): Flag that indicates that end of line should -# terminate repetition match. -# """ -# -# def __init__(self, *elements, **kwargs): -# super(Repetition, self).__init__(*elements, **kwargs) -# self.sep = kwargs.get('sep', None) -# -# -# class ZeroOrMore(Repetition): -# """ -# ZeroOrMore will try to match parser expression specified zero or more -# times. It will never fail. -# """ -# -# def _parse(self, parser): -# init_pos = parser.pos -# end_pos = -1 -# children = [] -# -# while True: -# current_pos = parser.pos -# -# # maybe eat the separator if needed -# if self.sep and children: -# sep_result = self.sep.parse(parser) -# if sep_result is None: -# parser.seek(current_pos) -# break -# -# # eat the ZeroOrMore -# node = self.nodes[0].parse(parser) -# if node is None: -# parser.seek(current_pos) -# break -# else: -# if node.end != -1: # because returns -1 when no match -# children.append(node) -# end_pos = node.end -# -# if len(children) == 0: -# return NonTerminalNode(self, init_pos, -1, [], []) -# -# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) -# -# def __repr__(self): -# to_str = ", ".join(repr(n) for n in self.elements) -# return self.add_rule_name_if_needed(f"({to_str})*") -# -# -# class OneOrMore(Repetition): -# """ -# OneOrMore will try to match parser expression specified one or more times. -# """ -# -# def _parse(self, parser): -# init_pos = parser.pos -# end_pos = -1 -# children = [] -# -# while True: -# current_pos = parser.pos -# -# # maybe eat the separator if needed -# if self.sep and children: -# sep_result = self.sep.parse(parser) -# if sep_result is None: -# parser.seek(current_pos) -# break -# -# # eat the ZeroOrMore -# node = self.nodes[0].parse(parser) -# if node is None: -# parser.seek(current_pos) -# break -# else: -# if node.end != -1: # because returns -1 when no match -# children.append(node) -# end_pos = node.end -# -# if len(children) == 0: # if nothing is found, it's an error -# return None -# -# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children) -# -# def __repr__(self): -# to_str = ", ".join(repr(n) for n in self.elements) -# return self.add_rule_name_if_needed(f"({to_str})+") -# -# -# class UnorderedGroup(Repetition): -# """ -# Will try to match all of the parsing expression in any order. -# """ -# -# def _parse(self, parser): -# raise NotImplementedError() -# -# # def __repr__(self): -# # to_str = ", ".join(repr(n) for n in self.elements) -# # return f"({to_str})#" -# -# -# class Match(ParsingExpression): -# """ -# Base class for all classes that will try to match something from the input. -# """ -# -# def __init__(self, rule_name, root=False): -# super(Match, self).__init__(rule_name=rule_name, root=root) -# -# def parse(self, parser): -# result = self._parse(parser) -# return result -# -# -# class StrMatch(Match): -# """ -# Matches a literal -# """ -# -# def __init__(self, to_match, rule_name="", ignore_case=True): -# super(Match, self).__init__(rule_name=rule_name) -# self.to_match = to_match -# self.ignore_case = ignore_case -# -# def __repr__(self): -# return self.add_rule_name_if_needed(f"'{self.to_match}'") -# -# def __eq__(self, other): -# if not super().__eq__(other): -# return False -# -# if not isinstance(other, StrMatch): -# return False -# -# return self.to_match == other.to_match and self.ignore_case == other.ignore_case -# -# def _parse(self, parser): -# token = parser.get_token() -# m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \ -# else token.value == self.to_match -# -# if m: -# node = TerminalNode(self, parser.pos, parser.pos, token.value) -# parser.next_token() -# return node -# -# return None -# -# -# class BnfNodeParser(BaseParser): -# def __init__(self, **kwargs): -# super().__init__("BnfNode_old", 50) -# self.enabled = False -# if 'grammars' in kwargs: -# self.concepts_grammars = kwargs.get("grammars") -# elif 'sheerka' in kwargs: -# self.concepts_grammars = kwargs.get("sheerka").concepts_grammars -# else: -# self.concepts_grammars = {} -# -# self.ignore_case = True -# -# self.token = None -# self.pos = -1 -# self.tokens = None -# -# self.context = None -# self.text = None -# self.sheerka = None -# -# def add_error(self, error, next_token=True): -# self.error_sink.append(error) -# if next_token: -# self.next_token() -# return error -# -# def reset_parser(self, context, text): -# self.context = context -# self.sheerka = context.sheerka -# self.text = text -# -# try: -# self.tokens = list(self.get_input_as_tokens(text)) -# except core.tokenizer.LexerError as e: -# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) -# return False -# -# self.token = None -# self.pos = -1 -# self.next_token(False) -# return True -# -# def get_token(self) -> Token: -# return self.token -# -# def next_token(self, skip_whitespace=True): -# if self.token and self.token.type == TokenKind.EOF: -# return False -# -# self.pos += 1 -# self.token = self.tokens[self.pos] -# -# if skip_whitespace: -# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE: -# self.pos += 1 -# self.token = self.tokens[self.pos] -# -# return self.token.type != TokenKind.EOF -# -# def seek(self, pos): -# self.pos = pos -# self.token = self.tokens[self.pos] -# return True -# -# def rewind(self, offset, skip_whitespace=True): -# self.pos += offset -# self.token = self.tokens[self.pos] -# -# if skip_whitespace: -# while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE): -# self.pos -= 1 -# self.token = self.tokens[self.pos] -# -# def initialize(self, context, concepts_definitions): -# """ -# Adds a bunch of concepts, and how they can be recognized -# :param context: execution context -# :param concepts_definitions: dictionary of concept, concept_definition -# :return: -# """ -# -# self.context = context -# self.sheerka = context.sheerka -# concepts_to_resolve = set() -# -# for concept, concept_def in concepts_definitions.items(): -# # ## Gets the grammars -# context.log(f"Resolving grammar for '{concept}'", context.who) -# concept.init_key() # make sure that the key is initialized -# grammar = self.get_model(concept_def, concepts_to_resolve) -# self.concepts_grammars[concept] = grammar -# -# if self.has_error: -# return self.sheerka.ret(self.name, False, self.error_sink) -# -# # ## Removes concepts with infinite recursions -# concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve) -# for concept in concepts_to_remove: -# concepts_to_resolve.remove(concept) -# del self.concepts_grammars[concept] -# -# if self.has_error: -# return self.sheerka.ret(self.name, False, self.error_sink) -# else: -# return self.sheerka.ret(self.name, True, self.concepts_grammars) -# -# def get_concept(self, concept_name): -# if concept_name in self.context.concepts: -# return self.context.concepts[concept_name] -# return self.sheerka.get_by_key(concept_name) -# -# def get_model(self, concept_def, concepts_to_resolve): -# -# # TODO -# # inner_get_model must not modify the initial ParsingExpression -# # A copy must be created -# def inner_get_model(expression): -# if isinstance(expression, Concept): -# if self.sheerka.isaset(self.context, expression): -# ret = ConceptGroupExpression(expression, rule_name=expression.name) -# else: -# ret = ConceptExpression(expression, rule_name=expression.name) -# concepts_to_resolve.add(expression) -# elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression -# if expression.rule_name is None or expression.rule_name == "": -# expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \ -# else expression.concept -# if isinstance(expression.concept, str): -# concept = self.get_concept(expression.concept) -# if self.sheerka.is_known(concept): -# expression.concept = concept -# concepts_to_resolve.add(expression.concept) -# ret = expression -# elif isinstance(expression, str): -# ret = StrMatch(expression, ignore_case=self.ignore_case) -# elif isinstance(expression, StrMatch): -# ret = expression -# if ret.ignore_case is None: -# ret.ignore_case = self.ignore_case -# elif isinstance(expression, Sequence) or \ -# isinstance(expression, OrderedChoice) or \ -# isinstance(expression, ZeroOrMore) or \ -# isinstance(expression, OneOrMore) or \ -# isinstance(expression, Optional): -# ret = expression -# ret.nodes = [inner_get_model(e) for e in ret.elements] -# else: -# ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False) -# -# # Translate separator expression. -# if isinstance(expression, Repetition) and expression.sep: -# expression.sep = inner_get_model(expression.sep) -# -# return ret -# -# model = inner_get_model(concept_def) -# -# return model -# -# def detect_infinite_recursion(self, concepts_to_resolve): -# -# # infinite recursion matcher -# def _is_infinite_recursion(ref_concept, node): -# if isinstance(node, ConceptExpression): -# if node.concept == ref_concept: -# return True -# -# if isinstance(node.concept, str): -# to_match = self.get_concept(node.concept) -# if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): -# return False -# else: -# to_match = node.concept -# -# if to_match not in self.concepts_grammars: -# return False -# -# return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match]) -# -# if isinstance(node, OrderedChoice): -# return _is_infinite_recursion(ref_concept, node.nodes[0]) -# -# if isinstance(node, Sequence): -# for node in node.nodes: -# if _is_infinite_recursion(ref_concept, node): -# return True -# return False -# -# return False -# -# removed_concepts = [] -# for e in concepts_to_resolve: -# if isinstance(e, str): -# e = self.get_concept(e) -# if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT): -# continue -# -# if e not in self.concepts_grammars: -# continue -# -# to_resolve = self.concepts_grammars[e] -# if _is_infinite_recursion(e, to_resolve): -# removed_concepts.append(e) -# return removed_concepts -# -# def parse(self, context, parser_input): -# if parser_input == "": -# return context.sheerka.ret( -# self.name, -# False, -# context.sheerka.new(BuiltinConcepts.IS_EMPTY) -# ) -# -# if not self.reset_parser(context, parser_input): -# return self.sheerka.ret( -# self.name, -# False, -# context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) -# -# concepts_found = [[]] -# unrecognized_tokens = None -# has_unrecognized = False -# -# # actually list of list -# # The first dimension is the number of possibilities found -# # The second dimension is the number of concepts found, under one possibility -# # -# # Example 1 -# # concept foo : 'one' 'two' -# # concept bar : 'one' 'two' -# # input 'one two' -> will produce two possibilities (foo and bar). -# # -# # Example 2 -# # concept foo : 'one' -# # concept bar : 'two' -# # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar) -# -# while True: -# init_pos = self.pos -# res = [] -# -# for concept, grammar in self.concepts_grammars.items(): -# self.seek(init_pos) -# node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode -# if node is not None and node.end != -1: -# updated_concept = self.finalize_concept(context.sheerka, concept, node) -# concept_node = ConceptNode( -# updated_concept, -# node.start, -# node.end, -# self.tokens[node.start: node.end + 1], -# None, -# node) -# res.append(concept_node) -# -# if len(res) == 0: # not recognized -# self.seek(init_pos) -# if unrecognized_tokens: -# unrecognized_tokens.add_token(self.get_token(), init_pos) -# else: -# unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()]) -# -# if not self.next_token(False): -# break -# -# else: # some concepts are recognized -# if unrecognized_tokens and unrecognized_tokens.not_whitespace(): -# unrecognized_tokens.fix_source() -# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens]) -# has_unrecognized = True -# unrecognized_tokens = None -# -# res = self.get_bests(res) # only keep the concepts that eat the more tokens -# concepts_found = core.utils.product(concepts_found, res) -# -# # loop -# self.seek(res[0].end) -# if not self.next_token(False): -# break -# -# # Fix the source for unrecognized tokens -# if unrecognized_tokens and unrecognized_tokens.not_whitespace(): -# unrecognized_tokens.fix_source() -# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens]) -# has_unrecognized = True -# -# # else -# # returns as many ReturnValue than choices found -# ret = [] -# for choice in concepts_found: -# ret.append( -# self.sheerka.ret( -# self.name, -# not has_unrecognized, -# self.sheerka.new( -# BuiltinConcepts.PARSER_RESULT, -# parser=self, -# source=parser_input, -# body=choice, -# try_parsed=choice))) -# -# if len(ret) == 1: -# self.log_result(context, parser_input, ret[0]) -# return ret[0] -# else: -# self.log_multiple_results(context, parser_input, ret) -# return ret -# -# def finalize_concept(self, sheerka, template, underlying, init_empty_body=True): -# """ -# Updates the properties of the concept -# Goes in recursion if the property is a concept -# """ -# -# # this cache is to make sure that we return the same concept for the same ConceptExpression -# _underlying_value_cache = {} -# -# def _add_prop(_concept, prop_name, value): -# """ -# Adds a new entry, -# makes a list if the property already exists -# """ -# if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None: -# # new entry -# _concept.compiled[prop_name] = value -# else: -# # make a list if there was a value -# previous_value = _concept.compiled[prop_name] -# if isinstance(previous_value, list): -# previous_value.append(value) -# else: -# new_value = [previous_value, value] -# _concept.compiled[prop_name] = new_value -# -# def _look_for_concept_match(_underlying): -# """ -# At some point, there is either an StrMatch or a ConceptMatch, -# that allowed the recognition. -# Look for the ConceptMatch, with recursion if needed -# """ -# if isinstance(_underlying.parsing_expression, ConceptExpression): -# return _underlying -# -# if not isinstance(_underlying, NonTerminalNode): -# return None -# -# if len(_underlying.children) != 1: -# return None -# -# return _look_for_concept_match(_underlying.children[0]) -# -# def _get_underlying_value(_underlying): -# concept_match_node = _look_for_concept_match(_underlying) -# if concept_match_node: -# # the value is a concept -# if id(concept_match_node) in _underlying_value_cache: -# result = _underlying_value_cache[id(concept_match_node)] -# else: -# ref_tpl = concept_match_node.parsing_expression.concept -# result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body) -# _underlying_value_cache[id(concept_match_node)] = result -# else: -# # the value is a string -# result = DoNotResolve(_underlying.source) -# -# return result -# -# def _process_rule_name(_concept, _underlying): -# if _underlying.parsing_expression.rule_name: -# value = _get_underlying_value(_underlying) -# _add_prop(_concept, _underlying.parsing_expression.rule_name, value) -# _concept.metadata.need_validation = True -# -# if isinstance(_underlying, NonTerminalNode): -# for child in _underlying.children: -# _process_rule_name(_concept, child) -# -# key = (template.key, template.id) if template.id else template.key -# concept = sheerka.new(key) -# if init_empty_body and concept.metadata.body is None: -# value = _get_underlying_value(underlying) -# concept.compiled[ConceptParts.BODY] = value -# if underlying.parsing_expression.rule_name: -# _add_prop(concept, underlying.parsing_expression.rule_name, value) -# # KSI : Why don't we set concept.metadata.need_validation to True ? -# -# if isinstance(underlying, NonTerminalNode): -# for node in underlying.children: -# _process_rule_name(concept, node) -# -# return concept -# -# def encode_grammar(self, grammar): -# """ -# Transform the grammar into something that can easily can be serialized -# :param grammar: -# :return: -# """ -# -# def _encode(expression): -# if isinstance(expression, StrMatch): -# res = f"'{expression.to_match}'" -# -# elif isinstance(expression, ConceptExpression): -# res = core.utils.str_concept(expression.concept) -# -# elif isinstance(expression, Sequence): -# res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")" -# -# elif isinstance(expression, OrderedChoice): -# res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")" -# -# elif isinstance(expression, Optional): -# res = _encode(expression.nodes[0]) + "?" -# -# elif isinstance(expression, ZeroOrMore): -# res = _encode(expression.nodes[0]) + "*" -# -# elif isinstance(expression, OneOrMore): -# res = _encode(expression.nodes[0]) + "+" -# -# if expression.rule_name: -# res += "=" + expression.rule_name -# -# return res -# -# result = {} -# for k, v in grammar.items(): -# key = core.utils.str_concept(k) -# value = _encode(v) -# result[key] = value -# return result -# -# @staticmethod -# def get_bests(results): -# """ -# Returns the result that is the longest -# :param results: -# :return: -# """ -# by_end_pos = defaultdict(list) -# for result in results: -# by_end_pos[result.end].append(result) -# -# return by_end_pos[max(by_end_pos)] -# -# -# class ParsingExpressionVisitor: -# """ -# visit ParsingExpression -# """ -# -# def visit(self, parsing_expression): -# name = parsing_expression.__class__.__name__ -# -# method = 'visit_' + name -# visitor = getattr(self, method, self.generic_visit) -# return visitor(parsing_expression) -# -# def generic_visit(self, parsing_expression): -# if hasattr(self, "visit_all"): -# self.visit_all(parsing_expression) -# -# for node in parsing_expression.elements: -# if isinstance(node, Concept): -# self.visit(ConceptExpression(node.key or node.name)) -# elif isinstance(node, str): -# self.visit(StrMatch(node)) -# else: -# self.visit(node) diff --git a/src/parsers/_ConceptsWithConceptsParser.py b/src/parsers/_ConceptsWithConceptsParser.py deleted file mode 100644 index d0fe43e..0000000 --- a/src/parsers/_ConceptsWithConceptsParser.py +++ /dev/null @@ -1,108 +0,0 @@ -# # try to match something like -# # ConceptNode 'plus' ConceptNode -# # -# # Replaced by SyaNodeParser -# from core.builtin_concepts import BuiltinConcepts -# from core.tokenizer import TokenKind, Token -# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode -# from parsers.BaseParser import BaseParser -# from parsers.MultipleConceptsParser import MultipleConceptsParser -# from core.concept import VARIABLE_PREFIX -# -# multiple_concepts_parser = MultipleConceptsParser() -# -# -# class ConceptsWithConceptsParser(BaseParser): -# def __init__(self, **kwargs): -# super().__init__("ConceptsWithConcepts", 25) -# self.enabled = False -# -# @staticmethod -# def get_tokens(nodes): -# tokens = [] -# -# for node in nodes: -# if isinstance(node, ConceptNode): -# index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column -# tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column)) -# else: -# for token in node.tokens: -# if token.type == TokenKind.EOF: -# break -# elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE): -# continue -# else: -# tokens.append(token) -# -# return tokens -# -# @staticmethod -# def get_key(nodes): -# key = "" -# index = 0 -# for node in nodes: -# if key: -# key += " " -# -# if isinstance(node, UnrecognizedTokensNode): -# key += node.source.strip() -# else: -# key += f"{VARIABLE_PREFIX}{index}" -# index += 1 -# -# return key -# -# def finalize_concept(self, context, concept, nodes): -# index = 0 -# for node in nodes: -# -# if isinstance(node, ConceptNode): -# prop_name = list(concept.props.keys())[index] -# concept.compiled[prop_name] = node.concept -# context.log( -# f"Setting property '{prop_name}='{node.concept}'.", -# self.name) -# index += 1 -# elif isinstance(node, SourceCodeNode): -# prop_name = list(concept.props.keys())[index] -# sheerka = context.sheerka -# value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node) -# concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)] -# context.log( -# f"Setting property '{prop_name}'='Python({node.source})'.", -# self.name) -# index += 1 -# -# return concept -# -# def parse(self, context, parser_input): -# sheerka = context.sheerka -# nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser) -# if not nodes: -# return None -# -# concept_key = self.get_key(nodes) -# concept = sheerka.new(concept_key) -# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): -# return sheerka.ret( -# self.name, -# False, -# sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body)) -# -# concepts = concept if hasattr(concept, "__iter__") else [concept] -# for concept in concepts: -# self.finalize_concept(context, concept, nodes) -# -# res = [] -# for concept in concepts: -# res.append(sheerka.ret( -# self.name, -# True, -# sheerka.new( -# BuiltinConcepts.PARSER_RESULT, -# parser=self, -# source=parser_input.source, -# body=concept, -# try_parsed=None))) -# -# return res[0] if len(res) == 1 else res diff --git a/src/parsers/_MultipleConceptsParser.py b/src/parsers/_MultipleConceptsParser.py deleted file mode 100644 index 7283d41..0000000 --- a/src/parsers/_MultipleConceptsParser.py +++ /dev/null @@ -1,163 +0,0 @@ -# # to be replaced by SyaNodeParser -# import ast -# -# from core.builtin_concepts import BuiltinConcepts -# from core.tokenizer import TokenKind -# from parsers.BaseNodeParser import SourceCodeNode -# from parsers.BaseParser import BaseParser -# from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode -# import core.utils -# from parsers.PythonParser import PythonParser -# -# concept_lexer_parser = BnfNodeParser() -# -# -# class MultipleConceptsParser(BaseParser): -# """ -# Parser that will take the result of BnfNodeParser and -# try to resolve the unrecognized tokens token by token -# -# It is a success when it returns a list ConceptNode exclusively -# """ -# -# def __init__(self, **kwargs): -# BaseParser.__init__(self, "MultipleConcepts", 45) -# self.enabled = False -# -# @staticmethod -# def finalize(nodes_found, unrecognized_tokens): -# if not unrecognized_tokens: -# return nodes_found, unrecognized_tokens -# -# unrecognized_tokens.fix_source() -# if unrecognized_tokens.not_whitespace(): -# nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) -# -# return nodes_found, None -# -# @staticmethod -# def create_or_add(unrecognized_tokens, token, index): -# if unrecognized_tokens: -# unrecognized_tokens.add_token(token, index) -# else: -# unrecognized_tokens = UnrecognizedTokensNode(index, index, [token]) -# return unrecognized_tokens -# -# def parse(self, context, parser_input): -# sheerka = context.sheerka -# nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser) -# if not nodes: -# return None -# -# nodes_found = [[]] -# concepts_only = True -# -# for node in nodes: -# if isinstance(node, UnrecognizedTokensNode): -# unrecognized_tokens = None -# i = 0 -# -# while i < len(node.tokens): -# -# token_index = node.start + i -# token = node.tokens[i] -# -# concepts_nodes = self.get_concepts_nodes(context, token_index, token) -# if concepts_nodes is not None: -# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) -# nodes_found = core.utils.product(nodes_found, concepts_nodes) -# i += 1 -# continue -# -# source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:]) -# if source_code_node: -# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) -# nodes_found = core.utils.product(nodes_found, [source_code_node]) -# i += len(source_code_node.tokens) -# continue -# -# # not a concept nor some source code -# unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index) -# concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE) -# i += 1 -# -# # finish processing if needed -# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) -# -# else: -# nodes_found = core.utils.product(nodes_found, [node]) -# -# ret = [] -# for choice in nodes_found: -# ret.append( -# sheerka.ret( -# self.name, -# concepts_only, -# sheerka.new( -# BuiltinConcepts.PARSER_RESULT, -# parser=self, -# source=parser_input.source, -# body=choice, -# try_parsed=None)) -# ) -# -# if len(ret) == 1: -# self.log_result(context, parser_input.source, ret[0]) -# return ret[0] -# else: -# self.log_multiple_results(context, parser_input.source, ret) -# return ret -# -# @staticmethod -# def get_concepts_nodes(context, index, token): -# """ -# Tries to recognize a concept -# from the univers of all known concepts -# """ -# -# if token.type != TokenKind.IDENTIFIER: -# return None -# -# concept = context.new_concept(token.value) -# if hasattr(concept, "__iter__") or context.sheerka.is_known(concept): -# concepts = concept if hasattr(concept, "__iter__") else [concept] -# concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts] -# return concepts_nodes -# -# return None -# -# @staticmethod -# def get_source_code_node(context, index, tokens): -# """ -# Tries to recognize source code. -# For the time being, only Python is supported -# :param context: -# :param tokens: -# :param index: -# :return: -# """ -# -# if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF): -# return None -# -# end_index = len(tokens) -# while end_index > 0: -# parser = PythonParser() -# tokens_to_parse = tokens[:end_index] -# res = parser.parse(context, tokens_to_parse) -# if res.status: -# # only expression are accepted -# ast_ = res.value.value.ast_ -# if not isinstance(ast_, ast.Expression): -# return None -# try: -# compiled = compile(ast_, "", "eval") -# eval(compiled, {}, {}) -# except Exception: -# return None -# -# source = BaseParser.get_text_from_tokens(tokens_to_parse) -# return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source) -# end_index -= 1 -# -# return None diff --git a/tests/core/test_concept.py b/tests/core/test_concept.py index 1723d81..14a0acb 100644 --- a/tests/core/test_concept.py +++ b/tests/core/test_concept.py @@ -12,10 +12,10 @@ from core.concept import Concept, ConceptParts, DEFINITION_TYPE_DEF ("foo", ["foo"], "foo"), ("foo a", ["foo"], "__var__0 a"), ("foo a b", ["a"], "foo __var__0 b"), - ("'foo'", [], "foo"), + ("'foo'", [], "'foo'"), ("my name is a", ["a"], "my name is __var__0"), ("a b c d", ["b", "c"], "a __var__0 __var__1 d"), - ("a 'b c' d", ["b", "c"], "a b c d"), + ("a 'b c' d", ["b", "c"], "a 'b c' d"), ("a | b", ["a", "b"], "__var__0 | __var__1"), ("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"), ("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"), diff --git a/tests/core/test_tokenizer.py b/tests/core/test_tokenizer.py index 1e18aed..099a61e 100644 --- a/tests/core/test_tokenizer.py +++ b/tests/core/test_tokenizer.py @@ -4,7 +4,7 @@ from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords def test_i_can_tokenize(): source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:" - source += "$£€!_identifier°~_^\\`==#" + source += "$£€!_identifier°~_^\\`==#__var__10" tokens = list(Tokenizer(source)) assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1) assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2) @@ -54,8 +54,9 @@ def test_i_can_tokenize(): assert tokens[45] == Token(TokenKind.BACK_QUOTE, '`', 108, 6, 50) assert tokens[46] == Token(TokenKind.EQUALSEQUALS, '==', 109, 6, 51) assert tokens[47] == Token(TokenKind.HASH, '#', 111, 6, 53) + assert tokens[48] == Token(TokenKind.VAR_DEF, '__var__10', 112, 6, 54) - assert tokens[48] == Token(TokenKind.EOF, '', 112, 6, 54) + assert tokens[49] == Token(TokenKind.EOF, '', 121, 6, 63) @pytest.mark.parametrize("text, expected", [ @@ -88,6 +89,19 @@ def test_i_can_parse_word(text): assert tokens[1].index == len(text) +@pytest.mark.parametrize("text", [ + "__var__0", + "__var__1", + "__var__10", + "__var__999", +]) +def test_i_can_parse_var_def(text): + tokens = list(Tokenizer(text)) + assert len(tokens) == 2 + assert tokens[0].type == TokenKind.VAR_DEF + assert tokens[0].value == text + + @pytest.mark.parametrize("text, message, error_text, index, line, column", [ ("'string", "Missing Trailing quote", "'string", 7, 1, 8), ('"string', "Missing Trailing quote", '"string', 7, 1, 8), diff --git a/tests/parsers/parsers_utils.py b/tests/parsers/parsers_utils.py index 50d6901..312a064 100644 --- a/tests/parsers/parsers_utils.py +++ b/tests/parsers/parsers_utils.py @@ -36,9 +36,9 @@ def compute_debug_array(res): if token.type == TokenKind.WHITESPACE: continue else: - res_debug.append(token.value) + res_debug.append("T(" + token.value + ")") else: - res_debug.append(token.concept.name) + res_debug.append("C(" + token.concept.name + ")") to_compare.append(res_debug) return to_compare diff --git a/tests/parsers/test_AtomsParser.py b/tests/parsers/test_AtomsParser.py index e8d6aca..cb5709b 100644 --- a/tests/parsers/test_AtomsParser.py +++ b/tests/parsers/test_AtomsParser.py @@ -218,8 +218,8 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("text, expected", [ ("hello foo bar", [ - (True, [CNC("hello1", source="hello foo ", a=" foo "), "bar"]), - (True, [CNC("hello2", source="hello foo ", b=" foo "), "bar"]), + (True, [CNC("hello1", source="hello foo ", a="foo "), "bar"]), + (True, [CNC("hello2", source="hello foo ", b="foo "), "bar"]), ]), ]) def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected): diff --git a/tests/parsers/test_BnfNodeParser_Old.py b/tests/parsers/test_BnfNodeParser_Old.py deleted file mode 100644 index d3855e5..0000000 --- a/tests/parsers/test_BnfNodeParser_Old.py +++ /dev/null @@ -1,1305 +0,0 @@ -# from ast import Str -# -# import pytest -# from core.builtin_concepts import BuiltinConcepts -# from core.concept import Concept, ConceptParts, DoNotResolve -# from core.tokenizer import Tokenizer, TokenKind, Token -# from parsers.BaseNodeParser import cnode, short_cnode -# from parsers.BnfParser import BnfParser -# from parsers.BnfNodeParser_Old import BnfNodeParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ -# ParsingExpressionVisitor, TerminalNode, NonTerminalNode, ZeroOrMore, OneOrMore, \ -# UnrecognizedTokensNode, ConceptExpression, ConceptGroupExpression -# -# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -# -# -# class ConceptVisitor(ParsingExpressionVisitor): -# def __init__(self): -# self.concepts = set() -# -# def visit_ConceptExpression(self, node): -# self.concepts.add(node.concept) -# -# -# def u(parsing_expression, start, end, children=None): -# """ -# u stands for underlying -# :param parsing_expression: -# :param start: -# :param end: -# :param children: -# :return: -# """ -# if isinstance(parsing_expression, str): -# parsing_expression = StrMatch(parsing_expression) -# -# if isinstance(parsing_expression, StrMatch): -# return TerminalNode(parsing_expression, start, end, parsing_expression.to_match) -# -# return NonTerminalNode(parsing_expression, start, end, [], children) -# -# -# def evaluated(concept): -# c = Concept(name=concept.name, body=concept.name) -# -# -# def t(text): -# if text.startswith("'") or text.startswith('"'): -# return Token(TokenKind.STRING, text, 0, 0, 0) -# -# if text.startswith(" "): -# return Token(TokenKind.WHITESPACE, text, 0, 0, 0) -# -# return Token(TokenKind.IDENTIFIER, text, 0, 0, 0) -# -# -# def get_expected(concept, text=None): -# c = Concept(name=concept.name) -# c.compiled[ConceptParts.BODY] = DoNotResolve(text or concept.name) -# c.init_key() -# c.metadata.id = concept.id -# return c -# -# -# def cbody(concept): -# """cbody stands for compiled body""" -# if not ConceptParts.BODY in concept.compiled: -# return None -# return concept.compiled[ConceptParts.BODY] -# -# -# def cprop(concept, prop_name): -# """cbody stands for compiled property""" -# return concept.compiled[prop_name] -# -# -# class TestBnfNodeParser(TestUsingMemoryBasedSheerka): -# -# def init(self, concepts, grammar): -# sheerka = self.get_sheerka(singleton=True) -# context = self.get_context(sheerka) -# for c in concepts: -# context.sheerka.add_in_cache(c) -# context.sheerka.set_id_if_needed(c, False) -# -# parser = BnfNodeParser() -# parser.initialize(context, grammar) -# -# return context, parser -# -# def execute(self, concepts, grammar, text): -# context, parser = self.init(concepts, grammar) -# -# res = parser.parse(context, text) -# wrapper = res.value -# return_value = res.value.value -# -# return context, res, wrapper, return_value -# -# -# @pytest.mark.parametrize("match, text", [ -# ("foo", "foo"), -# ("'foo'", "'foo'"), -# ("1", "1"), -# ("3.14", "3.14"), -# ("+", "+"), -# (StrMatch("foo"), "foo"), -# (StrMatch("'foo'"), "'foo'"), -# (StrMatch("1"), "1"), -# (StrMatch("3.14"), "3.14"), -# (StrMatch("+"), "+"), -# ]) -# def test_i_can_match_simple_tokens(self, match, text): -# foo = Concept(name="foo") -# grammar = {foo: match} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, text) -# -# assert res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [ConceptNode(get_expected(foo, text), 0, 0, source=text, underlying=u(match, 0, 0))] -# -# -# def test_i_can_match_multiple_concepts_in_one_input(self): -# one = Concept(name="one") -# two = Concept(name="two") -# grammar = {one: "one", two: "two"} -# -# context, res, wrapper, return_value = self.execute([one, two], grammar, "one two one") -# -# assert res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [ -# ConceptNode(get_expected(one), 0, 0, source="one", underlying=u("one", 0, 0)), -# ConceptNode(get_expected(two), 2, 2, source="two", underlying=u("two", 2, 2)), -# ConceptNode(get_expected(one), 4, 4, source="one", underlying=u("one", 4, 4)), -# ] -# -# -# def test_i_can_match_sequence(self): -# foo = Concept(name="foo") -# grammar = {foo: Sequence("one", "two", "three")} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "one two three") -# -# assert res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [ -# ConceptNode( -# get_expected(foo, "one two three"), -# 0, -# 4, -# source="one two three", -# underlying=u(grammar[foo], 0, 4, [ -# u("one", 0, 0), -# u("two", 2, 2), -# u("three", 4, 4)]))] -# -# -# def test_i_always_choose_the_longest_match(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} -# -# context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three") -# -# assert res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [cnode("foo", 0, 4, "one two three")] -# -# def test_i_can_match_several_sequences(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = {bar: Sequence("one", "two"), foo: Sequence("one", "two", "three")} -# -# context, res, wrapper, return_value = self.execute([foo, bar], grammar, "one two three one two") -# -# assert res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [ -# cnode("foo", 0, 4, "one two three"), -# cnode("bar", 6, 8, "one two"), -# ] -# -# def test_i_can_match_ordered_choice(self): -# foo = Concept(name="foo") -# grammar = {foo: OrderedChoice("one", "two")} -# context, parser = self.init([foo], grammar) -# -# res1 = parser.parse(context, "one") -# assert res1.status -# assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) -# assert res1.value.body == [cnode("foo", 0, 0, "one")] -# assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)]) -# -# res2 = parser.parse(context, "two") -# assert res2.status -# assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) -# assert res2.value.body == [cnode("foo", 0, 0, "two")] -# assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)]) -# -# res3 = parser.parse(context, "three") -# assert not res3.status -# assert context.sheerka.isinstance(res3.value, BuiltinConcepts.PARSER_RESULT) -# assert res3.value.value == [ -# UnrecognizedTokensNode(0, 0, [t("three")]) -# ] -# -# def test_i_cannot_match_ordered_choice_with_empty_alternative(self): -# foo = Concept(name="foo") -# grammar = {foo: Sequence(OrderedChoice("one", ""), "two")} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "ok") -# -# assert not res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [ -# UnrecognizedTokensNode(0, 0, [t("ok")]) -# ] -# -# def test_i_can_mix_sequences_and_ordered_choices(self): -# foo = Concept(name="foo") -# grammar = {foo: Sequence(OrderedChoice("twenty", "thirty"), "one", "ok")} -# -# context, parser = self.init([foo], grammar) -# -# res1 = parser.parse(context, "twenty one ok") -# assert res1.status -# assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) -# assert res1.value.body == [ConceptNode(get_expected(foo, "twenty one ok"), 0, 4, source="twenty one ok", -# underlying=u(grammar[foo], 0, 4, [ -# u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]), -# u("one", 2, 2), -# u("ok", 4, 4)]))] -# -# res2 = parser.parse(context, "thirty one ok") -# assert res2.status -# assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) -# assert res2.value.body == [ConceptNode(get_expected(foo, "thirty one ok"), 0, 4, source="thirty one ok", -# underlying=u(grammar[foo], 0, 4, [ -# u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]), -# u("one", 2, 2), -# u("ok", 4, 4)]))] -# -# res3 = parser.parse(context, "twenty one") -# assert not res3.status -# assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) -# assert res3.value.value == [ -# UnrecognizedTokensNode(0, 2, [t("twenty"), t(" "), t("one")]) -# ] -# -# def test_i_can_mix_ordered_choices_and_sequences(self): -# foo = Concept(name="foo") -# grammar = {foo: OrderedChoice(Sequence("twenty", "thirty"), "one")} -# -# context, parser = self.init([foo], grammar) -# -# res = parser.parse(context, "twenty thirty") -# assert res.status -# assert res.value.value == [cnode("foo", 0, 2, "twenty thirty")] -# -# res = parser.parse(context, "one") -# assert res.status -# assert res.value.value == [cnode("foo", 0, 0, "one")] -# -# def test_i_cannot_parse_empty_optional(self): -# foo = Concept(name="foo") -# grammar = {foo: Optional("one")} -# context, parser = self.init([foo], grammar) -# -# res = parser.parse(context, "") -# return_value = res.value -# -# assert not res.status -# assert context.sheerka.isinstance(return_value, BuiltinConcepts.IS_EMPTY) -# -# def test_i_can_parse_optional(self): -# foo = Concept(name="foo") -# grammar = {foo: Optional("one")} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "one") -# -# assert res.status -# assert return_value == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", -# underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] -# -# def test_i_can_parse_sequence_starting_with_optional(self): -# foo = Concept(name="foo") -# grammar = {foo: Sequence(Optional("twenty"), "one")} -# context, parser = self.init([foo], grammar) -# -# res = parser.parse(context, "twenty one") -# assert res.status -# assert res.value.body == [ConceptNode( -# get_expected(foo, "twenty one"), 0, 2, -# source="twenty one", -# underlying=u(grammar[foo], 0, 2, -# [ -# u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]), -# u("one", 2, 2)] -# ))] -# -# res = parser.parse(context, "one") -# assert res.status -# assert res.value.body == [ConceptNode(get_expected(foo, "one"), 0, 0, source="one", -# underlying=u(grammar[foo], 0, 0, [u("one", 0, 0)]))] -# -# def test_i_can_parse_sequence_ending_with_optional(self): -# foo = Concept(name="foo") -# grammar = {foo: Sequence("one", "two", Optional("three"))} -# -# context, parser = self.init([foo], grammar) -# -# res = parser.parse(context, "one two three") -# assert res.status -# assert res.value.body == [cnode("foo", 0, 4, "one two three")] -# -# res = parser.parse(context, "one two") -# assert res.status -# assert res.value.body == [cnode("foo", 0, 2, "one two")] -# -# def test_i_can_parse_sequence_with_optional_in_between(self): -# foo = Concept(name="foo") -# -# grammar = {foo: Sequence("one", Optional("two"), "three")} -# -# context, parser = self.init([foo], grammar) -# -# res = parser.parse(context, "one two three") -# assert res.status -# assert res.value.body == [cnode("foo", 0, 4, "one two three")] -# -# res = parser.parse(context, "one three") -# assert res.status -# assert res.value.body == [cnode("foo", 0, 2, "one three")] -# -# def test_i_cannot_parse_wrong_input_with_optional(self): -# foo = Concept(name="foo") -# grammar = {foo: Optional("one")} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "two") -# -# assert not res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [ -# UnrecognizedTokensNode(0, 0, [t("two")]) -# ] -# -# def test_i_can_use_reference(self): -# # when there are multiple matches for the same input -# # Do I need to create a choice concept ? -# # No, create a return value for every possible graph -# -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = {foo: Sequence("one", "two"), bar: foo} -# context, parser = self.init([foo, bar], grammar) -# res = parser.parse(context, "one two") -# -# assert len(res) == 2 -# -# assert res[0].status -# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) -# assert res[0].value.body == [cnode("foo", 0, 2, "one two")] -# concept_found_1 = res[0].value.body[0].concept -# assert cbody(concept_found_1) == DoNotResolve("one two") -# -# assert res[1].status -# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) -# assert res[1].value.body == [cnode("bar", 0, 2, "one two")] -# concept_found_2 = res[1].value.body[0].concept -# # the body and the prop['foo'] are the same concept 'foo' -# assert cbody(concept_found_2) == get_expected(foo, "one two") -# assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) -# -# def test_i_can_use_a_reference_with_a_body(self): -# """ -# Same test than before (test_i_can_use_reference()) -# but this time, the concept 'foo' already has a body. -# :return: -# """ -# -# foo = Concept(name="foo", body="'foo'") -# bar = Concept(name="bar") -# grammar = {foo: Sequence("one", "two"), bar: foo} -# context, parser = self.init([foo, bar], grammar) -# res = parser.parse(context, "one two") -# -# assert len(res) == 2 -# -# assert res[0].status -# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) -# assert res[0].value.body == [cnode("foo", 0, 2, "one two")] -# concept_found_1 = res[0].value.body[0].concept -# assert concept_found_1.metadata.body == "'foo'" -# assert cbody(concept_found_1) is None -# -# assert res[1].status -# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) -# assert res[1].value.body == [cnode("bar", 0, 2, "one two")] -# concept_found_2 = res[1].value.body[0].concept -# assert cbody(concept_found_2) == foo -# # the body and the prop['foo'] are the same concept 'foo' -# assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) -# -# def test_i_can_use_context_reference_with_multiple_levels(self): -# """ -# Same than previous one, but with reference of reference -# :return: -# """ -# -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# baz = Concept(name="baz") -# grammar = {foo: Sequence("one", "two"), bar: foo, baz: bar} -# context, parser = self.init([foo, bar, baz], grammar) -# -# res = parser.parse(context, "one two") -# assert len(res) == 3 -# -# assert res[0].status -# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) -# assert res[0].value.body == [cnode("foo", 0, 2, "one two")] -# concept_found_1 = res[0].value.body[0].concept -# assert cbody(concept_found_1) == DoNotResolve("one two") -# -# assert res[1].status -# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) -# assert res[1].value.body == [cnode("bar", 0, 2, "one two")] -# concept_found_2 = res[1].value.body[0].concept -# assert cbody(concept_found_2) == get_expected(foo, "one two") -# assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) -# -# assert res[2].status -# assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) -# assert res[2].value.body == [cnode("baz", 0, 2, "one two")] -# concept_found_3 = res[2].value.body[0].concept -# expected_foo = get_expected(foo, "one two") -# assert cbody(concept_found_3) == get_expected(bar, expected_foo) -# assert cprop(concept_found_3, "foo") == expected_foo -# assert id(cprop(concept_found_3, "bar")) == id(cbody(concept_found_3)) -# -# def test_order_is_not_important_when_using_references(self): -# """ -# Same test than test_i_can_use_reference(), -# but this time, 'bar' is declared before 'foo' -# So the order of the result is different -# :return: -# """ -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = {bar: foo, foo: Sequence("one", "two")} -# context, parser = self.init([foo, bar], grammar) -# -# res = parser.parse(context, "one two") -# assert len(res) == 2 -# assert res[0].value.body == [cnode("bar", 0, 2, "one two")] -# assert res[1].value.body == [cnode("foo", 0, 2, "one two")] -# -# def test_i_can_parse_when_reference(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} -# context, parser = self.init([foo, bar], grammar) -# -# res = parser.parse(context, "twenty two") -# assert res.status -# assert res.value.body == [cnode("bar", 0, 2, "twenty two")] -# concept_found = res.value.body[0].concept -# assert cbody(concept_found) == DoNotResolve("twenty two") -# assert cprop(concept_found, "foo") == get_expected(foo, "twenty") -# -# res = parser.parse(context, "thirty one") -# assert res.status -# assert res.value.body == [cnode("bar", 0, 2, "thirty one")] -# concept_found = res.value.body[0].concept -# assert cbody(concept_found) == DoNotResolve("thirty one") -# assert cprop(concept_found, "foo") == get_expected(foo, "thirty") -# -# res = parser.parse(context, "twenty") -# assert res.status -# assert res.value.body == [cnode("foo", 0, 0, "twenty")] -# concept_found = res.value.body[0].concept -# assert cbody(concept_found) == DoNotResolve("twenty") -# -# def test_i_can_parse_when_reference_has_a_body(self): -# foo = Concept(name="foo", body="'one'") -# bar = Concept(name="bar") -# grammar = {bar: Sequence(foo, OrderedChoice("one", "two")), foo: OrderedChoice("twenty", "thirty")} -# context, parser = self.init([foo, bar], grammar) -# -# res = parser.parse(context, "twenty two") -# assert res.status -# assert res.value.body == [cnode("bar", 0, 2, "twenty two")] -# concept_found = res.value.body[0].concept -# assert cbody(concept_found) == DoNotResolve("twenty two") -# assert cprop(concept_found, "foo") == foo -# -# res = parser.parse(context, "twenty") -# assert res.status -# assert res.value.body == [cnode("foo", 0, 0, "twenty")] -# concept_found = res.value.body[0].concept -# assert concept_found.metadata.body == "'one'" -# -# def test_i_can_parse_multiple_results(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = { -# bar: Sequence("one", "two"), -# foo: Sequence("one", OrderedChoice("two", "three")) -# } -# context, parser = self.init([foo, bar], grammar) -# -# res = parser.parse(context, "one two") -# assert len(res) == 2 -# assert res[0].status -# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) -# assert res[0].value.body == [cnode("bar", 0, 2, "one two")] -# concept_found_0 = res[0].value.body[0].concept -# assert cbody(concept_found_0) == DoNotResolve("one two") -# assert len(concept_found_0.props) == 0 -# -# assert res[1].status -# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) -# assert res[1].value.body == [cnode("foo", 0, 2, "one two")] -# concept_found_1 = res[1].value.body[0].concept -# assert cbody(concept_found_1) == DoNotResolve("one two") -# assert len(concept_found_1.props) == 0 -# -# def test_i_can_parse_multiple_results_times_two(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = { -# bar: Sequence("one", "two"), -# foo: Sequence("one", OrderedChoice("two", "three")) -# } -# context, parser = self.init([foo, bar], grammar) -# -# res = parser.parse(context, "one two one two") -# assert len(res) == 4 -# assert res[0].status -# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) -# assert res[0].value.body == [short_cnode("bar", "one two"), short_cnode("bar", "one two")] -# -# assert res[1].status -# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) -# assert res[1].value.body == [short_cnode("foo", "one two"), short_cnode("bar", "one two")] -# -# assert res[2].status -# assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) -# assert res[2].value.body == [short_cnode("bar", "one two"), short_cnode("foo", "one two")] -# -# assert res[3].status -# assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT) -# assert res[3].value.body == [short_cnode("foo", "one two"), short_cnode("foo", "one two")] -# -# def test_i_can_parse_multiple_results_when_reference(self): -# """ -# TODO : There should no be two answer, has the one with bar is totally useless -# Note that bar = Sequence(foo, OrderedChoice("one", "two")) does not match -# -# :return: -# """ -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = { -# bar: Sequence(foo, Optional(OrderedChoice("one", "two"))), -# foo: OrderedChoice("twenty", "thirty") -# } -# context, parser = self.init([foo, bar], grammar) -# -# res = parser.parse(context, "twenty") -# assert len(res) == 2 -# assert res[0].status -# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) -# assert res[0].value.body == [cnode("bar", 0, 0, "twenty")] -# -# assert res[1].status -# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) -# assert res[1].value.body == [cnode("foo", 0, 0, "twenty")] -# -# def test_i_can_parse_concept_reference_that_is_not_in_grammar(self): -# one = Concept(name="one") -# two = Concept(name="two") -# foo = Concept(name="foo") -# grammar = {foo: Sequence("twenty", OrderedChoice(one, two))} -# context, parser = self.init([one, two, foo], grammar) -# -# res = parser.parse(context, "twenty two") -# assert res.status -# assert res.value.body == [cnode("foo", 0, 2, "twenty two")] -# concept_found = res.value.body[0].concept -# assert cbody(concept_found) == DoNotResolve("twenty two") -# assert cprop(concept_found, "two") == get_expected(two, "two") -# -# res = parser.parse(context, "twenty one") -# assert res.status -# assert res.value.body == [cnode("foo", 0, 2, "twenty one")] -# -# def test_i_can_initialize_when_cyclic_reference(self): -# foo = Concept(name="foo") -# grammar = {foo: Optional("one", ConceptExpression("foo"))} -# context, parser = self.init([foo], grammar) -# -# assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo")) -# -# def test_i_cannot_initialize_when_cyclic_reference_when_concept_is_under_construction_and_not_known(self): -# foo = Concept(name="foo").init_key() -# grammar = {foo: Optional("one", ConceptExpression("foo"))} -# -# context = self.get_context() -# parser = BnfNodeParser() -# parser.initialize(context, grammar) -# assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression("foo", rule_name="foo")) -# -# def test_i_can_initialize_when_cyclic_reference_when_concept_is_under_construction_and_known(self): -# foo = Concept(name="foo").init_key() -# grammar = {foo: Optional("one", ConceptExpression("foo"))} -# -# context = self.get_context() -# context.concepts["foo"] = foo -# parser = BnfNodeParser() -# parser.initialize(context, grammar) -# assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo")) -# -# def test_i_can_parse_concept_reference_that_is_group(self): -# """ -# if one is number, then number is a 'group' -# a group can be found under the sdp entry 'all_' -# """ -# -# context = self.get_context() -# one = Concept(name="one") -# two = Concept(name="two") -# number = Concept(name="number") -# foo = Concept(name="foo") -# for c in [one, two, number, foo]: -# context.sheerka.set_id_if_needed(c, False) -# context.sheerka.add_in_cache(c) -# -# context.sheerka.add_concept_to_set(context, one, number) -# context.sheerka.add_concept_to_set(context, two, number) -# -# grammar = {foo: Sequence("twenty", number)} -# -# parser = BnfNodeParser() -# parser.initialize(context, grammar) -# -# res = parser.parse(context, "twenty two") -# assert res.status -# assert res.value.body == [cnode("foo", 0, 2, "twenty two")] -# concept_found = res.value.body[0].concept -# assert cbody(concept_found) == DoNotResolve("twenty two") -# assert cprop(concept_found, "two") == get_expected(two, "two") -# assert cprop(concept_found, "number") == get_expected(number, get_expected(two, "two")) -# -# res = parser.parse(context, "twenty one") -# assert res.status -# assert res.value.body == [cnode("foo", 0, 2, "twenty one")] -# concept_found = res.value.body[0].concept -# assert cbody(concept_found) == DoNotResolve("twenty one") -# assert cprop(concept_found, "one") == get_expected(one, "one") -# assert cprop(concept_found, "number") == get_expected(number, get_expected(one, "one")) -# -# def test_i_can_parse_zero_or_more(self): -# foo = Concept(name="foo") -# grammar = {foo: ZeroOrMore("one")} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "one one") -# -# assert res.status -# assert return_value == [cnode("foo", 0, 2, "one one")] -# assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)]) -# -# concept_found = return_value[0].concept -# assert cbody(concept_found) == DoNotResolve("one one") -# -# def test_i_can_parse_sequence_and_zero_or_more(self): -# foo = Concept(name="foo") -# grammar = {foo: Sequence(ZeroOrMore("one"), "two")} -# context, parser = self.init([foo], grammar) -# -# res = parser.parse(context, "one one two") -# assert res.status -# assert res.value.value == [cnode("foo", 0, 4, "one one two")] -# -# res = parser.parse(context, "two") -# assert res.status -# assert res.value.value == [cnode("foo", 0, 0, "two")] -# -# def test_i_cannot_parse_zero_and_more_when_wrong_entry(self): -# # TEST WITH UNRECOGNIZED -# foo = Concept(name="foo") -# grammar = {foo: ZeroOrMore("one")} -# context, parser = self.init([foo], grammar) -# -# parser = BnfNodeParser() -# parser.initialize(context, grammar) -# -# res = parser.parse(context, "one two") -# assert not res.status -# assert res.value.value == [ -# cnode("foo", 0, 0, "one"), -# UnrecognizedTokensNode(1, 2, [t(" "), t("two")]) -# ] -# -# res = parser.parse(context, "two") -# assert not res.status -# assert res.value.value == [ -# UnrecognizedTokensNode(0, 0, [t("two")]) -# ] -# -# def test_i_can_parse_zero_and_more_with_separator(self): -# foo = Concept(name="foo") -# grammar = {foo: ZeroOrMore("one", sep=",")} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one") -# -# assert res.status -# assert return_value == [cnode("foo", 0, 7, "one, one , one")] -# -# def test_that_zero_and_more_is_greedy(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = {foo: ZeroOrMore("one"), bar: "one"} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "one one one") -# -# assert res.status -# assert return_value == [cnode("foo", 0, 4, "one one one")] -# -# ############## -# ## YOU STOPPED HERE -# -# # next one to do is below -# ############# -# -# -# def test_i_can_parse_one_and_more(self): -# foo = Concept(name="foo") -# grammar = {foo: OneOrMore("one")} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "one one") -# -# assert res.status -# assert return_value == [cnode("foo", 0, 2, "one one")] -# assert return_value[0].underlying == u(grammar[foo], 0, 2, [ -# u("one", 0, 0), -# u("one", 2, 2)]) -# -# -# def test_i_can_parse_sequence_and_one_or_more(self): -# foo = Concept(name="foo") -# grammar = {foo: Sequence(OneOrMore("one"), "two")} -# context, parser = self.init([foo], grammar) -# -# res = parser.parse(context, "one one two") -# assert res.status -# assert res.value.value == [cnode("foo", 0, 4, "one one two")] -# -# res = parser.parse(context, "two") -# assert not res.status -# assert res.value.value == [ -# UnrecognizedTokensNode(0, 0, [t("two")]) -# ] -# -# def test_i_can_parse_one_and_more_with_separator(self): -# foo = Concept(name="foo") -# grammar = {foo: OneOrMore("one", sep=",")} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "one, one , one") -# -# assert res.status -# assert return_value == [cnode("foo", 0, 7, "one, one , one")] -# assert return_value[0].underlying == u(grammar[foo], 0, 7, [ -# u("one", 0, 0), -# u("one", 3, 3), -# u("one", 7, 7)]) -# -# def test_that_one_and_more_is_greedy(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = {foo: OneOrMore("one"), bar: "one"} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "one one one") -# -# assert res.status -# assert return_value == [cnode("foo", 0, 4, "one one one")] -# -# @pytest.mark.skip("Done in BaseNode") -# def test_i_can_detect_infinite_recursion(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# -# grammar = { -# bar: foo, -# foo: bar -# } -# parser = BnfNodeParser() -# parser.initialize(self.get_context(), grammar) -# -# assert bar not in parser.concepts_grammars -# assert foo not in parser.concepts_grammars -# -# @pytest.mark.skip("Done in BaseNode") -# def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = { -# bar: foo, -# foo: OrderedChoice(bar, "foo") -# } -# -# parser = BnfNodeParser() -# parser.initialize(self.get_context(), grammar) -# -# assert foo not in parser.concepts_grammars # removed because of the infinite recursion -# assert bar not in parser.concepts_grammars # removed because of the infinite recursion -# -# # the other way around is possible -# grammar = { -# bar: foo, -# foo: OrderedChoice("foo", bar) -# } -# context, parser = self.init([foo, bar], grammar) -# -# assert foo in parser.concepts_grammars -# assert bar in parser.concepts_grammars -# -# res = parser.parse(context, "foo") -# assert len(res) == 2 -# assert res[0].status -# assert res[0].value.body == [cnode("bar", 0, 0, "foo")] -# assert res[1].status -# assert res[1].value.body == [cnode("foo", 0, 0, "foo")] -# -# def test_i_can_detect_indirect_infinite_recursion_with_sequence(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# -# grammar = { -# bar: foo, -# foo: Sequence("one", bar, "two") -# } -# parser = BnfNodeParser() -# parser.initialize(self.get_context(), grammar) -# -# assert foo not in parser.concepts_grammars # removed because of the infinite recursion -# assert bar not in parser.concepts_grammars # removed because of the infinite recursion -# -# def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# -# grammar = { -# bar: foo, -# foo: Sequence("one", OrderedChoice(bar, "other"), "two") -# } -# parser = BnfNodeParser() -# parser.initialize(self.get_context(), grammar) -# -# assert foo not in parser.concepts_grammars # removed because of the infinite recursion -# assert bar not in parser.concepts_grammars # removed because of the infinite recursion -# -# def test_infinite_recursion_does_not_fail_if_a_concept_is_missing(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# -# grammar = { -# foo: bar -# } -# parser = BnfNodeParser() -# parser.initialize(self.get_context(), grammar) -# -# assert foo in parser.concepts_grammars -# -# def test_i_can_detect_indirect_infinite_recursion_with_optional(self): -# # TODO infinite recursion with optional -# pass -# -# def test_i_can_detect_indirect_infinite_recursion_with_zero_and_more(self): -# # TODO infinite recursion with optional -# pass -# -# def test_i_can_detect_indirect_infinite_recursion_with_one_and_more(self): -# # TODO infinite recursion with optional -# pass -# -# def test_i_can_visit_parsing_expression(self): -# mult = Concept(name="mult") -# add = Concept(name="add") -# -# visitor = ConceptVisitor() -# visitor.visit(Sequence(mult, Optional(Sequence("+", add)))) -# -# assert sorted(list(visitor.concepts)) == ["add", "mult"] -# -# def test_i_can_initialize_rule_names(self): -# context = self.get_context() -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# -# grammar = {foo: Sequence("one", "two"), bar: foo} -# parser = BnfNodeParser() -# ret = parser.initialize(context, grammar) -# return_value = ret.body -# -# assert return_value[foo].rule_name == "" -# assert return_value[bar].rule_name == "foo" -# -# @pytest.mark.parametrize("text, end_position", [ -# ("foo", 0), -# ("foo bar", 2), -# ("foo bar ", 3), -# (" foo bar ", 4) -# ]) -# def test_cannot_parser_unknown_concepts(self, text, end_position): -# context, res, wrapper, return_value = self.execute([], {}, text) -# tokens = list(Tokenizer(text))[:-1] -# -# assert not res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [UnrecognizedTokensNode(0, end_position, tokens)] -# -# def test_i_cannot_parse_when_part_of_the_input_is_unrecognized(self): -# one = Concept(name="one") -# two = Concept(name="two") -# grammar = {one: "one", two: "two"} -# -# context, res, wrapper, return_value = self.execute([one, two], grammar, "one two three") -# -# assert not res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [ -# ConceptNode(get_expected(one, "one"), 0, 0, source="one", underlying=u("one", 0, 0)), -# ConceptNode(get_expected(two, "two"), 2, 2, source="two", underlying=u("two", 2, 2)), -# UnrecognizedTokensNode(3, 4, [t(" "), t("three")]) -# ] -# -# # def test_i_cannot_parse_when_wrong_sequence(self): -# # foo = Concept(name="foo") -# # grammar = {foo: Sequence("one", "two", "three")} -# # -# # context, res, wrapper, return_value = self.execute([foo], grammar, "one two three one") -# # -# # assert not res.status -# # assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# # assert return_value == [ -# # short_cnode("foo", "one two three"), -# # UnrecognizedTokensNode(5, 6, [t(" "), t("one")]) -# # ] -# -# # def test_i_cannot_parse_when_sequence_cannot_match_because_of_end_of_file(self): -# # foo = Concept(name="foo") -# # grammar = {foo: Sequence("one", "two", "three")} -# # -# # context, res, wrapper, return_value = self.execute([foo], grammar, "one two") -# # -# # assert not res.status -# # assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# # assert return_value == [ -# # UnrecognizedTokensNode(0, 2, [t("one"), t(" "), t("two")]) -# # ] -# -# def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = { -# bar: Sequence("one", "two"), -# foo: Sequence("one", OrderedChoice("two", "three")) -# } -# context, parser = self.init([foo, bar], grammar) -# -# res = parser.parse(context, "one two four five") -# -# assert len(res) == 2 -# assert not res[0].status -# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) -# assert res[0].value.body == [ -# cnode("bar", 0, 2, "one two"), -# UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) -# ] -# -# assert not res[1].status -# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) -# assert res[1].value.body == [ -# cnode("foo", 0, 2, "one two"), -# UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) -# ] -# -# def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = { -# bar: Sequence("one", "two"), -# foo: Sequence("one", OrderedChoice("two", "three")) -# } -# context, parser = self.init([foo, bar], grammar) -# -# res = parser.parse(context, "four five one two") -# -# assert len(res) == 2 -# assert not res[0].status -# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) -# assert res[0].value.body == [ -# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), -# cnode("bar", 4, 6, "one two"), -# ] -# -# assert not res[1].status -# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) -# assert res[1].value.body == [ -# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), -# cnode("foo", 4, 6, "one two"), -# ] -# -# def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = { -# bar: Sequence("one", "two"), -# foo: Sequence("one", OrderedChoice("two", "three")) -# } -# context, parser = self.init([foo, bar], grammar) -# -# res = parser.parse(context, "four five one two six seven") -# assert len(res) == 2 -# assert not res[0].status -# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) -# assert res[0].value.body == [ -# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), -# cnode("bar", 4, 6, "one two"), -# UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), -# ] -# -# assert not res[1].status -# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) -# assert res[1].value.body == [ -# UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), -# cnode("foo", 4, 6, "one two"), -# UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), -# ] -# -# def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle(self): -# context = self.get_context() -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# baz = Concept(name="baz") -# grammar = { -# bar: Sequence("one", "two"), -# foo: Sequence("one", OrderedChoice("two", "three")), -# baz: StrMatch("six"), -# } -# context, parser = self.init([foo, bar, baz], grammar) -# -# res = parser.parse(context, "one two four five six") -# assert len(res) == 2 -# assert not res[0].status -# assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) -# assert res[0].value.body == [ -# cnode("bar", 0, 2, "one two"), -# UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), -# cnode("baz", 8, 8, "six"), -# ] -# -# assert not res[1].status -# assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) -# assert res[1].value.body == [ -# cnode("foo", 0, 2, "one two"), -# UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), -# cnode("baz", 8, 8, "six"), -# ] -# -# def test_i_can_get_the_inner_concept_when_possible(self): -# foo = Concept(name="foo") -# one = Concept(name="one") -# grammar = {foo: Sequence(Optional(ZeroOrMore(one)), ZeroOrMore("one"))} -# -# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one") -# -# assert res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [cnode("foo", 0, 0, "one")] -# concept_found = return_value[0].concept -# assert cbody(concept_found) == get_expected(one, "one") -# assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) -# -# def test_i_can_get_the_inner_concept_when_possible_with_rule_name(self): -# foo = Concept(name="foo") -# one = Concept(name="one") -# grammar = {foo: Sequence( -# Optional(ZeroOrMore(one, rule_name="zero"), rule_name="opt"), -# ZeroOrMore("one"), rule_name="seq")} -# -# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one") -# -# assert res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [cnode("foo", 0, 0, "one")] -# concept_found = return_value[0].concept -# assert cbody(concept_found) == get_expected(one, "one") -# assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) -# assert id(cprop(concept_found, "zero")) == id(cbody(concept_found)) -# assert id(cprop(concept_found, "opt")) == id(cbody(concept_found)) -# assert id(cprop(concept_found, "seq")) == id(cbody(concept_found)) -# -# def test_i_get_multiple_props_when_zero_or_more(self): -# foo = Concept(name="foo") -# one = Concept(name="one") -# grammar = {foo: ZeroOrMore(one)} -# -# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one one one") -# assert res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [cnode("foo", 0, 4, "one one one")] -# concept_found = return_value[0].concept -# assert cbody(concept_found) == DoNotResolve("one one one") -# assert len(concept_found.compiled["one"]) == 3 -# assert cprop(concept_found, "one")[0] == get_expected(one) -# assert cprop(concept_found, "one")[1] == get_expected(one) -# assert cprop(concept_found, "one")[2] == get_expected(one) -# assert id(cprop(concept_found, "one")[0]) != id(cprop(concept_found, "one")[1]) -# assert id(cprop(concept_found, "one")[1]) != id(cprop(concept_found, "one")[2]) -# assert id(cprop(concept_found, "one")[2]) != id(cprop(concept_found, "one")[0]) -# -# def test_i_get_multiple_props_when_zero_or_more_and_different_values(self): -# foo = Concept(name="foo") -# one = Concept(name="one") -# grammar = {foo: ZeroOrMore(Sequence(one, "ok", rule_name="seq")), one: OrderedChoice("one", "un", "uno")} -# -# context, res, wrapper, return_value = self.execute([foo, one], grammar, "one ok un ok uno ok") -# assert res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert return_value == [short_cnode("foo", "one ok un ok uno ok")] -# concept_found = return_value[0].concept -# assert cprop(concept_found, "one")[0] == get_expected(one, "one") -# assert cprop(concept_found, "one")[1] == get_expected(one, "un") -# assert cprop(concept_found, "one")[2] == get_expected(one, "uno") -# assert cprop(concept_found, "seq")[0] == DoNotResolve("one ok") -# assert cprop(concept_found, "seq")[1] == DoNotResolve("un ok") -# assert cprop(concept_found, "seq")[2] == DoNotResolve("uno ok") -# -# @pytest.mark.parametrize("rule, expected", [ -# (StrMatch("string"), "'string'"), -# (StrMatch("string", rule_name="rule_name"), "'string'=rule_name"), -# (Sequence(StrMatch("foo"), StrMatch("bar")), "('foo' 'bar')"), -# (Sequence(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo' 'bar')=rule_name"), -# (OrderedChoice(StrMatch("foo"), StrMatch("bar")), "('foo'|'bar')"), -# (OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="rule_name"), "('foo'|'bar')=rule_name"), -# (Optional(StrMatch("foo")), "'foo'?"), -# (Optional(StrMatch("foo"), rule_name="rule_name"), "'foo'?=rule_name"), -# (ZeroOrMore(StrMatch("foo")), "'foo'*"), -# (ZeroOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'*=rule_name"), -# (OneOrMore(StrMatch("foo")), "'foo'+"), -# (OneOrMore(StrMatch("foo"), rule_name="rule_name"), "'foo'+=rule_name"), -# (Sequence( -# Optional(StrMatch("foo"), rule_name="a"), -# ZeroOrMore(StrMatch("bar"), rule_name="b"), -# OneOrMore(StrMatch("baz"), rule_name="c"), -# rule_name="d"), "('foo'?=a 'bar'*=b 'baz'+=c)=d"), -# (OrderedChoice( -# Optional(StrMatch("foo"), rule_name="a"), -# ZeroOrMore(StrMatch("bar"), rule_name="b"), -# OneOrMore(StrMatch("baz"), rule_name="c"), -# rule_name="d"), "('foo'?=a|'bar'*=b|'baz'+=c)=d"), -# (Sequence( -# OrderedChoice(StrMatch("foo"), StrMatch("bar"), rule_name="a"), -# OrderedChoice(StrMatch("x"), StrMatch("y"), rule_name="b"), -# rule_name="c"), "(('foo'|'bar')=a ('x'|'y')=b)=c") -# ]) -# def test_i_can_encode_grammar(self, rule, expected): -# foo = Concept(name="foo") -# grammar = {foo: rule} -# context, parser = self.init([foo], grammar) -# -# encoded = parser.encode_grammar(parser.concepts_grammars) -# assert encoded["c:foo|1001:"] == expected -# -# bnf_parser = BnfParser() -# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) -# assert parse_res.status -# assert parse_res.value.value == rule -# -# def test_i_can_encode_grammar_when_concept_simple(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# grammar = {foo: ConceptExpression(bar)} -# context, parser = self.init([foo, bar], grammar) -# -# encoded = parser.encode_grammar(parser.concepts_grammars) -# assert encoded["c:foo|1001:"] == "c:bar|1002:=bar" -# -# bnf_parser = BnfParser() -# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) -# assert parse_res.status -# assert parse_res.value.value == grammar[foo] -# -# def test_i_can_encode_grammar_when_concepts(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# baz = Concept(name="baz") -# grammar = {foo: Sequence( -# StrMatch("a"), -# OrderedChoice(ConceptExpression(bar), -# OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")} -# context, parser = self.init([foo, bar, baz], grammar) -# -# encoded = parser.encode_grammar(parser.concepts_grammars) -# assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s" -# -# bnf_parser = BnfParser() -# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) -# assert parse_res.status -# assert parse_res.value.value == grammar[foo] -# -# def test_i_can_encode_grammar_when_set_concepts(self): -# foo = Concept(name="foo") -# bar = Concept(name="bar") -# baz = Concept(name="baz") -# grammar = {foo: Sequence( -# StrMatch("a"), -# OrderedChoice(bar, -# OneOrMore(ConceptExpression(baz)), rule_name="oc"), rule_name="s")} -# context = self.get_context() -# for c in [foo, bar, baz]: -# context.sheerka.add_in_cache(c) -# context.sheerka.set_id_if_needed(c, False) -# context.sheerka.add_concept_to_set(context, baz, bar) -# -# parser = BnfNodeParser() -# parser.initialize(context, grammar) -# -# encoded = parser.encode_grammar(parser.concepts_grammars) -# assert encoded["c:foo|1001:"] == "('a' (c:bar|1002:=bar|c:baz|1003:=baz+)=oc)=s" -# -# bnf_parser = BnfParser() -# parse_res = bnf_parser.parse(context, encoded["c:foo|1001:"]) -# assert parse_res.status -# -# expected = Sequence( -# StrMatch("a"), -# OrderedChoice(ConceptGroupExpression(bar, rule_name="bar"), -# OneOrMore(ConceptExpression(baz, rule_name="baz")), rule_name="oc"), rule_name="s") -# assert parse_res.value.value == expected -# -# def test_i_concept_validation_is_not_set_when_no_variables(self): -# foo = Concept(name="foo") -# grammar = {foo: "foo"} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "foo") -# assert not return_value[0].concept.metadata.need_validation -# -# def test_i_concept_validation_is_set_when_unnamed_variables_are_found(self): -# foo = Concept(name="foo") -# grammar = {foo: Sequence("foo", OrderedChoice("a", "b"))} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "foo a") -# assert not return_value[0].concept.metadata.need_validation -# -# def test_i_concept_validation_is_set_when_named_variables_are_found(self): -# foo = Concept(name="foo") -# grammar = {foo: Sequence("foo", OrderedChoice("a", "b", rule_name="var"))} -# -# context, res, wrapper, return_value = self.execute([foo], grammar, "foo a") -# assert return_value[0].concept.metadata.need_validation -# -# -# # -# # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(self): -# # context = self.get_context() -# # add = Concept(name="add") -# # mult = Concept(name="mult") -# # atom = Concept(name="atom") -# # -# # grammar = { -# # add: Sequence(mult, Optional(Sequence(OrderedChoice('+', '-', rule_name="sign"), add))), -# # mult: Sequence(atom, Optional(Sequence(OrderedChoice('*', '/'), mult))), -# # atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')), -# # } -# # -# # parser = BnfNodeParser() -# # parser.register(grammar) -# # -# # # res = parser.parse(context, "1") -# # # assert len(res) == 3 # add, mult, atom -# # # -# # # res = parser.parse(context, "1 * 2") -# # # assert len(res) == 2 # add and mult -# # # -# # # res = parser.parse(context, "1 + 2") -# # # assert res.status -# # # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2")] -# # -# # res = parser.parse(context, "1 * 2 + 3") -# # assert res.status -# # assert return_value == [ConceptNode(add, 0, 4, source="1 + 2 + 3")] -# -# def test_i_can_register_concepts_with_the_same_name(self): -# # TODO : concepts are registered by name, -# # what when two concepts have the same name ? -# pass -# -# def test_i_can_parse_very_very_long_input(self): -# # TODO: In the current implementation, all the tokens are loaded in memory -# # It's clearly not the good approach -# pass diff --git a/tests/parsers/test_ConceptsWithConceptsParser.py b/tests/parsers/test_ConceptsWithConceptsParser.py deleted file mode 100644 index caaa378..0000000 --- a/tests/parsers/test_ConceptsWithConceptsParser.py +++ /dev/null @@ -1,193 +0,0 @@ -# import ast -# -# import pytest -# -# from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts -# from core.concept import Concept -# from core.tokenizer import Token, TokenKind, Tokenizer -# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode -# from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser -# from parsers.MultipleConceptsParser import MultipleConceptsParser -# from parsers.PythonParser import PythonNode -# -# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -# -# multiple_concepts_parser = MultipleConceptsParser() -# -# -# def ret_val(*args): -# result = [] -# index = 0 -# source = "" -# for item in args: -# if isinstance(item, Concept): -# tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)] -# result.append(ConceptNode(item, index, index, tokens, item.name)) -# index += 1 -# source += item.name -# elif isinstance(item, PythonNode): -# tokens = list(Tokenizer(item.source))[:-1] # strip trailing EOF -# result.append(SourceCodeNode(item, index, index + len(tokens) - 1, tokens, item.source)) -# index += len(tokens) -# source += item.source -# else: -# tokens = list(Tokenizer(item))[:-1] # strip trailing EOF -# result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens)) -# index += len(tokens) -# source += item -# -# return ReturnValueConcept( -# "who", -# False, -# ParserResultConcept(parser=multiple_concepts_parser, value=result, source=source)) -# -# -# class TestConceptsWithConceptsParser(TestUsingMemoryBasedSheerka): -# -# def init(self, concepts, inputs): -# context = self.get_context() -# for concept in concepts: -# context.sheerka.create_new_concept(context, concept) -# -# return context, ret_val(*inputs) -# -# def execute(self, concepts, inputs): -# context, input_return_values = self.init(concepts, inputs) -# -# parser = ConceptsWithConceptsParser() -# result = parser.parse(context, input_return_values.body) -# -# wrapper = result.body -# return_value = result.body.body -# -# return context, parser, result, wrapper, return_value -# -# @pytest.mark.parametrize("text, interested", [ -# ("not parser result", False), -# (ParserResultConcept(parser="not multiple_concepts_parser"), False), -# (ParserResultConcept(parser=multiple_concepts_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True), -# ]) -# def test_not_interested(self, text, interested): -# context = self.get_context() -# -# res = ConceptsWithConceptsParser().parse(context, text) -# if interested: -# assert res is not None -# else: -# assert res is None -# -# def test_i_can_parse_composition_of_concepts(self): -# foo = Concept("foo") -# bar = Concept("bar") -# plus = Concept("a plus b").def_var("a").def_var("b") -# -# context, parser, result, wrapper, return_value = self.execute([foo, bar, plus], [foo, " plus ", bar]) -# -# assert result.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert result.who == wrapper.parser.name -# assert wrapper.source == "foo plus bar" -# assert context.sheerka.isinstance(return_value, plus) -# -# assert return_value.compiled["a"] == foo -# assert return_value.compiled["b"] == bar -# -# # sanity check, I can evaluate the result -# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value) -# assert evaluated.key == return_value.key -# assert evaluated.get_prop("a") == foo.init_key() -# assert evaluated.get_prop("b") == bar.init_key() -# -# def test_i_can_parse_when_composition_of_source_code(self): -# plus = Concept("a plus b", body="a + b").def_var("a").def_var("b") -# left = PythonNode("1+1", ast.parse("1+1", mode="eval")) -# right = PythonNode("2+2", ast.parse("2+2", mode="eval")) -# context, parser, result, wrapper, return_value = self.execute([plus], [left, " plus ", right]) -# -# assert result.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert result.who == wrapper.parser.name -# assert wrapper.source == "1+1 plus 2+2" -# assert context.sheerka.isinstance(return_value, plus) -# -# left_parser_result = ParserResultConcept(parser=parser, source="1+1", value=left) -# right_parser_result = ParserResultConcept(parser=parser, source="2+2", value=right) -# assert return_value.compiled["a"] == [ReturnValueConcept(parser.name, True, left_parser_result)] -# assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, right_parser_result)] -# -# # sanity check, I can evaluate the result -# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value) -# assert evaluated.key == return_value.key -# assert evaluated.get_prop("a") == 2 -# assert evaluated.get_prop("b") == 4 -# assert evaluated.body == 6 -# -# def test_i_can_parse_when_mix_of_concept_and_code(self): -# plus = Concept("a plus b").def_var("a").def_var("b") -# code = PythonNode("1+1", ast.parse("1+1", mode="eval")) -# foo = Concept("foo") -# context, parser, result, wrapper, return_value = self.execute([plus, foo], [foo, " plus ", code]) -# -# assert result.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert result.who == wrapper.parser.name -# assert wrapper.source == "foo plus 1+1" -# assert context.sheerka.isinstance(return_value, plus) -# -# code_parser_result = ParserResultConcept(parser=parser, source="1+1", value=code) -# assert return_value.compiled["a"] == foo -# assert return_value.compiled["b"] == [ReturnValueConcept(parser.name, True, code_parser_result)] -# -# # sanity check, I can evaluate the result -# evaluated = context.sheerka.evaluate_concept(self.get_context(context.sheerka, True), return_value) -# assert evaluated.key == return_value.key -# assert evaluated.get_prop("a") == foo.init_key() -# assert evaluated.get_prop("b") == 2 -# -# def test_i_can_parse_when_multiple_concepts_are_recognized(self): -# foo = Concept("foo") -# bar = Concept("bar") -# plus_1 = Concept("a plus b", body="body1").def_var("a").def_var("b") -# plus_2 = Concept("a plus b", body="body2").def_var("a").def_var("b") -# -# context, input_return_values = self.init([foo, bar, plus_1, plus_2], [foo, " plus ", bar]) -# parser = ConceptsWithConceptsParser() -# result = parser.parse(context, input_return_values.body) -# -# assert len(result) == 2 -# -# res = result[0] -# wrapper = res.value -# return_value = res.value.value -# assert res.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert res.who == wrapper.parser.name -# assert wrapper.source == "foo plus bar" -# assert context.sheerka.isinstance(return_value, plus_1) -# assert return_value.compiled["a"] == foo -# assert return_value.compiled["b"] == bar -# -# res = result[1] -# wrapper = res.value -# return_value = res.value.value -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert res.who == wrapper.parser.name -# assert wrapper.source == "foo plus bar" -# assert context.sheerka.isinstance(return_value, plus_2) -# assert return_value.compiled["a"] == foo -# assert return_value.compiled["b"] == bar -# -# def test_i_cannot_parse_when_unknown_concept(self): -# foo = Concept("foo") -# bar = Concept("bar") -# -# context, input_return_values = self.init([foo, bar], [foo, " plus ", bar]) -# parser = ConceptsWithConceptsParser() -# result = parser.parse(context, input_return_values.body) -# wrapper = result.body -# return_value = result.body.body -# -# assert not result.status -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.NOT_FOR_ME) -# assert result.who == parser.name -# assert return_value == input_return_values.body.body diff --git a/tests/parsers/test_MultipleConceptsParser.py b/tests/parsers/test_MultipleConceptsParser.py deleted file mode 100644 index 90e4dfa..0000000 --- a/tests/parsers/test_MultipleConceptsParser.py +++ /dev/null @@ -1,216 +0,0 @@ -# import pytest -# -# from core.builtin_concepts import ParserResultConcept, BuiltinConcepts -# from core.concept import Concept -# from core.tokenizer import Tokenizer, TokenKind, Token -# from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode, ConceptNode -# from parsers.BnfNodeParser import BnfNodeParser, Sequence -# from parsers.MultipleConceptsParser import MultipleConceptsParser -# from parsers.PythonParser import PythonNode -# -# from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -# -# -# def get_return_value(context, grammar, expression): -# parser = BnfNodeParser() -# parser.initialize(context, grammar) -# -# ret_val = parser.parse(context, expression) -# assert not ret_val.status -# return ret_val -# -# -# class TestMultipleConceptsParser(TestUsingMemoryBasedSheerka): -# -# def init(self, concepts, grammar, expression): -# context = self.get_context() -# for c in concepts: -# context.sheerka.create_new_concept(context, c) -# return_value = get_return_value(context, grammar, expression) -# -# return context, return_value -# -# def test_not_interested_if_not_parser_result(self): -# context = self.get_context() -# text = "not parser result" -# -# res = MultipleConceptsParser().parse(context, text) -# assert res is None -# -# def test_not_interested_if_not_from_concept_lexer_parser(self): -# context = self.get_context() -# text = ParserResultConcept(parser="not concept lexer", value="some value") -# -# res = MultipleConceptsParser().parse(context, text) -# assert res is None -# -# def test_i_can_parse_exact_concepts(self): -# foo = Concept("foo", body="'foo'") -# bar = Concept("bar", body="'bar'") -# baz = Concept("baz", body="'baz'") -# grammar = {} -# context, return_value = self.init([foo, bar, baz], grammar, "bar foo baz") -# -# parser = MultipleConceptsParser() -# ret_val = parser.parse(context, return_value.body) -# -# assert ret_val.status -# assert ret_val.who == parser.name -# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) -# assert ret_val.value.value == [ -# ConceptNode(bar, 0, 0, source="bar"), -# ConceptNode(foo, 2, 2, source="foo"), -# ConceptNode(baz, 4, 4, source="baz")] -# assert ret_val.value.source == "bar foo baz" -# -# def test_i_can_parse_when_ending_with_bnf(self): -# foo = Concept("foo", body="'foo'") -# bar = Concept("bar", body="'bar'") -# grammar = {foo: Sequence("foo1", "foo2", "foo3")} -# context, return_value = self.init([foo, bar], grammar, "bar foo1 foo2 foo3") -# -# parser = MultipleConceptsParser() -# ret_val = parser.parse(context, return_value.body) -# -# assert ret_val.status -# assert ret_val.who == parser.name -# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) -# assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")] -# assert ret_val.value.source == "bar foo1 foo2 foo3" -# -# def test_i_can_parse_when_starting_with_bnf(self): -# foo = Concept("foo", body="'foo'") -# bar = Concept("bar", body="'bar'") -# grammar = {foo: Sequence("foo1", "foo2", "foo3")} -# context, return_value = self.init([foo, bar], grammar, "foo1 foo2 foo3 bar") -# -# parser = MultipleConceptsParser() -# ret_val = parser.parse(context, return_value.body) -# -# assert ret_val.status -# assert ret_val.who == parser.name -# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) -# assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")] -# assert ret_val.value.source == "foo1 foo2 foo3 bar" -# -# def test_i_can_parse_when_concept_are_mixed(self): -# foo = Concept("foo") -# bar = Concept("bar") -# baz = Concept("baz") -# grammar = {foo: Sequence("foo1", "foo2", "foo3")} -# context, return_value = self.init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar") -# -# parser = MultipleConceptsParser() -# ret_val = parser.parse(context, return_value.body) -# -# assert ret_val.status -# assert ret_val.who == parser.name -# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) -# assert ret_val.value.value == [ -# cnode("baz", 0, 0, "baz"), -# cnode("foo", 2, 6, "foo1 foo2 foo3"), -# cnode("bar", 8, 8, "bar")] -# assert ret_val.value.source == "baz foo1 foo2 foo3 bar" -# -# def test_i_can_parse_when_multiple_concepts_are_matching(self): -# foo = Concept("foo") -# bar = Concept("bar", body="bar1") -# baz = Concept("bar", body="bar2") -# grammar = {foo: "foo"} -# context, return_value = self.init([foo, bar, baz], grammar, "foo bar") -# -# parser = MultipleConceptsParser() -# ret_val = parser.parse(context, return_value.body) -# -# assert len(ret_val) == 2 -# assert ret_val[0].status -# assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")] -# assert ret_val[0].value.source == "foo bar" -# assert ret_val[0].value.value[1].concept.metadata.body == "bar1" -# -# assert ret_val[1].status -# assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")] -# assert ret_val[1].value.source == "foo bar" -# assert ret_val[1].value.value[1].concept.metadata.body == "bar2" -# -# def test_i_can_parse_when_source_code(self): -# foo = Concept("foo") -# grammar = {foo: "foo"} -# context, return_value = self.init([foo], grammar, "1 foo") -# -# parser = MultipleConceptsParser() -# ret_val = parser.parse(context, return_value.body) -# wrapper = ret_val.value -# value = ret_val.value.value -# -# assert ret_val.status -# assert ret_val.who == parser.name -# assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) -# assert wrapper.source == "1 foo" -# assert value == [ -# scnode(0, 1, "1 "), -# cnode("foo", 2, 2, "foo")] -# -# def test_i_cannot_parse_when_unrecognized_token(self): -# twenty_two = Concept("twenty two") -# one = Concept("one") -# grammar = {twenty_two: Sequence("twenty", "two")} -# context, return_value = self.init([twenty_two, one], grammar, "twenty two + one") -# -# parser = MultipleConceptsParser() -# ret_val = parser.parse(context, return_value.body) -# -# assert not ret_val.status -# assert ret_val.who == parser.name -# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) -# assert ret_val.value.value == [ -# cnode("twenty two", 0, 2, "twenty two"), -# utnode(3, 5, " + "), -# cnode("one", 6, 6, "one") -# ] -# assert ret_val.value.source == "twenty two + one" -# -# def test_i_cannot_parse_when_unknown_concepts(self): -# twenty_two = Concept("twenty two") -# one = Concept("one") -# grammar = {twenty_two: Sequence("twenty", "two")} -# context, return_value = self.init([twenty_two, one], grammar, "twenty two plus one") -# -# parser = MultipleConceptsParser() -# ret_val = parser.parse(context, return_value.body) -# -# assert not ret_val.status -# assert ret_val.who == parser.name -# assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) -# assert ret_val.value.value == [ -# cnode("twenty two", 0, 2, "twenty two"), -# utnode(3, 5, " plus "), -# cnode("one", 6, 6, "one") -# ] -# assert ret_val.value.source == "twenty two plus one" -# -# @pytest.mark.parametrize("text, expected_source, expected_end", [ -# ("True", "True", 0), -# ("1 == 1", "1 == 1", 4), -# ("1!xdf", "1", 0), -# ("1", "1", 0), -# ]) -# def test_i_can_get_source_code_node(self, text, expected_source, expected_end): -# tokens = list(Tokenizer(text))[:-1] # strip trailing EOF -# -# start_index = 5 # a random number different of zero -# res = MultipleConceptsParser().get_source_code_node(self.get_context(), start_index, tokens) -# -# assert isinstance(res, SourceCodeNode) -# assert isinstance(res.node, PythonNode) -# assert res.source == expected_source -# assert res.start == start_index -# assert res.end == start_index + expected_end -# -# def test_i_cannot_parse_null_text(self): -# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, []) -# assert res is None -# -# eof = Token(TokenKind.EOF, "", 0, 0, 0) -# res = MultipleConceptsParser().get_source_code_node(self.get_context(), 0, [eof]) -# assert res is None diff --git a/tests/parsers/test_SyaNodeParser.py b/tests/parsers/test_SyaNodeParser.py index fccce67..bf14bd7 100644 --- a/tests/parsers/test_SyaNodeParser.py +++ b/tests/parsers/test_SyaNodeParser.py @@ -31,6 +31,7 @@ cmap = { "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), "square": Concept("square(a)").def_var("a"), "foo bar": Concept("foo bar(a)").def_var("a"), + "long infixed": Concept("a long infixed b").def_var("a").def_var("b"), "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), } @@ -50,8 +51,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): TestSyaNodeParser.sheerka.force_sya_def(context, [ (cmap["plus"].id, 5, SyaAssociativity.Right), (cmap["mult"].id, 10, SyaAssociativity.Right), - (cmap["minus"].id, 10, SyaAssociativity.Right), - (cmap["square"].id, None, SyaAssociativity.No)]) + (cmap["minus"].id, 10, SyaAssociativity.Right)]) def init_parser(self, my_concepts_map=None, @@ -98,99 +98,92 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("expression, expected_sequences", [ ("one plus two", [["one", "two", "plus"]]), - ("1 + 1 plus two", [["1 + 1 ", "two", "plus"]]), + ("1 + 1 plus two", [["1 + 1", "two", "plus"]]), ("one + two plus three", [ ["one", " + ", "two", "three", "plus"], - ["one + two ", "three", "plus"]]), + ["one + two", "three", "plus"]]), ("twenty one plus two", [ ["twenty ", "one", "two", "plus"], [short_cnode("twenties", "twenty one"), "two", "plus"] ]), - ("x$!# plus two", [["x$!# ", "two", "plus"]]), + ("x$!# plus two", [["x$!#", "two", "plus"]]), - ("one plus 1 + 1", [["one", " 1 + 1", "plus"]]), - ("1 + 1 plus 2 + 2", [["1 + 1 ", " 2 + 2", "plus"]]), + ("one plus 1 + 1", [["one", "1 + 1", "plus"]]), + ("1 + 1 plus 2 + 2", [["1 + 1", "2 + 2", "plus"]]), ("one + two plus 1 + 1", [ - ["one", " + ", "two", " 1 + 1", "plus"], - ["one + two ", " 1 + 1", "plus"] + ["one", " + ", "two", "1 + 1", "plus"], + ["one + two", "1 + 1", "plus"] ]), ("twenty one plus 1 + 1", [ - ["twenty ", "one", " 1 + 1", "plus"], - [cnode("twenties", 0, 2, "twenty one"), " 1 + 1", "plus"] + ["twenty ", "one", "1 + 1", "plus"], + [cnode("twenties", 0, 2, "twenty one"), "1 + 1", "plus"] ]), - ("x$!# plus 1 + 1", [["x$!# ", " 1 + 1", "plus"]]), + ("x$!# plus 1 + 1", [["x$!#", "1 + 1", "plus"]]), ("one plus two + three", [ ["one", "two", "plus", " + ", "three"], - ["one", " two + three", "plus"], + ["one", "two + three", "plus"], ]), ("1 + 1 plus two + three", [ - ["1 + 1 ", "two", "plus", (" + ", 1), "three"], - ["1 + 1 ", " two + three", "plus"], + ["1 + 1", "two", "plus", (" + ", 1), "three"], + ["1 + 1", "two + three", "plus"], ]), ("one + two plus two + three", [ ["one", " + ", "two", ("two", 1), "plus", (" + ", 1), "three"], - ["one + two ", ("two", 1), "plus", (" + ", 1), "three"], - ["one", " + ", "two", " two + three", "plus"], - ["one + two ", " two + three", "plus"], + ["one + two", ("two", 1), "plus", (" + ", 1), "three"], + ["one", " + ", "two", "two + three", "plus"], + ["one + two", "two + three", "plus"], ]), ("twenty one plus two + three", [ ["twenty ", "one", "two", "plus", " + ", "three"], [cnode("twenties", 0, 2, "twenty one"), "two", "plus", " + ", "three"], - ["twenty ", "one", " two + three", "plus"], - [cnode("twenties", 0, 2, "twenty one"), " two + three", "plus"], + ["twenty ", "one", "two + three", "plus"], + [cnode("twenties", 0, 2, "twenty one"), "two + three", "plus"], ]), ("x$!# plus two + three", [ - ["x$!# ", "two", "plus", " + ", "three"], - ["x$!# ", " two + three", "plus"], + ["x$!#", "two", "plus", " + ", "three"], + ["x$!#", "two + three", "plus"], ]), ("one plus twenty two", [ - ["one", " twenty ", "plus", "two"], + ["one", "twenty ", "plus", "two"], ["one", cnode("twenties", 4, 6, "twenty two"), "plus"], ]), ("1 + 1 plus twenty one", [ - ["1 + 1 ", " twenty ", "plus", "one"], - ["1 + 1 ", cnode("twenties", 8, 10, "twenty one"), "plus"], + ["1 + 1", "twenty ", "plus", "one"], + ["1 + 1", cnode("twenties", 8, 10, "twenty one"), "plus"], ]), ("one + two plus twenty one", [ - ["one", " + ", "two", " twenty ", "plus", ("one", 1)], - ["one + two ", " twenty ", "plus", ("one", 1)], + ["one", " + ", "two", "twenty ", "plus", ("one", 1)], + ["one + two", "twenty ", "plus", ("one", 1)], ["one", " + ", "two", cnode("twenties", 8, 10, "twenty one"), "plus"], - ["one + two ", cnode("twenties", 8, 10, "twenty one"), "plus"], + ["one + two", cnode("twenties", 8, 10, "twenty one"), "plus"], ]), ("twenty one plus twenty two", [ - ["twenty ", "one", " twenty ", "plus", "two"], - [cnode("twenties", 0, 2, "twenty one"), " twenty ", "plus", "two"], + ["twenty ", "one", ("twenty ", 1), "plus", "two"], + [cnode("twenties", 0, 2, "twenty one"), ("twenty ", 1), "plus", "two"], ["twenty ", "one", cnode("twenties", 6, 8, "twenty two"), "plus"], [cnode("twenties", 0, 2, "twenty one"), cnode("twenties", 6, 8, "twenty two"), "plus"], ]), ("x$!# plus twenty two", [ - ["x$!# ", " twenty ", "plus", "two"], - ["x$!# ", cnode("twenties", 7, 9, "twenty two"), "plus"] + ["x$!#", "twenty ", "plus", "two"], + ["x$!#", cnode("twenties", 7, 9, "twenty two"), "plus"] ]), - ("one plus z$!#", [["one", " z$!#", "plus"]]), - ("1 + 1 plus z$!#", [["1 + 1 ", " z$!#", "plus"]]), + ("one plus z$!#", [["one", "z$!#", "plus"]]), + ("1 + 1 plus z$!#", [["1 + 1", "z$!#", "plus"]]), ("one + two plus z$!#", [ - ["one", " + ", "two", " z$!#", "plus"], - ["one + two ", " z$!#", "plus"], + ["one", " + ", "two", "z$!#", "plus"], + ["one + two", "z$!#", "plus"], ]), ("twenty one plus z$!#", [ - ["twenty ", "one", " z$!#", "plus"], - [cnode("twenties", 0, 2, "twenty one"), " z$!#", "plus"], + ["twenty ", "one", "z$!#", "plus"], + [cnode("twenties", 0, 2, "twenty one"), "z$!#", "plus"], ]), - ("x$!# plus z$!#", [["x$!# ", " z$!#", "plus"]]), + ("x$!# plus z$!#", [["x$!#", "z$!#", "plus"]]), ]) def test_i_can_post_fix_simple_infix_concepts(self, expression, expected_sequences): - # concepts_map = { - # "plus": Concept("a plus b").def_var("a").def_var("b"), - # "one": Concept("one"), - # "two": Concept("two"), - # "three": Concept("three"), - # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - # } sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) @@ -202,10 +195,10 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert res_i.out == expected_array @pytest.mark.parametrize("expression, expected_sequences", [ - ("one plus plus plus 1 + 1", [["one", " 1 + 1", "plus plus plus"]]), + ("one plus plus plus 1 + 1", [["one", "1 + 1", "plus plus plus"]]), ("x$!# another long name infix twenty two", [ - ["x$!# ", " twenty ", "another long name infix", "two"], - ["x$!# ", cnode("twenties", 13, 15, "twenty two"), "another long name infix"], + ["x$!#", "twenty ", "another long name infix", "two"], + ["x$!#", cnode("twenties", 13, 15, "twenty two"), "another long name infix"], ]), ]) def test_i_can_post_fix_infix_concepts_with_long_name(self, expression, expected_sequences): @@ -229,24 +222,18 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("expression, expected_sequences", [ ("one prefixed", [["one", "prefixed"]]), - ("1 + 1 prefixed", [["1 + 1 ", "prefixed"]]), + ("1 + 1 prefixed", [["1 + 1", "prefixed"]]), ("one + two prefixed", [ ["one", " + ", "two", "prefixed"], - ["one + two ", "prefixed"], + ["one + two", "prefixed"], ]), ("twenty one prefixed", [ ["twenty ", "one", "prefixed"], [cnode("twenties", 0, 2, "twenty one"), "prefixed"], ]), - ("x$!# prefixed", [["x$!# ", "prefixed"]]), + ("x$!# prefixed", [["x$!#", "prefixed"]]), ]) def test_i_can_post_fix_simple_prefixed_concepts(self, expression, expected_sequences): - # concepts_map = { - # "prefixed": Concept("a prefixed").def_var("a"), - # "one": Concept("one"), - # "two": Concept("two"), - # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - # } sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) @@ -259,28 +246,28 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("expression, expected_sequences", [ ("one prefixed prefixed", [["one", "prefixed prefixed"]]), - ("1 + 1 prefixed prefixed", [["1 + 1 ", "prefixed prefixed"]]), + ("1 + 1 prefixed prefixed", [["1 + 1", "prefixed prefixed"]]), ("one + two prefixed prefixed", [ ["one", " + ", "two", "prefixed prefixed"], - ["one + two ", "prefixed prefixed"], + ["one + two", "prefixed prefixed"], ]), ("twenty one prefixed prefixed", [ ["twenty ", "one", "prefixed prefixed"], [cnode("twenties", 0, 2, "twenty one"), "prefixed prefixed"], ]), - ("x$!# prefixed prefixed", [["x$!# ", "prefixed prefixed"]]), + ("x$!# prefixed prefixed", [["x$!#", "prefixed prefixed"]]), ("one long name prefixed", [["one", "long name prefixed"]]), - ("1 + 1 long name prefixed", [["1 + 1 ", "long name prefixed"]]), + ("1 + 1 long name prefixed", [["1 + 1", "long name prefixed"]]), ("one + two long name prefixed", [ ["one", " + ", "two", "long name prefixed"], - ["one + two ", "long name prefixed"], + ["one + two", "long name prefixed"], ]), ("twenty one long name prefixed", [ ["twenty ", "one", "long name prefixed"], [cnode("twenties", 0, 2, "twenty one"), "long name prefixed"], ]), - ("x$!# long name prefixed", [["x$!# ", "long name prefixed"]]), + ("x$!# long name prefixed", [["x$!#", "long name prefixed"]]), ]) def test_i_can_post_fix_prefixed_concepts_with_long_names(self, expression, expected_sequences): concepts_map = { @@ -302,24 +289,18 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("expression, expected_sequences", [ ("suffixed one", [["one", "suffixed"]]), - ("suffixed 1 + 1", [[" 1 + 1", "suffixed"]]), + ("suffixed 1 + 1", [["1 + 1", "suffixed"]]), ("suffixed one + two", [ ["one", "suffixed", " + ", "two"], - [" one + two", "suffixed"], + ["one + two", "suffixed"], ]), ("suffixed twenty one", [ - [" twenty ", "suffixed", "one"], + ["twenty ", "suffixed", "one"], [cnode("twenties", 2, 4, "twenty one"), "suffixed"], ]), - ("suffixed x$!#", [[" x$!#", "suffixed"]]), + ("suffixed x$!#", [["x$!#", "suffixed"]]), ]) def test_i_can_post_fix_simple_suffixed_concepts(self, expression, expected_sequences): - # concepts_map = { - # "suffixed": Concept("suffixed a").def_var("a"), - # "one": Concept("one"), - # "two": Concept("two"), - # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - # } sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) @@ -351,26 +332,27 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("expression, expected_sequences", [ ("one ? two : three", [["one", "two", "three", "?"]]), + ("one ? baz qux : two", [["one", "baz qux", "two", "?"]]), ("1+1 ? one + two : twenty one", [ - ["1+1 ", "one", " + ", "two"], # an error is detected - ["1+1 ", " one + two ", " twenty ", "?", ("one", 1)], - ["1+1 ", " one + two ", short_cnode("twenties", "twenty one"), "?"], + ["1+1", "one", " + ", "two"], # error is detected so the parsing has stopped + ["1+1", "one + two", "twenty ", "?", ("one", 1)], + ["1+1", "one + two", short_cnode("twenties", "twenty one"), "?"], ]), - ("x$!# ? y$!# : z$!#", [["x$!# ", " y$!# ", " z$!#", "?"]]), + ("x$!# ? y$!# : z$!#", [["x$!#", "y$!#", "z$!#", "?"]]), ("if one then two else three end", [["one", "two", "three", "if"]]), ("if 1+1 then x$!# else twenty one end", [ - [" 1+1 ", " x$!# ", " twenty ", "one"], # an error is detected - [" 1+1 ", " x$!# ", short_cnode("twenties", "twenty one"), "if"], + ["1+1", "x$!#", "twenty ", "one"], # an error is detected + ["1+1", "x$!#", short_cnode("twenties", "twenty one"), "if"], ]), ("if x$!# then one + two else z$!# end", [ - [" x$!# ", "one", " + ", "two"], # an error is detected - [" x$!# ", " one + two ", " z$!# ", "if"], + ["x$!#", "one", " + ", "two"], # error is detected so the parsing has stopped + ["x$!#", "one + two", "z$!#", "if"], ]), ]) def test_i_can_post_fix_ternary_concepts(self, expression, expected_sequences): """ - The purpose of this test is to validate concepts like + The purpose of this test is to validate concepts that have at least 3 parameters separated by tokens Example : var_0 token var_1 token var_2 @@ -381,14 +363,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): :return: """ - # concepts_map = { - # "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), - # "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), - # "one": Concept("one"), - # "two": Concept("two"), - # "three": Concept("three"), - # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - # } sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) @@ -402,15 +376,15 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("expression, expected_sequences", [ ("one ? ? two : : three", [["one", "two", "three", "? ?"]]), ("1+1 ? ? one + two : : twenty one", [ - ["1+1 ", "one", " + ", "two"], # error - ["1+1 ", " one + two ", " twenty ", "? ?", ("one", 1)], - ["1+1 ", " one + two ", short_cnode("twenties", "twenty one"), "? ?"], + ["1+1", "one", " + ", "two"], # error + ["1+1", "one + two", "twenty ", "? ?", ("one", 1)], + ["1+1", "one + two", short_cnode("twenties", "twenty one"), "? ?"], ]), ("if if one then then two else else three end end ", [["one", "two", "three", "if if"]]), ("if if 1+1 then then x$!# else else twenty one end end ", [ - [" 1+1 ", " x$!# ", " twenty ", "one"], # error - [" 1+1 ", " x$!# ", short_cnode("twenties", "twenty one"), "if if"]]), + ["1+1", "x$!#", "twenty ", "one"], # error + ["1+1", "x$!#", short_cnode("twenties", "twenty one"), "if if"]]), ]) def test_i_can_post_fix_ternary_concept_with_long_names(self, expression, expected_sequences): concepts_map = { @@ -433,8 +407,8 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("expression, expected", [ ("foo bar baz", ["baz", "bar", "foo"]), - ("foo bar x$!#", [" x$!#", "bar", "foo"]), - ("foo bar 1 + 1", [" 1 + 1", "bar", "foo"]), + ("foo bar x$!#", ["x$!#", "bar", "foo"]), + ("foo bar 1 + 1", ["1 + 1", "bar", "foo"]), ]) def test_i_can_post_fix_suffixed_unary_composition(self, expression, expected): concepts_map = { @@ -452,8 +426,8 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("expression, expected", [ ("baz bar foo", ["baz", "bar", "foo"]), - ("x$!# bar foo", ["x$!# ", "bar", "foo"]), - ("1 + 1 bar foo", ["1 + 1 ", "bar", "foo"]), + ("x$!# bar foo", ["x$!#", "bar", "foo"]), + ("1 + 1 bar foo", ["1 + 1", "bar", "foo"]), ]) def test_i_can_post_fix_prefixed_unary_composition(self, expression, expected): concepts_map = { @@ -480,17 +454,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("one mult (two plus three)", ["one", "two", "three", "plus", "mult"]), ]) def test_i_can_post_fix_binary_with_precedence(self, expression, expected): - # concepts_map = { - # "plus": Concept("a plus b").def_var("a").def_var("b"), - # "mult": Concept("a mult b").def_var("a").def_var("b"), - # "one": Concept("one"), - # "two": Concept("two"), - # "three": Concept("three"), - # } - # sya_def = { - # concepts_map["plus"]: (5, SyaAssociativity.Right), - # concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus - # } sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) @@ -566,7 +529,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): } sya_def = { - concepts_map["plus"]: (None, SyaAssociativity.Left), + concepts_map["plus"]: (1, SyaAssociativity.Left), } sheerka, context, parser = self.init_parser(concepts_map, sya_def) @@ -580,14 +543,14 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert res[0].out == expected_array @pytest.mark.parametrize("expression, expected", [ - ("x$!# ? y$!# : z$!# ? two : three", ["x$!# ", " y$!# ", " z$!# ", "two", "three", ("?", 1), "?"]), - ("x$!# ? y$!# : (z$!# ? two : three)", ["x$!# ", " y$!# ", "z$!# ", "two", "three", ("?", 1), "?"]), + ("x$!# ? y$!# : z$!# ? two : three", ["x$!#", "y$!#", "z$!#", "two", "three", ("?", 1), "?"]), + ("x$!# ? y$!# : (z$!# ? two : three)", ["x$!#", "y$!#", "z$!#", "two", "three", ("?", 1), "?"]), - ("one ? x$!# ? y$!# : z$!# : three", ["one", " x$!# ", " y$!# ", " z$!# ", ("?", 1), "three", "?"]), - ("one ? (x$!# ? y$!# : z$!#) : three", ["one", "x$!# ", " y$!# ", " z$!#", ("?", 1), "three", "?"]), + ("one ? x$!# ? y$!# : z$!# : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]), + ("one ? (x$!# ? y$!# : z$!#) : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]), - ("one ? two : x$!# ? y$!# : z$!#", ["one", "two", " x$!# ", " y$!# ", " z$!#", ("?", 1), "?"]), - ("one ? two : (x$!# ? y$!# : z$!#)", ["one", "two", "x$!# ", " y$!# ", " z$!#", ("?", 1), "?"]), + ("one ? two : x$!# ? y$!# : z$!#", ["one", "two", "x$!#", "y$!#", "z$!#", ("?", 1), "?"]), + ("one ? two : (x$!# ? y$!# : z$!#)", ["one", "two", "x$!#", "y$!#", "z$!#", ("?", 1), "?"]), ]) def test_i_can_post_fix_right_associated_ternary(self, expression, expected): concepts_map = { @@ -607,14 +570,14 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert res[0].out == expected_array @pytest.mark.parametrize("expression, expected", [ - ("x$!# ? y$!# : z$!# ? two : three", ["x$!# ", " y$!# ", " z$!# ", "?", "two", "three", ("?", 1)]), - ("(x$!# ? y$!# : z$!#) ? two : three", ["x$!# ", " y$!# ", " z$!#", "?", "two", "three", ("?", 1)]), + ("x$!# ? y$!# : z$!# ? two : three", ["x$!#", "y$!#", "z$!#", "?", "two", "three", ("?", 1)]), + ("(x$!# ? y$!# : z$!#) ? two : three", ["x$!#", "y$!#", "z$!#", "?", "two", "three", ("?", 1)]), # the following one is not possible when Left association - # ("one ? x$!# ? y$!# : z$!# : three", ["one", " x$!# ", " y$!# ", " z$!# ", ("?", 1), "three", "?"]), + # ("one ? x$!# ? y$!# : z$!# : three", ["one", "x$!#", "y$!#", "z$!#", ("?", 1), "three", "?"]), - ("one ? two : x$!# ? y$!# : z$!#", ["one", "two", " x$!# ", "?", " y$!# ", " z$!#", ("?", 1)]), - ("(one ? two : x$!#) ? y$!# : z$!#", ["one", "two", " x$!#", "?", " y$!# ", " z$!#", ("?", 1)]), + ("one ? two : x$!# ? y$!# : z$!#", ["one", "two", "x$!#", "?", "y$!#", "z$!#", ("?", 1)]), + ("(one ? two : x$!#) ? y$!# : z$!#", ["one", "two", "x$!#", "?", "y$!#", "z$!#", ("?", 1)]), ]) def test_i_can_post_fix_left_associated_ternary(self, expression, expected): concepts_map = { @@ -644,7 +607,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): expression = "foo bar baz" res = parser.infix_to_postfix(context, expression) expected_sequences = [ - [UTN(" bar "), "foo", "baz"], + [UTN("bar "), "foo", "baz"], ["baz", "foo bar"] ] @@ -669,9 +632,9 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]), ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]), - ("(one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]), - ("( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]), - ("( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]), + ("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]), + ("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]), + ("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]), ("suffixed (suffixed one)", ["one", ("suffixed", 1), "suffixed"]), ("suffixed ( suffixed one) ", ["one", ("suffixed", 1), "suffixed"]), @@ -681,32 +644,12 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("one plus (two minus three)", ["one", "two", "three", "minus", "plus"]), ("one plus ( two minus three )", ["one", "two", "three", "minus", "plus"]), ("(one plus two) minus three", ["one", "two", "plus", "three", "minus"]), - ("( one plus two ) minus three )", ["one", "two", "plus", "three", "minus"]), + ("(( one plus two ) minus three )", ["one", "two", "plus", "three", "minus"]), - ("foo bar (one)", ["one", "foo bar"]), + ("foo bar(one)", ["one", "foo bar"]), ("foo bar ( one )", ["one", "foo bar"]), ]) def test_i_can_pos_fix_when_parenthesis(self, expression, expected): - # concepts_map = { - # "prefixed": Concept("a prefixed").def_var("a"), - # "suffixed": Concept("suffixed a").def_var("a"), - # "square": Concept("square(a)").def_var("a"), - # "foo bar": Concept("foo bar(a)").def_var("a"), - # "plus": Concept("a plus b").def_var("a").def_var("b"), - # "minus": Concept("a minus b").def_var("a").def_var("b"), - # "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), - # "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), - # "one": Concept("one"), - # "two": Concept("two"), - # "three": Concept("three"), - # } - # - # sya_def = { - # concepts_map["square"]: (None, SyaAssociativity.No), - # concepts_map["plus"]: (10, SyaAssociativity.Right), - # concepts_map["minus"]: (10, SyaAssociativity.Right), - # } - sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) @@ -721,14 +664,14 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("function(one prefixed)", [[SCWC("function(", ")", "one", "prefixed")]]), ("function(if one then two else three end)", [[SCWC("function(", ")", "one", "two", "three", "if")]]), ("function(suffixed twenty two)", [ - [SCWC("function(", ")", " twenty ", "suffixed", "two")], + [SCWC("function(", ")", "twenty ", "suffixed", "two")], [SCWC("function(", ")", short_cnode("twenties", "twenty two"), "suffixed")]]), ("function(twenty two prefixed)", [ [SCWC("function(", ")", "twenty ", "two", "prefixed")], [SCWC("function(", ")", short_cnode("twenties", "twenty two"), "prefixed")], ]), ("function(if one then twenty two else three end)", [ - ["')'", "one", " twenty ", "two"], # error + ["')'", "one", "twenty ", "two"], # error [SCWC("function(", ")", "one", short_cnode("twenties", "twenty two"), "three", "if")] ]), ("func1(func2(one two) three)", [ @@ -744,16 +687,16 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ]), ("f1(one plus two mult three) plus f2(suffixed x$!# prefixed)", [ [SCWC("f1(", ")", "one", "two", "three", "mult", "plus"), - SCWC(" f2(", (")", 1), " x$!# ", "prefixed", "suffixed"), + SCWC("f2(", (")", 1), "x$!#", "prefixed", "suffixed"), ("plus", 1)] ]), # plus, suffixed, prefixed, ternary - ("func1(one) plus func2(two)", [[SCWC("func1(", ")", "one"), SCWC(" func2(", (")", 1), "two"), "plus"]]), - ("suffixed function(one)", [[SCWC(" function(", ")", "one"), "suffixed"]]), + ("func1(one) plus func2(two)", [[SCWC("func1(", ")", "one"), SCWC("func2(", (")", 1), "two"), "plus"]]), + ("suffixed function(one)", [[SCWC("function(", ")", "one"), "suffixed"]]), ("function(one) prefixed", [[SCWC("function(", ")", "one"), "prefixed"]]), ("if f1(one) then f2(two) else f3(three) end", [ - [SCWC(" f1(", ")", "one"), SCWC(" f2(", (")", 1), "two"), SCWC(" f3(", (")", 2), "three"), "if"]]), + [SCWC("f1(", ")", "one"), SCWC("f2(", (")", 1), "two"), SCWC("f3(", (")", 2), "three"), "if"]]), # Sequence ("if one then two else three end function(x$!#)", [ @@ -762,21 +705,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("suffixed one function(two)", [["one", "suffixed", SCWC(" function(", ")", "two")]]), ]) def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences): - # concepts_map = { - # "prefixed": Concept("a prefixed").def_var("a"), - # "suffixed": Concept("suffixed a").def_var("a"), - # "plus": Concept("a plus b").def_var("a").def_var("b"), - # "mult": Concept("a mult b").def_var("a").def_var("b"), - # "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), - # "one": Concept("one"), - # "two": Concept("two"), - # "three": Concept("three"), - # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - # } - # sya_def = { - # concepts_map["plus"]: (5, SyaAssociativity.Right), - # concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus - # } sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) @@ -787,28 +715,22 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert res_i.out == expected_array @pytest.mark.parametrize("expression, expected", [ - ("(", ("(", 0)), - ("one plus ( 1 + ", ("(", 4)), - ("one( 1 + ", ("(", 1)), - ("one ( 1 + ", ("(", 2)), - ("function( 1 + ", ("(", 1)), - ("function ( 1 + ", ("(", 2)), - ("one plus ) 1 + ", (")", 4)), - ("one ) 1 + ", (")", 2)), - ("function ) 1 + ", (")", 2)), - ("one ? ( : two", ("(", 4)), - ("one ? one plus ( : two", ("(", 8)), - ("one ? ) : two", (")", 4)), - ("one ? one plus ) : two", (")", 8)), + # ("(", ("(", 0)), + # ("one plus ( 1 + ", ("(", 4)), + # ("one( 1 + ", ("(", 1)), + # ("one ( 1 + ", ("(", 2)), + # ("function( 1 + ", ("(", 1)), + # ("function ( 1 + ", ("(", 2)), + # ("one plus ) 1 + ", (")", 4)), + # ("one ) 1 + ", (")", 2)), + # ("function ) 1 + ", (")", 2)), + # ("one ? ( : two", ("(", 4)), + # ("one ? one plus ( : two", ("(", 8)), + # ("one ? ) : two", (")", 4)), + # ("one ? one plus ) : two", (")", 8)), ("(one plus ( 1 + )", ("(", 0)), ]) def test_i_can_detect_parenthesis_mismatch_error_when_post_fixing(self, expression, expected): - # concepts_map = { - # "one": Concept("one"), - # "two": Concept("two"), - # "plus": Concept("a plus b").def_var("a").def_var("b"), - # "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), - # } sheerka, context, parser = self.init_parser() res = parser.infix_to_postfix(context, expression) @@ -820,12 +742,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("one ? one two : three", ("?", ":")), ]) def test_i_can_detected_when_too_many_parameters(self, expression, expected): - # concepts_map = { - # "one": Concept("one"), - # "two": Concept("two"), - # "plus": Concept("a plus b").def_var("a").def_var("b"), - # "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), - # } sheerka, context, parser = self.init_parser(cmap, None) res = parser.infix_to_postfix(context, expression) @@ -850,27 +766,16 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("one infix two three infix four", ["one", "two", "infix", "three", "four", ("infix", 1)]), ("one infix two three prefixed", ["one", "two", "infix", "three", "prefixed"]), ("one infix two suffixed three", ["one", "two", "infix", "three", "suffixed"]), - ("one infix two x$!# ? y$!# : z$!#", ["one", "two", "infix", " x$!# ", " y$!# ", " z$!#", "?"]), + ("one infix two x$!# ? y$!# : z$!#", ["one", "two", "infix", " x$!#", "y$!#", "z$!#", "?"]), ("one prefixed two infix three", ["one", "prefixed", "two", "three", "infix"]), ("one prefixed two prefixed", ["one", "prefixed", "two", ("prefixed", 1)]), ("one prefixed suffixed two", ["one", "prefixed", "two", "suffixed"]), - ("one prefixed x$!# ? y$!# : z$!#", ["one", "prefixed", " x$!# ", " y$!# ", " z$!#", "?"]), + ("one prefixed x$!# ? y$!# : z$!#", ["one", "prefixed", " x$!#", "y$!#", "z$!#", "?"]), ("(one infix two) (three prefixed)", ["one", "two", "infix", "three", "prefixed"]), ]) def test_i_can_post_fix_sequences(self, expression, expected): - # concepts_map = { - # "prefixed": Concept("a prefixed").def_var("a"), - # "suffixed": Concept("suffixed a").def_var("a"), - # "infix": Concept("a infix b").def_var("a").def_var("b"), - # "?": Concept("a ? b : c").def_var("a").def_var("b").def_var("c"), - # "one": Concept("one"), - # "two": Concept("two"), - # "three": Concept("three"), - # "four": Concept("four"), - # } - sheerka, context, parser = self.init_parser(cmap, None) res = parser.infix_to_postfix(context, expression) @@ -886,23 +791,49 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): "plus equals": Concept("a plus equals b").def_var("a").def_var("b"), } - sheerka, context, parser = self.init_parser(concepts_map, None) + sya_def = { + concepts_map["plus"]: (1, SyaAssociativity.Right), + concepts_map["plus plus"]: (1, SyaAssociativity.Right), + concepts_map["plus equals"]: (1, SyaAssociativity.Right), + } + + sheerka, context, parser = self.init_parser(concepts_map, sya_def) expression = "a plus plus equals b" res = parser.infix_to_postfix(context, expression) expected_array = tests.parsers.parsers_utils.compute_debug_array(res) assert expected_array == [ - ["a", "a plus b", "a plus b", "equals", "b"], - ["a", "a plus b", "a plus plus", "equals", "b"], - ["a", "a plus b", "a plus equals b", "equals", "b"], - ["a", "a plus plus", "plus", "equals", "b"], - ["a", "a plus plus", "plus", "equals", "b"], - ["a", "a plus plus", "plus", "equals", "b"], - ["a", "a plus equals b", "a plus b", "equals", "b"], - ["a", "a plus equals b", "a plus plus", "equals", "b"], - ["a", "a plus equals b", "a plus equals b", "equals", "b"], + ["T(a)", "C(a plus b)", "C(a plus b)", "T(equals)", "T(b)"], + ["T(a)", "C(a plus b)", "C(a plus plus)", "T(equals)", "T(b)"], + ["T(a)", "C(a plus b)", "C(a plus equals b)", "T(equals)", "T(b)"], + ["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"], + ["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"], + ["T(a)", "C(a plus plus)", "T(plus)", "T(equals)", "T(b)"], + ["T(a)", "C(a plus equals b)", "C(a plus b)", "T(equals)", "T(b)"], + ["T(a)", "C(a plus equals b)", "C(a plus plus)", "T(equals)", "T(b)"], + ["T(a)", "C(a plus equals b)", "C(a plus equals b)", "T(equals)", "T(b)"], ] + def test_non_reg(self): + concepts_map = { + "plus": Concept("a plus b").def_var("a").def_var("b"), + "complex infix": Concept("a complex infix b ").def_var("a").def_var("b"), + } + + sya_def = { + # concepts_map["plus"]: (1, SyaAssociativity.Right), + # concepts_map["plus plus"]: (1, SyaAssociativity.Right), + # concepts_map["plus equals"]: (1, SyaAssociativity.Right), + } + + sheerka, context, parser = self.init_parser(concepts_map, sya_def) + + expression = "a plus complex infix b" + res = parser.infix_to_postfix(context, expression) + + res = parser.parse(context, expression) + pass + def test_i_can_use_string_instead_of_identifier(self): concepts_map = { "ternary": Concept("a ? ? b '::' c").def_var("a").def_var("b").def_var("c"), @@ -945,13 +876,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): Not quite sure why this test is here :return: """ - # concepts_map = { - # "foo": Concept("foo a").def_var("a"), - # "one": Concept("one"), - # "two": Concept("two"), - # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - # } - sheerka, context, parser = self.init_parser(cmap, None) + sheerka, context, parser = self.init_parser() expression = "suffixed twenties" res = parser.infix_to_postfix(context, expression) @@ -962,17 +887,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert res[0].out == expected_array def test_i_can_parse_when_concept_atom_only(self): - # concepts_map = { - # "plus": Concept("a plus b").def_var("a").def_var("b"), - # "mult": Concept("a mult b").def_var("a").def_var("b"), - # "one": Concept("one"), - # "two": Concept("two"), - # "three": Concept("three"), - # } - # sya_def = { - # concepts_map["plus"]: (5, SyaAssociativity.Right), - # concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus - # } sheerka, context, parser = self.init_parser() text = "one plus two mult three" @@ -992,10 +906,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert expected_concept.compiled["b"].compiled["b"] == cmap["three"] def test_i_can_parse_when_python_code(self): - # concepts_map = { - # "foo": Concept("foo a").def_var("a") - # } - sheerka, context, parser = self.init_parser(cmap, None) + sheerka, context, parser = self.init_parser() text = "suffixed 1 + 1" res = parser.parse(context, text) @@ -1014,16 +925,10 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(return_value_a, BuiltinConcepts.RETURN_VALUE) assert return_value_a.status assert sheerka.isinstance(return_value_a.body, BuiltinConcepts.PARSER_RESULT) - assert return_value_a.body.source == " 1 + 1" + assert return_value_a.body.source == "1 + 1" assert isinstance(return_value_a.body.body, PythonNode) def test_i_can_parse_when_bnf_concept(self): - # concepts_map = { - # "foo": Concept("foo a").def_var("a"), - # "one": Concept("one"), - # "two": Concept("two"), - # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - # } sheerka, context, parser = self.init_parser() text = "suffixed twenty one" @@ -1043,13 +948,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert expected_concept.compiled["a"].compiled["unit"] == cmap["one"] def test_i_can_parse_sequences(self): - # concepts_map = { - # "plus": Concept("a plus b").def_var("a").def_var("b"), - # "foo": Concept("foo a").def_var("a"), - # "one": Concept("one"), - # "two": Concept("two"), - # } - sheerka, context, parser = self.init_parser(cmap, None) + sheerka, context, parser = self.init_parser() text = "one plus 1 + 1 suffixed two" res = parser.parse(context, text) @@ -1081,27 +980,12 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("f1(one prefixed) plus f2(suffixed two)", True, [ CNC("plus", a=SCWC("f1(", ")", CNC("prefixed", a="one")), - b=SCWC(" f2(", (")", 1), CNC("suffixed", a="two"))) + b=SCWC("f2(", (")", 1), CNC("suffixed", a="two"))) ]), ("function(suffixed x$!#)", False, [ - SCWC("function(", ")", CNC("suffixed", 2, 7, a=" x$!#"))]), + SCWC("function(", ")", CNC("suffixed", 2, 7, a="x$!#"))]), ]) def test_i_can_parse_when_one_result(self, text, expected_status, expected_result): - # concepts_map = { - # "prefixed": Concept("a prefixed").def_var("a"), - # "suffixed": Concept("suffixed a").def_var("a"), - # "mult": Concept("a mult b").def_var("a").def_var("b"), - # "plus": Concept("a plus b").def_var("a").def_var("b"), - # "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), - # "one": Concept("one"), - # "two": Concept("two"), - # "three": Concept("three"), - # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - # } - # sya_def = { - # concepts_map["plus"]: (5, SyaAssociativity.Right), - # concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus - # } sheerka, context, parser = self.init_parser() res = parser.parse(context, text) @@ -1113,41 +997,54 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected_array - # @pytest.mark.parametrize("text, list_of_expected", [ - # ("1 plus twenty one", [ - # (False, [CNC("plus", a=scnode(0, 0, "1"), b=UTN(" twenty ")), CN("one")]), - # (True, [CNC("plus", a=scnode(0, 0, "1"), b=CN("twenties", source="twenty one"))]) - # ]) - # ]) - # def test_i_can_parse_when_multiple_results(self, text, list_of_expected): - # concepts_map = { - # "prefixed": Concept("a prefixed").def_var("a"), - # "suffixed": Concept("suffixed a").def_var("a"), - # "mult": Concept("a mult b").def_var("a").def_var("b"), - # "plus": Concept("a plus b").def_var("a").def_var("b"), - # "if": Concept("if a then b else c end").def_var("a").def_var("b").def_var("c"), - # "one": Concept("one"), - # "two": Concept("two"), - # "three": Concept("three"), - # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - # } - # sya_def = { - # concepts_map["plus"]: (5, SyaAssociativity.Right), - # concepts_map["mult"]: (10, SyaAssociativity.Right), # precedence greater than plus - # } - # sheerka, context, parser = self.init_parser(concepts_map, sya_def) - # - # list_of_res = parser.parse(context, text) - # assert len(list_of_res) == len(list_of_expected) - # - # for res, expected in zip(list_of_res, list_of_expected): - # wrapper = res.body - # lexer_nodes = res.body.body - # assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - # - # expected_array = compute_expected_array(concepts_map, text, expected[1]) - # assert res.status == expected[0] - # assert lexer_nodes == expected_array + @pytest.mark.parametrize("text", [ + "foo bar (one", + "foo bar one", + "foo one two", + "foo x$!# one", + ]) + def test_i_cannot_parse_when_concept_almost_found(self, text): + """ + We test that the parsed concept seems like a known one, but it was not. + The parser has to detected that the predication was incorrect + :return: + """ + sheerka, context, parser = self.init_parser() + + res = parser.parse(context, text) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + assert res.body.body == text + + @pytest.mark.parametrize("text, expected_result", [ + ("one plus two foo bar baz", [CNC("plus", a="one", b="two"), UTN(" foo bar baz")]), + ("one plus two foo bar", [CNC("plus", a="one", b="two"), UTN(" foo bar")]), + ("foo bar one plus two", [UTN("foo bar "), CNC("plus", a="one", b="two")]), + ("foo bar (one plus two", [UTN("foo bar ("), CNC("plus", a="one", b="two")]), + ("one plus two a long other b", [CNC("plus", a="one", b="two"), UTN(" a long other b")]), + ("one plus two a long infixed", [CNC("plus", a="one", b="two"), UTN(" a long infixed")]), + ("one plus two a long", [CNC("plus", a="one", b="two"), UTN(" a long")]), + ("one ? a long infixed : two", [CNC("?", a="one", b=UTN("a long infixed"), c="two")]), + ("one ? a long infix : two", [CNC("?", a="one", b=UTN("a long infix"), c="two")]), + ]) + def test_i_cannot_parse_when_one_part_is_recognized_but_not_the_rest(self, text, expected_result): + """ + We test that the parsed concept seems like a known one, but it was not. + The parser has to detected that the predication was incorrect + :return: + """ + sheerka, context, parser = self.init_parser() + + res = parser.parse(context, text) + + wrapper = res.body + lexer_nodes = res.body.body + + expected_array = compute_expected_array(cmap, text, expected_result) + assert not res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert lexer_nodes == expected_array @pytest.mark.parametrize("text, expected_concept, expected_unrecognized", [ ("x$!# prefixed", "prefixed", ["a"]), @@ -1157,12 +1054,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("x$!# infix z$!#", "infix", ["a", "b"]), ]) def test_i_cannot_parse_when_unrecognized(self, text, expected_concept, expected_unrecognized): - # concepts_map = { - # "suffixed": Concept("suffixed a").def_var("a"), - # "prefixed": Concept("a prefixed").def_var("a"), - # "infix": Concept("a infix b").def_var("a").def_var("b"), - # "one": Concept("one") - # } sheerka, context, parser = self.init_parser() res = parser.parse(context, text) @@ -1183,13 +1074,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ("one prefixed x$!#", [cnode("__var__0 prefixed", 0, 2, "one prefixed"), utnode(3, 7, " x$!#")]), ]) def test_i_cannot_parse_when_part_of_the_sequence_is_not_recognized(self, text, expected): - # concepts_map = { - # "suffixed": Concept("suffixed a").def_var("a"), - # "prefixed": Concept("a prefixed").def_var("a"), - # "infix": Concept("a infix b").def_var("a").def_var("b"), - # "one": Concept("one"), - # "two": Concept("two"), - # } sheerka, context, parser = self.init_parser() res = parser.parse(context, text) @@ -1203,7 +1087,7 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("text", [ "one", "1 + 1", - "x$!# ", + "x$!#", "twenty one" "", "function(not an sya concept)", @@ -1214,13 +1098,6 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): Atoms concepts, source code or BNF concepts alone are discarded by the lexer :return: """ - # concepts_map = { - # "plus": Concept("a plus b").def_var("a").def_var("b"), - # "one": Concept("one"), - # "two": Concept("two"), - # "three": Concept("three"), - # "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), - # } sheerka, context, parser = self.init_parser() res = parser.parse(context, text) diff --git a/tests/sdp/test_sheerkaDataProvider_Old.py b/tests/sdp/test_sheerkaDataProvider_Old.py deleted file mode 100644 index 9e50bb1..0000000 --- a/tests/sdp/test_sheerkaDataProvider_Old.py +++ /dev/null @@ -1,2314 +0,0 @@ -# import hashlib -# import json -# import os -# import shutil -# from datetime import date, datetime -# from os import path -# -# import core.utils -# import pytest -# from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderError, \ -# SheerkaDataProviderDuplicateKeyError, SheerkaDataProviderResult, SheerkaDataProviderRef -# from sdp.sheerkaSerializer import JsonSerializer, Serializer, PickleSerializer -# -# tests_root = path.abspath("../../build/tests") -# evt_digest = "3a571cb6034ef6fc8d7fe91948d0d29728eed74de02bac7968b0e9facca2c2d7" -# -# -# def read_json_file(sdp, file_name): -# with sdp.io.open(file_name, "r") as f: -# return json.load(f) -# -# -# class ObjWithKey: -# """ -# Object where the key can be resolved using get_key() -# Not suitable for Json dump as there is no to_dict() method -# """ -# -# def __init__(self, a, b): -# self.a = a -# self.b = b -# -# def __eq__(self, obj): -# return isinstance(obj, ObjWithKey) and \ -# self.a == obj.a and \ -# self.b == obj.b -# -# def __repr__(self): -# return f"ObjWithKey({self.a}, {self.b})" -# -# def get_key(self): -# return self.a -# -# -# class ObjSetKey: -# """ -# Object where the key can be be automatically set thanks to set_key() -# Not suitable for Json dump as there is no to_dict() method -# """ -# -# def __init__(self, value, key=None): -# self.value = value -# self.key = key -# -# def __eq__(self, obj): -# return isinstance(obj, ObjSetKey) and \ -# self.key == obj.key and \ -# self.value == obj.value -# -# def __repr__(self): -# return f"ObjSetKey({self.key}, {self.value})" -# -# def set_key(self, key): -# self.key = key -# -# -# class ObjNoKey: -# """ -# Object with no key, they won't be ordered -# Not suitable for Json dump as there is no to_dict() method -# """ -# -# def __init__(self, a, b): -# self.a = a -# self.b = b -# -# def __hash__(self): -# return hash((self.a, self.b)) -# -# def __eq__(self, obj): -# return isinstance(obj, ObjNoKey) and \ -# self.a == obj.a and \ -# self.b == obj.b -# -# def __repr__(self): -# return f"ObjNoKey({self.a}, {self.b})" -# -# -# class ObjDumpJson: -# """ -# Object where the key can be resolved using get_key() -# that can be used to dump as Json -# """ -# -# def __init__(self, key=None, value=None): -# self.key = key -# self.value = value -# -# def __eq__(self, obj): -# return isinstance(obj, ObjDumpJson) and \ -# self.key == obj.key and \ -# self.value == obj.value -# -# def __repr__(self): -# return f"ObjDumpJson({self.key}, {self.value})" -# -# def get_key(self): -# return self.key -# -# def get_digest(self): -# """ -# Returns the digest of the event -# :return: hexa form of the sha256 -# """ -# return hashlib.sha256(f"Concept:{self.key}{self.value}".encode("utf-8")).hexdigest() -# -# def to_dict(self): -# return self.__dict__ -# -# def from_dict(self, as_dict): -# self.value = as_dict["value"] -# self.key = as_dict["key"] -# -# -# class ObjDumpJsonNoDigest: -# """ -# Object where the key can be resolved using get_key() -# that can be used to dump as Json, -# But with no builtin digest computation -# """ -# -# def __init__(self, key=None, value=None): -# self.key = key -# self.value = value -# -# def __eq__(self, obj): -# return isinstance(obj, ObjDumpJsonNoDigest) and \ -# self.key == obj.key and \ -# self.value == obj.value -# -# def __repr__(self): -# return f"ObjDumpJsonNoDigest({self.key}, {self.value})" -# -# def get_key(self): -# return self.key -# -# def to_dict(self): -# return self.__dict__ -# -# def from_dict(self, as_dict): -# self.value = as_dict["value"] -# self.key = as_dict["key"] -# -# -# class ObjWithDigestNoKey: -# """ -# Object that can compute its digest. -# It can be used to test objects sharing the same entry (but that are different) -# Not suitable for Json dump as there is no to_dict() method -# """ -# -# def __init__(self, a, b): -# self.a = a -# self.b = b -# -# def __hash__(self): -# return hash((self.a, self.b)) -# -# def __eq__(self, obj): -# return isinstance(obj, ObjNoKey) and \ -# self.a == obj.a and \ -# self.b == obj.b -# -# def __repr__(self): -# return f"ObjWithDigestNoKey({self.a}, {self.b})" -# -# def get_digest(self): -# return str(self.a) + str(self.b) -# -# -# class ObjWithDigestWithKey: -# """ -# Object with a key that can compute its digest. -# It can be used to test objects sharing the same key (but that are different) -# Not suitable for Json dump as there is no to_dict() method -# """ -# -# def __init__(self, a, b): -# self.a = a -# self.b = b -# -# def __hash__(self): -# return hash((self.a, self.b)) -# -# def __eq__(self, obj): -# return isinstance(obj, ObjWithDigestWithKey) and \ -# self.a == obj.a and \ -# self.b == obj.b -# -# def __repr__(self): -# return f"ObjWithDigestWithKey({self.a}, {self.b})" -# -# def get_key(self): -# return self.a -# -# def get_digest(self): -# return str(self.a) + str(self.b) -# -# -# @pytest.fixture(autouse=True) -# def init_test(): -# if path.exists(tests_root): -# shutil.rmtree(tests_root) -# -# if not path.exists(tests_root): -# os.makedirs(tests_root) -# current_pwd = os.getcwd() -# os.chdir(tests_root) -# -# yield None -# -# os.chdir(current_pwd) -# -# -# @pytest.mark.parametrize("root, expected", [ -# (".sheerka", path.abspath(path.join(tests_root, ".sheerka"))), -# ("mem://", "") -# ]) -# def test_i_can_init_the_data_provider(root, expected): -# sdp = SheerkaDataProvider(root) -# -# assert sdp.io.root == expected -# assert sdp.io.exists(sdp.io.root) -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_save_and_load_an_event(root): -# sdp = SheerkaDataProvider(root) -# event = Event("hello world", date=date(year=2007, month=9, day=10), user="kodjo") -# -# evt_digest = sdp.save_event(event) -# evt = sdp.load_event(evt_digest) -# -# assert evt.version == 1 -# assert evt.date == datetime(year=2007, month=9, day=10) -# assert evt.user_id == "kodjo" -# assert evt.message == "hello world" -# assert evt.parents is None -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, evt_digest[0:24], evt_digest)) -# -# # I can get the last event -# evt = sdp.load_event() -# assert evt.message == "hello world" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_get_event_history(root): -# sdp = SheerkaDataProvider(root) -# event = Event("hello world", date=date(year=2007, month=9, day=10), user="kodjo") -# event2 = Event("hello world 2", date=date(year=2007, month=9, day=10), user="kodjo") -# -# evt_digest1 = sdp.save_event(event) -# evt_digest2 = sdp.save_event(event2) -# -# evt = sdp.load_event(evt_digest2) -# assert evt.version == 1 -# assert evt.date == datetime(year=2007, month=9, day=10) -# assert evt.user_id == "kodjo" -# assert evt.message == "hello world 2" -# assert evt.parents == [evt_digest1] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_load_events(root): -# sdp = SheerkaDataProvider(root) -# -# for i in range(15): -# sdp.save_event(Event(f"Hello {i}")) -# -# events = list(sdp.load_events(10)) # first ten -# assert len(events) == 10 -# assert events[0].message == "Hello 14" -# assert events[9].message == "Hello 5" -# -# events = list(sdp.load_events(10, 5)) # skip first 5, then take 10 -# assert len(events) == 10 -# assert events[0].message == "Hello 9" -# assert events[9].message == "Hello 0" -# -# events = list(sdp.load_events(20, 10)) # skip first 10, take 20,(but only 5 remaining) -# assert len(events) == 5 -# assert events[0].message == "Hello 4" -# assert events[4].message == "Hello 0" -# -# events = list(sdp.load_events(1, 20)) # skip first 20, take one -# assert len(events) == 0 -# -# events = list(sdp.load_events(0)) # all -# assert len(events) == 15 -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_load_events_when_no_event(root): -# sdp = SheerkaDataProvider(root) -# -# events = list(sdp.load_events(1)) -# assert len(events) == 0 -# -# events = list(sdp.load_events(1, 5)) -# assert len(events) == 0 -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_an_string(root): -# sdp = SheerkaDataProvider(root) -# obj = "foo => bar" -# -# result = sdp.add(evt_digest, "entry", obj) -# last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) -# state = sdp.load_state(last_commit) -# loaded = sdp.get(result.entry, result.key) -# -# assert result.obj == obj -# assert result.entry == "entry" -# assert result.key is None -# assert result.digest is None -# assert loaded == obj -# -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) -# -# assert state.date is not None -# assert state.parents == [] -# assert state.events == [evt_digest] -# assert state.data == {"entry": "foo => bar"} -# -# assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_several_strings_if_allow_multiple_is_true(root): -# sdp = SheerkaDataProvider(root) -# -# sdp.add(evt_digest, "entry", "foo") -# sdp.add(evt_digest, "entry", "foo") -# result = sdp.add(evt_digest, "entry", "bar") -# loaded = sdp.get(result.entry, result.key) -# -# assert result.obj == "bar" -# assert result.entry == "entry" -# assert result.key is None -# assert result.digest is None -# assert loaded == ["foo", "foo", "bar"] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_add_several_strings_if_allow_multiple_is_false(root): -# sdp = SheerkaDataProvider(root) -# -# with pytest.raises(IndexError) as index_error: -# sdp.add(evt_digest, "entry", "foo", False) -# sdp.add(evt_digest, "entry", "foo", False) -# assert index_error.value.args[0] == "entry" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_an_object_with_no_key(root): -# sdp = SheerkaDataProvider(root) -# obj = ObjNoKey("a", "b") -# -# result = sdp.add(evt_digest, "entry", obj) -# last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) -# state = sdp.load_state(last_commit) -# loaded = sdp.get(result.entry, result.key) -# -# assert result.obj == obj -# assert result.entry == "entry" -# assert result.key is None -# assert result.digest is None -# -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) -# -# assert state.date is not None -# assert state.parents == [] -# assert state.events == [evt_digest] -# assert state.data == {"entry": ObjNoKey("a", "b")} -# -# assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_several_obj_no_key_if_allow_multiple_is_true(root): -# sdp = SheerkaDataProvider(root) -# -# sdp.add(evt_digest, "entry", ObjNoKey("a", "b")) -# sdp.add(evt_digest, "entry", ObjNoKey("a", "b")) -# result = sdp.add(evt_digest, "entry", ObjNoKey("c", "d")) -# loaded = sdp.get(result.entry, result.key) -# -# assert result.obj == ObjNoKey("c", "d") -# assert result.entry == "entry" -# assert result.key is None -# assert result.digest is None -# assert loaded == [ObjNoKey("a", "b"), ObjNoKey("a", "b"), ObjNoKey("c", "d")] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_add_several_obj_no_key_if_allow_multiple_is_false(root): -# sdp = SheerkaDataProvider(root) -# -# with pytest.raises(IndexError) as index_error: -# sdp.add(evt_digest, "entry", ObjNoKey("a", "b"), False) -# sdp.add(evt_digest, "entry", ObjNoKey("c", "d"), False) -# assert index_error.value.args[0] == "entry" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_a_dict(root): -# """ -# Adding a dictionary. -# Note that there is no key when adding a dictionary -# -# If you add {'my_key': 'my_value'} -# 'my_key is not considered as the key of the entry' -# -# Because if you add {'my_key': 'my_value', 'my_key2': 'my_value2'} -# There are now multiple keys. -# -# So for dictionary entries, the key is not managed -# """ -# sdp = SheerkaDataProvider(root) -# obj = {"my_key": "my_value"} -# -# result = sdp.add(evt_digest, "entry", obj) -# last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) -# state = sdp.load_state(last_commit) -# loaded = sdp.get(result.entry, result.key) -# -# loaded_value = sdp.get(result.entry, "my_key") # we can retrieve by key -# -# assert result.obj == obj -# assert result.entry == "entry" -# assert result.key is None # we return None as dict may contains several entries -# assert result.digest is None -# -# assert loaded == obj -# assert loaded_value == "my_value" -# -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) -# -# assert state.date is not None -# assert state.parents == [] -# assert state.events == [evt_digest] -# assert state.data == {"entry": obj} -# -# assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_multiple_entries_at_once_with_dict(root): -# sdp = SheerkaDataProvider(root) -# obj = {"my_key1": "value1", "my_key2": "value2"} -# -# result = sdp.add(evt_digest, "entry", obj) -# loaded = sdp.get(result.entry, result.key) -# loaded_value1 = sdp.get(result.entry, "my_key1") -# loaded_value2 = sdp.get(result.entry, "my_key2") -# -# assert result.obj == obj -# assert result.entry == "entry" -# assert result.key is None # we return None as dict may contains several entries -# assert result.digest is None -# -# assert loaded == {"my_key1": "value1", "my_key2": "value2"} -# assert loaded_value1 == "value1" -# assert loaded_value2 == "value2" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_same_key_with_dict_if_allow_multiple_is_true(root): -# sdp = SheerkaDataProvider(root) -# -# sdp.add(evt_digest, "entry", {"my_key": "my_value"}) -# result = sdp.add(evt_digest, "entry", {"my_key": "my_value"}) -# loaded1 = sdp.get(result.entry, result.key) -# -# result = sdp.add(evt_digest, "entry", {"my_key": "my_value2"}) -# loaded2 = sdp.get(result.entry, result.key) -# -# assert result.entry == "entry" -# assert result.key is None -# assert loaded1 == {"my_key": ["my_value", "my_value"]} -# assert loaded2 == {"my_key": ["my_value", "my_value", "my_value2"]} -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_add_same_key_with_dict_if_allow_multiple_is_false(root): -# sdp = SheerkaDataProvider(root) -# -# with pytest.raises(IndexError) as index_error: -# sdp.add(evt_digest, "entry", {"my_key": "my_value"}, False) -# sdp.add(evt_digest, "entry", {"my_key": "my_value2"}, False) -# assert index_error.value.args[0] == "entry.my_key" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_object_with_different_key_if_allow_multiple_is_false(root): -# sdp = SheerkaDataProvider(root) -# -# sdp.add(evt_digest, "entry", {"my_key": "a"}, False) -# sdp.add(evt_digest, "entry", {"my_key2": "b"}, False) -# -# assert sdp.get("entry", "my_key") == "a" -# assert sdp.get("entry", "my_key2") == "b" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_obj_with_key(root): -# sdp = SheerkaDataProvider(root) -# obj1 = ObjWithKey("key1", "b") -# obj2 = ObjSetKey("c", key="key2") -# -# result1 = sdp.add(evt_digest, "entry", obj1) # test when key is taken from obj.get_key() -# result2 = sdp.add(evt_digest, "entry2", obj2) # test when key is taken from obj.key -# last_commit = sdp.get_snapshot(SheerkaDataProvider.HeadFile) -# state = sdp.load_state(last_commit) -# -# loaded1 = sdp.get(result1.entry, result1.key) -# loaded2 = sdp.get(result2.entry, result2.key) -# -# assert result1.obj == obj1 -# assert result1.entry == "entry" -# assert result1.key == "key1" -# assert result1.digest is None -# -# assert result2.obj == obj2 -# assert result2.entry == "entry2" -# assert result2.key == "key2" -# assert result2.digest is None -# -# assert loaded1 == ObjWithKey("key1", "b") -# assert loaded2 == ObjSetKey("c", key="key2") -# -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, last_commit[0:24], last_commit)) -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) -# -# assert state.date is not None -# assert len(state.parents) == 1 -# assert state.events == [evt_digest] -# assert state.data == {"entry": {"key1": obj1}, "entry2": {"key2": obj2}} -# -# assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == last_commit -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_objects_with_same_key_if_allow_multiple_is_true(root): -# sdp = SheerkaDataProvider(root) -# -# sdp.add(evt_digest, "entry", ObjWithKey("my_key", "b")) -# result = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key")) -# loaded1 = sdp.get(result.entry, result.key) -# -# result = sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key")) -# sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key2")) # to prove that it does not melt everything -# loaded2 = sdp.get(result.entry, result.key) -# -# assert loaded1 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key")] -# assert loaded2 == [ObjWithKey("my_key", "b"), ObjSetKey("c", key="my_key"), ObjSetKey("c", key="my_key")] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_add_object_with_same_key_if_allow_multiple_is_false(root): -# sdp = SheerkaDataProvider(root) -# -# with pytest.raises(IndexError) as index_error: -# sdp.add(evt_digest, "entry", ObjWithKey("my_key", "b"), False) -# sdp.add(evt_digest, "entry", ObjSetKey("c", key="my_key"), False) -# assert index_error.value.args[0] == "entry.my_key" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_obj_with_key_to_a_list(root): -# sdp = SheerkaDataProvider(root) -# -# sdp.add(evt_digest, "entry", "foo") -# sdp.add(evt_digest, "entry", "bar") # entry is now a list -# sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) # this entry must no be taken as a object with a key -# -# loaded = sdp.get("entry") -# assert loaded == ["foo", "bar", ObjWithKey("a", "b")] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_a_reference(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) -# obj1 = ObjWithDigestWithKey(1, "foo") -# result1 = sdp.add(evt_digest, "entry", obj1, use_ref=True) -# result3 = sdp.add(evt_digest, "entry_by_ref", SheerkaDataProviderRef(obj1.b, obj1.get_digest())) -# -# # another object -# obj2 = ObjWithDigestWithKey(2, "bar") -# sdp.add(evt_digest, "entry", obj2, use_ref=True) -# sdp.add(evt_digest, "entry_by_ref", SheerkaDataProviderRef(obj2.b, obj2.get_digest())) -# -# assert result1.obj == obj1 -# assert result1.entry == "entry" -# assert result1.key == str(obj1.get_key()) -# assert result1.digest == obj1.get_digest() -# -# assert result3.obj == SheerkaDataProviderRef(obj1.b, obj1.get_digest()) -# assert result3.entry == "entry_by_ref" -# assert result3.key == "foo" -# assert result3.digest is None -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == { -# "entry": { -# "1": '##REF##:' + obj1.get_digest(), -# "2": '##REF##:' + obj2.get_digest(), -# }, -# "entry_by_ref": { -# "foo": SheerkaDataProviderRef(obj1.b, obj1.get_digest()), -# "bar": SheerkaDataProviderRef(obj2.b, obj2.get_digest()) -# }, -# } -# -# # make sure that I can load back -# loaded1 = sdp.get("entry_by_ref", "foo") -# assert loaded1 == ObjWithDigestWithKey(1, "foo") -# assert getattr(loaded1, Serializer.ORIGIN) == obj1.get_digest() -# -# loaded2 = sdp.get("entry_by_ref", "bar") -# assert loaded2 == ObjWithDigestWithKey(2, "bar") -# assert getattr(loaded2, Serializer.ORIGIN) == obj2.get_digest() -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_have_multiple_is_ref_to_the_same_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) -# ref_result1 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(1, "foo"), use_ref=True) -# ref_result2 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(2, "bar"), use_ref=True) -# -# sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result1.digest)) -# sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result2.digest)) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {'entry': {'1': '##REF##:1foo', '2': '##REF##:2bar'}, -# 'entry_ref': {'1': [SheerkaDataProviderRef("1", ref_result1.digest), -# SheerkaDataProviderRef("1", ref_result2.digest)]}, -# } -# -# loaded = sdp.get("entry_ref", "1") -# assert len(loaded) == 2 -# assert loaded[0] == ObjWithDigestWithKey(1, "foo") -# assert loaded[1] == ObjWithDigestWithKey(2, "bar") -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_add_obj_with_no_key_when_then_entry_has_keys(root): -# sdp = SheerkaDataProvider(root) -# -# with pytest.raises(SheerkaDataProviderError) as error: -# sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) -# sdp.add(evt_digest, "entry", "foo") -# -# assert error.value.obj == "foo" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_string_using_auto_generated_key(root): -# sdp = SheerkaDataProvider(root) -# key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) -# -# result1 = sdp.add_with_auto_key(evt_digest, "entry1", "foo") -# result2 = sdp.add_with_auto_key(evt_digest, "entry1", "bar") -# result3 = sdp.add_with_auto_key(evt_digest, "entry2", "baz") -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# -# assert sdp.io.exists(key_file) -# assert read_json_file(sdp, key_file) == {"entry1": 2, "entry2": 1} -# assert state.data == {"entry1": {"1": "foo", "2": "bar"}, "entry2": {"1": "baz"}} -# assert result1.obj == "foo" -# assert result2.obj == "bar" -# assert result3.obj == "baz" -# assert result1.entry == "entry1" -# assert result2.entry == "entry1" -# assert result3.entry == "entry2" -# assert result1.digest is None -# assert result2.digest is None -# assert result3.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_add_the_same_digest_twice_in_the_same_entry(root): -# """ -# If get_digest() is implemented, checks for duplicates -# :return: -# """ -# sdp = SheerkaDataProvider(root) -# -# with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: -# sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) -# sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) -# -# assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() -# assert error.value.key == "entry" -# assert error.value.args[0] == "Duplicate object." -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_add_the_same_digest_twice_in_the_same_entry2(root): -# """ -# If get_digest() is implemented, checks for duplicates in list when no key -# :return: -# """ -# sdp = SheerkaDataProvider(root) -# -# with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: -# sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) -# sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "c")) -# sdp.add(evt_digest, "entry", ObjWithDigestNoKey("a", "b")) -# -# assert error.value.obj.get_digest() == ObjWithDigestNoKey("a", "b").get_digest() -# assert error.value.key == "entry" -# assert error.value.args[0] == "Duplicate object." -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_add_the_same_digest_twice_in_the_same_entry3(root): -# """ -# If get_digest() is implemented, checks for duplicates when the key is provided -# :return: -# """ -# sdp = SheerkaDataProvider(root) -# -# with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: -# sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) -# sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) -# -# assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() -# assert error.value.key == "entry.a" -# assert error.value.args[0] == "Duplicate object." -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_add_the_same_digest_twice_in_the_same_entry4(root): -# """ -# If get_digest() is implemented, checks for duplicates in list when the key is provided -# :return: -# """ -# sdp = SheerkaDataProvider(root) -# -# with pytest.raises(SheerkaDataProviderDuplicateKeyError) as error: -# sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) -# sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "c")) -# sdp.add(evt_digest, "entry", ObjWithDigestWithKey("a", "b")) -# -# assert error.value.obj.get_digest() == ObjWithDigestWithKey("a", "b").get_digest() -# assert error.value.key == "entry.a" -# assert error.value.args[0] == "Duplicate object." -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_get_and_set_key(root): -# sdp = SheerkaDataProvider(root) -# key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) -# sdp.set_key("entry1", 1000) -# -# sdp.get_next_key("entry1") -# sdp.get_next_key("entry1") -# sdp.get_next_key("entry1") -# sdp.get_next_key("entry2") -# sdp.get_next_key("entry2") -# -# assert sdp.io.exists(key_file) -# assert read_json_file(sdp, key_file) == {"entry1": 1003, "entry2": 2} -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_object_using_auto_generated_key(root): -# sdp = SheerkaDataProvider(root) -# key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) -# -# result1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b")) -# result2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjNoKey("a", "b")) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# -# assert sdp.io.exists(key_file) -# assert read_json_file(sdp, key_file) == {"entry1": 2} -# assert state.data == {"entry1": {"1": ObjNoKey("a", "b"), "2": ObjNoKey("a", "b")}} -# -# assert result1.obj == ObjNoKey("a", "b") -# assert result2.obj == ObjNoKey("a", "b") -# assert result1.entry == "entry1" -# assert result2.entry == "entry1" -# assert result1.key == "1" -# assert result2.key == "2" -# assert result1.digest is None -# assert result2.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_object_key_is_updated_when_possible_using_auto_generated_key(root): -# sdp = SheerkaDataProvider(root) -# key_file = path.join(sdp.io.root, SheerkaDataProvider.KeysFile) -# -# result1 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo")) -# result2 = sdp.add_with_auto_key(evt_digest, "entry1", ObjSetKey("foo")) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# -# assert sdp.io.exists(key_file) -# assert read_json_file(sdp, key_file) == {"entry1": 2} -# assert state.data == {"entry1": {"1": ObjSetKey("foo", "1"), "2": ObjSetKey("foo", "2")}} -# -# assert result1.obj == ObjSetKey("foo", "1") -# assert result2.obj == ObjSetKey("foo", "2") -# assert result1.entry == "entry1" -# assert result2.entry == "entry1" -# assert result1.key == "1" -# assert result2.key == "2" -# assert result1.digest is None -# assert result2.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_set_objects_with_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry", ObjWithKey(1, "foo")) -# result = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo")) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": {"2": ObjWithKey(2, "foo")}} -# assert result.entry == "entry" -# assert result.key == "2" -# assert result.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_set_objects_with_no_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry", ObjNoKey(1, "foo")) -# result = sdp.set(evt_digest, "entry", ObjNoKey(2, "foo")) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": ObjNoKey(2, "foo")} -# assert result.entry == "entry" -# assert result.key is None -# assert result.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_set_from_list_to_dict(root): -# sdp = SheerkaDataProvider(root) -# sdp.set(evt_digest, "entry", [ObjNoKey(1, "foo"), ObjNoKey(2, "foo")]) -# result = sdp.set(evt_digest, "entry", {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": {"1": ObjNoKey(1, "foo"), "2": ObjNoKey(2, "foo")}} -# assert result.entry == "entry" -# assert result.key is None -# assert result.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_set_using_reference(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey))) -# sdp.add(evt_digest, "entry", ObjWithKey(1, "foo")) -# result = sdp.set(evt_digest, "entry", ObjWithKey(2, "foo"), use_ref=True) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": {"2": '##REF##:43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9'}} -# -# assert result.obj == ObjWithKey(2, "foo") -# assert result.entry == "entry" -# assert result.key == "2" -# assert result.digest == "43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9" -# -# assert sdp.io.exists(sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, -# "43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9")) -# -# # sanity check, make sure that I can load back -# loaded = sdp.get(result.entry, result.key) -# assert loaded == ObjWithKey(2, "foo") -# assert getattr(loaded, Serializer.ORIGIN) == "43f07065c7bad051cdd726bdfa4de7f8d754c31486c65ddb31d6b6548dec3db9" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_set_a_reference(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) -# obj = ObjWithDigestWithKey(1, "foo") -# sdp.add(evt_digest, "entry", obj, use_ref=True) -# sdp.set(evt_digest, "entry_by_value", {obj.b: obj.get_digest()}, is_ref=True) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == { -# "entry": {"1": '##REF##:' + obj.get_digest()}, -# "entry_by_value": {"foo": '##REF##:' + obj.get_digest()}, -# } -# -# # sanity check, make sure that I can load back -# loaded = sdp.get("entry_by_value", "foo") -# assert loaded == ObjWithDigestWithKey(1, "foo") -# assert getattr(loaded, Serializer.ORIGIN) == obj.get_digest() -# -# -# def test_i_cannot_set_using_use_ref_and_is_ref(): -# sdp = SheerkaDataProvider("mem://") -# -# with pytest.raises(SheerkaDataProviderError): -# sdp.set(evt_digest, "entry", ObjWithDigestWithKey("a", "b"), use_ref=True, is_ref=True) -# -# -# def test_i_cannot_set_using_is_ref_if_obj_is_not_a_dictionary(): -# sdp = SheerkaDataProvider("mem://") -# -# with pytest.raises(SheerkaDataProviderError): -# sdp.set(evt_digest, "entry", ObjWithDigestWithKey("a", "b"), is_ref=True) -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_an_object_with_a_key_as_a_reference(root): -# sdp = SheerkaDataProvider(root) -# obj = ObjDumpJson("my_key", "value1") -# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) -# sdp.serializer.register(obj_serializer) -# -# result = sdp.add(evt_digest, "entry", obj, use_ref=True) -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# digest = state.data["entry"]["my_key"][len(SheerkaDataProvider.REF_PREFIX):] -# -# assert result.obj == obj -# assert result.entry == "entry" -# assert result.key == obj.key -# assert result.digest == obj.get_digest() -# assert digest == result.digest -# assert state.data == {'entry': {'my_key': f"{SheerkaDataProvider.REF_PREFIX}{digest}"}} -# -# loaded = sdp.load_obj(digest) -# assert loaded == obj -# assert getattr(loaded, Serializer.ORIGIN) == digest -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_a_dictionary_as_a_reference(root): -# sdp = SheerkaDataProvider(root) -# obj = {"my_key": "value1"} -# -# # No need to register a serializer for dictionaries -# -# result = sdp.add(evt_digest, "entry", obj, use_ref=True) -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# digest = state.data["entry"][len(SheerkaDataProvider.REF_PREFIX):] -# -# assert result.obj == obj -# assert result.entry == "entry" -# assert result.key is None # we return None as dict may contains several entries -# assert result.digest == "1790cae3f354ecb6b419faaa2ee2c374ff33efb8cddafda9960924036ac04c1f" # a digest is created -# assert digest == result.digest -# -# assert state.data == {'entry': f"{SheerkaDataProvider.REF_PREFIX}{digest}"} -# -# loaded = sdp.load_obj(digest) -# assert loaded["my_key"] == obj["my_key"] -# assert loaded[Serializer.ORIGIN] == digest -# assert len(loaded) == 2 -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_an_object_with_no_builtin_digest_as_a_reference(root): -# sdp = SheerkaDataProvider(root) -# obj = ObjDumpJsonNoDigest("a", "b") -# -# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) -# sdp.serializer.register(obj_serializer) -# -# result = sdp.add(evt_digest, "entry", obj, use_ref=True) -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# digest = state.data["entry"][obj.get_key()][len(SheerkaDataProvider.REF_PREFIX):] -# -# assert result.obj == obj -# assert result.entry == "entry" -# assert result.key == obj.get_key() -# assert result.digest is not None -# assert digest == result.digest -# -# assert state.data == {'entry': {obj.key: f"{SheerkaDataProvider.REF_PREFIX}{result.digest}"}} -# -# loaded = sdp.load_obj(digest) -# assert getattr(loaded, Serializer.ORIGIN) == digest -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_unique(root): -# sdp = SheerkaDataProvider(root) -# result = sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo")) -# assert result == SheerkaDataProviderResult(ObjNoKey(1, "foo"), "entry", None, None, False) -# -# result = sdp.add_unique(evt_digest, "entry", ObjNoKey(1, "foo")) -# assert result == SheerkaDataProviderResult(ObjNoKey(1, "foo"), "entry", None, None, True) -# -# result = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar")) -# assert result == SheerkaDataProviderResult(ObjNoKey(2, "bar"), "entry", None, None, False) -# -# result = sdp.add_unique(evt_digest, "entry", ObjNoKey(2, "bar")) -# assert result == SheerkaDataProviderResult(ObjNoKey(2, "bar"), "entry", None, None, True) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": {ObjNoKey(1, "foo"), ObjNoKey(2, "bar")}} -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_keep_state_history(root): -# sdp = SheerkaDataProvider(root) -# -# event1 = Event("cmd add 'foo => bar'") -# event_digest1 = sdp.save_event(event1) -# obj1 = "foo => bar" -# sdp.add(event_digest1, "entry1", obj1) -# state_digest1 = sdp.get_snapshot(SheerkaDataProvider.HeadFile) -# -# event2 = Event("cmd add 'foo => baz'") -# event_digest2 = sdp.save_event(event2) -# obj2 = "foo => baz" -# sdp.add(event_digest2, "entry2", obj2) -# state_digest2 = sdp.get_snapshot(SheerkaDataProvider.HeadFile) -# -# state2 = sdp.load_state(state_digest2) -# -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, event_digest1[0:24], event_digest1)) -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, state_digest1[0:24], state_digest1)) -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.EventFolder, event_digest2[0:24], event_digest2)) -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.StateFolder, state_digest2[0:24], state_digest2)) -# assert state2.date is not None -# assert state2.parents == [state_digest1] -# assert state2.events == [event_digest2] -# assert state2.data == {"entry1": "foo => bar", "entry2": "foo => baz"} -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_list_elements_when_there_is_nothing_to_list(root): -# sdp = SheerkaDataProvider(root) -# -# result = sdp.list("entry") -# -# assert list(result) == [] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_list_when_no_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, str))) -# -# sdp.add(evt_digest, "entry1", "foo") -# sdp.add(evt_digest, "entry1", "bar") -# sdp.add(evt_digest, "entry1", "baz", use_ref=True) -# sdp.add(evt_digest, "entry2", "xyz") -# -# result = sdp.list("entry1") -# -# assert list(result) == ["foo", "bar", "baz"] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_list_when_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjWithKey))) -# -# sdp.add(evt_digest, "entry1", {"1": "foo"}) -# sdp.add(evt_digest, "entry1", {"2": "bar"}) -# sdp.add(evt_digest, "entry1", ObjWithKey("3", "value"), use_ref=True) -# sdp.add(evt_digest, "entry2", {"4": "xxx"}) -# -# result = sdp.list("entry1") -# -# assert list(result) == ["foo", "bar", ObjWithKey("3", "value")] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_list_when_one_element(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", "foo") -# sdp.add(evt_digest, "entry2", "baz") -# -# result = sdp.list("entry1") -# -# assert list(result) == ["foo"] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_list_when_multiple_entries_under_the_same_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry", ObjWithKey("a", "b")) -# sdp.add(evt_digest, "entry", ObjWithKey("a", "c")) -# -# result = sdp.list("entry") -# assert list(result) == [[ObjWithKey("a", "b"), ObjWithKey("a", "c")]] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_list_when_multiple_entries_under_the_same_key_when_reference(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjWithKey))) -# -# sdp.add(evt_digest, "entry", ObjWithKey("a", "b"), use_ref=True) -# sdp.add(evt_digest, "entry", ObjWithKey("a", "c"), use_ref=True) -# -# result = sdp.list("entry") -# assert list(result) == [[ObjWithKey("a", "b"), ObjWithKey("a", "c")]] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_list_when_multiple_entries_under_the_same_entry(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry", ObjNoKey("a", "b")) -# sdp.add(evt_digest, "entry", ObjNoKey("a", "c")) -# -# result = sdp.list("entry") -# assert list(result) == [ObjNoKey("a", "b"), ObjNoKey("a", "c")] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_list_when_multiple_entries_under_the_same_entry_when_reference(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda obj: isinstance(obj, ObjNoKey))) -# -# sdp.add(evt_digest, "entry", ObjNoKey("a", "b"), use_ref=True) -# sdp.add(evt_digest, "entry", ObjNoKey("a", "c"), use_ref=True) -# -# result = sdp.list("entry") -# assert list(result) == [ObjNoKey("a", "b"), ObjNoKey("a", "c")] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_filter_on_key_for_dict(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", {"1": "foo"}) -# sdp.add(evt_digest, "entry1", {"2": "bar"}) -# -# result = sdp.list("entry1", lambda k, o: k == "1") -# -# assert list(result) == ["foo"] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_filter_on_key_for_objects(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", ObjWithKey("a1", "b1")) -# sdp.add(evt_digest, "entry1", ObjWithKey("a2", "b2")) -# -# result = sdp.list("entry1", lambda k, o: k == "a1") -# -# assert list(result) == [ObjWithKey("a1", "b1")] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_filter_on_attribute_for_dict(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", {"1": {"a": "a1", "b": "b1"}}) -# sdp.add(evt_digest, "entry1", {"2": {"a": "a2", "b": "b2"}}) -# -# result = sdp.list("entry1", lambda k, o: o["a"] == "a2") -# -# assert list(result) == [{"a": "a2", "b": "b2"}] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_filter_on_attribute_for_object(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", ObjWithKey("a1", "b1")) -# sdp.add(evt_digest, "entry1", ObjWithKey("a2", "b2")) -# -# result = sdp.list("entry1", lambda k, o: o.b == "b2") -# -# assert list(result) == [ObjWithKey("a2", "b2")] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_filter_a_list(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", "foo") -# sdp.add(evt_digest, "entry1", "bar") -# -# result = sdp.list("entry1", lambda o: o == "bar") -# -# assert list(result) == ["bar"] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_filter_a_list_of_object(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", ObjNoKey("a1", "b1")) -# sdp.add(evt_digest, "entry1", ObjNoKey("a2", "b2")) -# -# result = sdp.list("entry1", lambda o: o.b == "b1") -# -# assert list(result) == [ObjNoKey("a1", "b1")] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_remove_all_elements(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", "foo") -# sdp.add(evt_digest, "entry1", "bar") -# -# state_digest = sdp.remove(evt_digest, "entry1") -# result = sdp.list("entry1") -# -# assert sdp.io.read_text(path.join(sdp.io.root, SheerkaDataProvider.HeadFile)) == state_digest -# assert list(result) == [] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_remove_a_element(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", "foo") -# sdp.add(evt_digest, "entry1", "bar") -# -# sdp.remove(evt_digest, "entry1", lambda o: o == "foo") -# result = sdp.list("entry1") -# -# assert list(result) == ["bar"] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_remove_dict_by_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", {"1": ObjNoKey("a1", "b1")}) -# sdp.add(evt_digest, "entry1", {"2": ObjNoKey("a2", "b2")}) -# -# sdp.remove(evt_digest, "entry1", lambda k, o: k == "2") -# result = sdp.list("entry1") -# -# assert list(result) == [ObjNoKey("a1", "b1")] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_remove_when_only_one_element(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", "foo") -# -# sdp.remove(evt_digest, "entry1", lambda o: o == "foo") -# result = sdp.list("entry1") -# -# assert list(result) == [] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_remove_if_entry_does_not_exist(root): -# sdp = SheerkaDataProvider(root) -# with pytest.raises(IndexError) as e: -# sdp.remove(evt_digest, "entry", silent_remove=False) -# assert str(e.value) == "entry" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_no_exception_is_raise_when_remove_in_silent_mode(root): -# sdp = SheerkaDataProvider(root) -# sdp.remove(evt_digest, "entry", silent_remove=True) # default -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_modify_an_entry_without_a_key(root): -# sdp = SheerkaDataProvider(root) -# -# with pytest.raises(SheerkaDataProviderError) as error: -# sdp.modify(evt_digest, "entry", None, "baz") -# -# assert error.value.args[0] == "Key is mandatory." -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_dict_with_a_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry", {"key1": "foo"}) -# sdp.add(evt_digest, "entry", {"key2": "bar"}) -# -# result = sdp.modify(evt_digest, "entry", "key1", "baz") -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": {"key1": "baz", "key2": "bar"}} -# assert result.obj == "baz" -# assert result.entry == "entry" -# assert result.key == "key1" -# assert result.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_an_object_with_a_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) -# sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) -# -# result = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key1", "baz")) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# -# assert state.data == {"entry": {"key1": ObjWithKey("key1", "baz"), "key2": ObjWithKey("key2", "bar")}} -# assert result.obj == ObjWithKey("key1", "baz") -# assert result.entry == "entry" -# assert result.key == "key1" -# assert result.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_an_object_while_changing_the_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) -# sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) -# -# result = sdp.modify(evt_digest, "entry", "key1", ObjWithKey("key3", "baz")) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": {"key2": ObjWithKey("key2", "bar"), "key3": ObjWithKey("key3", "baz")}} -# assert result.obj == ObjWithKey("key3", "baz") -# assert result.entry == "entry" -# assert result.key == "key3" -# assert result.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) -# sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar")) -# -# result = sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key1", "bar")) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": {"key1": [ObjWithKey("key1", "foo"), ObjWithKey("key1", "bar")]}} -# assert result.obj == ObjWithKey("key1", "bar") -# assert result.entry == "entry" -# assert result.key == "key1" -# assert result.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_list(root): -# """ -# In this example, the item to modify is within a list, and its key has changed -# and in the new key, there is already a list -# :return: -# """ -# sdp = SheerkaDataProvider(root) -# -# sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value11")) -# sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value12")) -# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) -# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) -# -# new_value = ObjDumpJson("key1", "value13") -# setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) -# result = sdp.modify(evt_digest, "entry", "key2", new_value) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": { -# "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value12"), ObjDumpJson("key1", "value13")], -# "key2": [ObjDumpJson("key2", "value22")] -# }} -# assert result.obj == new_value -# assert result.entry == "entry" -# assert result.key == "key1" -# assert result.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_nothing(root): -# """ -# In this example, the item to modify is within a list, and its key has changed -# and in the new key, there is nothing (the new key does not exist) -# :return: -# """ -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) -# -# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) -# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) -# -# new_value = ObjDumpJson("key1", "value13") -# setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) -# result = sdp.modify(evt_digest, "entry", "key2", new_value) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": { -# "key1": ObjDumpJson("key1", "value13"), -# "key2": [ObjDumpJson("key2", "value22")] -# }} -# assert result.obj == new_value -# assert result.entry == "entry" -# assert result.key == "key1" -# assert result.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_an_object_while_changing_the_key_to_an_existing_when_one_item(root): -# """ -# In this example, the item to modify is within a list, and its key has changed -# and in the new key, there is only one element -# :return: -# """ -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) -# -# sdp.add(evt_digest, "entry", ObjDumpJson("key1", "value11")) -# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value21")) -# sdp.add(evt_digest, "entry", ObjDumpJson("key2", "value22")) -# -# new_value = ObjDumpJson("key1", "value13") -# setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key2", "value21").get_digest()) -# result = sdp.modify(evt_digest, "entry", "key2", new_value) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": { -# "key1": [ObjDumpJson("key1", "value11"), ObjDumpJson("key1", "value13")], -# "key2": [ObjDumpJson("key2", "value22")] -# }} -# assert result.obj == new_value -# assert result.entry == "entry" -# assert result.key == "key1" -# assert result.digest is None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_a_object_saved_by_ref(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithKey))) -# sdp.add(evt_digest, "entry", ObjWithKey("key1", "foo")) -# sdp.add(evt_digest, "entry", ObjWithKey("key2", "bar"), use_ref=True) -# -# result = sdp.modify(evt_digest, "entry", "key2", ObjWithKey("key2", "baz")) -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": { -# "key1": ObjWithKey("key1", "foo"), -# "key2": "##REF##:041d3cca905b51bc2c66251e73e56b836aae7b9435ee3d7eb05d44bb67ff575e"}} -# assert result.obj == ObjWithKey("key2", "baz") -# assert result.entry == "entry" -# assert result.key == "key2" -# assert result.digest == "041d3cca905b51bc2c66251e73e56b836aae7b9435ee3d7eb05d44bb67ff575e" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_an_object_saved_by_ref_in_a_list(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJsonNoDigest))) -# -# sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key1", "value11"), use_ref=True) -# sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key1", "value12"), use_ref=True) -# result = sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key2", "value21"), use_ref=True) -# sdp.add(evt_digest, "entry", ObjDumpJsonNoDigest("key2", "value22"), use_ref=True) -# -# new_value = ObjDumpJsonNoDigest("key1", "value13") -# setattr(new_value, Serializer.ORIGIN, result.digest) -# result = sdp.modify(evt_digest, "entry", "key2", new_value) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": { -# 'key1': ['##REF##:f80a0c0aceb1a7a3d238c0cff2d86d6bd3a62e0c1a65c5b505f43b10c4604bd8', -# '##REF##:239a8238d188c37afa10b1bcc312ca8a0e78f6e75d688ca65d08e16717ff68b0', -# '##REF##:9d0a2bf9d4081de0b14837ea46bc7a1cfb6b7562f7ae86255ea9bd0ac53a6437'], -# 'key2': ['##REF##:df8a38b07f469f2ff8001ea6a70f77f4f9ce85d69c530091fcaf4b380f1500d3'] -# }} -# assert result.obj == new_value -# assert result.entry == "entry" -# assert result.key == "key1" -# assert result.digest is not None -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_a_data_provider_ref(root): -# # first, create a valid entry -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) -# obj = ObjWithDigestWithKey("1", "foo") -# sdp.add(evt_digest, "entry", obj, use_ref=True) -# sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef(obj.b, obj.get_digest())) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == { -# "entry": {"1": "##REF##:1foo"}, -# "entry_ref": {"foo": SheerkaDataProviderRef(obj.b, obj.get_digest())}} -# -# # modify this entry -# obj_new = ObjWithDigestWithKey("1", "bar") -# sdp.modify(evt_digest, "entry", obj_new.a, obj_new) -# result = sdp.modify(evt_digest, "entry_ref", "foo", SheerkaDataProviderRef(obj.b, obj_new.get_digest())) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == { -# "entry": {"1": "##REF##:1bar"}, -# "entry_ref": {"foo": SheerkaDataProviderRef(obj.b, obj_new.get_digest())}} -# -# assert result.obj == SheerkaDataProviderRef(obj.b, obj_new.get_digest()) -# assert result.entry == "entry_ref" -# assert result.key == "foo" -# assert result.digest is None # digest is not set as what is saved (the digest) is not saved by ref -# -# # sanity check, I can load the modified entry -# loaded = sdp.get("entry_ref", "foo") -# assert loaded == ObjWithDigestWithKey("1", "bar") -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_is_ref_when_in_list(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) -# ref_result1 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(1, "foo"), use_ref=True) -# ref_result2 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(2, "bar"), use_ref=True) -# -# sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result1.digest)) -# sdp.add(evt_digest, "entry_ref", SheerkaDataProviderRef("1", ref_result2.digest)) -# -# ref_result3 = sdp.add(evt_digest, "entry", ObjWithDigestWithKey(3, "baz"), use_ref=True) -# -# result = sdp.modify( -# evt_digest, -# "entry_ref", -# "1", -# SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest)) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {'entry': {'1': '##REF##:1foo', '2': '##REF##:2bar', '3': '##REF##:3baz'}, -# 'entry_ref': {'1': [ -# SheerkaDataProviderRef("1", ref_result1.digest), -# SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest)]}} -# -# loaded = sdp.get("entry_ref", "1") -# assert len(loaded) == 2 -# assert loaded[0] == ObjWithDigestWithKey(1, "foo") -# assert loaded[1] == ObjWithDigestWithKey(3, "baz") -# -# assert result.obj == SheerkaDataProviderRef("1", ref_result3.digest, ref_result2.digest) -# assert result.entry == "entry_ref" -# assert result.key == "1" -# assert result.digest is None # digest is not set as what is saved (the digest) is not saved by ref -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_modify_an_entry_that_does_not_exist(root): -# sdp = SheerkaDataProvider(root) -# -# with pytest.raises(IndexError) as e: -# sdp.modify(evt_digest, "entry", "key", "foo") -# -# assert str(e.value) == "entry" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_modify_a_key_that_does_not_exist(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", {"1": "foo"}) -# -# with pytest.raises(IndexError) as e: -# sdp.modify(evt_digest, "entry1", "2", "bar") -# assert str(e.value) == "entry1.2" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_modify_a_list_when_origin_is_unknown(root): -# sdp = SheerkaDataProvider(root) -# -# sdp.add(evt_digest, "entry", ObjWithKey("key", "value1")) -# sdp.add(evt_digest, "entry", ObjWithKey("key", "value2")) # same they -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# -# with pytest.raises(SheerkaDataProviderError) as error: -# sdp.modify(evt_digest, "entry", "key", ObjWithKey("key", "value2")) -# -# assert error.value.obj == ObjWithKey("key", "value2") -# assert error.value.args[0] == "Multiple entries under 'entry.key'" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_a_list_when_the_origin_is_known(root): -# sdp = SheerkaDataProvider(root) -# -# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1")) -# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2")) # same they -# -# new_value = ObjDumpJson("key", "value3") -# setattr(new_value, Serializer.ORIGIN, ObjDumpJson("key", "value1").get_digest()) -# -# sdp.modify(evt_digest, "entry", "key", new_value) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": {"key": [ObjDumpJson("key", "value3"), ObjDumpJson("key", "value2")]}} -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_modify_a_list_when_the_origin_is_known_2(root): -# """ -# This time, we check that the origin is automatically set when the object was saved as a reference -# We also check that all objects are still persisted as reference -# :return: -# """ -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) -# -# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1"), use_ref=True) -# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they -# -# objs = sdp.get("entry", "key") # origin is automatically set to the loaded objects -# objs[0].value = "value3" -# -# sdp.modify(evt_digest, "entry", "key", objs[0]) -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": {"key": [ -# "##REF##:621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0", -# "##REF##:5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517"]}} -# -# # checks that all objects are (still) persisted -# sdp.io.exists( -# sdp.io.get_obj_path(sdp.ObjectsFolder, "621771a3af6a331e9abb3a63fb25e0cac4b13df0b292dfa30db6bd89031bfad0")) -# sdp.io.exists( -# sdp.io.get_obj_path(sdp.ObjectsFolder, "5fe085e8366d35c5f04a18b2d3dada376128b246e07c66de5872830b00f5f517")) -# sdp.io.exists( -# sdp.io.get_obj_path(sdp.ObjectsFolder, "1aac9e0d5c74c3bb989fd0f9def792bba36c5595d32f61be7cbb1a38dcf75327")) -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_get_the_entire_entry(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", "foo") -# sdp.add(evt_digest, "entry1", "bar") -# -# result = sdp.get("entry1") -# result_safe = sdp.get_safe("entry1") -# -# assert result == ["foo", "bar"] -# assert result_safe == ["foo", "bar"] -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_get_an_entry_with_on_object(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", "foo") -# -# result = sdp.get("entry1") -# result_safe = sdp.get_safe("entry1") -# -# assert result == "foo" -# assert result_safe == "foo" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_get_an_entry_by_key(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", {"1": "foo"}) -# sdp.add(evt_digest, "entry1", {"2": "bar"}) -# -# result = sdp.get("entry1", "2") -# result_safe = sdp.get_safe("entry1", "2") -# -# assert result == "bar" -# assert result_safe == "bar" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_get_object_saved_by_reference(root): -# sdp = SheerkaDataProvider(root) -# obj = ObjDumpJson("my_key", "value1") -# sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj))) -# -# result = sdp.add(evt_digest, "entry", obj, use_ref=True) -# loaded = sdp.get(result.entry, result.key) -# -# assert loaded == obj -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_get_objects_from_list_when_saved_by_reference(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjDumpJson))) -# -# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value1"), use_ref=True) -# sdp.add(evt_digest, "entry", ObjDumpJson("key", "value2"), use_ref=True) # same they -# -# objs = sdp.get("entry", "key") -# -# assert objs[0] == ObjDumpJson("key", "value1") -# assert objs[1] == ObjDumpJson("key", "value2") -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_get_an_entry_that_does_not_exist(root): -# sdp = SheerkaDataProvider(root) -# -# assert sdp.get_safe("entry") is None -# with pytest.raises(IndexError) as e: -# sdp.get("entry") -# assert str(e.value) == "entry" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_cannot_get_a_key_that_does_not_exist(root): -# sdp = SheerkaDataProvider(root) -# sdp.add(evt_digest, "entry1", {"1": "foo"}) -# -# assert sdp.get_safe("entry1", "2") is None -# with pytest.raises(IndexError) as e: -# sdp.get("entry1", "2") -# assert str(e.value) == "entry1.2" -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_save_and_retrieve_cache(root): -# sdp = SheerkaDataProvider(root) -# txt = "foo bar baz foo bar baz foo bar baz" -# key = "key_to_use" -# category = "cache_category" -# -# assert not sdp.in_cache(category, key) -# digest = sdp.add_to_cache(category, key, txt) -# assert sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest)) -# assert sdp.in_cache(category, key) -# -# from_cache = sdp.load_from_cache(category, key) -# assert from_cache == txt -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_cache_is_not_updated_by_default(root): -# sdp = SheerkaDataProvider(root) -# txt = "foo bar baz foo bar baz foo bar baz" -# txt2 = "foo foo foo foo foo foo foo foo foo" -# key = "key_to_use" -# category = "cache_category" -# -# sdp.add_to_cache(category, key, txt) -# sdp.add_to_cache(category, key, txt2) -# -# from_cache = sdp.load_from_cache(category, key) -# assert from_cache == txt -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_update_cache(root): -# sdp = SheerkaDataProvider(root) -# txt = "foo bar baz foo bar baz foo bar baz" -# txt2 = "foo foo foo foo foo foo foo foo foo" -# key = "key_to_use" -# category = "cache_category" -# -# sdp.add_to_cache(category, key, txt) -# sdp.add_to_cache(category, key, txt2, update=True) -# -# from_cache = sdp.load_from_cache(category, key) -# assert from_cache == txt2 -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_remove_from_cache(root): -# sdp = SheerkaDataProvider(root) -# txt = "foo bar baz foo bar baz foo bar baz" -# key = "key_to_use" -# category = "cache_category" -# -# sdp.add_to_cache(category, key, txt) -# digest = sdp.remove_from_cache(category, key) -# assert not sdp.io.exists(path.join(sdp.io.root, SheerkaDataProvider.CacheFolder, digest[0:24], digest)) -# assert not sdp.in_cache(category, key) -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_test_than_an_entry_exists(root): -# sdp = SheerkaDataProvider(root) -# -# assert not sdp.exists("entry") -# sdp.add(evt_digest, "entry", "value") -# assert sdp.exists("entry") -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_test_if_a_key_exists(root): -# sdp = SheerkaDataProvider(root) -# obj = ObjWithDigestWithKey("key", "value") -# -# assert not sdp.exists("entry") -# assert not sdp.exists("entry", obj.get_key()) -# -# sdp.add(evt_digest, "entry", obj) -# assert not sdp.exists("entry", "wrong_key") -# assert sdp.exists("entry", obj.get_key()) -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_test_that_the_object_exists(root): -# sdp = SheerkaDataProvider(root) -# obj = ObjWithDigestWithKey("key", "value") -# -# assert not sdp.exists("entry") -# assert not sdp.exists("entry", obj.get_key()) -# assert not sdp.exists("entry", obj.get_key(), obj.get_digest()) -# -# # test for a single item under the key -# sdp.add(evt_digest, "entry", obj) -# assert not sdp.exists("entry", obj.get_key(), "wrong_digest") -# assert sdp.exists("entry", obj.get_key(), obj.get_digest()) -# -# # test for a list item under the key -# sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value2")) -# assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) -# -# sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value3")) -# assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) -# -# sdp.add(evt_digest, "entry2", obj) -# assert sdp.exists("entry2", obj.get_key(), obj.get_digest()) -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_test_than_the_object_exists_when_using_references(root): -# sdp = SheerkaDataProvider(root) -# sdp.serializer.register(PickleSerializer(lambda o: isinstance(o, ObjWithDigestWithKey))) -# obj = ObjWithDigestWithKey("key", "value") -# -# assert not sdp.exists("entry") -# assert not sdp.exists("entry", obj.get_key()) -# assert not sdp.exists("entry", obj.get_key(), obj.get_digest()) -# -# # test for a single item under the key -# sdp.add(evt_digest, "entry", obj, use_ref=True) -# assert not sdp.exists("entry", obj.get_key(), "wrong_digest") -# assert sdp.exists("entry", obj.get_key(), obj.get_digest()) -# -# # test for a list item under the key -# sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value2"), use_ref=True) -# assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) -# -# sdp.add(evt_digest, "entry2", ObjWithDigestWithKey("key", "value3"), use_ref=True) -# assert not sdp.exists("entry2", obj.get_key(), obj.get_digest()) -# -# sdp.add(evt_digest, "entry2", obj, use_ref=True) -# assert sdp.exists("entry2", obj.get_key(), obj.get_digest()) -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_save_and_load_object_ref_with_history(root): -# sdp = SheerkaDataProvider(root) -# obj = ObjDumpJson("my_key", "value1") -# sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj))) -# -# result = sdp.add(evt_digest, "entry", obj, use_ref=True) -# loaded = sdp.get(result.entry, result.key) -# history = getattr(loaded, Serializer.HISTORY) -# -# assert result.obj == obj -# assert result.entry == "entry" -# assert result.key == obj.key -# assert result.digest == obj.get_digest() -# -# assert loaded.key == obj.key -# assert loaded.value == obj.value -# -# assert history[Serializer.USERNAME] == "kodjo" -# assert history[Serializer.MODIFICATION_DATE] != "" -# assert history[Serializer.PARENTS] == [] -# -# assert sdp.io.exists(sdp.io.get_obj_path(sdp.ObjectsFolder, obj.get_digest())) -# -# # save a second type with no modification -# previous_modification_time = history[Serializer.MODIFICATION_DATE] -# previous_parents = history[Serializer.PARENTS] -# -# sdp.modify(evt_digest, "entry", result.key, loaded) -# loaded = sdp.get(result.entry, result.key) -# history = getattr(loaded, Serializer.HISTORY) -# -# assert history[Serializer.MODIFICATION_DATE] == previous_modification_time -# assert history[Serializer.PARENTS] == previous_parents -# -# # save again, but with a modification -# previous_digest = loaded.get_digest() -# loaded.value = "value2" -# -# sdp.modify(evt_digest, "entry", result.key, loaded) -# loaded2 = sdp.get(result.entry, result.key) -# history2 = getattr(loaded2, Serializer.HISTORY) -# -# assert loaded2.key == loaded.key -# assert loaded2.value == loaded.value -# -# assert history2[Serializer.USERNAME] == "kodjo" -# assert history2[Serializer.MODIFICATION_DATE] != "" -# assert history2[Serializer.PARENTS] == [previous_digest] -# -# state = sdp.load_state(sdp.get_snapshot(SheerkaDataProvider.HeadFile)) -# assert state.data == {"entry": { -# "my_key": '##REF##:e6bf5b56428cfce0f08c94f2c3625dc3b3a8180d7229eaa9f8aa967fb16e5256'}} -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_can_add_obj_with_same_key_and_get_them_back(root): -# sdp = SheerkaDataProvider(root) -# obj1 = ObjDumpJson("key", "value1") -# obj2 = ObjDumpJson("key", "value2") -# sdp.serializer.register(JsonSerializer(core.utils.get_full_qualified_name(obj1))) -# -# result = sdp.add(evt_digest, "entry", obj1, use_ref=True) -# sdp.add(evt_digest, "entry", obj2, use_ref=True) -# -# loaded = sdp.get(result.entry, result.key) -# -# assert len(loaded) == 2 -# assert loaded[0] == obj1 -# assert loaded[1] == obj2 -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_get_safe_dictionary_without_origin(root): -# sdp = SheerkaDataProvider(root) -# obj = {"my_key": "value1"} -# -# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) -# sdp.serializer.register(obj_serializer) -# -# result = sdp.add(evt_digest, "entry", obj, use_ref=True) -# from_db = sdp.get(result.entry, result.key) -# -# assert len(from_db) == 2 -# assert from_db["my_key"] == obj["my_key"] -# assert Serializer.ORIGIN in from_db -# -# from_db_no_origin = sdp.get_safe(result.entry, result.key, load_origin=False) -# assert len(from_db_no_origin) == 1 -# assert from_db_no_origin["my_key"] == obj["my_key"] -# assert Serializer.ORIGIN not in from_db_no_origin -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_get_dictionary_without_origin(root): -# sdp = SheerkaDataProvider(root) -# obj = {"my_key": "value1"} -# -# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) -# sdp.serializer.register(obj_serializer) -# -# result = sdp.add(evt_digest, "entry", obj, use_ref=True) -# from_db = sdp.get(result.entry, result.key) -# -# assert len(from_db) == 2 -# assert from_db["my_key"] == obj["my_key"] -# assert Serializer.ORIGIN in from_db -# -# from_db_no_origin = sdp.get(result.entry, result.key, load_origin=False) -# assert len(from_db_no_origin) == 1 -# assert from_db_no_origin["my_key"] == obj["my_key"] -# assert Serializer.ORIGIN not in from_db_no_origin -# -# -# @pytest.mark.parametrize("root", [ -# ".sheerka", -# "mem://" -# ]) -# def test_i_get_safe_object_without_origin(root): -# sdp = SheerkaDataProvider(root) -# obj = ObjDumpJson("my_key", "value1") -# -# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) -# sdp.serializer.register(obj_serializer) -# -# result = sdp.add(evt_digest, "entry", obj, use_ref=True) -# from_db = sdp.get(result.entry, result.key) -# -# assert from_db == obj -# assert hasattr(from_db, Serializer.ORIGIN) -# -# from_db_no_origin = sdp.get_safe(result.entry, result.key, load_origin=False) -# assert from_db_no_origin == obj -# assert not hasattr(from_db_no_origin, Serializer.ORIGIN) -# -# -# def test_i_can_get_ref(): -# sdp = SheerkaDataProvider("mem://") -# obj = ObjDumpJson("my_key", "value1") -# -# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(obj)) -# sdp.serializer.register(obj_serializer) -# -# result = sdp.add(evt_digest, "entry", obj, use_ref=True) -# -# ref = sdp.get_ref(result.entry, result.key) -# assert ref == "076f0df0f110c304982242a88088efacce71f361e49f065db75919a7f72c2821" -# -# -# def test_i_can_get_ref_when_list(): -# sdp = SheerkaDataProvider("mem://") -# -# obj_serializer = JsonSerializer(core.utils.get_full_qualified_name(ObjDumpJson)) -# sdp.serializer.register(obj_serializer) -# -# sdp.add(evt_digest, "entry", ObjDumpJson("my_key", "value1"), use_ref=True) -# result = sdp.add(evt_digest, "entry", ObjDumpJson("my_key", "value2"), use_ref=True) -# -# ref = sdp.get_ref(result.entry, result.key) -# assert ref == [ -# "076f0df0f110c304982242a88088efacce71f361e49f065db75919a7f72c2821", -# "e6bf5b56428cfce0f08c94f2c3625dc3b3a8180d7229eaa9f8aa967fb16e5256" -# ] -# -# -# def test_i_cannot_get_ref_if_the_saved_item_is_not_a_ref(): -# sdp = SheerkaDataProvider("mem://") -# obj = ObjDumpJson("my_key", "value1") -# result = sdp.add(evt_digest, "entry", obj, use_ref=False) -# -# with pytest.raises(SheerkaDataProviderError) as e: -# sdp.get_ref(result.entry, result.key) -# -# assert e.value.args[0] == "Not a reference" -# assert e.value.obj == f"{result.entry}.{result.key}" -# -# -# def test_i_cannot_get_ref_if_the_item_does_not_exist(): -# sdp = SheerkaDataProvider("mem://") -# with pytest.raises(IndexError): -# sdp.get_ref("fake", "fake")