From 37cd3ed75795a9ac702192d9867c92e7335c12c8 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Thu, 27 Aug 2020 18:54:28 +0200 Subject: [PATCH] Fixed some bugs --- _concepts.txt | 5 +- src/cache/BaseCache.py | 19 +- src/core/sheerka/services/SheerkaAdmin.py | 4 +- src/core/sheerka/services/SheerkaDump.py | 1 + src/core/utils.py | 30 +- src/evaluators/AddConceptEvaluator.py | 1 + src/evaluators/LexerNodeEvaluator.py | 3 +- src/evaluators/PythonEvaluator.py | 10 + src/parsers/BaseNodeParser.py | 87 ++++-- src/parsers/BaseParser.py | 34 +-- src/parsers/BnfNodeParser.py | 304 ++++++++++++++----- src/parsers/BnfParser.py | 19 +- src/parsers/PythonParser.py | 2 +- src/parsers/PythonWithConceptsParser.py | 15 +- src/parsers/ShortTermMemoryParser.py | 2 +- src/parsers/SyaNodeParser.py | 33 +- src/parsers/UnrecognizedNodeParser.py | 61 +++- tests/BaseTest.py | 7 + tests/cache/test_cache.py | 33 ++ tests/core/test_SheerkaSetsManager.py | 2 +- tests/evaluators/test_LexerNodeEvaluator.py | 4 +- tests/non_reg/test_sheerka_non_reg.py | 33 ++ tests/parsers/parsers_utils.py | 11 +- tests/parsers/test_BnfNodeParser.py | 18 +- tests/parsers/test_BnfParser.py | 7 +- tests/parsers/test_SyaNodeParser.py | 81 +++-- tests/parsers/test_UnrecognizedNodeParser.py | 48 ++- 27 files changed, 685 insertions(+), 189 deletions(-) diff --git a/_concepts.txt b/_concepts.txt index ebc753a..b664de3 100644 --- a/_concepts.txt +++ b/_concepts.txt @@ -81,7 +81,7 @@ def concept thousands from bnf number=n1 'thousand' 'and' number=n2 as n1 * 1000 last_created_concept() is number def concept history as history() def concept plus from a plus b as a + b -def concept minus from a plus b as a - b +def concept minus from a minus b as a - b def concept multiplied from a multiplied by b as a * b def concept divided from a divided by b as a * b set_is_greater_than(BuiltinConcepts.PRECEDENCE, multiplied, plus) @@ -90,8 +90,11 @@ set_is_greater_than(BuiltinConcepts.PRECEDENCE, multiplied, minus) set_is_greater_than(BuiltinConcepts.PRECEDENCE, divided, minus) def concept explain as get_results() | filter("id == 0") | recurse(2) def concept explain last as get_last_results() | filter("id == 0") | recurse(2) +def concept explain x as get_results() | filter(f"id == {x}") | recurse(3) where x +def concept explain x '--recurse' y as get_results() | filter(f"id == {x}") | recurse(y) where x,y set_isa(c:explain:, __COMMAND) set_isa(c:explain last:, __COMMAND) +set_isa(c:explain x:, __COMMAND) def concept precedence a > precedence b as set_is_greater_than(BuiltinConcepts.PRECEDENCE, a, b) set_isa(c:precedence a > precedence b:, __COMMAND) def concept x is a command as set_isa(x, __COMMAND) diff --git a/src/cache/BaseCache.py b/src/cache/BaseCache.py index 62c1280..254dcd4 100644 --- a/src/cache/BaseCache.py +++ b/src/cache/BaseCache.py @@ -1,5 +1,7 @@ from threading import RLock +MAX_INITIALIZED_KEY = 100 + class BaseCache: """ @@ -15,11 +17,15 @@ class BaseCache: self._extend_exists = extend_exists # search in remote self._lock = RLock() self._current_size = 0 - self._initialized_keys = set() + self._initialized_keys = set() # to keep the list of the keys already requested (using get()) self.to_add = set() self.to_remove = set() + # Explanation on _initialized_keys + # everytime you try to get an item, its key is added to _initialized_keys + # If the item is found, the entru is i + def __len__(self): """ Return the number of items in the cache @@ -78,7 +84,6 @@ class BaseCache: :return: """ with self._lock: - self._initialized_keys.add(key) return self._get(key) def inner_get(self, key): @@ -165,6 +170,9 @@ class BaseCache: self._initialized_keys.remove(key) except KeyError: pass + + self._current_size -= len(to_delete) + return len(to_delete) def clear(self): @@ -201,6 +209,7 @@ class BaseCache: for key in keys: if key not in self._initialized_keys and self._default: # to keep sync with the remote repo is needed + # first check self._initialized_keys to prevent infinite loop self.get(key) def _add_to_add(self, key): @@ -221,7 +230,12 @@ class BaseCache: try: value = self._cache[key] except KeyError: + if len(self._initialized_keys) == MAX_INITIALIZED_KEY: + self._initialized_keys.clear() if callable(self._default): + if key in self._initialized_keys: + return None + value = self._default(key) if value is not None: self._cache[key] = value @@ -233,6 +247,7 @@ class BaseCache: self._current_size += 1 else: value = self._default + self._initialized_keys.add(key) return value diff --git a/src/core/sheerka/services/SheerkaAdmin.py b/src/core/sheerka/services/SheerkaAdmin.py index 8663158..71ffb0b 100644 --- a/src/core/sheerka/services/SheerkaAdmin.py +++ b/src/core/sheerka/services/SheerkaAdmin.py @@ -5,7 +5,7 @@ from core.sheerka.services.sheerka_service import BaseService CONCEPTS_FILE = "_concepts_lite.txt" CONCEPTS_FILE_ALL_CONCEPTS = "_concepts.txt" - +CONCEPTS_FILE_TO_USE = CONCEPTS_FILE_ALL_CONCEPTS class SheerkaAdmin(BaseService): NAME = "Admin" @@ -38,7 +38,7 @@ class SheerkaAdmin(BaseService): return self.sheerka.cache_manager.caches[name].cache.copy() - def restore(self, concept_file=CONCEPTS_FILE): + def restore(self, concept_file=CONCEPTS_FILE_TO_USE): """ Restore the state with all previous valid concept definitions :return: diff --git a/src/core/sheerka/services/SheerkaDump.py b/src/core/sheerka/services/SheerkaDump.py index 7b565bc..3ffb02c 100644 --- a/src/core/sheerka/services/SheerkaDump.py +++ b/src/core/sheerka/services/SheerkaDump.py @@ -47,6 +47,7 @@ class SheerkaDump(BaseService): if not first: self.sheerka.log.info("") + self.sheerka.log.info(f"id : {c.id}") self.sheerka.log.info(f"name : {c.name}") self.sheerka.log.info(f"key : {c.key}") self.sheerka.log.info(f"definition : {c.metadata.definition}") diff --git a/src/core/utils.py b/src/core/utils.py index f1416cf..34bf093 100644 --- a/src/core/utils.py +++ b/src/core/utils.py @@ -5,8 +5,22 @@ import re from core.tokenizer import TokenKind +default_debug_name = "*default*" +debug_activated = set() + + +def my_debug(*args, check_started=None): + if check_started and default_debug_name not in debug_activated: + return + + if isinstance(check_started, str) and check_started not in debug_activated: + return + + if isinstance(check_started, list): + for debug_name in check_started: + if debug_name not in debug_activated: + return -def my_debug(*args): with open("debug.txt", "a") as f: for arg in args: if isinstance(arg, list): @@ -16,6 +30,20 @@ def my_debug(*args): f.write(f"{arg}\n") +def start_debug(msg=None, debug_name=default_debug_name): + debug_activated.add(debug_name) + if msg: + with open("debug.txt", "a") as f: + f.write(f"{msg}\n") + + +def stop_debug(msg=None, debug_name=default_debug_name): + if msg: + with open("debug.txt", "a") as f: + f.write(f"{msg}\n") + debug_activated.remove(debug_name) + + def sysarg_to_string(argv): """ Transform a list of strings into a single string diff --git a/src/evaluators/AddConceptEvaluator.py b/src/evaluators/AddConceptEvaluator.py index 1b65f5d..f3802a4 100644 --- a/src/evaluators/AddConceptEvaluator.py +++ b/src/evaluators/AddConceptEvaluator.py @@ -15,6 +15,7 @@ class ConceptOrRuleNameVisitor(ParsingExpressionVisitor): """ def __init__(self): + super().__init__() self.names = set() def visit_ConceptExpression(self, node): diff --git a/src/evaluators/LexerNodeEvaluator.py b/src/evaluators/LexerNodeEvaluator.py index 593d810..065ef6a 100644 --- a/src/evaluators/LexerNodeEvaluator.py +++ b/src/evaluators/LexerNodeEvaluator.py @@ -1,7 +1,6 @@ from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from evaluators.BaseEvaluator import OneReturnValueEvaluator -from parsers.BaseNodeParser import SourceCodeNode -from parsers.BnfNodeParser import ConceptNode +from parsers.BaseNodeParser import SourceCodeNode, ConceptNode from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode diff --git a/src/evaluators/PythonEvaluator.py b/src/evaluators/PythonEvaluator.py index 97378d6..98325a3 100644 --- a/src/evaluators/PythonEvaluator.py +++ b/src/evaluators/PythonEvaluator.py @@ -298,6 +298,12 @@ class PythonEvaluator(OneReturnValueEvaluator): @staticmethod def resolve_concept(context, concept_hint): + """ + Try to find a concept by its name, id or the pattern c:key|id: + :param context: + :param concept_hint: + :return: + """ if isinstance(concept_hint, Concept): return concept_hint @@ -310,6 +316,10 @@ class PythonEvaluator(OneReturnValueEvaluator): # So a concept was explicitly required, not its value # We mark the concept as already evaluated, so it's body will not be evaluated new_instance.metadata.is_evaluated = True + if len(concept.metadata.variables) > 0: + # In this situation, it means that we are dealing with the concept and not its instantiation + # So do not try to evaluate it + new_instance.metadata.is_evaluated = True return new_instance diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index c23a969..58877d0 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -272,6 +272,9 @@ class SourceCodeWithConceptNode(LexerNode): if id(self) == id(other): return True + if isinstance(other, SCWC): + return other == self + if not isinstance(other, SourceCodeWithConceptNode): return False @@ -315,6 +318,10 @@ class SourceCodeWithConceptNode(LexerNode): return self def pseudo_fix_source(self): + """ + pseudo because the code is not that clean ! + :return: + """ self.source = self.first.source for n in self.nodes: self.source += " " @@ -352,23 +359,6 @@ utnode = namedtuple("utnode", "start end source") scnode = namedtuple("scnode", "start end source") -@dataclass(init=False) -class SCWC: - """ - SourceNodeWithConcept tester class - It matches with a SourceNodeWithConcept - but it's easier to instantiate during the tests - """ - first: LexerNode - last: LexerNode - content: tuple - - def __init__(self, first, last, *args): - self.first = first - self.last = last - self.content = args - - class HelperWithPos: def __init__(self, start=None, end=None): self.start = start @@ -439,6 +429,69 @@ class SCN(HelperWithPos): return txt + ")" +class SCWC(HelperWithPos): + """ + SourceNodeWithConcept tester class + It matches with a SourceNodeWithConcept + but it's easier to instantiate during the tests + """ + + def __init__(self, first, last, *args): + super().__init__(None, None) + self.first = first + self.last = last + self.content = args + + def __eq__(self, other): + if id(self) == id(other): + return True + + if isinstance(other, SourceCodeWithConceptNode): + if self.first != other.first: + return False + + if self.last != other.last: + return False + + if len(self.content) != len(other.nodes): + return False + + for self_node, other_node in zip(self.content, other.nodes): + if self_node != other_node: + return False + + # at last + return True + + def __repr__(self): + txt = "SCWC(" + if self.start is not None: + txt += f"start={self.start}" + if self.end is not None: + txt += f", end={self.end}" + txt += f", source='{self.source}'" + return txt + ")" + + @property + def source(self): + """ + this code is a copy and paste from SourceCodeWithConceptNode.pseudo_fix_source + TODO: create a common function or whatever... + :return: + """ + source = self.first.source + for n in self.content: + source += " " + if hasattr(n, "source"): + source += n.source + elif hasattr(n, "concept"): + source += str(n.concept) + else: + source += " unknown" + source += self.last.source + return source + + class CN(HelperWithPos): """ ConceptNode tester class diff --git a/src/parsers/BaseParser.py b/src/parsers/BaseParser.py index 62d57f8..b68b9b5 100644 --- a/src/parsers/BaseParser.py +++ b/src/parsers/BaseParser.py @@ -175,38 +175,14 @@ class BaseParser: body=tree, try_parsed=try_parse) - def get_input_as_text(self, parser_input, custom_switcher=None, tracker=None): + @staticmethod + def get_input_as_lexer_nodes(parser_input, expected_parser=None): """ - Recreate back the source code from parser_input - :param parser_input: list of Tokens - :param custom_switcher: map of [TokenKind, overridden values] - :param tracker: keep track of the value overridden by custom_switcher + Extract the lexer node from the parser_input + :param parser_input: + :param expected_parser: returns the nodes if the parent parser is the expected one :return: """ - if isinstance(parser_input, list): - return self.get_text_from_tokens(parser_input, custom_switcher, tracker) - - if isinstance(parser_input, ParserResultConcept): - parser_input = parser_input.source - - if "c:" in parser_input: - return self.get_text_from_tokens(list(Tokenizer(parser_input)), custom_switcher, tracker) - - return parser_input - - def get_input_as_tokens(self, parser_input, strip_eof=False): - if isinstance(parser_input, list): - return self.manage_eof(parser_input, strip_eof) - - if isinstance(parser_input, ParserResultConcept): - if parser_input.tokens: - return self.manage_eof(parser_input.tokens, strip_eof) - else: - return Tokenizer(parser_input.source) - - return Tokenizer(parser_input, yield_eof=not strip_eof) - - def get_input_as_lexer_nodes(self, parser_input, expected_parser=None): if not isinstance(parser_input, ParserResultConcept): return None diff --git a/src/parsers/BnfNodeParser.py b/src/parsers/BnfNodeParser.py index 244d78c..6f2ad97 100644 --- a/src/parsers/BnfNodeParser.py +++ b/src/parsers/BnfNodeParser.py @@ -10,14 +10,13 @@ from collections import defaultdict from dataclasses import dataclass from operator import attrgetter -import core.utils +import core.builtin_helpers from cache.Cache import Cache -from core import builtin_helpers from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept, DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts +from core.concept import DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts, Concept from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import Tokenizer, Token, TokenKind -from parsers.BaseNodeParser import BaseNodeParser, LexerNode, UnrecognizedTokensNode, ConceptNode, GrammarErrorNode +from core.tokenizer import Tokenizer, TokenKind, Token +from parsers.BaseNodeParser import BaseNodeParser, GrammarErrorNode, UnrecognizedTokensNode, ConceptNode, LexerNode from parsers.BaseParser import BaseParser PARSERS = ["AtomNode", "SyaNode", "Python"] @@ -147,8 +146,16 @@ class MultiNode: class ParsingExpression: + log_sink = [] + + @classmethod + def reset_logs(cls): + cls.log_sink.clear() + def __init__(self, *args, **kwargs): self.elements = args + self.debug_enabled = False + self._has_unordered_choice = None nodes = kwargs.get('nodes', []) or [] if not hasattr(nodes, '__iter__'): @@ -178,11 +185,95 @@ class ParsingExpression: def parse(self, parser): # TODO : add memoization - return self._parse(parser) + + if self.debug_enabled: + self.debug(f">> {parser.pos:3d} : {self}") + + res = self._parse(parser) + return res def add_rule_name_if_needed(self, text): return text + "=" + self.rule_name if self.rule_name else text + def has_unordered_choice(self): + if self._has_unordered_choice is None: + visitor = HasUnorderedChoiceVisitor() + visitor.visit(self) + self._has_unordered_choice = visitor.value + + return self._has_unordered_choice + + def debug(self, msg): + self.log_sink.append((id(self), msg)) + + def get_debug(self): + if not self.debug_enabled: + return None + + # search for the first debug line for the current pexpression + id_self = id(self) + for i, line in enumerate(self.log_sink): + if line[0] == id_self: + break + else: + return "" + + n, debug = self.inner_get_debug(i, "") + self.log_sink.clear() + return debug + + def inner_get_debug(self, n, tab=""): + """ + + :param n: line number + :param tab: current indentation + :return: + """ + + if not self.debug: + return None + + id_self = id(self) + + def add_debug_for_current(_n, _debug): + if n >= len(self.log_sink): + return _n, _debug + + _l = self.log_sink[_n] + while _l[0] == id_self: + _debug += tab + _l[1] + "\n" + _n += 1 + if _n == len(self.log_sink): + return _n, _debug + _l = self.log_sink[_n] + return _n, _debug + + # if n >= len(self.log_sink): + # return n, None + # + # line = self.log_sink[n] + # + # if line[0] != id_self: + # # return n, f"{tab}>> No log for {self}\n" + # return n, None + + debug = "" + n, debug = add_debug_for_current(n, debug) + # while line[0] == id_self: + # debug += tab + line[1] + "\n" + # n += 1 + # if n == len(self.log_sink): + # return n, debug + # line = self.log_sink[n] + + for node in self.nodes: + n, node_debug = node.inner_get_debug(n, tab + " ") + if node_debug: + debug += node_debug + n, debug = add_debug_for_current(n, debug) + + return n, debug + class ConceptExpression(ParsingExpression): """ @@ -234,6 +325,10 @@ class ConceptExpression(ParsingExpression): parser_helper.parser.parser_input.tokens[node.start: node.end + 1], [node]) + @staticmethod + def get_recurse_id(parent_id, concept_id, rule_name): + return f"{parent_id}#{concept_id}({rule_name})" + class Sequence(ParsingExpression): """ @@ -277,6 +372,8 @@ class Sequence(ParsingExpression): parsing_contexts.extend(to_append) if len(parsing_contexts) == 0: + if self.debug_enabled: + self.debug(f"<< Failed matching {e}") return None to_append.clear() @@ -290,8 +387,12 @@ class Sequence(ParsingExpression): pcontext.fix_tokens(parser_helper) if len(parsing_contexts) == 1: + if self.debug_enabled: + self.debug(f"<< Found match '{parsing_contexts[0].node.source}'") return parsing_contexts[0].node + if self.debug_enabled: + self.debug(f"<< Found matches {[r.node.source for r in parsing_contexts]}") return MultiNode(parsing_contexts) def __repr__(self): @@ -537,8 +638,7 @@ class Match(ParsingExpression): super(Match, self).__init__(rule_name=rule_name, root=root) def parse(self, parser): - result = self._parse(parser) - return result + return self._parse(parser) class StrMatch(Match): @@ -573,14 +673,19 @@ class StrMatch(Match): def _parse(self, parser_helper): token = parser_helper.get_token() + m = token.str_value.lower() == self.to_match.lower() if self.ignore_case \ else token.strip_quote == self.to_match if m: + if self.debug_enabled: + self.debug(f"pos={parser_helper.pos}, token={token.str_value}, to_match={self.to_match} => Matched") node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value) parser_helper.next_token(self.skip_white_space) return node + if self.debug_enabled: + self.debug(f"pos={parser_helper.pos}, token={token.str_value}, to_match={self.to_match} => No Match") return None @@ -646,7 +751,6 @@ class StrMatch(Match): # parser.dprint("-- NoMatch at {}".format(c_pos)) # parser._nm_raise(self, c_pos, parser) - class ParsingExpressionVisitor: """ visit ParsingExpression @@ -654,9 +758,22 @@ class ParsingExpressionVisitor: STOP = "##_Stop_##" + def __init__(self, get_nodes=None, circular_ref_strategy=None): + self.get_nodes = get_nodes or (lambda pe: pe.elements) + + self.circular_ref_strategy = circular_ref_strategy + self.seen = set() if circular_ref_strategy else None + def visit(self, parsing_expression): name = parsing_expression.__class__.__name__ + if self.circular_ref_strategy: + if id(parsing_expression) in self.seen: + if self.circular_ref_strategy == "skip": + return + raise RecursionError(f"circular ref detected : {self}") + self.seen.add(id(parsing_expression)) + method = 'visit_' + name visitor = getattr(self, method, self.generic_visit) return visitor(parsing_expression) @@ -665,7 +782,7 @@ class ParsingExpressionVisitor: if hasattr(self, "visit_all"): self.visit_all(parsing_expression) - for node in parsing_expression.elements: + for node in self.get_nodes(parsing_expression): if isinstance(node, Concept): res = self.visit(ConceptExpression(node.key or node.name)) elif isinstance(node, str): @@ -679,6 +796,7 @@ class ParsingExpressionVisitor: class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor): def __init__(self, sheerka): + super().__init__() self.sheerka = sheerka self.first_tokens = None @@ -713,12 +831,29 @@ class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor): class BnfNodeConceptExpressionVisitor(ParsingExpressionVisitor): def __init__(self): + super().__init__() self.references = [] def visit_ConceptExpression(self, pe): self.references.append(pe.concept) +class HasUnorderedChoiceVisitor(ParsingExpressionVisitor): + def __init__(self): + super().__init__(lambda pe: pe.nodes, circular_ref_strategy="skip") + self.value = False + + def __repr__(self): + return f"HasUnorderedChoiceVisitor(={self.value})" + + def reset(self): + self.value = False + + def visit_UnOrderedChoice(self, parsing_expression): + self.value = True + return ParsingExpressionVisitor.STOP + + class BnfConceptParserHelper: def __init__(self, parser): self.parser = parser @@ -806,7 +941,6 @@ class BnfConceptParserHelper: if isinstance(node, MultiNode): # when multiple choices are found, use the longest result node = node.results[0].node - if node is not None and node.end != -1: self.sequence.append(self.create_concept_node(concept, node)) self.pos = node.end @@ -835,7 +969,7 @@ class BnfConceptParserHelper: self.unrecognized_tokens.fix_source() # try to recognize concepts - nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized( + nodes_sequences = core.builtin_helpers.get_lexer_nodes_from_unrecognized( self.parser.context, self.unrecognized_tokens, PARSERS) @@ -867,12 +1001,17 @@ class BnfConceptParserHelper: clone.debug = self.debug[:] self.errors = self.errors[:] clone.sequence = self.sequence[:] - clone.pos = self.pos + clone.unrecognized_tokens = self.unrecognized_tokens.clone() + clone.has_unrecognized = self.has_unrecognized + clone.bnf_parsed = self.bnf_parsed + + clone.pos = self.pos + return clone def finalize(self): - if self.bnf_parsed > 0: + if self.bnf_parsed: self.manage_unrecognized() for forked in self.forked: # manage that some clones may have been forked @@ -883,13 +1022,12 @@ class BnfConceptParserHelper: key = (template.key, template.id) if template.id else template.key concept = sheerka.new(key) concept = self.finalize_concept(sheerka, concept, underlying) - concept_node = ConceptNode( - concept, - underlying.start, - underlying.end, - self.parser.parser_input.tokens[underlying.start: underlying.end + 1], - None, - underlying) + concept_node = ConceptNode(concept, + underlying.start, + underlying.end, + self.parser.parser_input.tokens[underlying.start: underlying.end + 1], + None, + underlying) return concept_node def finalize_concept(self, sheerka, concept, underlying, init_empty_body=True): @@ -1015,6 +1153,7 @@ class BnfNodeParser(BaseNodeParser): if 'sheerka' in kwargs: sheerka = kwargs.get("sheerka") self.concepts_grammars = sheerka.concepts_grammars + self.sheerka = sheerka else: self.concepts_grammars = Cache() @@ -1031,6 +1170,7 @@ class BnfNodeParser(BaseNodeParser): @staticmethod def get_valid(parsers_helpers): + valid_parser_helpers = [] for parser_helper in parsers_helpers: if not parser_helper.bnf_parsed or parser_helper.has_error(): @@ -1146,7 +1286,7 @@ class BnfNodeParser(BaseNodeParser): def fix_infinite_recursions(self, context, grammar, concept_id, parsing_expression): """ - Check the newly created parsing expresion + Check the newly created parsing expression Some infinite recursion can be resolved, simply by removing the pexpression that causes the loop Let's look for that :param context: @@ -1162,7 +1302,7 @@ class BnfNodeParser(BaseNodeParser): for node_id in path_: expression_ = expression_.nodes[0] if isinstance(expression_, ConceptExpression) else expression_ for i, node in [(i, n) for i, n in enumerate(expression_.nodes) if isinstance(n, ConceptExpression)]: - if node.recurse_id == node_id or node.concept.id == node_id: + if node_id in (node.recurse_id, node.concept.id): index_ = i parent_ = expression_ expression_ = node # take the child of the ConceptExpression found @@ -1220,17 +1360,22 @@ class BnfNodeParser(BaseNodeParser): in_recursion.extend(already_found) return True already_found.append(id_to_use) - return self.check_for_infinite_recursion( - parsing_expression.nodes[0], already_found, in_recursion, only_first) + return self.check_for_infinite_recursion(parsing_expression.nodes[0], + already_found, + in_recursion, + only_first) + + already_found_for_current_node = [] if isinstance(parsing_expression, Sequence): - # for sequence, we need to check all nodes + # for sequence, we need to check all nodes (unless, only first) if only_first: nodes = [] if len(parsing_expression.nodes) == 0 else [parsing_expression.nodes[0]] else: nodes = parsing_expression.nodes for node in nodes: - already_found_for_current_node = already_found.copy() + already_found_for_current_node.clear() + already_found_for_current_node.extend(already_found) if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, False): return True return False @@ -1239,7 +1384,8 @@ class BnfNodeParser(BaseNodeParser): # for ordered choice, if there is at least one node that does not resolved to a recursion # we are safe for node in parsing_expression.nodes: - already_found_for_current_node = already_found.copy() + already_found_for_current_node.clear() + already_found_for_current_node.extend(already_found) if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True): return True else: @@ -1248,7 +1394,8 @@ class BnfNodeParser(BaseNodeParser): if isinstance(parsing_expression, UnOrderedChoice): for node in parsing_expression.nodes: - already_found_for_current_node = already_found.copy() + already_found_for_current_node.clear() + already_found_for_current_node.extend(already_found.copy()) if self.check_for_infinite_recursion(node, already_found_for_current_node, in_recursion, True): return True return False @@ -1278,7 +1425,8 @@ class BnfNodeParser(BaseNodeParser): root_concept=concept, desc=desc) as sub_context: # get the parsing expression - ret = self.resolve_concept_parsing_expression(sub_context, concept, None, grammar, to_update) + to_skip = {concept.id} + ret = self.resolve_concept_parsing_expression(sub_context, concept, None, grammar, to_skip, to_update) # check and update parsing expression that are still under construction # Note that we only update the concept that will update concepts_grammars @@ -1289,15 +1437,10 @@ class BnfNodeParser(BaseNodeParser): if isinstance(node, UnderConstruction): pe.nodes[i] = grammar.get(node.concept_id) - # # check for infinite recursions. - # # and try to fix them when possible - # already_found = [concept.id] - # concepts_in_recursion = [] - # if self.check_for_infinite_recursion(ret, already_found, concepts_in_recursion): - # chicken_anf_egg = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion) - # for concept_id in concepts_in_recursion: - # grammar[concept_id] = chicken_anf_egg - + # KSI 20200826 + # To be rewritten into get_infinite_recursions + # I have changed resolve_concept_parsing_expression() to directly avoid obvious circular references + # So it's no longer need to search and fix them concepts_in_recursion = self.fix_infinite_recursions(context, grammar, concept.id, ret) if concepts_in_recursion: chicken_anf_egg = context.sheerka.new(BuiltinConcepts.CHICKEN_AND_EGG, body=concepts_in_recursion) @@ -1307,54 +1450,67 @@ class BnfNodeParser(BaseNodeParser): # update, in case of infinite circular recursion ret = grammar[concept.id] - # finally, update concept grammar + # finally, update the list of the known pexpression (self.concepts_grammars) + # We do not add pexpressions that contain UnOrderedChoice because the choices always depend on the current + # concept. + # For example, the pexpression for 'twenties' found under the concept 'hundreds' won't be the same than + # the pexpression 'twenties' under the concept 'thousand' or even the pexpression 'twenties' without any + # context. for k, v in grammar.items(): - self.concepts_grammars.put(k, v) - - # not quite sure that it is a good idea. - # Why do we want to corrupt previous valid entries ? - if context.sheerka.isinstance(v, BuiltinConcepts.CHICKEN_AND_EGG): + if k == concept.id: self.concepts_grammars.put(k, v) + elif context.sheerka.isinstance(v, BuiltinConcepts.CHICKEN_AND_EGG): + # not quite sure that it is a good idea. + # Why do we want to corrupt previous valid entries ? + self.concepts_grammars.put(k, v) + else: + if not v.has_unordered_choice(): + self.concepts_grammars.put(k, v) sub_context.add_values(return_values=ret) return ret - def resolve_concept_parsing_expression(self, context, concept, name, grammar, to_update): + def resolve_concept_parsing_expression(self, context, concept, name, grammar, to_skip, to_update): """ :param context: :param concept: concept :param name: rule_name of the concept if exists :param grammar: already resolved parsing expressions - :param to_update: parsing expressions that contains unresovled parsing expression + :param to_skip: list of concepts to skip in order to avoid circular references (only for UnOrderedChoice pe) + :param to_update: parsing expressions that contains unresolved parsing expression :return: """ - if context.sheerka.isaset(context, concept) and hasattr(context, "obj"): - key_to_use = f"{concept.id}#{name}#{context.obj.id}" + sheerka = context.sheerka + + if sheerka.isaset(context, concept) and hasattr(context, "obj"): + key_to_use = ConceptExpression.get_recurse_id(context.obj.id, concept.id, name) else: key_to_use = concept.id - if key_to_use in self.concepts_grammars: # validated entry - return self.concepts_grammars.get(key_to_use) + if key_to_use in self.concepts_grammars: + # Use the global pexpression only if it does not contains UnOrderedChoice + pe = self.concepts_grammars.get(key_to_use) + if not pe.has_unordered_choice(): + return self.concepts_grammars.get(key_to_use) if key_to_use in grammar: # under construction entry return grammar.get(key_to_use) desc = f"Resolve concept parsing expression for '{concept}'. {key_to_use=}" with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context: - if not concept.bnf: # to save a function call. Not sure it worth it. + if not concept.bnf: # 'if' is done outside to save a function call. Not sure it worth it. BaseNodeParser.ensure_bnf(sub_context, concept, self.name) grammar[key_to_use] = UnderConstruction(concept.id) - sheerka = context.sheerka if concept.metadata.definition_type == DEFINITION_TYPE_BNF: expression = concept.bnf desc = f"Bnf concept detected. Resolving parsing expression '{expression}'" with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc: ssc.add_inputs(expression=expression) - resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update) + resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_skip, to_update) ssc.add_values(return_values=resolved) elif sheerka.isaset(context, concept): @@ -1363,15 +1519,15 @@ class BnfNodeParser(BaseNodeParser): ssc.add_inputs(concept=concept) concepts_in_group = self.sheerka.get_set_elements(ssc, concept) - valid_concepts = [] - for c in concepts_in_group: - if c.id == context.obj.id: - continue - - if hasattr(context, "concepts_to_skip") and c.id in context.concepts_to_skip: - continue - - valid_concepts.append(c) + valid_concepts = [c for c in concepts_in_group if c.id not in to_skip] + # for c in concepts_in_group: + # if c.id == context.obj.id: + # continue + # + # if hasattr(context, "concepts_to_skip") and c.id in context.concepts_to_skip: + # continue + # + # valid_concepts.append(c) nodes = [] for c in valid_concepts: @@ -1381,6 +1537,7 @@ class BnfNodeParser(BaseNodeParser): resolved = self.resolve_parsing_expression(ssc, UnOrderedChoice(*nodes), grammar, + to_skip, to_update) ssc.add_values(concepts_in_group=concepts_in_group) ssc.add_values(return_values=resolved) @@ -1389,7 +1546,7 @@ class BnfNodeParser(BaseNodeParser): desc = f"Concept is a simple concept." with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc: expression = self.get_expression_from_concept_name(concept.name) - resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update) + resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_skip, to_update) grammar[key_to_use] = resolved @@ -1400,7 +1557,7 @@ class BnfNodeParser(BaseNodeParser): sub_context.add_values(return_values=resolved) return resolved - def resolve_parsing_expression(self, context, expression, grammar, to_update): + def resolve_parsing_expression(self, context, expression, grammar, to_skip, to_update): if isinstance(expression, str): ret = StrMatch(expression, ignore_case=self.ignore_case) @@ -1416,12 +1573,14 @@ class BnfNodeParser(BaseNodeParser): unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept) return self.add_error(unknown_concept) - pe = self.resolve_concept_parsing_expression( - context, - concept, - expression.rule_name, - grammar, - to_update) + inner_to_skip = to_skip.copy() + inner_to_skip.add(concept.id) + pe = self.resolve_concept_parsing_expression(context, + concept, + expression.rule_name, + grammar, + inner_to_skip, + to_update) if not isinstance(pe, (ParsingExpression, UnderConstruction)): return pe # an error is detected, escalate it @@ -1447,7 +1606,7 @@ class BnfNodeParser(BaseNodeParser): ret = expression ret.nodes = [] for e in ret.elements: - pe = self.resolve_parsing_expression(context, e, grammar, to_update) + pe = self.resolve_parsing_expression(context, e, grammar, to_skip, to_update) if not isinstance(pe, (ParsingExpression, UnderConstruction)): return pe # an error is detected, escalate it if isinstance(pe, UnderConstruction): @@ -1462,6 +1621,7 @@ class BnfNodeParser(BaseNodeParser): expression.sep = self.resolve_parsing_expression(context, expression.sep, grammar, + to_skip, to_update) return ret diff --git a/src/parsers/BnfParser.py b/src/parsers/BnfParser.py index 556922b..f440f32 100644 --- a/src/parsers/BnfParser.py +++ b/src/parsers/BnfParser.py @@ -5,8 +5,8 @@ from core.builtin_concepts import BuiltinConcepts from core.sheerka.Sheerka import ExecutionContext from core.tokenizer import Tokenizer, Token, TokenKind, LexerError from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode -from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \ - StrMatch +from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, \ + ConceptExpression, StrMatch @dataclass() @@ -295,14 +295,15 @@ class BnfParser(BaseParser): self.next_token() if BnfParser.is_expression_a_set(self.context, expression): - root_concept = self.context.search( - start_with_self=True, - predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF, - get_obj=lambda ec: ec.action_context, - stop=lambda ec: ec.action == BuiltinConcepts.INIT_BNF) + root_concept = self.context.search(start_with_self=True, + predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF, + get_obj=lambda ec: ec.action_context, + stop=lambda ec: ec.action == BuiltinConcepts.INIT_BNF) root_concept = list(root_concept) if root_concept and hasattr(root_concept[0], "id"): - expression.recurse_id = f"{expression.concept.id}#{expression.rule_name}#{root_concept[0].id}" + expression.recurse_id = expression.get_recurse_id(root_concept[0].id, + expression.concept.id, + expression.rule_name) return expression @@ -313,7 +314,7 @@ class BnfParser(BaseParser): @staticmethod def update_recurse_id(context, concept_id, expression): if BnfParser.is_expression_a_set(context, expression): - expression.recurse_id = f"{expression.concept.id}#{expression.rule_name}#{concept_id}" + expression.recurse_id = expression.get_recurse_id(concept_id, expression.concept.id, expression.rule_name) for element in expression.elements: BnfParser.update_recurse_id(context, concept_id, element) diff --git a/src/parsers/PythonParser.py b/src/parsers/PythonParser.py index 1e0b2b6..a00c8b2 100644 --- a/src/parsers/PythonParser.py +++ b/src/parsers/PythonParser.py @@ -6,8 +6,8 @@ import core.utils from core.builtin_concepts import BuiltinConcepts from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute from core.tokenizer import LexerError, TokenKind +from parsers.BaseNodeParser import ConceptNode from parsers.BaseParser import BaseParser, Node, ErrorNode -from parsers.BnfNodeParser import ConceptNode log = logging.getLogger(__name__) diff --git a/src/parsers/PythonWithConceptsParser.py b/src/parsers/PythonWithConceptsParser.py index b8ca142..57a1cff 100644 --- a/src/parsers/PythonWithConceptsParser.py +++ b/src/parsers/PythonWithConceptsParser.py @@ -1,7 +1,8 @@ from core.builtin_concepts import BuiltinConcepts from core.sheerka.services.SheerkaExecute import SheerkaExecute +from parsers.BaseNodeParser import SourceCodeWithConceptNode from parsers.BaseParser import BaseParser -from parsers.BnfNodeParser import ConceptNode +from parsers.BaseNodeParser import ConceptNode from parsers.PythonParser import PythonParser from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser @@ -21,6 +22,16 @@ class PythonWithConceptsParser(BaseParser): res += c if c.isalnum() else "0" return res + @staticmethod + def get_nodes(nodes): + for node in nodes: + if isinstance(node, SourceCodeWithConceptNode): + yield node.first + yield from node.nodes + yield node.last + else: + yield node + def parse(self, context, parser_input): sheerka = context.sheerka nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser) @@ -63,7 +74,7 @@ class PythonWithConceptsParser(BaseParser): identifiers[id(c)] = identifier return identifier - for node in nodes: + for node in self.get_nodes(nodes): if isinstance(node, ConceptNode): source += node.source if to_parse: diff --git a/src/parsers/ShortTermMemoryParser.py b/src/parsers/ShortTermMemoryParser.py index 314242b..1bcabc4 100644 --- a/src/parsers/ShortTermMemoryParser.py +++ b/src/parsers/ShortTermMemoryParser.py @@ -9,7 +9,7 @@ class ShortTermMemoryParser(BaseParser): """ def __init__(self, **kwargs): - super().__init__("shortTermMemory", 85) + super().__init__("ShortTermMemory", 85) def parse(self, context, parser_input): """ diff --git a/src/parsers/SyaNodeParser.py b/src/parsers/SyaNodeParser.py index 717cd44..2a37e9f 100644 --- a/src/parsers/SyaNodeParser.py +++ b/src/parsers/SyaNodeParser.py @@ -125,6 +125,11 @@ class SyaConceptParserHelper: return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0 def is_next(self, token): + """ + To match long named concepts + :param token: + :return: + """ if self.is_matched() or len(self.expected) == 0: return False @@ -294,8 +299,9 @@ class InFixToPostFix: else: self.out.append(item) - # put the item to the list of awaiting parameters - self.parameters_list.append(item) + # put the item to the list of awaiting parameters only if it's not the end of function marker + if item != ")": + self.parameters_list.append(item) if len(self._concepts()) > 0: # try to predict the final position of the current concept @@ -339,9 +345,18 @@ class InFixToPostFix: self.unrecognized_tokens.add_token(token, parser_helper.start + i) def get_errors(self): + def has_error(item): + if isinstance(item, SyaConceptParserHelper) and item.error: + return True + if isinstance(item, SourceCodeWithConceptNode): + for n in item.nodes: + if hasattr(n, "error") and n.error: + return True + return False + res = [] res.extend(self.errors) - res.extend([item for item in self.out if isinstance(item, SyaConceptParserHelper) and item.error]) + res.extend([item for item in self.out if has_error(item)]) return res def lock(self): @@ -367,8 +382,8 @@ class InFixToPostFix: if len(self.parameters_list) > parser_helper.expected_parameters_before_first_token: # There are more parameters than needed by the new concept - # The others are either - # - parameters for the previous concept (if any) + # These others parameters are either + # - parameters for the previous suffixed concept (if any) # - concepts on their own # - syntax error # In all the cases, the only thing that matter is to pop what is expected by the new concept @@ -461,7 +476,7 @@ class InFixToPostFix: """ The unrecognized ends with an lpar '(' It means that its a function like foo(something) - The problem is that we need to know if there are other conceps before the function + The problem is that we need to know if there are other concepts before the function ex : suffix one function(x) suffix and one are not / may not be part of the name of the function @@ -585,7 +600,7 @@ class InFixToPostFix: del (current_concept.expected[0]) else: # error - # We are not parsing the concept we tought we were parsing. + # We are not parsing the concept we thought we were parsing. # Transform the eaten tokens into unrecognized # and discard the current SyaConceptParserHelper # TODO: manage the pending LPAR, RPAR ? @@ -697,6 +712,10 @@ class InFixToPostFix: for to_out in parsing_res.to_out: instance._put_to_out(to_out) + # make sure to pop the current concept + if self._stack_isinstance(SyaConceptParserHelper): + self.pop_stack_to_out() + instance._put_to_out(")") # mark where the function should end instance.stack.append(parsing_res.function) instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # reset unrecognized diff --git a/src/parsers/UnrecognizedNodeParser.py b/src/parsers/UnrecognizedNodeParser.py index 731d9bb..b1b0c0e 100644 --- a/src/parsers/UnrecognizedNodeParser.py +++ b/src/parsers/UnrecognizedNodeParser.py @@ -1,13 +1,13 @@ from dataclasses import dataclass +import core.utils from core.builtin_concepts import BuiltinConcepts +from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes from core.concept import Concept from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode from parsers.BaseParser import BaseParser, ErrorNode -from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes -import core.utils -PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"] +PARSERS = ["EmptyString", "ShortTermMemory", "AtomNode", "BnfNode", "SyaNode", "Python"] @dataclass() @@ -64,7 +64,18 @@ class UnrecognizedNodeParser(BaseParser): elif isinstance(node, SourceCodeNode): sequences_found = core.utils.product(sequences_found, [node]) - has_unrecognized = True # never trust source code not. I may be an invalid source code + has_unrecognized = True # to let PythonWithConceptParser validate the code + + elif isinstance(node, SourceCodeWithConceptNode): + for i, n in [(i, n) for i, n in enumerate(node.nodes) if isinstance(n, ConceptNode)]: + res = self.validate_concept_node(context, n) + if not res.status: + self.add_error(res.body) + break + else: + node.nodes[i] = res.body + sequences_found = core.utils.product(sequences_found, [node]) + has_unrecognized = True # to let PythonWithConceptParser validate the code else: # cannot happen as of today :-) raise NotImplementedError(f"Node is {type(node)}, which is not supported yet") @@ -104,19 +115,47 @@ class UnrecognizedNodeParser(BaseParser): :param concept: :return: """ - for name, value in concept.compiled.items(): - if isinstance(value, Concept): - _validate_concept(value) + for k, v in concept.compiled.items(): + if isinstance(v, Concept): + _validate_concept(v) - elif isinstance(value, UnrecognizedTokensNode): - res = parse_unrecognized(context, value.source, PARSERS) + elif isinstance(v, UnrecognizedTokensNode): + res = parse_unrecognized(context, v.source, PARSERS) res = only_successful(context, res) # only key successful parsers if res.status: - concept.compiled[name] = res.body.body + concept.compiled[k] = res.body.body else: - errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'")) + errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'")) + + def _get_source(compiled, var_name): + if var_name not in compiled: + return None + if not isinstance(compiled[var_name], list): + return None + if not len(compiled[var_name]) == 1: + return None + if not sheerka.isinstance(compiled[var_name][0], BuiltinConcepts.RETURN_VALUE): + return None + if not sheerka.isinstance(compiled[var_name][0].body, BuiltinConcepts.PARSER_RESULT): + return None + if compiled[var_name][0].body.name == "parsers.ShortTermMemory": + return None + + return compiled[var_name][0].body.source _validate_concept(concept_node.concept) + + # Special case where the values of the variables are the names of the variable + # example : Concept("a plus b").def_var("a").def_var("b") + # and the user has entered 'a plus b' + # Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2') + # This means that 'a' and 'b' don't have any real value + for name, value in concept_node.concept.metadata.variables: + if not _get_source(concept_node.concept.compiled, name) == name: + break + else: + concept_node.concept.metadata.is_evaluated = True + if len(errors) > 0: return context.sheerka.ret(self.name, False, errors) else: diff --git a/tests/BaseTest.py b/tests/BaseTest.py index b34e282..f8591e3 100644 --- a/tests/BaseTest.py +++ b/tests/BaseTest.py @@ -80,6 +80,13 @@ class BaseTest: @staticmethod def get_concept_instance(sheerka, concept, **kwargs): + """ + Use to instantiate concept with default variables already set + :param sheerka: + :param concept: + :param kwargs: + :return: + """ instance = sheerka.new(concept.key if isinstance(concept, Concept) else concept) for i, var in enumerate(instance.metadata.variables): if var[0] in kwargs: diff --git a/tests/cache/test_cache.py b/tests/cache/test_cache.py index 06b4f33..ffdf266 100644 --- a/tests/cache/test_cache.py +++ b/tests/cache/test_cache.py @@ -1,4 +1,5 @@ import pytest +from cache.BaseCache import MAX_INITIALIZED_KEY from cache.Cache import Cache from cache.CacheManager import CacheManager from cache.DictionaryCache import DictionaryCache @@ -55,6 +56,14 @@ class TestCache(TestUsingMemoryBasedSheerka): assert cache.get("key") == "key_not_found" assert "key" in cache # default callable are put in cache + def test_i_dont_ask_the_remote_repository_twice(self): + nb_request = [] + + cache = Cache(default=lambda key: nb_request.append("requested")) + assert cache.get("key") is None + assert cache.get("key") is None + assert len(nb_request) == 1 + def test_i_can_put_and_retrieve_value_from_list_cache(self): cache = ListCache() @@ -532,3 +541,27 @@ class TestCache(TestUsingMemoryBasedSheerka): cache.delete("key") assert cache.get("value") is None assert cache.to_remove == {"key"} + + def test_initialized_key_is_removed_when_the_entry_is_found(self): + caches = [Cache(), ListCache(), ListIfNeededCache(), SetCache()] + + for cache in caches: + cache.put("key", "value") + cache.get("key") + + assert len(cache._initialized_keys) == 0 + + cache = IncCache() + cache.put("key", 10) + cache.get("key") + assert len(cache._initialized_keys) == 0 + + def test_initialized_keys_are_reset_when_max_length_is_reached(self): + cache = Cache() + for i in range(MAX_INITIALIZED_KEY): + cache.get(str(i)) + + assert len(cache._initialized_keys) == MAX_INITIALIZED_KEY + + cache.get(str(MAX_INITIALIZED_KEY + 1)) + assert len(cache._initialized_keys) == 1 diff --git a/tests/core/test_SheerkaSetsManager.py b/tests/core/test_SheerkaSetsManager.py index 544a6e8..1c6850b 100644 --- a/tests/core/test_SheerkaSetsManager.py +++ b/tests/core/test_SheerkaSetsManager.py @@ -273,7 +273,7 @@ class TestSheerkaSetsManager(TestUsingMemoryBasedSheerka): # update number sheerka.set_isa(context, sheerka.new("one"), number) - assert twenties.bnf.elements[1].recurse_id == "1002#number#1003" + assert twenties.bnf.elements[1].recurse_id == "1003#1002(number)" def test_concepts_in_group_cache_is_updated(self): sheerka, context, one, two, number = self.init_concepts("one", "two", "number") diff --git a/tests/evaluators/test_LexerNodeEvaluator.py b/tests/evaluators/test_LexerNodeEvaluator.py index 7850970..748b578 100644 --- a/tests/evaluators/test_LexerNodeEvaluator.py +++ b/tests/evaluators/test_LexerNodeEvaluator.py @@ -5,8 +5,8 @@ from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, Built from core.concept import Concept, ConceptParts, DoNotResolve from core.sheerka.services.SheerkaExecute import ParserInput from evaluators.LexerNodeEvaluator import LexerNodeEvaluator -from parsers.BaseNodeParser import SourceCodeNode -from parsers.BnfNodeParser import ConceptNode, BnfNodeParser, UnrecognizedTokensNode +from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode +from parsers.BnfNodeParser import BnfNodeParser from parsers.PythonParser import PythonNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka diff --git a/tests/non_reg/test_sheerka_non_reg.py b/tests/non_reg/test_sheerka_non_reg.py index 3e5909b..dc9a7b2 100644 --- a/tests/non_reg/test_sheerka_non_reg.py +++ b/tests/non_reg/test_sheerka_non_reg.py @@ -1091,6 +1091,39 @@ as: assert res[0].status assert res[0].body + def test_i_can_evaluate_source_code_with_concept(self): + init = [ + "def concept the a ret a", + ] + + sheerka = self.init_scenario(init) + res = sheerka.evaluate_user_input("desc(the a)") + + assert len(res) == 1 + assert res[0].status + + def test_i_can_parse_concept_with_variables_using_short_name(self): + init = [ + "def concept foo from a foo b where a,b", + "def concept bar from bar a where a", + "def concept baz from a baz where a", + ] + + sheerka = self.init_scenario(init) + res = sheerka.evaluate_user_input("desc(foo)") + assert len(res) == 1 + assert res[0].status + + sheerka = self.init_scenario(init) + res = sheerka.evaluate_user_input("desc(bar)") + assert len(res) == 1 + assert res[0].status + + sheerka = self.init_scenario(init) + res = sheerka.evaluate_user_input("desc(baz)") + assert len(res) == 1 + assert res[0].status + class TestSheerkaNonRegFile(TestUsingFileBasedSheerka): def test_i_can_def_several_concepts(self): diff --git a/tests/parsers/parsers_utils.py b/tests/parsers/parsers_utils.py index 971926e..d59db84 100644 --- a/tests/parsers/parsers_utils.py +++ b/tests/parsers/parsers_utils.py @@ -87,10 +87,13 @@ def get_node( return sub_expr if isinstance(sub_expr, SCWC): - first = get_node(concepts_map, expression_as_tokens, sub_expr.first, sya=sya) - last = get_node(concepts_map, expression_as_tokens, sub_expr.last, sya=sya) - content = [get_node(concepts_map, expression_as_tokens, c, sya=sya) for c in sub_expr.content] - return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source() + sub_expr.first = get_node(concepts_map, expression_as_tokens, sub_expr.first, sya=sya) + sub_expr.last = get_node(concepts_map, expression_as_tokens, sub_expr.last, sya=sya) + sub_expr.content = [get_node(concepts_map, expression_as_tokens, c, sya=sya) for c in sub_expr.content] + sub_expr.fix_pos(sub_expr.first) + sub_expr.fix_pos(sub_expr.last) + return sub_expr + #return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source() if isinstance(sub_expr, SCN): node = get_node(concepts_map, expression_as_tokens, sub_expr.source, sya=sya) diff --git a/tests/parsers/test_BnfNodeParser.py b/tests/parsers/test_BnfNodeParser.py index 9621188..6f42d3d 100644 --- a/tests/parsers/test_BnfNodeParser.py +++ b/tests/parsers/test_BnfNodeParser.py @@ -3,8 +3,8 @@ from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYPE_BNF from core.sheerka.services.SheerkaExecute import ParserInput from parsers.BaseNodeParser import CNC, UTN, CN -from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ - Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice +from parsers.BnfNodeParser import StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ + Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice, BnfNodeParser from parsers.BnfParser import BnfParser import tests.parsers.parsers_utils @@ -969,7 +969,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes - def test_i_can_get_parsing_expression_when_sequence_of_concept(self): + def test_i_can_get_parsing_expression_when_sequence_of_concepts(self): my_map = { "one": Concept("one"), "two_ones": self.bnf_concept("two_ones", Sequence(ConceptExpression("one"), ConceptExpression("one"))) @@ -1203,6 +1203,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): def test_i_can_parse_hundreds_like_expression(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) + sheerka.concepts_grammars.clear() text = "three hundred and thirty two" three = CC("three", body=DoNotResolve("three")) @@ -1226,7 +1227,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): thirties=thirty_two)) expected_array = compute_expected_array(cmap, text, [expected]) + res = parser.parse(context, ParserInput(text)) + parser_result = res.value concepts_nodes = res.value.value @@ -1400,6 +1403,15 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert parser.parse(context, ParserInput("foo foo foo bar")).status assert not parser.parse(context, ParserInput("foo baz")).status + def test_i_only_get_the_requested_parsing_expression(self): + sheerka, context, parser = self.init_parser(init_from_sheerka=True) + parser.context = context + parser.sheerka = sheerka + sheerka.concepts_grammars.clear() # to simulate restart + + parser.get_parsing_expression(context, sheerka.resolve("thirties")) + assert len(parser.concepts_grammars) == 9 # requested concept + concepts that do not contains UnorderedChoice + @pytest.mark.parametrize("name, expected", [ (None, []), ("", []), diff --git a/tests/parsers/test_BnfParser.py b/tests/parsers/test_BnfParser.py index 804d287..0ce235f 100644 --- a/tests/parsers/test_BnfParser.py +++ b/tests/parsers/test_BnfParser.py @@ -5,8 +5,9 @@ from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer, TokenKind, LexerError, Token from parsers.BaseNodeParser import cnode from parsers.BaseParser import UnexpectedTokenErrorNode -from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ - BnfNodeParser, ConceptExpression +from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, \ + OneOrMore, ConceptExpression +from parsers.BnfNodeParser import BnfNodeParser from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -235,4 +236,4 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): assert res.status pexpression = res.value.value assert pexpression == Sequence(StrMatch('twenty'), ConceptExpression(number, "n1")) - assert pexpression.elements[1].recurse_id == "1003#n1#1004" + assert pexpression.elements[1].recurse_id == "1004#1003(n1)" diff --git a/tests/parsers/test_SyaNodeParser.py b/tests/parsers/test_SyaNodeParser.py index 91d4713..9cbdd82 100644 --- a/tests/parsers/test_SyaNodeParser.py +++ b/tests/parsers/test_SyaNodeParser.py @@ -5,7 +5,7 @@ from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonMana from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer from parsers.BaseNodeParser import utnode, ConceptNode, cnode, short_cnode, UnrecognizedTokensNode, \ - SCWC, CNC, UTN + SCWC, CNC, UTN, SourceCodeWithConceptNode from parsers.PythonParser import PythonNode from parsers.SyaNodeParser import SyaNodeParser, SyaConceptParserHelper, SyaAssociativity, \ NoneAssociativeSequenceErrorNode, TooManyParametersFound @@ -56,10 +56,12 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): cmap["minus"].set_prop(BuiltinConcepts.ASSOCIATIVITY, "right") TestSyaNodeParser.sheerka.services[SheerkaComparisonManager.NAME].set_is_greater_than(context, BuiltinConcepts.PRECEDENCE, - cmap["mult"], cmap["plus"]) + cmap["mult"], + cmap["plus"]) TestSyaNodeParser.sheerka.services[SheerkaComparisonManager.NAME].set_is_greater_than(context, BuiltinConcepts.PRECEDENCE, - cmap["mult"], cmap["minus"]) + cmap["mult"], + cmap["minus"]) # TestSyaNodeParser.sheerka.force_sya_def(context, [ # (cmap["plus"].id, 5, SyaAssociativity.Right), @@ -716,6 +718,9 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): ["one", "two", "three", "if", SCWC(" function(", ")", "x$!#")]]), ("one prefixed function(two)", [["one", "prefixed", SCWC(" function(", ")", "two")]]), ("suffixed one function(two)", [["one", "suffixed", SCWC(" function(", ")", "two")]]), + ( + "func1(suffixed one func2(two))", + [[SCWC("func1(", (")", 1), "one", "suffixed", SCWC(" func2(", ")", "two"))]]), ]) def test_i_can_post_fix_when_parenthesis_and_unknown(self, expression, expected_sequences): sheerka, context, parser = self.init_parser() @@ -728,19 +733,19 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert res_i.out == expected_array @pytest.mark.parametrize("expression, expected", [ - # ("(", ("(", 0)), - # ("one plus ( 1 + ", ("(", 4)), - # ("one( 1 + ", ("(", 1)), - # ("one ( 1 + ", ("(", 2)), - # ("function( 1 + ", ("(", 1)), - # ("function ( 1 + ", ("(", 2)), - # ("one plus ) 1 + ", (")", 4)), - # ("one ) 1 + ", (")", 2)), - # ("function ) 1 + ", (")", 2)), - # ("one ? ( : two", ("(", 4)), - # ("one ? one plus ( : two", ("(", 8)), - # ("one ? ) : two", (")", 4)), - # ("one ? one plus ) : two", (")", 8)), + ("(", ("(", 0)), + ("one plus ( 1 + ", ("(", 4)), + ("one( 1 + ", ("(", 1)), + ("one ( 1 + ", ("(", 2)), + ("function( 1 + ", ("(", 1)), + ("function ( 1 + ", ("(", 2)), + ("one plus ) 1 + ", (")", 4)), + ("one ) 1 + ", (")", 2)), + ("function ) 1 + ", (")", 2)), + ("one ? ( : two", ("(", 4)), + ("one ? one plus ( : two", ("(", 8)), + ("one ? ) : two", (")", 4)), + ("one ? one plus ) : two", (")", 8)), ("(one plus ( 1 + )", ("(", 0)), ]) def test_i_can_detect_parenthesis_mismatch_error_when_post_fixing(self, expression, expected): @@ -797,6 +802,29 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert len(res) == 1 assert res[0].out == expected_array + def test_i_cannot_post_fix_using_concept_short_name(self): + concepts_map = { + "infixed": self.from_def_concept("infixed", "a infixed b", ["a", "b"]), + "suffixed": self.from_def_concept("suffixed", "suffixed a", ["a"]), + "prefixed": self.from_def_concept("prefixed", "a prefixed", ["a"]), + } + sheerka, context, parser = self.init_parser(concepts_map) + + res = parser.infix_to_postfix(context, ParserInput("desc(infixed)")) + assert len(res) == 1 + assert isinstance(res[0].out[0], SourceCodeWithConceptNode) + assert res[0].out[0].nodes[0].error == 'Not enough prefix parameters' + + res = parser.infix_to_postfix(context, ParserInput("desc(suffixed)")) + assert len(res) == 1 + assert isinstance(res[0].out[0], SourceCodeWithConceptNode) + assert res[0].out[0].nodes[0].error == 'Not enough suffix parameters' + + res = parser.infix_to_postfix(context, ParserInput("desc(prefixed)")) + assert len(res) == 1 + assert isinstance(res[0].out[0], SourceCodeWithConceptNode) + assert res[0].out[0].nodes[0].error == 'Not enough prefix parameters' + @pytest.mark.parametrize("expression", [ "one ? two : three", "one?two:three", @@ -1117,6 +1145,26 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert lexer_nodes == expected + def test_i_cannot_parse_function_using_short_name(self): + concepts_map = { + "infixed": self.from_def_concept("infixed", "a infixed b", ["a", "b"]), + "suffixed": self.from_def_concept("suffixed", "suffixed a", ["a"]), + "prefixed": self.from_def_concept("prefixed", "a prefixed", ["a"]), + } + sheerka, context, parser = self.init_parser(concepts_map) + + res = parser.parse(context, ParserInput("desc(infixed)")) + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + + res = parser.parse(context, ParserInput("desc(suffixed)")) + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + + res = parser.parse(context, ParserInput("desc(prefixed)")) + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + @pytest.mark.parametrize("text", [ "one", "1 + 1", @@ -1146,4 +1194,3 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka): assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) - diff --git a/tests/parsers/test_UnrecognizedNodeParser.py b/tests/parsers/test_UnrecognizedNodeParser.py index c17ae57..2a83606 100644 --- a/tests/parsers/test_UnrecognizedNodeParser.py +++ b/tests/parsers/test_UnrecognizedNodeParser.py @@ -2,7 +2,7 @@ from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from core.concept import Concept, CC from core.tokenizer import Tokenizer, TokenKind from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, scnode, cnode, \ - utnode, SyaAssociativity, CN, CNC, UTN + utnode, SyaAssociativity, CN, CNC, UTN, SourceCodeWithConceptNode, SCWC, SourceCodeNode from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -24,11 +24,20 @@ def get_input_nodes_from(my_concepts_map, full_expr, *args): concept = n.concept if hasattr(n, "concept") and n.concept else \ Concept().update_from(my_concepts_map[n.concept_key]) tokens = full_expr_as_tokens[n.start: n.end + 1] - if hasattr(node, "compiled"): + if hasattr(n, "compiled"): for k, v in n.compiled.items(): concept.compiled[k] = _get_real_node(v) return ConceptNode(concept, n.start, n.end, tokens) + if isinstance(n, SCWC): + n.first = _get_real_node(n.first) + n.last = _get_real_node(n.first) + n.content = tuple(_get_real_node(nn) for nn in n.content) + return SourceCodeWithConceptNode(n.first, n.last, list(n.content)) + + if isinstance(n, (UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SourceCodeWithConceptNode)): + return n + raise NotImplementedError() res = [] @@ -307,6 +316,41 @@ class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka): exclude_body=True) assert actual_nodes == expected_array + def test_i_can_parse_unrecognized_source_code_with_concept_node(self): + sheerka, context, parser = self.init_parser() + + expression = "desc(a plus b)" + source_code_concepts = SCWC("desc(", ")", CNC("plus", a=UTN("a"), b=UTN("b"))) + nodes = get_input_nodes_from(concepts_map, expression, source_code_concepts) + parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes) + + res = parser.parse(context, parser_input) + parser_result = res.body + actual_nodes = res.body.body + + assert not res.status # status is False to let PythonWithConceptParser validate the code + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert len(actual_nodes) == 1 + assert actual_nodes[0].nodes[0].concept.metadata.is_evaluated # 'a plus b' is recognized as concept definition + + def test_i_can_parse_unrecognized_source_code_with_concept_node_when_var_in_short_term_memory(self): + sheerka, context, parser = self.init_parser() + + expression = "desc(a plus b)" + source_code_concepts = SCWC("desc(", ")", CNC("plus", a=UTN("a"), b=UTN("b"))) + nodes = get_input_nodes_from(concepts_map, expression, source_code_concepts) + parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes) + + context.add_to_short_term_memory("a", 1) + res = parser.parse(context, parser_input) + parser_result = res.body + actual_nodes = res.body.body + + assert not res.status # status is False to let PythonWithConceptParser validate the code + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert len(actual_nodes) == 1 + assert not actual_nodes[0].nodes[0].concept.metadata.is_evaluated # 'a plus b' need to be evaluated + def test_i_can_parse_sequences(self): sheerka, context, parser = self.init_parser()