From f26c391d3fbb9f4a7cfe9fd6ef1ef542aa8799f9 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Thu, 2 Jul 2020 16:32:02 +0200 Subject: [PATCH] Fixed parsing of BNF concepts mixed with isaset concepts --- src/core/builtin_helpers.py | 7 +- src/core/concept.py | 14 +- .../sheerka/services/SheerkaSetsManager.py | 13 +- src/parsers/BaseNodeParser.py | 3 + src/parsers/BnfNodeParser.py | 338 +++++++++++++----- src/sheerkapickle/SheerkaPickler.py | 3 +- src/sheerkapickle/sheerka_handlers.py | 4 +- tests/core/test_ExecutionContext.py | 17 + tests/core/test_SheerkaEvaluateConcept.py | 4 +- tests/non_reg/test_sheerka_non_reg.py | 8 +- tests/parsers/test_BnfNodeParser.py | 111 ++++-- tests/parsers/test_BnfParser.py | 14 +- 12 files changed, 413 insertions(+), 123 deletions(-) diff --git a/src/core/builtin_helpers.py b/src/core/builtin_helpers.py index 32d18d2..8072749 100644 --- a/src/core/builtin_helpers.py +++ b/src/core/builtin_helpers.py @@ -5,7 +5,7 @@ import core.ast.nodes from core.ast.nodes import CallNodeConcept from core.ast.visitors import UnreferencedNamesVisitor from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept +from core.concept import Concept, NotInit from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode from parsers.BaseParser import BaseParser, ErrorNode @@ -324,6 +324,11 @@ def ensure_evaluated(context, concept): if concept.metadata.is_evaluated: return concept + # do not try to evaluate concept that are not fully initialized + for var in concept.metadata.variables: + if var[0] not in concept.values or concept.get_value(var[0]) == NotInit: + return concept + with context.push(BuiltinConcepts.EVALUATE_CONCEPT, concept, desc=f"Evaluating concept {concept}") as sub_context: sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED) evaluated = context.sheerka.evaluate_concept(sub_context, concept) diff --git a/src/core/concept.py b/src/core/concept.py index 4c39beb..c73c57b 100644 --- a/src/core/concept.py +++ b/src/core/concept.py @@ -22,6 +22,16 @@ DEFINITION_TYPE_BNF = "bnf" DEFINITION_TYPE_DEF = "def" +class NotInitialized: + value = "**NotInit**" + + def __repr__(self): + return self.value + + +NotInit = NotInitialized() + + class ConceptParts(Enum): """ Lists metadata that can contains some code @@ -51,7 +61,7 @@ class ConceptMetadata: desc: str # possible description for the concept id: str # unique identifier for a concept. The id will never be modified (but the key can) props: dict # hashmap of properties, values - variables: list # list of concept variables, with their default values + variables: list # list of concept variables(tuple), with their default values is_evaluated: bool = False # True is the concept is evaluated by sheerka.eval_concept() need_validation = False # True if the properties of the concept need to be validated full_serialization: bool = False # If True, the full object will be serialized, rather than just the diff @@ -183,7 +193,7 @@ class Concept: self.metadata.variables.append((var_name, default_value)) - self.set_value(var_name, None) # do not set the default value + self.set_value(var_name, NotInit) # do not set the default value # why not setting variables to the default values ? # Because it may not be the real values, as metadata.variables need to be evaluated diff --git a/src/core/sheerka/services/SheerkaSetsManager.py b/src/core/sheerka/services/SheerkaSetsManager.py index b12d7cc..a556cea 100644 --- a/src/core/sheerka/services/SheerkaSetsManager.py +++ b/src/core/sheerka/services/SheerkaSetsManager.py @@ -3,7 +3,7 @@ from cache.Cache import Cache from cache.SetCache import SetCache from core.ast.nodes import python_to_concept from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept, ConceptParts, ensure_concept +from core.concept import Concept, ConceptParts, ensure_concept, DEFINITION_TYPE_BNF from core.sheerka.services.sheerka_service import BaseService GROUP_PREFIX = 'All_' @@ -145,10 +145,13 @@ class SheerkaSetsManager(BaseService): return concepts + # already in cache ? if res := self.concepts_in_set.get(concept.id): return res res = _get_set_elements(concept) + + # put in cache self.concepts_in_set.put(concept.id, res) return res @@ -196,6 +199,11 @@ class SheerkaSetsManager(BaseService): if not (isinstance(concept, Concept) and concept.id): return False + # KSI 29062020 + # To resolve infinite recursion between group concepts and BNF concepts + if concept.metadata.definition_type == DEFINITION_TYPE_BNF: + return False + # check if it has a group # TODO: use cache instead of directly requesting sdp if self.sets.get(concept.id): @@ -255,7 +263,8 @@ for x in xx__concepts__xx: for element_id in ids: concept = self.sheerka.get_by_id(element_id) if len(concept.metadata.variables) == 0: - # only evaluate + # The concepts are directly taken from Sheerka.get_by_id, so variable cannot be filled + # It's the reason why we only evaluate concept with no variable evaluated = self.sheerka.evaluate_concept(sub_context, concept) if context.sheerka.is_success(evaluated): result.append(evaluated) diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index 762b050..768bd9f 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -43,6 +43,9 @@ class LexerNode(Node): self.source = BaseParser.get_text_from_tokens(self.tokens) return self + def clone(self): + pass + class UnrecognizedTokensNode(LexerNode): def __init__(self, start, end, tokens): diff --git a/src/parsers/BnfNodeParser.py b/src/parsers/BnfNodeParser.py index 42a00be..a2e6444 100644 --- a/src/parsers/BnfNodeParser.py +++ b/src/parsers/BnfNodeParser.py @@ -8,6 +8,7 @@ ##################################################################################################### from collections import defaultdict from dataclasses import dataclass +from operator import attrgetter import core.utils from cache.Cache import Cache @@ -17,19 +18,51 @@ from core.concept import Concept, DEFINITION_TYPE_BNF, DoNotResolve, ConceptPart from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer, Token, TokenKind from parsers.BaseNodeParser import BaseNodeParser, LexerNode, UnrecognizedTokensNode, ConceptNode, GrammarErrorNode -from parsers.BaseParser import ErrorNode +from parsers.BaseParser import BaseParser PARSERS = ["AtomNode", "SyaNode", "Python"] +# def debug(obj): +# with open("debug.txt", "a") as f: +# f.write(f"{obj}\n") + +def debug(obj): + pass + + @dataclass -class ConceptParsingError(ErrorNode): - concept: Concept +class ParsingContext: + """ + Class used to allow backtracking when parsing UnOrderedChoice pexpression + It keeps the LexerNode parsed and the position of the parser right after the parsing + """ + node: LexerNode # node parsed + pos: int # position of the parser after the parsing + + def clone(self): + return ParsingContext(self.node.clone(), self.pos) + + def fix_tokens(self, parser_helper): + """ + When the nodes are fully created, make sure that their sources and tokens are correct + :param parser_helper: + :return: + """ + self.node.tokens = parser_helper.parser.parser_input.tokens[self.node.start: self.node.end + 1] + self.node.source = BaseParser.get_text_from_tokens(self.node.tokens) + + def __mul__(self, other): + res = [self] + for i in range(other - 1): + res.append(self.clone()) + return res class NonTerminalNode(LexerNode): """ - Returned by the BnfNodeParser + A LexerNode is the result of the parsing of a parsing expression (pexpression) + NonTerminalNode when parsing a pexpression which has children (Sequence, OrderedChoice, Optional, Repetition...) """ def __init__(self, parsing_expression, start, end, tokens, children=None): @@ -57,10 +90,15 @@ class NonTerminalNode(LexerNode): def __hash__(self): return hash((self.parsing_expression, self.start, self.end, self.children)) + def clone(self): + clone = NonTerminalNode(self.parsing_expression, self.start, self.end, self.tokens, self.children.copy()) + return clone + class TerminalNode(LexerNode): """ - Returned by the BnfNodeParser + A LexerNode is the result of the parsing of a parsing expression (pexpression) + TerminalNode for StrMatch """ def __init__(self, parsing_expression, start, end, value): @@ -84,6 +122,37 @@ class TerminalNode(LexerNode): def __hash__(self): return hash((self.parsing_expression, self.start, self.end, self.value)) + def clone(self): + clone = TerminalNode(self.parsing_expression, self.start, self.end, self.value) + return clone + + +class MultiNode: + """" + A LexerNode is the result of the parsing of a parsing expression (pexpression) + MultiNode is used by the UnorderedChoice parsing expression when multiple choices are found + """ + + def __init__(self, results): + self.results = results + + def __repr__(self): + text = "MultiNode(" + sources = [r.node.source for r in self.results] + text += f"{sources})" + return text + + def combine(self, parsing_expression): + for i in range(len(self.results)): + node = self.results[i].node + self.results[i].node = NonTerminalNode(parsing_expression, + node.start, + node.end, + node.tokens, + [node]) + + return self + class ParsingExpression: def __init__(self, *args, **kwargs): @@ -100,12 +169,24 @@ class ParsingExpression: if not isinstance(other, ParsingExpression): return False - return self.rule_name == other.rule_name and self.elements == other.elements + if self.rule_name != other.rule_name: + return False + + if len(self.elements) != len(other.elements): + return False + + for self_element, other_element in zip(self.elements, other.elements): + if self_element != other_element: + return False + + return True def __hash__(self): return hash((self.rule_name, self.elements)) def parse(self, parser): + debug(self) + # TODO : add memoization return self._parse(parser) def add_rule_name_if_needed(self, text): @@ -120,9 +201,10 @@ class ConceptExpression(ParsingExpression): When the grammar is created, it is replaced by the actual concept """ - def __init__(self, concept, rule_name=""): + def __init__(self, concept, rule_name="", recurse_id=None): super().__init__(rule_name=rule_name) self.concept = concept + self.recurse_id = recurse_id def __repr__(self): return self.add_rule_name_if_needed(f"{self.concept}") @@ -135,7 +217,7 @@ class ConceptExpression(ParsingExpression): return False if isinstance(self.concept, Concept): - return self.concept.name == other.concept.name + return self.concept.id == other.concept.id # when it's only the name of the concept return self.concept == other.concept @@ -147,6 +229,10 @@ class ConceptExpression(ParsingExpression): node = self.nodes[0].parse(parser_helper) if node is None: return None + + if isinstance(node, MultiNode): + return node.combine(self) + return NonTerminalNode(self, node.start, node.end, @@ -163,21 +249,55 @@ class Sequence(ParsingExpression): init_pos = parser_helper.pos end_pos = parser_helper.pos - children = [] + ntn = NonTerminalNode(self, + init_pos, + end_pos, + None, + []) + parsing_contexts = [ParsingContext(ntn, parser_helper.pos)] + to_remove = [] + to_append = [] for e in self.nodes: - node = e.parse(parser_helper) - if node is None: - return None - else: - if node.end != -1: # because returns -1 when no match - children.append(node) - end_pos = node.end - return NonTerminalNode(self, - init_pos, - end_pos, - parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1], - children) + for pcontext in parsing_contexts: + parser_helper.seek(pcontext.pos) + node = e.parse(parser_helper) + if node is None: + to_remove.append(pcontext) + elif isinstance(node, MultiNode): + clones = pcontext * len(node.results) # clones pcontext (but first item is pcontext) + to_append.extend(clones[1:]) + for clone, node_pcontext in zip(clones, node.results): + clone.pos = node_pcontext.pos + clone.node.children.append(node_pcontext.node) + clone.node.end = node_pcontext.node.end + else: + if node.end != -1: # because returns -1 when no match + pcontext.pos = parser_helper.pos + pcontext.node.children.append(node) + pcontext.node.end = node.end + + for pcontext in to_remove: + parsing_contexts.remove(pcontext) + parsing_contexts.extend(to_append) + + if len(parsing_contexts) == 0: + return None + + to_append.clear() + to_remove.clear() + + # reset tokenizer the following pexpression + parser_helper.seek(parsing_contexts[0].pos) + + # update nodes sources and tokens + for pcontext in parsing_contexts: + pcontext.fix_tokens(parser_helper) + + if len(parsing_contexts) == 1: + return parsing_contexts[0].node + + return MultiNode(parsing_contexts) def __repr__(self): to_str = ", ".join(repr(n) for n in self.elements) @@ -188,6 +308,7 @@ class OrderedChoice(ParsingExpression): """ Will match the first one among multiple It will stop at the first match (so the order of definition is important) + TODO : implement MultiNode support """ def _parse(self, parser_helper): @@ -211,36 +332,42 @@ class OrderedChoice(ParsingExpression): return self.add_rule_name_if_needed(f"({to_str})") -class LongestChoice(ParsingExpression): +class UnOrderedChoice(ParsingExpression): """ - Will match the longest one among multiple + May match many nodes. It will return nodes sorted by length All elements will be tested, so the order is not important - The behaviour when multiple candidate is found is not defined yet + The behaviour when multiple candidates with same length are found is not defined yet """ def _parse(self, parser_helper): init_pos = parser_helper.pos - longest_node = None - end_pos = -1 + parsing_contexts = [] for e in self.nodes: node = e.parse(parser_helper) if node: - if longest_node is None or node.end > longest_node.end: - longest_node = node - end_pos = parser_helper.pos - + if isinstance(node, MultiNode): + node.combine(self) + parsing_contexts.extend(node.results) + else: + tn = NonTerminalNode(self, + init_pos, + node.end, + parser_helper.parser.parser_input.tokens[init_pos: node.end + 1], + [node]) + parsing_contexts.append(ParsingContext(tn, parser_helper.pos)) parser_helper.seek(init_pos) # backtrack - if longest_node is None: + if len(parsing_contexts) == 0: return None - parser_helper.seek(end_pos) - return NonTerminalNode(self, - init_pos, - longest_node.end, - parser_helper.parser.parser_input.tokens[init_pos: longest_node.end + 1], - [longest_node]) + parser_helper.seek(parsing_contexts[0].pos) + + if len(parsing_contexts) == 1: + return parsing_contexts[0].node + else: + parsing_contexts.sort(key=attrgetter("pos"), reverse=True) + return MultiNode(parsing_contexts) def __repr__(self): to_str = "# ".join(repr(n) for n in self.elements) @@ -252,6 +379,7 @@ class Optional(ParsingExpression): Will match or not the elements if many matches, will choose longest one If you need order, use Optional(OrderedChoice) + TODO : implement MultiNode support """ def _parse(self, parser_helper): @@ -302,6 +430,7 @@ class ZeroOrMore(Repetition): """ ZeroOrMore will try to match parser expression specified zero or more times. It will never fail. + TODO : implement MultiNode support """ def _parse(self, parser_helper): @@ -343,6 +472,7 @@ class ZeroOrMore(Repetition): class OneOrMore(Repetition): """ OneOrMore will try to match parser expression specified one or more times. + TODO : implement MultiNode support """ def _parse(self, parser_helper): @@ -450,6 +580,7 @@ class StrMatch(Match): parser_helper.next_token(self.skip_white_space) return node + debug(f"Failed to match {self}. {token=}") return None @@ -574,6 +705,11 @@ class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor): self.visit(node) return self.STOP + def visit_UnOrderedChoice(self, parsing_expression): + for node in parsing_expression.elements: + self.visit(node) + return self.STOP + class BnfConceptParserHelper: def __init__(self, parser): @@ -657,7 +793,13 @@ class BnfConceptParserHelper: self.token = self.parser.parser_input.tokens[self.pos] # parse + debug(f"parsing {parsing_expression} against '{self.parser.parser_input.text}'") node = parsing_expression.parse(self) + + if isinstance(node, MultiNode): + # when multiple choices are found, use the longest result + node = node.results[0].node + if node is not None and node.end != -1: self.sequence.append(self.create_concept_node(concept, node)) self.pos = node.end @@ -826,6 +968,24 @@ class BnfConceptParserHelper: return concept + def get_node_value(self, node): + """ + Try to evaluate the value of a given LexerNode (TerminalNode or NonTerminalNode) + :param node: + :return: + """ + + if isinstance(node, TerminalNode): + return node.value + + if isinstance(node.parsing_expression, ConceptExpression): + concept = node.parsing_expression.concept + finalized = self.finalize_concept(self.parser.sheerka, concept, node) + evaluated = core.builtin_helpers.ensure_evaluated(self.parser.context, finalized) + return evaluated.body + + return None + @dataclass class UnderConstruction: @@ -834,11 +994,11 @@ class UnderConstruction: @dataclass() class ToUpdate: - parent_id: int + instance_id: int parsing_expression: ParsingExpression def __hash__(self): - return hash(self.parent_id) + return hash(self.instance_id) class BnfNodeParser(BaseNodeParser): @@ -980,9 +1140,10 @@ class BnfNodeParser(BaseNodeParser): def check_for_infinite_recursion(self, parsing_expression, already_found, only_first=False): if isinstance(parsing_expression, ConceptExpression): - if parsing_expression.concept.id in already_found: + id_to_use = parsing_expression.recurse_id or parsing_expression.concept.id + if id_to_use in already_found: return True - already_found.add(parsing_expression.concept.id) + already_found.add(id_to_use) return self.check_for_infinite_recursion(parsing_expression.nodes[0], already_found, only_first) if isinstance(parsing_expression, Sequence): @@ -1010,7 +1171,7 @@ class BnfNodeParser(BaseNodeParser): return False return False - if isinstance(parsing_expression, LongestChoice): + if isinstance(parsing_expression, UnOrderedChoice): for node in parsing_expression.nodes: already_found_for_current_node = already_found.copy() if self.check_for_infinite_recursion(node, already_found_for_current_node, True): @@ -1018,11 +1179,6 @@ class BnfNodeParser(BaseNodeParser): return True return False - if isinstance(parsing_expression, UnderConstruction): - if parsing_expression.concept_id in already_found: - return True - already_found.add(parsing_expression.concept_id) - return False def get_parsing_expression(self, context, concept): @@ -1041,10 +1197,6 @@ class BnfNodeParser(BaseNodeParser): # concept that are not totally resolved, because they reference parsing expression under construction to_update = set() # the key is the instance id of the parsing expression - # during the parsing of concept, we will resolve other concepts - # keep the track of the concepts that can safely be added to self.concept_grammars - to_keep = {concept.id} - desc = f"Get parsing expression for concept {concept}" with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, @@ -1052,17 +1204,16 @@ class BnfNodeParser(BaseNodeParser): root_concept=concept, desc=desc) as sub_context: # get the parsing expression - ret = self.resolve_concept_parsing_expression(sub_context, concept, grammar, to_update, to_keep) + ret = self.resolve_concept_parsing_expression(sub_context, concept, None, grammar, to_update) # check and update parsing expression that are still under construction # Note that we only update the concept that will update concepts_grammars # because pe.node may be large for item in to_update: - if item.parent_id in to_keep: - pe = item.parsing_expression - for i, node in enumerate(pe.nodes): - if isinstance(node, UnderConstruction): - pe.nodes[i] = grammar.get(node.concept_id) + pe = item.parsing_expression + for i, node in enumerate(pe.nodes): + if isinstance(node, UnderConstruction): + pe.nodes[i] = grammar.get(node.concept_id) # check for infinite recursion. # We are adding a new concept. Does it create an infinite recursion ? @@ -1077,8 +1228,7 @@ class BnfNodeParser(BaseNodeParser): # finally, update concept grammar for k, v in grammar.items(): - if k in to_keep: - self.concepts_grammars.put(k, v) + self.concepts_grammars.put(k, v) # not quite sure that it is a good idea. # Why do we want to corrupt previous valid entries ? @@ -1089,19 +1239,33 @@ class BnfNodeParser(BaseNodeParser): return ret - def resolve_concept_parsing_expression(self, context, concept, grammar, to_update, to_keep): - if concept.id in self.concepts_grammars: # validated entry - return self.concepts_grammars.get(concept.id) + def resolve_concept_parsing_expression(self, context, concept, name, grammar, to_update): + """ - if concept.id in grammar: # under construction entry - return grammar.get(concept.id) + :param context: + :param concept: concept + :param name: rule_name of the concept if exists + :param grammar: already resolved parsing expressions + :param to_update: parsing expressions that contains unresovled parsing expression + :return: + """ + if context.sheerka.isaset(context, concept) and hasattr(context, "obj"): + key_to_use = f"{concept.id}#{name}#{context.obj.id}" + else: + key_to_use = concept.id - desc = f"Resolve concept parsing expression for '{concept}'" + if key_to_use in self.concepts_grammars: # validated entry + return self.concepts_grammars.get(key_to_use) + + if key_to_use in grammar: # under construction entry + return grammar.get(key_to_use) + + desc = f"Resolve concept parsing expression for '{concept}'. {key_to_use=}" with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context: if not concept.bnf: # to save a function call. Not sure it worth it. BaseNodeParser.ensure_bnf(sub_context, concept, self.name) - grammar[concept.id] = UnderConstruction(concept.id) + grammar[key_to_use] = UnderConstruction(concept.id) sheerka = context.sheerka if concept.metadata.definition_type == DEFINITION_TYPE_BNF: @@ -1109,7 +1273,7 @@ class BnfNodeParser(BaseNodeParser): desc = f"Bnf concept detected. Resolving parsing expression '{expression}'" with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc: ssc.add_inputs(expression=expression) - resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update, to_keep) + resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update) ssc.add_values(return_values=resolved) elif sheerka.isaset(context, concept): @@ -1120,32 +1284,30 @@ class BnfNodeParser(BaseNodeParser): valid_concepts = [] for c in concepts_in_group: - if c.id == context.root_concept.id: + if c.id == context.obj.id: continue - c_pe = self.resolve_concept_parsing_expression(context, c, grammar, to_update, to_keep) - if self.check_for_infinite_recursion(c_pe, {concept.id}, True): - continue + # c_pe = self.resolve_concept_parsing_expression(context, c, None, grammar, to_update, to_keep) + # if self.check_for_infinite_recursion(c_pe, {concept.id}, True): + # continue valid_concepts.append(c) - nodes = [ConceptExpression(c, rule_name=c.name) for c in valid_concepts] + nodes = [ConceptExpression(c, rule_name=c.name, recurse_id=key_to_use) for c in valid_concepts] resolved = self.resolve_parsing_expression(ssc, - LongestChoice(*nodes), + UnOrderedChoice(*nodes), grammar, - to_update, - to_keep) + to_update) ssc.add_values(concepts_in_group=concepts_in_group) ssc.add_values(return_values=resolved) else: desc = f"Concept is a simple concept." with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc: - to_keep.add(concept.id) expression = self.get_expression_from_concept_name(concept.name) - resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update, to_keep) + resolved = self.resolve_parsing_expression(ssc, expression, grammar, to_update) - grammar[concept.id] = resolved + grammar[key_to_use] = resolved if self.has_error: sub_context.add_values(errors=self.error_sink) @@ -1154,7 +1316,7 @@ class BnfNodeParser(BaseNodeParser): sub_context.add_values(return_values=resolved) return resolved - def resolve_parsing_expression(self, context, expression, grammar, to_update, to_keep): + def resolve_parsing_expression(self, context, expression, grammar, to_update): if isinstance(expression, str): ret = StrMatch(expression, ignore_case=self.ignore_case) @@ -1170,7 +1332,12 @@ class BnfNodeParser(BaseNodeParser): unknown_concept = self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=concept) return self.add_error(unknown_concept) - pe = self.resolve_concept_parsing_expression(context, concept, grammar, to_update, to_keep) + pe = self.resolve_concept_parsing_expression( + context, + concept, + expression.rule_name, + grammar, + to_update) if not isinstance(pe, (ParsingExpression, UnderConstruction)): return pe # an error is detected, escalate it @@ -1179,7 +1346,7 @@ class BnfNodeParser(BaseNodeParser): # return pe # we are looking for ourself, just return it if isinstance(pe, UnderConstruction): - to_update.add(ToUpdate(context.obj.id, expression)) + to_update.add(ToUpdate(id(expression), expression)) expression.nodes = [pe] expression.rule_name = expression.rule_name or concept.name @@ -1192,18 +1359,18 @@ class BnfNodeParser(BaseNodeParser): elif isinstance(expression, Sequence) or \ isinstance(expression, OrderedChoice) or \ - isinstance(expression, LongestChoice) or \ + isinstance(expression, UnOrderedChoice) or \ isinstance(expression, ZeroOrMore) or \ isinstance(expression, OneOrMore) or \ isinstance(expression, Optional): ret = expression ret.nodes = [] for e in ret.elements: - pe = self.resolve_parsing_expression(context, e, grammar, to_update, to_keep) + pe = self.resolve_parsing_expression(context, e, grammar, to_update) if not isinstance(pe, (ParsingExpression, UnderConstruction)): return pe # an error is detected, escalate it if isinstance(pe, UnderConstruction): - to_update.add(ToUpdate(context.obj.id, ret)) + to_update.add(ToUpdate(id(expression), ret)) ret.nodes.append(pe) else: @@ -1214,8 +1381,7 @@ class BnfNodeParser(BaseNodeParser): expression.sep = self.resolve_parsing_expression(context, expression.sep, grammar, - to_update, - to_keep) + to_update) return ret diff --git a/src/sheerkapickle/SheerkaPickler.py b/src/sheerkapickle/SheerkaPickler.py index 29104ea..9b75d89 100644 --- a/src/sheerkapickle/SheerkaPickler.py +++ b/src/sheerkapickle/SheerkaPickler.py @@ -2,7 +2,7 @@ import json from logging import Logger import core.utils -from core.concept import Concept +from core.concept import Concept, NotInitialized from core.sheerka.services.SheerkaExecute import ParserInput from sheerkapickle import utils, tags, handlers @@ -38,6 +38,7 @@ class SheerkaPickler: from evaluators.BaseEvaluator import BaseEvaluator self.to_reduce.append(ToReduce(lambda o: isinstance(o, (BaseParser, BaseEvaluator)), lambda o: o.name)) self.to_reduce.append(ToReduce(lambda o: isinstance(o, ParserInput), lambda o: o.as_text())) + self.to_reduce.append(ToReduce(lambda o: isinstance(o, NotInitialized), lambda o: None)) def flatten(self, obj): if utils.is_primitive(obj): diff --git a/src/sheerkapickle/sheerka_handlers.py b/src/sheerkapickle/sheerka_handlers.py index c02a022..fd7c5d9 100644 --- a/src/sheerkapickle/sheerka_handlers.py +++ b/src/sheerkapickle/sheerka_handlers.py @@ -4,7 +4,7 @@ from core.sheerka.services.SheerkaExecute import ParserInput from evaluators.BaseEvaluator import BaseEvaluator from parsers.BaseParser import BaseParser from sheerkapickle.handlers import BaseHandler, registry -from core.concept import Concept, PROPERTIES_TO_SERIALIZE as CONCEPT_PROPERTIES_TO_SERIALIZE, ConceptParts +from core.concept import Concept, PROPERTIES_TO_SERIALIZE as CONCEPT_PROPERTIES_TO_SERIALIZE, ConceptParts, NotInit from core.sheerka.ExecutionContext import ExecutionContext, PROPERTIES_TO_SERIALIZE as CONTEXT_PROPERTIES_TO_SERIALIZE default_concept = Concept() @@ -67,7 +67,7 @@ class ConceptHandler(BaseHandler): # get properties for prop_name, prop_value in resolved_value: key_to_use = ConceptParts(prop_name[7:]) if isinstance(prop_name, str) and prop_name.startswith("cParts.") else prop_name - instance.set_value(key_to_use, prop_value) + instance.set_value(key_to_use, NotInit if prop_value is None else prop_value) else: raise Exception("Sanity check as it's not possible yet") diff --git a/tests/core/test_ExecutionContext.py b/tests/core/test_ExecutionContext.py index f917e53..7424d18 100644 --- a/tests/core/test_ExecutionContext.py +++ b/tests/core/test_ExecutionContext.py @@ -121,3 +121,20 @@ def test_i_can_search(): stop=lambda ec: ec.obj == "skip", start_with_self=True, get_obj=lambda ec: ec.obj)) == ["obj_abbb"] + + +def test_variables_are_passed_to_children_but_not_to_parents(): + a = ExecutionContext("foo", Event("event_1"), "fake_sheerka", BuiltinConcepts.NOP, None) + assert not hasattr(a, "var") + + b = a.push(BuiltinConcepts.NOP, None, var="foo") + assert b.var == "foo" + assert not hasattr(a, "var") + + c = b.push(BuiltinConcepts.NOP, None) + assert c.var == "foo" + + c.var = "bar" + assert c.var == "bar" + assert b.var != "bar" + assert not hasattr(a, "var") diff --git a/tests/core/test_SheerkaEvaluateConcept.py b/tests/core/test_SheerkaEvaluateConcept.py index 1f87533..53b8bc8 100644 --- a/tests/core/test_SheerkaEvaluateConcept.py +++ b/tests/core/test_SheerkaEvaluateConcept.py @@ -1,6 +1,6 @@ import pytest from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept -from core.concept import Concept, DoNotResolve, ConceptParts, Property, InfiniteRecursionResolved, CB +from core.concept import Concept, DoNotResolve, ConceptParts, Property, InfiniteRecursionResolved, CB, NotInit from parsers.PythonParser import PythonNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -65,7 +65,7 @@ class TestSheerkaEvaluateConcept(TestUsingMemoryBasedSheerka): assert len(evaluated.values) == 0 if expr is None else 1 @pytest.mark.parametrize("expr, expected", [ - (None, None), + (None, NotInit), ("", ""), ("1", 1), ("1+1", 2), diff --git a/tests/non_reg/test_sheerka_non_reg.py b/tests/non_reg/test_sheerka_non_reg.py index 5cc7140..8c4154f 100644 --- a/tests/non_reg/test_sheerka_non_reg.py +++ b/tests/non_reg/test_sheerka_non_reg.py @@ -1,6 +1,6 @@ import pytest from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept, PROPERTIES_TO_SERIALIZE, simplec, CMV +from core.concept import Concept, PROPERTIES_TO_SERIALIZE, simplec, CMV, NotInit from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator from evaluators.PythonEvaluator import PythonEvalError from parsers.BaseNodeParser import SyaAssociativity @@ -319,7 +319,7 @@ as: assert res[0].status concept_found = res[0].value assert sheerka.isinstance(concept_found, greetings) - assert concept_found.get_value("a") is None + assert concept_found.get_value("a") == NotInit assert not concept_found.metadata.need_validation @pytest.mark.parametrize("desc, definitions", [ @@ -898,8 +898,8 @@ as: res = sheerka.evaluate_user_input(expression) assert res[0].status assert res[0].body == CMV(plus, a="c:one:", b="c:two:") - assert res[0].body.a is None # concept is not evaluated - assert res[0].body.b is None # concept is not evaluated + assert res[0].body.a == NotInit # concept is not evaluated + assert res[0].body.b == NotInit # concept is not evaluated expression = "eval c:one: < c:two:" res = sheerka.evaluate_user_input(expression) diff --git a/tests/parsers/test_BnfNodeParser.py b/tests/parsers/test_BnfNodeParser.py index 00d8aba..cb63ef0 100644 --- a/tests/parsers/test_BnfNodeParser.py +++ b/tests/parsers/test_BnfNodeParser.py @@ -4,7 +4,7 @@ from core.concept import Concept, ConceptParts, DoNotResolve, CC, DEFINITION_TYP from core.sheerka.services.SheerkaExecute import ParserInput from parsers.BaseNodeParser import CNC, UTN, CN from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ - Optional, ZeroOrMore, OneOrMore, ConceptExpression, LongestChoice + Optional, ZeroOrMore, OneOrMore, ConceptExpression, UnOrderedChoice from parsers.BnfParser import BnfParser import tests.parsers.parsers_utils @@ -25,6 +25,8 @@ cmap = { "baz": Concept("baz"), "one hundred": Concept("one hundred", body="100"), "one_hundred": Concept("'one hundred'", body="100"), + "hundreds": Concept("hundreds", definition="number=n1 'hundred' 'and' number=n2", + where="n1 < 10 and n2 < 100", body="n1 * 100 + n2"), "bnf baz": Concept("bnf baz", definition="'baz'"), # this one should be chosen @@ -102,6 +104,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): sheerka.set_isa(context, sheerka.new("fifty"), sheerka.new("number")) sheerka.set_isa(context, sheerka.new("one hundred"), sheerka.new("number")) + # Pay attention. 'twenties (t1 and t2) are not set as number + thirties = cls.update_bnf(context, Concept("thirties", definition="thirty number", where="number < 10", @@ -367,9 +371,9 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): ("three", []), ]) - def test_i_can_parse_longest_choice(self, text, expected): + def test_i_can_parse_unordered_choice(self, text, expected): my_map = { - "foo": self.bnf_concept("foo", LongestChoice( + "foo": self.bnf_concept("foo", UnOrderedChoice( StrMatch("one"), Sequence(StrMatch("one"), StrMatch("two")))), } @@ -799,7 +803,8 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("bar_expr, expected", [ (ConceptExpression("foo"), {}), (OrderedChoice(ConceptExpression("foo"), StrMatch("one")), {'one': ['1002']}), - (Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']}) + (Sequence(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']}), + # (UnOrderedChoice(StrMatch("one"), ConceptExpression("foo"), StrMatch("two")), {'one': ['1001', '1002']}) ]) def test_i_can_detect_infinite_recursion(self, bar_expr, expected): my_map = { @@ -894,7 +899,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): number_nodes = parsing_expression.nodes[1].nodes assert len(number_nodes) == 1 - assert isinstance(number_nodes[0], LongestChoice) + assert isinstance(number_nodes[0], UnOrderedChoice) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes @@ -927,7 +932,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): number_nodes = parsing_expression.nodes[0].nodes assert len(number_nodes) == 1 - assert isinstance(number_nodes[0], LongestChoice) + assert isinstance(number_nodes[0], UnOrderedChoice) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["two"], rule_name="two") in number_nodes[0].nodes @@ -959,7 +964,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): number_nodes = parsing_expression.nodes[1].nodes assert len(number_nodes) == 1 - assert isinstance(number_nodes[0], LongestChoice) + assert isinstance(number_nodes[0], UnOrderedChoice) assert len(number_nodes[0].nodes) == len(number_nodes[0].elements) assert ConceptExpression(my_map["one"], rule_name="one") in number_nodes[0].nodes assert ConceptExpression(my_map["twenty"], rule_name="twenty") in number_nodes[0].nodes @@ -1117,11 +1122,16 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array - def test_i_can_parse_bnf_concept_mixed_with_isa_concepts_when_concept_starts_with_isa(self): + def test_i_can_parse_one_thousand(self): + """ + Test of simple number + 'thousand' + :return: + """ sheerka, context, parser = self.init_parser(init_from_sheerka=True) - one = CC("one", body=DoNotResolve("one")) - + sheerka.concepts_grammars.clear() # to simulate restart text = "one thousand" + + one = CC("one", body=DoNotResolve("one")) expected = CNC("thousands", source=text, number=CC("number", @@ -1138,27 +1148,51 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert concepts_nodes == expected_array + def test_i_can_parse_fifty_one_thousand(self): + """ + Test of complex number + 'thousand' (complex because the number is a BNF concept) + :return: + """ + sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.concepts_grammars.clear() # to simulate restart text = "fifty one thousand" + + one = CC("one", body=DoNotResolve("one")) fifty_one = CC("fifties", source="fifty one", fifty="fifty", number=CC("number", source="one", body=one, one=one)) - expected = CNC("thousands", - source=text, - number=CC("number", - source="fifty one", - fifties=fifty_one, - body=fifty_one)) - expected_array = compute_expected_array(cmap, text, [expected]) + one_thousand = CC("thousands", + source="one thousand", + number=CC("number", source="one", body=one, one=one)) + + expected_thousand = CNC("thousands", + source=text, + number=CC("number", + source="fifty one", + fifties=fifty_one, + body=fifty_one)) + expected_fifties = CNC("fifties", + source=text, + fifty="fifty", + number=CC("number", + source="one thousand", + thousands=one_thousand, + body=one_thousand)) + expected_thousands = compute_expected_array(cmap, text, [expected_thousand]) + expected_fifties = compute_expected_array(cmap, text, [expected_fifties]) res = parser.parse(context, ParserInput(text)) - parser_result = res.value - concepts_nodes = res.value.value - assert res.status - assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) - assert concepts_nodes == expected_array + assert res[0].status + assert res[0].value.value == expected_thousands + + assert res[1].status + assert res[1].value.value == expected_fifties + + def test_i_can_parse_one_hundred_thousand(self): + sheerka, context, parser = self.init_parser(init_from_sheerka=True) + sheerka.concepts_grammars.clear() # to simulate restart text = "one hundred thousand" res = parser.parse(context, ParserInput(text)) @@ -1167,6 +1201,39 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka): assert res.status assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + def test_i_can_parse_hundreds_like_expression(self): + sheerka, context, parser = self.init_parser(init_from_sheerka=True) + + text = "three hundred and thirty two" + three = CC("three", body=DoNotResolve("three")) + two = CC("two", body=DoNotResolve("two")) + thirty_two = CC("thirties", + source="thirty two", + thirty="thirty", + number=CC("number", + source="two", + body=two, + two=two)) + expected = CNC("hundreds", + source=text, + n1=CC("number", + source="three", + body=three, + three=three), + n2=CC("number", + source="thirty two", + body=thirty_two, + thirties=thirty_two)) + + expected_array = compute_expected_array(cmap, text, [expected]) + res = parser.parse(context, ParserInput(text)) + parser_result = res.value + concepts_nodes = res.value.value + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert concepts_nodes == expected_array + def test_i_can_parse_bnf_concept_mixed_with_isa_after_restart(self): sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka.concepts_grammars.clear() # simulate restart diff --git a/tests/parsers/test_BnfParser.py b/tests/parsers/test_BnfParser.py index 389a968..4020030 100644 --- a/tests/parsers/test_BnfParser.py +++ b/tests/parsers/test_BnfParser.py @@ -22,6 +22,16 @@ def c(name, rule_name=None): return ConceptExpression(concept, rule_name=rule_name or name) +def update_concepts_ids(sheerka, parsing_expression): + if isinstance(parsing_expression, ConceptExpression): + if not parsing_expression.concept.id: + concept = sheerka.get_by_key(parsing_expression.concept.key) + parsing_expression.concept.metadata.id = concept.id + + for pe in parsing_expression.elements: + update_concepts_ids(sheerka, pe) + + eof_token = Token(TokenKind.EOF, "", 0, 0, 0) @@ -109,7 +119,9 @@ class TestBnfParser(TestUsingMemoryBasedSheerka): ("def 'concept'", Sequence(c("def"), StrMatch("concept"))), ]) def test_i_can_parse_regex_with_concept(self, expression, expected): - sheerka, context, parser, foo, bar, var, _def = self.init_parser("foo", "bar", "var", "def") + sheerka, context, parser, *concepts = self.init_parser("foo", "bar", "var", "def") + + update_concepts_ids(sheerka, expected) res = parser.parse(context, Tokenizer(expression))