From 8152f82c6be0341df594347a6a7ab42136428a42 Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Wed, 15 Jan 2020 18:38:29 +0100 Subject: [PATCH] Added simple form of concept composition --- core/sheerka.py | 109 ++++++---- core/sheerka_transform.py | 4 +- core/tokenizer.py | 58 +++++ docs/blog.rst | 2 +- evaluators/ConceptComposerEvaluator.py | 109 ---------- evaluators/ConceptNodeEvaluator.py | 68 ------ evaluators/LexerNodeEvaluator.py | 102 +++++++++ parsers/ConceptLexerParser.py | 90 ++++---- parsers/ConceptsWithConceptsParser.py | 110 ++++++++++ parsers/MultipleConceptsParser.py | 140 ++++++++---- parsers/PythonParser.py | 81 ++++++- parsers/PythonWithConceptsParser.py | 15 +- tests/test_BnfParser.py | 8 +- tests/test_ConceptComposerEvaluator.py | 128 ----------- tests/test_ConceptLexerParser.py | 126 +++++------ tests/test_ConceptsWithConceptsParser.py | 204 ++++++++++++++++++ ...valuator.py => test_LexerNodeEvaluator.py} | 54 ++++- tests/test_MultipleConceptsParser.py | 95 ++++++-- tests/test_PythonWithConceptsParser.py | 16 +- tests/test_sheerka_non_reg.py | 114 ++++++++-- tests/test_sheerka_transform.py | 1 + tests/test_tokenizer.py | 15 +- 22 files changed, 1105 insertions(+), 544 deletions(-) delete mode 100644 evaluators/ConceptComposerEvaluator.py delete mode 100644 evaluators/ConceptNodeEvaluator.py create mode 100644 evaluators/LexerNodeEvaluator.py create mode 100644 parsers/ConceptsWithConceptsParser.py delete mode 100644 tests/test_ConceptComposerEvaluator.py create mode 100644 tests/test_ConceptsWithConceptsParser.py rename tests/{test_ConceptNodeEvaluator.py => test_LexerNodeEvaluator.py} (61%) diff --git a/core/sheerka.py b/core/sheerka.py index a3d082d..055a7ba 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -258,6 +258,7 @@ class Sheerka(Concept): # execution_context.log(logger or self.log, f"Parsing {debug_text}") with execution_context.push(desc=f"Parsing using {parser.name}") as sub_context: + sub_context.add_inputs(to_parse=to_parse) res = parser.parse(sub_context, to_parse) if res is not None: if hasattr(res, "__iter__"): @@ -278,6 +279,7 @@ class Sheerka(Concept): to_process.append(res) if res.status: return_value_success_found = True + sub_context.add_values(return_values=res) if return_value_success_found: stop_processing = True @@ -347,51 +349,69 @@ class Sheerka(Concept): sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True) # process + iteration = 0 while True: - simple_digest = return_values[:] + with execution_context.push(desc=f"iteration #{iteration}", iteration=iteration) as iteration_context: + simple_digest = return_values[:] + iteration_context.add_inputs(return_values=simple_digest) - for priority in sorted_priorities: + for priority in sorted_priorities: - original_items = return_values[:] - evaluated_items = [] - to_delete = [] - for evaluator in grouped_evaluators[priority]: - evaluator = _preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy + original_items = return_values[:] + evaluated_items = [] + to_delete = [] + for evaluator in grouped_evaluators[priority]: + evaluator = _preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy - # process evaluators that work on return value - from evaluators.BaseEvaluator import OneReturnValueEvaluator - if isinstance(evaluator, OneReturnValueEvaluator): - for item in original_items: - if evaluator.matches(execution_context, item): - result = evaluator.eval(execution_context, item) - if result is None: - continue - elif isinstance(result, list): - evaluated_items.extend(result) - to_delete.append(item) - elif isinstance(result, ReturnValueConcept): - evaluated_items.append(result) - to_delete.append(item) + sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})" + with iteration_context.push(desc=sub_context_desc) as sub_context: + sub_context.add_inputs(return_values=original_items) + + # process evaluators that work on one simple return value at the time + from evaluators.BaseEvaluator import OneReturnValueEvaluator + if isinstance(evaluator, OneReturnValueEvaluator): + debug_result = [] + for item in original_items: + if evaluator.matches(sub_context, item): + result = evaluator.eval(sub_context, item) + if result is None: + debug_result.append({"input": item, "return_value": None}) + continue + + to_delete.append(item) + if isinstance(result, list): + evaluated_items.extend(result) + elif isinstance(result, ReturnValueConcept): + evaluated_items.append(result) + else: + error = self.new(BuiltinConcepts.INVALID_RETURN_VALUE, body=result, + evaluator=evaluator) + result = self.ret("sheerka.process", False, error, parents=[item]) + evaluated_items.append(result) + debug_result.append({"input": item, "return_value": result}) + else: + debug_result.append({"input": item, "return_value": "** No Match **"}) + sub_context.add_values(return_values=debug_result) + + # process evaluators that work on all return values + else: + if evaluator.matches(sub_context, original_items): + results = evaluator.eval(sub_context, original_items) + if results is None: + continue + if not isinstance(results, list): + results = [results] + for result in results: + evaluated_items.append(result) + to_delete.extend(result.parents) + sub_context.add_values(return_values=results) else: - error = self.new(BuiltinConcepts.INVALID_RETURN_VALUE, body=result, - evaluator=evaluator) - evaluated_items.append(self.ret("sheerka.process", False, error, parents=[item])) - to_delete.append(item) + sub_context.add_values(return_values="** No Match **") - # process evaluators that work on all return values - else: - if evaluator.matches(execution_context, original_items): - results = evaluator.eval(execution_context, original_items) - if results is None: - continue - if not isinstance(results, list): - results = [results] - for result in results: - evaluated_items.append(result) - to_delete.extend(result.parents) + return_values = evaluated_items + return_values.extend([item for item in original_items if item not in to_delete]) - return_values = evaluated_items - return_values.extend([item for item in original_items if item not in to_delete]) + iteration_context.add_values(return_values=return_values[:]) # have we done something ? to_compare = return_values[:] @@ -399,7 +419,7 @@ class Sheerka(Concept): break # inc the iteration and continue - execution_context = execution_context.push(iteration=execution_context.iteration + 1) + iteration += 1 # remove all evaluation context that are not reduced return_values = core.utils.remove_list_from_list(return_values, evaluation_return_values) @@ -879,11 +899,11 @@ class Sheerka(Concept): return obj - def is_unknown(self, obj): + def is_known(self, obj): if not isinstance(obj, Concept): return True - return obj.key == BuiltinConcepts.UNKNOWN_CONCEPT + return obj.key != str(BuiltinConcepts.UNKNOWN_CONCEPT) def isinstance(self, a, b): """ @@ -1049,6 +1069,8 @@ class ExecutionContext: self.desc = desc # human description of what is going on self.children = [] self.preprocess = None + + self.inputs = {} # what was the parameters of the execution context self.values = {} # what was produced by the execution context self.obj = kwargs.pop("obj", None) @@ -1105,6 +1127,11 @@ class ExecutionContext: self.preprocess.add(preprocess) return self + def add_inputs(self, **kwargs): + for k, v in kwargs.items(): + self.inputs[k] = v + return self + def add_values(self, **kwargs): for k, v in kwargs.items(): self.values[k] = v diff --git a/core/sheerka_transform.py b/core/sheerka_transform.py index d939811..adbfc51 100644 --- a/core/sheerka_transform.py +++ b/core/sheerka_transform.py @@ -48,7 +48,7 @@ class SheerkaTransform: self.ids[obj] = self.id_count if isinstance(obj, Concept): - return self.context_to_dict(obj) + return self.concept_to_dict(obj) elif isinstance(obj, ExecutionContext): return self.execution_context_to_dict(obj) @@ -104,7 +104,7 @@ class SheerkaTransform: else: return obj - def context_to_dict(self, obj: Concept): + def concept_to_dict(self, obj: Concept): to_dict = { OBJ_TYPE_KEY: SheerkaTransformType.Concept, OBJ_ID_KEY: self.id_count, diff --git a/core/tokenizer.py b/core/tokenizer.py index e9c9b38..e609b25 100644 --- a/core/tokenizer.py +++ b/core/tokenizer.py @@ -37,6 +37,8 @@ class TokenKind(Enum): BACK_SLASH = "bslash" # \ CARAT = "carat" # ^ DOLLAR = "dollar" # $ + EURO = "dollar" # € + STERLING = "steling" # £ EMARK = "emark" # ! GREATER = "greater" # > LESS = "less" # < @@ -128,6 +130,18 @@ class Tokenizer: yield Token(TokenKind.MINUS, "-", self.i, self.line, self.column) self.i += 1 self.column += 1 + elif c == "_": + if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha(): + identifier = self.eat_identifier(self.i) + token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER + value = Keywords(identifier) if identifier in self.KEYWORDS else identifier + yield Token(token_type, value, self.i, self.line, self.column) + self.i += len(identifier) + self.column += len(identifier) + else: + yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column) + self.i += 1 + self.column += 1 elif c == "/": yield Token(TokenKind.SLASH, "/", self.i, self.line, self.column) self.i += 1 @@ -205,6 +219,46 @@ class Tokenizer: yield Token(TokenKind.GREATER, ">", self.i, self.line, self.column) self.i += 1 self.column += 1 + elif c == "!": + yield Token(TokenKind.EMARK, "!", self.i, self.line, self.column) + self.i += 1 + self.column += 1 + elif c == "`": + yield Token(TokenKind.BACK_QUOTE, "`", self.i, self.line, self.column) + self.i += 1 + self.column += 1 + elif c == "\\": + yield Token(TokenKind.BACK_SLASH, "\\", self.i, self.line, self.column) + self.i += 1 + self.column += 1 + elif c == "^": + yield Token(TokenKind.CARAT, "^", self.i, self.line, self.column) + self.i += 1 + self.column += 1 + elif c == "$": + yield Token(TokenKind.DOLLAR, "$", self.i, self.line, self.column) + self.i += 1 + self.column += 1 + elif c == "€": + yield Token(TokenKind.EURO, "€", self.i, self.line, self.column) + self.i += 1 + self.column += 1 + elif c == "£": + yield Token(TokenKind.STERLING, "£", self.i, self.line, self.column) + self.i += 1 + self.column += 1 + elif c == "#": + yield Token(TokenKind.HASH, "#", self.i, self.line, self.column) + self.i += 1 + self.column += 1 + elif c == "°": + yield Token(TokenKind.DEGREE, "°", self.i, self.line, self.column) + self.i += 1 + self.column += 1 + elif c == "~": + yield Token(TokenKind.TILDE, "~", self.i, self.line, self.column) + self.i += 1 + self.column += 1 elif c == "\n" or c == "\r": newline = self.eat_newline(self.i) yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column) @@ -234,6 +288,10 @@ class Tokenizer: self.i += len(string) self.column = 1 if newlines > 0 else self.column + len(string) self.line += newlines + elif c == "_": + yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column) + self.i += 1 + self.column += 1 else: raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column) diff --git a/docs/blog.rst b/docs/blog.rst index 20f24f2..d13a47b 100644 --- a/docs/blog.rst +++ b/docs/blog.rst @@ -675,7 +675,7 @@ when you input one two three four five -the list of :code:`[foo, bar]` will be returned by the parser (as return values) +the list of :code:`[foo, bar]` will be returned by the ConceptLexerParser (as return values) How does it works ? diff --git a/evaluators/ConceptComposerEvaluator.py b/evaluators/ConceptComposerEvaluator.py deleted file mode 100644 index bc6e37b..0000000 --- a/evaluators/ConceptComposerEvaluator.py +++ /dev/null @@ -1,109 +0,0 @@ -# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept -# from core.concept import Concept -# from core.tokenizer import TokenKind -# from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator -# from parsers.BaseParser import BaseParser -# from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, ConceptLexerParser -# import core.utils -# -# -# class ConceptComposerEvaluator(AllReturnValuesEvaluator): -# """ -# Try to reassemble parts of concepts from different evaluators -# """ -# -# NAME = "ConceptComposer" -# -# def __init__(self): -# super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 40) -# -# def matches(self, context, return_values): -# concept_lexer_parser_name = ConceptLexerParser().name -# -# for return_value in return_values: -# if return_value.who.startswith(BaseParser.PREFIX) and return_value.status: -# return False -# -# if return_value.who.startswith(BaseEvaluator.PREFIX): -# return False -# -# if return_value.who != concept_lexer_parser_name: -# continue -# -# if not isinstance(return_value.value, ParserResultConcept): -# return False -# -# if not ( -# isinstance(return_value.value.value, ConceptNode) or -# isinstance(return_value.value.value, UnrecognizedTokensNode) or -# ( -# hasattr(return_value.value.value, "__iter__") and -# len(return_value.value.value) > 0 and -# ( -# isinstance(return_value.value.value[0], ConceptNode) or -# isinstance(return_value.value.value[0], UnrecognizedTokensNode) -# ))): -# return False -# -# self.eaten = return_value -# return True -# -# return False -# -# def eval(self, context, return_value): -# sheerka = context.sheerka -# nodes = self.eaten.value.value -# temp_res = [] -# has_error = False -# concepts_only = True -# -# for node in nodes: -# if isinstance(node, UnrecognizedTokensNode): -# tokens = core.utils.strip_tokens(node.tokens, True) -# for token in tokens: -# if token.type == TokenKind.IDENTIFIER: -# concept = context.new_concept(token.value) -# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): -# has_error = True -# else: -# with context.push(self.name, desc=f"Evaluating '{concept}'") as sub_context: -# sub_context.log_new(self.verbose_log) -# concept = sheerka.evaluate_concept(sub_context, concept, self.verbose_log) -# sub_context.add_values(return_values=concept) -# temp_res.append(concept) -# -# else: -# temp_res.append(core.utils.strip_quotes(token.value)) -# concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE -# else: -# with context.push(self.name, desc=f"Evaluating '{node.concept}'") as sub_context: -# sub_context.log_new(self.verbose_log) -# concept = sheerka.evaluate_concept(sub_context, node.concept, self.verbose_log) -# sub_context.add_values(return_values=concept) -# temp_res.append(concept) -# -# if has_error: -# return sheerka.ret( -# self.name, -# False, -# temp_res, -# parents=[self.eaten]) -# -# if concepts_only: -# res = [] -# for r in temp_res: -# if isinstance(r, Concept): -# res.append(r) -# else: -# res = "" -# for r in temp_res: -# if isinstance(r, Concept): -# res += sheerka.value(r) -# else: -# res += r -# -# return sheerka.ret( -# self.name, -# True, -# res, -# parents=[self.eaten]) diff --git a/evaluators/ConceptNodeEvaluator.py b/evaluators/ConceptNodeEvaluator.py deleted file mode 100644 index 8d39128..0000000 --- a/evaluators/ConceptNodeEvaluator.py +++ /dev/null @@ -1,68 +0,0 @@ -from core.builtin_concepts import ParserResultConcept, BuiltinConcepts -from evaluators.BaseEvaluator import OneReturnValueEvaluator -from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode - - -class ConceptNodeEvaluator(OneReturnValueEvaluator): - """ - After a BNF is recognized, generates the concept or the list concepts - """ - - NAME = "ConceptNode" - - def __init__(self): - super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60) - - def matches(self, context, return_value): - if not return_value.status: - return False - - if not isinstance(return_value.value, ParserResultConcept): - return False - - return ( - isinstance(return_value.value.value, ConceptNode) or - isinstance(return_value.value.value, UnrecognizedTokensNode) or - ( - hasattr(return_value.value.value, "__iter__") and - len(return_value.value.value) > 0 and - ( - isinstance(return_value.value.value[0], ConceptNode) or - isinstance(return_value.value.value[0], UnrecognizedTokensNode) - ) - ) - ) - - def eval(self, context, return_value): - """ - From a concept node, creates a new concept - and makes sure that the properties are correctly set - """ - sheerka = context.sheerka - nodes = return_value.value.value - if not hasattr(nodes, "__iter__"): - nodes = [nodes] - - concepts = [] - error_found = False - source = "" - for node in nodes: - if isinstance(node, ConceptNode): - source += node.source if source == "" else (" " + node.source) - concepts.append(node.concept) - else: - error_found = True - - if len(concepts) == 1: - return sheerka.ret( - self.name, - not error_found, - context.sheerka.new( - BuiltinConcepts.PARSER_RESULT, - parser=self, - source=source, - body=concepts[0], - try_parsed=None), - parents=[return_value]) - - return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME), parents=[return_value]) diff --git a/evaluators/LexerNodeEvaluator.py b/evaluators/LexerNodeEvaluator.py new file mode 100644 index 0000000..15c1e41 --- /dev/null +++ b/evaluators/LexerNodeEvaluator.py @@ -0,0 +1,102 @@ +from core.builtin_concepts import ParserResultConcept, BuiltinConcepts +from evaluators.BaseEvaluator import OneReturnValueEvaluator +from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode +from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode + + +class LexerNodeEvaluator(OneReturnValueEvaluator): + """ + After a BNF is recognized, generates the concept or the list concepts + """ + + NAME = "LexerNode" + + def __init__(self): + super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60) + self.identifiers = {} # cache for already created identifier (the key is id(concept)) + self.identifiers_key = {} # number of identifiers with the same root (prefix) + + def matches(self, context, return_value): + if not return_value.status: + return False + + if not isinstance(return_value.value, ParserResultConcept): + return False + + value = return_value.value.value + if isinstance(value, (ConceptNode, SourceCodeNode)): + return True + + if hasattr(value, "__iter__"): + for node in value: + if not isinstance(node, (ConceptNode, SourceCodeNode)): + return False + return True + + return False + + def eval(self, context, return_value): + """ + From a concept node, creates a new concept + and makes sure that the properties are correctly set + """ + nodes = return_value.value.value + if not hasattr(nodes, "__iter__"): + nodes = [nodes] + + context.log(self.verbose_log, f"{nodes=}", self.name) + + for node in nodes: + if isinstance(node, SourceCodeNode): + ret = self.evaluate_python_code(context, nodes) + break + else: + ret = self.evaluate_concepts_only(context, nodes) + + ret.parents = [return_value] + return ret + + def evaluate_concepts_only(self, context, nodes): + concepts = [] + source = "" + sheerka = context.sheerka + + for node in nodes: + if isinstance(node, ConceptNode): + source += node.source if source == "" else (" " + node.source) + concepts.append(node.concept) + + if len(concepts) == 1: + return sheerka.ret( + self.name, + True, + context.sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=source, + body=concepts[0], + try_parsed=None)) + + return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=nodes)) + + def evaluate_python_code(self, context, nodes): + sheerka = context.sheerka + + helper = LexerNodeParserHelperForPython() + result = helper.parse(context, nodes) + + if isinstance(result, PythonNode): + return sheerka.ret( + self.name, + True, + sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=result.source, + body=result, + try_parsed=None)) + else: + return sheerka.ret( + self.name, + False, + result.body) diff --git a/parsers/ConceptLexerParser.py b/parsers/ConceptLexerParser.py index 5d5e9d1..5dc5f6a 100644 --- a/parsers/ConceptLexerParser.py +++ b/parsers/ConceptLexerParser.py @@ -6,7 +6,8 @@ # Arpeggio: A flexible PEG parser for Python, # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004 ##################################################################################################### -from dataclasses import field, dataclass +from collections import namedtuple +from dataclasses import dataclass from collections import defaultdict from core.builtin_concepts import BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve @@ -15,23 +16,6 @@ from parsers.BaseParser import BaseParser, Node, ErrorNode import core.utils -def flatten(iterable): - if iterable is None: - return [] - - result = [] - for e in iterable: - if e.parsing_expression.rule_name is not None and e.parsing_expression.rule_name != "": - if hasattr(e, "children"): - e.children = flatten(e.children) - result.append(e) - elif hasattr(e, "children"): - result.extend(flatten(e.children)) - else: - result.append(e) - return result - - @dataclass() class LexerNode(Node): start: int # starting index in the tokens list @@ -68,10 +52,10 @@ class UnrecognizedTokensNode(LexerNode): return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)) def __eq__(self, other): - if isinstance(other, tuple): - if len(other) != 3: - return False - return self.start == other[0] and self.end == other[1] and self.source == other[2] + if isinstance(other, utnode): + return self.start == other.start and \ + self.end == other.end and \ + self.source == other.source if not isinstance(other, UnrecognizedTokensNode): return False @@ -80,6 +64,9 @@ class UnrecognizedTokensNode(LexerNode): self.end == other.end and \ self.source == other.source + def __hash__(self): + return hash((self.start, self.end, self.source)) + def __repr__(self): return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')" @@ -99,17 +86,14 @@ class ConceptNode(LexerNode): self.source = BaseParser.get_text_from_tokens(self.tokens) def __eq__(self, other): - if isinstance(other, tuple): - if len(other) == 2: - return self.concept.key == other[0] and self.source == other[1] - else: - return self.concept.key == other[0] and \ - self.start == other[1] and \ - self.end == other[2] and \ - self.source == other[3] + if isinstance(other, cnode): + return self.concept.key == other.concept_key and \ + self.start == other.start and \ + self.end == other.end and \ + self.source == other.source - # if not super().__eq__(other): - # return False + if isinstance(other, short_cnode): + return self.concept.key == other.concept_key and self.source == other.source if not isinstance(other, ConceptNode): return False @@ -127,6 +111,42 @@ class ConceptNode(LexerNode): return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')" +class SourceCodeNode(LexerNode): + """ + Returned when some source code (like Python source code is recognized) + """ + + def __init__(self, node, start, end, tokens=None, source=None): + super().__init__(start, end, tokens, source) + self.node = node # The PythonNode (or whatever language node) that is found + + def __eq__(self, other): + if isinstance(other, scnode): + return self.start == other.start and \ + self.end == other.end and \ + self.source == other.source + + if not isinstance(other, SourceCodeNode): + return False + + return self.node == other.node and \ + self.start == other.start and \ + self.end == other.end and \ + self.source == other.source + + def __hash__(self): + return hash((self.start, self.end, self.source)) + + def __repr__(self): + return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')" + + +cnode = namedtuple("ConceptNode", "concept_key start end source") +short_cnode = namedtuple("ConceptNode", "concept_key source") +utnode = namedtuple("UnrecognizedTokensNode", "start end source") +scnode = namedtuple("SourceCodeNode", "start end source") + + class NonTerminalNode(LexerNode): """ Returned by the ConceptLexerParser @@ -146,9 +166,6 @@ class NonTerminalNode(LexerNode): return name + sub_names def __eq__(self, other): - # if not super().__eq__(other): - # return False - if not isinstance(other, NonTerminalNode): return False @@ -176,9 +193,6 @@ class TerminalNode(LexerNode): return name + f"'{self.value}'" def __eq__(self, other): - # if not super().__eq__(other): - # return False - if not isinstance(other, TerminalNode): return False diff --git a/parsers/ConceptsWithConceptsParser.py b/parsers/ConceptsWithConceptsParser.py new file mode 100644 index 0000000..9f35f47 --- /dev/null +++ b/parsers/ConceptsWithConceptsParser.py @@ -0,0 +1,110 @@ +from core.builtin_concepts import BuiltinConcepts +from core.tokenizer import TokenKind, Token +from parsers.BaseParser import BaseParser +from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode +from parsers.MultipleConceptsParser import MultipleConceptsParser +from core.concept import VARIABLE_PREFIX +import logging + +multiple_concepts_parser = MultipleConceptsParser() + + +class ConceptsWithConceptsParser(BaseParser): + def __init__(self, **kwargs): + super().__init__("ConceptsWithConcepts", 25) + + @staticmethod + def get_tokens(nodes): + tokens = [] + + for node in nodes: + if isinstance(node, ConceptNode): + index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column + tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column)) + else: + for token in node.tokens: + if token.type == TokenKind.EOF: + break + elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE): + continue + else: + tokens.append(token) + + return tokens + + @staticmethod + def get_key(nodes): + key = "" + index = 0 + for node in nodes: + if key: + key += " " + + if isinstance(node, UnrecognizedTokensNode): + key += node.source.strip() + else: + key += f"{VARIABLE_PREFIX}{index}" + index += 1 + + return key + + def finalize_concept(self, context, concept, nodes): + index = 0 + for node in nodes: + + if isinstance(node, ConceptNode): + prop_name = list(concept.props.keys())[index] + concept.cached_asts[prop_name] = node.concept + context.log( + self.verbose_log, + f"Setting property '{prop_name}='{node.concept}'.", + self.name) + index += 1 + elif isinstance(node, SourceCodeNode): + prop_name = list(concept.props.keys())[index] + sheerka = context.sheerka + value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node) + concept.cached_asts[prop_name] = [context.sheerka.ret(self.name, True, value)] + context.log( + self.verbose_log, + f"Setting property '{prop_name}'='Python({node.source})'.", + self.name) + index += 1 + + return concept + + def parse(self, context, text): + sheerka = context.sheerka + if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT): + return None + + if not text.parser == multiple_concepts_parser: + return None + + nodes = text.body + + concept_key = self.get_key(nodes) + concept = sheerka.new(concept_key) + if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): + return sheerka.ret( + self.name, + False, + sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body)) + + concepts = concept if hasattr(concept, "__iter__") else [concept] + for concept in concepts: + self.finalize_concept(context, concept, nodes) + + res = [] + for concept in concepts: + res.append(sheerka.ret( + self.name, + True, + sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=text.source, + body=concept, + try_parsed=None))) + + return res[0] if len(res) == 1 else res diff --git a/parsers/MultipleConceptsParser.py b/parsers/MultipleConceptsParser.py index 6c37ee2..5f4c47a 100644 --- a/parsers/MultipleConceptsParser.py +++ b/parsers/MultipleConceptsParser.py @@ -1,8 +1,11 @@ +import ast + from core.builtin_concepts import BuiltinConcepts from core.tokenizer import TokenKind from parsers.BaseParser import BaseParser -from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode +from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode import core.utils +from parsers.PythonParser import PythonParser concept_lexer_parser = ConceptLexerParser() @@ -18,6 +21,25 @@ class MultipleConceptsParser(BaseParser): def __init__(self, **kwargs): BaseParser.__init__(self, "MultipleConcepts", 45) + @staticmethod + def finalize(nodes_found, unrecognized_tokens): + if not unrecognized_tokens: + return nodes_found, unrecognized_tokens + + unrecognized_tokens.fix_source() + if unrecognized_tokens.not_whitespace(): + nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) + + return nodes_found, None + + @staticmethod + def create_or_add(unrecognized_tokens, token, index): + if unrecognized_tokens: + unrecognized_tokens.add_token(token, index) + else: + unrecognized_tokens = UnrecognizedTokensNode(index, index, [token]) + return unrecognized_tokens + def parse(self, context, text): sheerka = context.sheerka if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT): @@ -29,50 +51,42 @@ class MultipleConceptsParser(BaseParser): sheerka = context.sheerka nodes = text.value nodes_found = [[]] - source = "" concepts_only = True for node in nodes: if isinstance(node, UnrecognizedTokensNode): unrecognized_tokens = None - for i, token in enumerate(node.tokens): - index = node.start + i + i = 0 - if token.type == TokenKind.IDENTIFIER: - # it may be a concept - concept = context.new_concept(token.value) - if hasattr(concept, "__iter__") or not sheerka.is_unknown(concept): - # finish processing unrecognized_tokens - if unrecognized_tokens: - unrecognized_tokens.fix_source() - source += unrecognized_tokens.source - if unrecognized_tokens.not_whitespace(): - nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) - unrecognized_tokens = None + while i < len(node.tokens): - source += token.value - concepts = concept if hasattr(concept, "__iter__") else [concept] - concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts] - nodes_found = core.utils.product(nodes_found, concepts_nodes) - continue - else: - # it cannot be a concept - concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE + token_index = node.start + i + token = node.tokens[i] - if unrecognized_tokens: - unrecognized_tokens.add_token(token, index) - else: - unrecognized_tokens = UnrecognizedTokensNode(index, index, [token]) + concepts_nodes = self.get_concepts_nodes(context, token_index, token) + if concepts_nodes is not None: + nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) + nodes_found = core.utils.product(nodes_found, concepts_nodes) + i += 1 + continue - if unrecognized_tokens: - unrecognized_tokens.fix_source() - source += unrecognized_tokens.source - if unrecognized_tokens.not_whitespace(): - nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) + source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:]) + if source_code_node: + nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) + nodes_found = core.utils.product(nodes_found, [source_code_node]) + i += len(source_code_node.tokens) + continue + + # not a concept nor some source code + unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index) + concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE) + i += 1 + + # finish processing if needed + nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens) else: nodes_found = core.utils.product(nodes_found, [node]) - source += node.source ret = [] for choice in nodes_found: @@ -83,14 +97,68 @@ class MultipleConceptsParser(BaseParser): sheerka.new( BuiltinConcepts.PARSER_RESULT, parser=self, - source=source, + source=text.source, body=choice, try_parsed=None)) ) if len(ret) == 1: - self.log_result(context, source, ret[0]) + self.log_result(context, text.source, ret[0]) return ret[0] else: - self.log_multiple_results(context, source, ret) + self.log_multiple_results(context, text.source, ret) return ret + + @staticmethod + def get_concepts_nodes(context, index, token): + """ + Tries to recognize a concept + from the univers of all known concepts + """ + + if token.type != TokenKind.IDENTIFIER: + return None + + concept = context.new_concept(token.value) + if hasattr(concept, "__iter__") or context.sheerka.is_known(concept): + concepts = concept if hasattr(concept, "__iter__") else [concept] + concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts] + return concepts_nodes + + return None + + @staticmethod + def get_source_code_node(context, index, tokens): + """ + Tries to recognize source code. + For the time being, only Python is supported + :param context: + :param tokens: + :param index: + :return: + """ + + if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF): + return None + + end_index = len(tokens) + while end_index > 0: + parser = PythonParser() + tokens_to_parse = tokens[:end_index] + res = parser.parse(context, tokens_to_parse) + if res.status: + # only expression are accepted + ast_ = res.value.value.ast_ + if not isinstance(ast_, ast.Expression): + return None + try: + compiled = compile(ast_, "", "eval") + eval(compiled, {}, {}) + except Exception: + return None + + source = BaseParser.get_text_from_tokens(tokens_to_parse) + return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source) + end_index -= 1 + + return None diff --git a/parsers/PythonParser.py b/parsers/PythonParser.py index a3333c5..6788660 100644 --- a/parsers/PythonParser.py +++ b/parsers/PythonParser.py @@ -5,6 +5,8 @@ from dataclasses import dataclass, field import ast import logging +from parsers.ConceptLexerParser import ConceptNode + log = logging.getLogger(__name__) @@ -22,7 +24,7 @@ class PythonNode(Node): def __init__(self, source, ast_, concepts=None): self.source = source self.ast_ = ast_ - self.concepts = concepts or {} + self.concepts = concepts or {} # when concepts are recognized in the expression # def __repr__(self): # return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")" @@ -133,3 +135,80 @@ class PythonGetNamesVisitor(ast.NodeVisitor): def visit_Name(self, node): self.names.add(node.id) + +class LexerNodeParserHelperForPython: + """Helper class to parse mix of concepts and Python""" + + def __init__(self): + self.identifiers = {} # cache for already created identifier (the key is id(concept)) + self.identifiers_key = {} # number of identifiers with the same root (prefix) + + def _get_identifier(self, concept): + """ + Get an identifier for a concept. + Make sure to return the same identifier if the same concept + Make sure to return a different identifier if same name but different concept + + Internal function because I don't want identifiers, identifiers_key and python_ids_mappings + to be instance variables + I would like to keep this parser as stateless as possible + :param concept: + :return: + """ + if id(concept) in self.identifiers: + return self.identifiers[id(concept)] + + identifier = "__C__" + self._sanitize(concept.key or concept.name) + if concept.id: + identifier += "__" + concept.id + + if identifier in self.identifiers_key: + self.identifiers_key[identifier] += 1 + identifier += f"_{self.identifiers_key[identifier]}" + else: + self.identifiers_key[identifier] = 0 + + identifier += "__C__" + + self.identifiers[id(concept)] = identifier + return identifier + + @staticmethod + def _sanitize(identifier): + res = "" + for c in identifier: + res += c if c.isalnum() else "0" + return res + + def parse(self, context, nodes): + source = "" + to_parse = "" + + concepts = {} # the key is the Python identifier + + for node in nodes: + if isinstance(node, ConceptNode): + source += node.source + if to_parse: + to_parse += " " + concept = node.concept + python_id = self._get_identifier(concept) + to_parse += python_id + concepts[python_id] = concept + else: + source += node.source + to_parse += node.source + + with context.push(self, desc="Trying Python for '" + to_parse + "'") as sub_context: + sub_context.add_inputs(to_parse=to_parse) + python_parser = PythonParser() + result = python_parser.parse(sub_context, to_parse) + sub_context.add_values(return_values=result) + + if result.status: + python_node = result.body.body + python_node.source = source + python_node.concepts = concepts + return python_node + + return result.body # the error diff --git a/parsers/PythonWithConceptsParser.py b/parsers/PythonWithConceptsParser.py index 8e4b5ea..2d84781 100644 --- a/parsers/PythonWithConceptsParser.py +++ b/parsers/PythonWithConceptsParser.py @@ -37,6 +37,10 @@ class PythonWithConceptsParser(BaseParser): def _get_identifier(c): """ + Get an identifier for a concept. + Make sure to return the same identifier if the same concept + Make sure to return a different identifier if same name but different concept + Internal function because I don't want identifiers, identifiers_key and python_ids_mappings to be instance variables I would like to keep this parser as stateless as possible @@ -99,14 +103,3 @@ class PythonWithConceptsParser(BaseParser): self.name, False, result.body) - - def concept_identifier(self, concept): - if id(concept) in self.identifiers: - return self.identifiers[id(concept)] - - identifier = "__C__" + (concept.key or concept.name) - if concept.id: - identifier += "__" + concept.id - identifier += "__C__" - - return identifier diff --git a/tests/test_BnfParser.py b/tests/test_BnfParser.py index 003d145..eeb1538 100644 --- a/tests/test_BnfParser.py +++ b/tests/test_BnfParser.py @@ -6,7 +6,7 @@ from core.tokenizer import Tokenizer, TokenKind, LexerError from parsers.BaseParser import UnexpectedTokenErrorNode from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ - ConceptLexerParser, ConceptNode, ConceptMatch + ConceptLexerParser, ConceptNode, ConceptMatch, cnode from sdp.sheerkaDataProvider import Event @@ -108,12 +108,12 @@ def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(): res = concept_parser.parse(context, "twenty two") assert res.status - assert res.value.body == [("bar", 0, 2, "twenty two")] + assert res.value.body == [cnode("bar", 0, 2, "twenty two")] res = concept_parser.parse(context, "thirty one") assert res.status - assert res.value.body == [("bar", 0, 2, "thirty one")] + assert res.value.body == [cnode("bar", 0, 2, "thirty one")] res = concept_parser.parse(context, "twenty") assert res.status - assert res.value.body == [("foo", 0, 0, "twenty")] + assert res.value.body == [cnode("foo", 0, 0, "twenty")] diff --git a/tests/test_ConceptComposerEvaluator.py b/tests/test_ConceptComposerEvaluator.py deleted file mode 100644 index fa2794c..0000000 --- a/tests/test_ConceptComposerEvaluator.py +++ /dev/null @@ -1,128 +0,0 @@ -# import pytest -# -# from core.builtin_concepts import ReturnValueConcept, ParserResultConcept -# from core.concept import Concept -# from core.sheerka import Sheerka, ExecutionContext -# from evaluators.BaseEvaluator import BaseEvaluator -# from evaluators.ConceptComposerEvaluator import ConceptComposerEvaluator -# from parsers.BaseParser import BaseParser -# from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, Sequence -# from sdp.sheerkaDataProvider import Event -# -# concept_lexer_name = ConceptLexerParser().name -# -# -# def get_context(): -# sheerka = Sheerka(skip_builtins_in_db=True) -# sheerka.initialize("mem://") -# return ExecutionContext("test", Event(), sheerka) -# -# -# def get_return_values(context, grammar, expression): -# parser = ConceptLexerParser() -# parser.initialize(context, grammar) -# -# ret_val = parser.parse(context, expression) -# assert not ret_val.status -# return [ret_val] -# -# -# def init(concepts, grammar, expression): -# context = get_context() -# for c in concepts: -# context.sheerka.add_in_cache(c) -# return_values = get_return_values(context, grammar, expression) -# -# return context, return_values -# -# -# @pytest.mark.parametrize("return_values, expected", [ -# ([ -# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), -# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), -# ReturnValueConcept("not a parser", True, "some value"), -# ], True), -# ([ -# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), -# ], True), -# ([ -# ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not in error"), -# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), -# ], False), -# ([ -# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), -# ReturnValueConcept(concept_lexer_name, True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), -# ], False), -# ([ -# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), -# ReturnValueConcept(concept_lexer_name, False, "some value"), -# ], False), -# ([ -# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"), -# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=["not a concept"])), -# ], False), -# ([ -# ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", False, "evaluator in error"), -# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), -# ReturnValueConcept("not a parser", True, "some value"), -# ], False), -# ([ -# ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", True, "evaluator"), -# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), -# ReturnValueConcept("not a parser", True, "some value"), -# ], False), -# ]) -# def test_i_can_match(return_values, expected): -# context = get_context() -# assert ConceptComposerEvaluator().matches(context, return_values) == expected -# -# -# def test_i_can_eval_simple_concepts(): -# foo = Concept("foo", body="'foo'") -# bar = Concept("bar", body="'bar'") -# grammar = {} -# context, return_values = init([foo, bar], grammar, "bar foo") -# -# composer = ConceptComposerEvaluator() -# assert composer.matches(context, return_values) -# -# ret_val = composer.eval(context, return_values) -# assert ret_val.status -# assert ret_val.who == composer.name -# assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()] -# assert ret_val.value[0].metadata.is_evaluated -# assert ret_val.value[1].metadata.is_evaluated -# assert ret_val.parents == [return_values[0]] -# -# -# def test_i_can_eval_simple_concepts_when_some_are_bnf(): -# foo = Concept("foo", body="'foo'") -# bar = Concept("bar", body="'bar'") -# grammar = {foo: "foo"} -# context, return_values = init([foo, bar], grammar, "bar foo") -# -# composer = ConceptComposerEvaluator() -# assert composer.matches(context, return_values) -# -# ret_val = composer.eval(context, return_values) -# assert ret_val.status -# assert ret_val.who == composer.name -# assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()] -# assert ret_val.value[0].metadata.is_evaluated -# assert ret_val.value[1].metadata.is_evaluated -# assert ret_val.parents == [return_values[0]] -# -# -# def test_i_can_eval_simple_concept_and_text(): -# foo = Concept("foo", body="'foo'") -# grammar = {} -# context, return_values = init([foo], grammar, "'bar' foo") -# -# composer = ConceptComposerEvaluator() -# assert composer.matches(context, return_values) -# -# ret_val = composer.eval(context, return_values) -# assert ret_val.status -# assert ret_val.who == composer.name -# assert ret_val.value == "bar foo" -# assert ret_val.parents == [return_values[0]] diff --git a/tests/test_ConceptLexerParser.py b/tests/test_ConceptLexerParser.py index c579a64..eae1ab0 100644 --- a/tests/test_ConceptLexerParser.py +++ b/tests/test_ConceptLexerParser.py @@ -6,7 +6,7 @@ from core.sheerka import Sheerka, ExecutionContext from core.tokenizer import Tokenizer, TokenKind, Token from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore, \ - UnrecognizedTokensNode + UnrecognizedTokensNode, cnode, short_cnode from sdp.sheerkaDataProvider import Event @@ -163,7 +163,7 @@ def test_i_always_choose_the_longest_match(): assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [("foo", 0, 4, "one two three")] + assert return_value == [cnode("foo", 0, 4, "one two three")] def test_i_can_match_several_sequences(): @@ -176,8 +176,8 @@ def test_i_can_match_several_sequences(): assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ - ("foo", 0, 4, "one two three"), - ("bar", 6, 8, "one two"), + cnode("foo", 0, 4, "one two three"), + cnode("bar", 6, 8, "one two"), ] @@ -189,13 +189,13 @@ def test_i_can_match_ordered_choice(): res1 = parser.parse(context, "one") assert res1.status assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) - assert res1.value.body == [("foo", 0, 0, "one")] + assert res1.value.body == [cnode("foo", 0, 0, "one")] assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)]) res2 = parser.parse(context, "two") assert res2.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) - assert res2.value.body == [("foo", 0, 0, "two")] + assert res2.value.body == [cnode("foo", 0, 0, "two")] assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)]) res3 = parser.parse(context, "three") @@ -259,11 +259,11 @@ def test_i_can_mix_ordered_choices_and_sequences(): res = parser.parse(context, "twenty thirty") assert res.status - assert res.value.value == [("foo", 0, 2, "twenty thirty")] + assert res.value.value == [cnode("foo", 0, 2, "twenty thirty")] res = parser.parse(context, "one") assert res.status - assert res.value.value == [("foo", 0, 0, "one")] + assert res.value.value == [cnode("foo", 0, 0, "one")] def test_i_cannot_parse_empty_optional(): @@ -319,11 +319,11 @@ def test_i_can_parse_sequence_ending_with_optional(): res = parser.parse(context, "one two three") assert res.status - assert res.value.body == [("foo", 0, 4, "one two three")] + assert res.value.body == [cnode("foo", 0, 4, "one two three")] res = parser.parse(context, "one two") assert res.status - assert res.value.body == [("foo", 0, 2, "one two")] + assert res.value.body == [cnode("foo", 0, 2, "one two")] def test_i_can_parse_sequence_with_optional_in_between(): @@ -335,11 +335,11 @@ def test_i_can_parse_sequence_with_optional_in_between(): res = parser.parse(context, "one two three") assert res.status - assert res.value.body == [("foo", 0, 4, "one two three")] + assert res.value.body == [cnode("foo", 0, 4, "one two three")] res = parser.parse(context, "one three") assert res.status - assert res.value.body == [("foo", 0, 2, "one three")] + assert res.value.body == [cnode("foo", 0, 2, "one three")] def test_i_cannot_parse_wrong_input_with_optional(): @@ -370,13 +370,13 @@ def test_i_can_use_reference(): assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [("foo", 0, 2, "one two")] + assert res[0].value.body == [cnode("foo", 0, 2, "one two")] concept_found_1 = res[0].value.body[0].concept assert cbody(concept_found_1) == DoNotResolve("one two") assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [("bar", 0, 2, "one two")] + assert res[1].value.body == [cnode("bar", 0, 2, "one two")] concept_found_2 = res[1].value.body[0].concept # the body and the prop['foo'] are the same concept 'foo' assert cbody(concept_found_2) == get_expected(foo, "one two") @@ -400,13 +400,13 @@ def test_i_can_use_a_reference_with_a_body(): assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [("foo", 0, 2, "one two")] + assert res[0].value.body == [cnode("foo", 0, 2, "one two")] concept_found_1 = res[0].value.body[0].concept assert concept_found_1.body == "'foo'" assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [("bar", 0, 2, "one two")] + assert res[1].value.body == [cnode("bar", 0, 2, "one two")] concept_found_2 = res[1].value.body[0].concept # the body and the prop['foo'] are the same concept 'foo' assert cbody(concept_found_2) == foo @@ -430,20 +430,20 @@ def test_i_can_use_context_reference_with_multiple_levels(): assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [("foo", 0, 2, "one two")] + assert res[0].value.body == [cnode("foo", 0, 2, "one two")] concept_found_1 = res[0].value.body[0].concept assert cbody(concept_found_1) == DoNotResolve("one two") assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [("bar", 0, 2, "one two")] + assert res[1].value.body == [cnode("bar", 0, 2, "one two")] concept_found_2 = res[1].value.body[0].concept assert cbody(concept_found_2) == get_expected(foo, "one two") assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) assert res[2].status assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) - assert res[2].value.body == [("baz", 0, 2, "one two")] + assert res[2].value.body == [cnode("baz", 0, 2, "one two")] concept_found_3 = res[2].value.body[0].concept expected_foo = get_expected(foo, "one two") assert cbody(concept_found_3) == get_expected(bar, expected_foo) @@ -465,8 +465,8 @@ def test_order_is_not_important_when_using_references(): res = parser.parse(context, "one two") assert len(res) == 2 - assert res[0].value.body == [("bar", 0, 2, "one two")] - assert res[1].value.body == [("foo", 0, 2, "one two")] + assert res[0].value.body == [cnode("bar", 0, 2, "one two")] + assert res[1].value.body == [cnode("foo", 0, 2, "one two")] def test_i_can_parse_when_reference(): @@ -477,21 +477,21 @@ def test_i_can_parse_when_reference(): res = parser.parse(context, "twenty two") assert res.status - assert res.value.body == [("bar", 0, 2, "twenty two")] + assert res.value.body == [cnode("bar", 0, 2, "twenty two")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty two") assert cprop(concept_found, "foo") == get_expected(foo, "twenty") res = parser.parse(context, "thirty one") assert res.status - assert res.value.body == [("bar", 0, 2, "thirty one")] + assert res.value.body == [cnode("bar", 0, 2, "thirty one")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("thirty one") assert cprop(concept_found, "foo") == get_expected(foo, "thirty") res = parser.parse(context, "twenty") assert res.status - assert res.value.body == [("foo", 0, 0, "twenty")] + assert res.value.body == [cnode("foo", 0, 0, "twenty")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty") @@ -504,14 +504,14 @@ def test_i_can_parse_when_reference_has_a_body(): res = parser.parse(context, "twenty two") assert res.status - assert res.value.body == [("bar", 0, 2, "twenty two")] + assert res.value.body == [cnode("bar", 0, 2, "twenty two")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty two") assert cprop(concept_found, "foo") == foo res = parser.parse(context, "twenty") assert res.status - assert res.value.body == [("foo", 0, 0, "twenty")] + assert res.value.body == [cnode("foo", 0, 0, "twenty")] concept_found = res.value.body[0].concept assert concept_found.body == "'one'" @@ -529,14 +529,14 @@ def test_i_can_parse_multiple_results(): assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [("bar", 0, 2, "one two")] + assert res[0].value.body == [cnode("bar", 0, 2, "one two")] concept_found_0 = res[0].value.body[0].concept assert cbody(concept_found_0) == DoNotResolve("one two") assert len(concept_found_0.props) == 0 assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [("foo", 0, 2, "one two")] + assert res[1].value.body == [cnode("foo", 0, 2, "one two")] concept_found_1 = res[1].value.body[0].concept assert cbody(concept_found_1) == DoNotResolve("one two") assert len(concept_found_1.props) == 0 @@ -555,19 +555,19 @@ def test_i_can_parse_multiple_results_times_two(): assert len(res) == 4 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [("bar", "one two"), ("bar", "one two")] + assert res[0].value.body == [short_cnode("bar", "one two"), short_cnode("bar", "one two")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [("foo", "one two"), ("bar", "one two")] + assert res[1].value.body == [short_cnode("foo", "one two"), short_cnode("bar", "one two")] assert res[2].status assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) - assert res[2].value.body == [("bar", "one two"), ("foo", "one two")] + assert res[2].value.body == [short_cnode("bar", "one two"), short_cnode("foo", "one two")] assert res[3].status assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT) - assert res[3].value.body == [("foo", "one two"), ("foo", "one two")] + assert res[3].value.body == [short_cnode("foo", "one two"), short_cnode("foo", "one two")] def test_i_can_parse_multiple_results_when_reference(): @@ -589,11 +589,11 @@ def test_i_can_parse_multiple_results_when_reference(): assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [("bar", 0, 0, "twenty")] + assert res[0].value.body == [cnode("bar", 0, 0, "twenty")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [("foo", 0, 0, "twenty")] + assert res[1].value.body == [cnode("foo", 0, 0, "twenty")] def test_i_can_parse_concept_reference_that_is_not_in_grammar(): @@ -608,14 +608,14 @@ def test_i_can_parse_concept_reference_that_is_not_in_grammar(): res = parser.parse(context, "twenty two") assert res.status - assert res.value.body == [("foo", 0, 2, "twenty two")] + assert res.value.body == [cnode("foo", 0, 2, "twenty two")] concept_found = res.value.body[0].concept assert cbody(concept_found) == DoNotResolve("twenty two") assert cprop(concept_found, "two") == get_expected(two, "two") res = parser.parse(context, "twenty one") assert res.status - assert res.value.body == [("foo", 0, 2, "twenty one")] + assert res.value.body == [cnode("foo", 0, 2, "twenty one")] def test_i_can_parse_zero_or_more(): @@ -625,7 +625,7 @@ def test_i_can_parse_zero_or_more(): context, res, wrapper, return_value = execute([foo], grammar, "one one") assert res.status - assert return_value == [("foo", 0, 2, "one one")] + assert return_value == [cnode("foo", 0, 2, "one one")] assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)]) concept_found = return_value[0].concept @@ -639,11 +639,11 @@ def test_i_can_parse_sequence_and_zero_or_more(): res = parser.parse(context, "one one two") assert res.status - assert res.value.value == [("foo", 0, 4, "one one two")] + assert res.value.value == [cnode("foo", 0, 4, "one one two")] res = parser.parse(context, "two") assert res.status - assert res.value.value == [("foo", 0, 0, "two")] + assert res.value.value == [cnode("foo", 0, 0, "two")] def test_i_cannot_parse_zero_and_more_when_wrong_entry(): @@ -657,7 +657,7 @@ def test_i_cannot_parse_zero_and_more_when_wrong_entry(): res = parser.parse(context, "one two") assert not res.status assert res.value.value == [ - ("foo", 0, 0, "one"), + cnode("foo", 0, 0, "one"), UnrecognizedTokensNode(1, 2, [t(" "), t("two")]) ] @@ -675,7 +675,7 @@ def test_i_can_parse_zero_and_more_with_separator(): context, res, wrapper, return_value = execute([foo], grammar, "one, one , one") assert res.status - assert return_value == [("foo", 0, 7, "one, one , one")] + assert return_value == [cnode("foo", 0, 7, "one, one , one")] def test_that_zero_and_more_is_greedy(): @@ -686,7 +686,7 @@ def test_that_zero_and_more_is_greedy(): context, res, wrapper, return_value = execute([foo], grammar, "one one one") assert res.status - assert return_value == [("foo", 0, 4, "one one one")] + assert return_value == [cnode("foo", 0, 4, "one one one")] def test_i_can_parse_one_and_more(): @@ -696,7 +696,7 @@ def test_i_can_parse_one_and_more(): context, res, wrapper, return_value = execute([foo], grammar, "one one") assert res.status - assert return_value == [("foo", 0, 2, "one one")] + assert return_value == [cnode("foo", 0, 2, "one one")] assert return_value[0].underlying == u(grammar[foo], 0, 2, [ u("one", 0, 0), u("one", 2, 2)]) @@ -709,7 +709,7 @@ def test_i_can_parse_sequence_and_one_or_more(): res = parser.parse(context, "one one two") assert res.status - assert res.value.value == [("foo", 0, 4, "one one two")] + assert res.value.value == [cnode("foo", 0, 4, "one one two")] res = parser.parse(context, "two") assert not res.status @@ -725,7 +725,7 @@ def test_i_can_parse_one_and_more_with_separator(): context, res, wrapper, return_value = execute([foo], grammar, "one, one , one") assert res.status - assert return_value == [("foo", 0, 7, "one, one , one")] + assert return_value == [cnode("foo", 0, 7, "one, one , one")] assert return_value[0].underlying == u(grammar[foo], 0, 7, [ u("one", 0, 0), u("one", 3, 3), @@ -740,7 +740,7 @@ def test_that_one_and_more_is_greedy(): context, res, wrapper, return_value = execute([foo], grammar, "one one one") assert res.status - assert return_value == [("foo", 0, 4, "one one one")] + assert return_value == [cnode("foo", 0, 4, "one one one")] def test_i_can_detect_infinite_recursion(): @@ -785,9 +785,9 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(): res = parser.parse(context, "foo") assert len(res) == 2 assert res[0].status - assert res[0].value.body == [("bar", 0, 0, "foo")] + assert res[0].value.body == [cnode("bar", 0, 0, "foo")] assert res[1].status - assert res[1].value.body == [("foo", 0, 0, "foo")] + assert res[1].value.body == [cnode("foo", 0, 0, "foo")] def test_i_can_detect_indirect_infinite_recursion_with_sequence(): @@ -912,7 +912,7 @@ def test_i_cannot_parse_when_wrong_sequence(): assert not res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert return_value == [ - ("foo", "one two three"), + short_cnode("foo", "one two three"), UnrecognizedTokensNode(5, 6, [t(" "), t("one")]) ] @@ -945,14 +945,14 @@ def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end(): assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ - ("bar", 0, 2, "one two"), + cnode("bar", 0, 2, "one two"), UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ - ("foo", 0, 2, "one two"), + cnode("foo", 0, 2, "one two"), UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) ] @@ -973,14 +973,14 @@ def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens(): assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - ("bar", 4, 6, "one two"), + cnode("bar", 4, 6, "one two"), ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - ("foo", 4, 6, "one two"), + cnode("foo", 4, 6, "one two"), ] @@ -999,7 +999,7 @@ def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens(): assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - ("bar", 4, 6, "one two"), + cnode("bar", 4, 6, "one two"), UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), ] @@ -1007,7 +1007,7 @@ def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens(): assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), - ("foo", 4, 6, "one two"), + cnode("foo", 4, 6, "one two"), UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), ] @@ -1029,17 +1029,17 @@ def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle(): assert not res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert res[0].value.body == [ - ("bar", 0, 2, "one two"), + cnode("bar", 0, 2, "one two"), UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), - ("baz", 8, 8, "six"), + cnode("baz", 8, 8, "six"), ] assert not res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert res[1].value.body == [ - ("foo", 0, 2, "one two"), + cnode("foo", 0, 2, "one two"), UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), - ("baz", 8, 8, "six"), + cnode("baz", 8, 8, "six"), ] @@ -1052,7 +1052,7 @@ def test_i_can_get_the_inner_concept_when_possible(): assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [("foo", 0, 0, "one")] + assert return_value == [cnode("foo", 0, 0, "one")] concept_found = return_value[0].concept assert cbody(concept_found) == get_expected(one, "one") assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) @@ -1069,7 +1069,7 @@ def test_i_can_get_the_inner_concept_when_possible_with_rule_name(): assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [("foo", 0, 0, "one")] + assert return_value == [cnode("foo", 0, 0, "one")] concept_found = return_value[0].concept assert cbody(concept_found) == get_expected(one, "one") assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) @@ -1086,7 +1086,7 @@ def test_i_get_multiple_props_when_zero_or_more(): context, res, wrapper, return_value = execute([foo, one], grammar, "one one one") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [("foo", 0, 4, "one one one")] + assert return_value == [cnode("foo", 0, 4, "one one one")] concept_found = return_value[0].concept assert cbody(concept_found) == DoNotResolve("one one one") assert len(concept_found.cached_asts["one"]) == 3 @@ -1106,7 +1106,7 @@ def test_i_get_multiple_props_when_zero_or_more_and_different_values(): context, res, wrapper, return_value = execute([foo, one], grammar, "one ok un ok uno ok") assert res.status assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) - assert return_value == [("foo", "one ok un ok uno ok")] + assert return_value == [short_cnode("foo", "one ok un ok uno ok")] concept_found = return_value[0].concept assert cprop(concept_found, "one")[0] == get_expected(one, "one") assert cprop(concept_found, "one")[1] == get_expected(one, "un") diff --git a/tests/test_ConceptsWithConceptsParser.py b/tests/test_ConceptsWithConceptsParser.py new file mode 100644 index 0000000..a362844 --- /dev/null +++ b/tests/test_ConceptsWithConceptsParser.py @@ -0,0 +1,204 @@ +import ast + +import pytest + +from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts +from core.concept import Concept +from core.sheerka import Sheerka, ExecutionContext +from core.tokenizer import Token, TokenKind, Tokenizer +from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode +from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser +from parsers.MultipleConceptsParser import MultipleConceptsParser +from parsers.PythonParser import PythonNode +from sdp.sheerkaDataProvider import Event + +multiple_concepts_parser = MultipleConceptsParser() + + +def get_context(): + sheerka = Sheerka(skip_builtins_in_db=True) + sheerka.initialize("mem://") + return ExecutionContext("test", Event(), sheerka) + + +def get_ret_from(*args): + result = [] + index = 0 + source = "" + for item in args: + if isinstance(item, Concept): + tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)] + result.append(ConceptNode(item, index, index, tokens, item.name)) + index += 1 + source += item.name + elif isinstance(item, PythonNode): + tokens = list(Tokenizer(item.source))[:-1] # strip trailing EOF + result.append(SourceCodeNode(item, index, index + len(tokens) - 1, tokens, item.source)) + index += len(tokens) + source += item.source + else: + tokens = list(Tokenizer(item))[:-1] # strip trailing EOF + result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens)) + index += len(tokens) + source += item + + return ReturnValueConcept( + "who", + False, + ParserResultConcept(parser=multiple_concepts_parser, value=result, source=source)) + + +def init(concepts, inputs): + context = get_context() + for concept in concepts: + context.sheerka.create_new_concept(context, concept) + + return context, get_ret_from(*inputs) + + +def execute(concepts, inputs): + context, input_return_values = init(concepts, inputs) + + parser = ConceptsWithConceptsParser() + result = parser.parse(context, input_return_values.body) + + wrapper = result.body + return_value = result.body.body + + return context, parser, result, wrapper, return_value + + +@pytest.mark.parametrize("text, interested", [ + ("not parser result", False), + (ParserResultConcept(parser="not multiple_concepts_parser"), False), + (ParserResultConcept(parser=multiple_concepts_parser, value=[]), True), +]) +def test_not_interested(text, interested): + context = get_context() + + res = ConceptsWithConceptsParser().parse(context, text) + if interested: + assert res is not None + else: + assert res is None + + +def test_i_can_parse_composition_of_concepts(): + foo = Concept("foo") + bar = Concept("bar") + plus = Concept("a plus b").set_prop("a").set_prop("b") + + context, parser, result, wrapper, return_value = execute([foo, bar, plus], [foo, " plus ", bar]) + + assert result.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert result.who == wrapper.parser.name + assert wrapper.source == "foo plus bar" + assert context.sheerka.isinstance(return_value, plus) + + assert return_value.cached_asts["a"] == foo + assert return_value.cached_asts["b"] == bar + + # sanity check, I can evaluate the result + evaluated = context.sheerka.evaluate_concept(context, return_value) + assert evaluated.key == return_value.key + assert evaluated.get_prop("a") == foo.init_key() + assert evaluated.get_prop("b") == bar.init_key() + + +def test_i_can_parse_when_composition_of_source_code(): + plus = Concept("a plus b", body="a + b").set_prop("a").set_prop("b") + left = PythonNode("1+1", ast.parse("1+1", mode="eval")) + right = PythonNode("2+2", ast.parse("2+2", mode="eval")) + context, parser, result, wrapper, return_value = execute([plus], [left, " plus ", right]) + + assert result.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert result.who == wrapper.parser.name + assert wrapper.source == "1+1 plus 2+2" + assert context.sheerka.isinstance(return_value, plus) + + left_parser_result = ParserResultConcept(parser=parser, source="1+1", value=left) + right_parser_result = ParserResultConcept(parser=parser, source="2+2", value=right) + assert return_value.cached_asts["a"] == [ReturnValueConcept(parser.name, True, left_parser_result)] + assert return_value.cached_asts["b"] == [ReturnValueConcept(parser.name, True, right_parser_result)] + + # sanity check, I can evaluate the result + evaluated = context.sheerka.evaluate_concept(context, return_value) + assert evaluated.key == return_value.key + assert evaluated.get_prop("a") == 2 + assert evaluated.get_prop("b") == 4 + assert evaluated.body == 6 + + +def test_i_can_parse_when_mix_of_concept_and_code(): + plus = Concept("a plus b").set_prop("a").set_prop("b") + code = PythonNode("1+1", ast.parse("1+1", mode="eval")) + foo = Concept("foo") + context, parser, result, wrapper, return_value = execute([plus, foo], [foo, " plus ", code]) + + assert result.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert result.who == wrapper.parser.name + assert wrapper.source == "foo plus 1+1" + assert context.sheerka.isinstance(return_value, plus) + + code_parser_result = ParserResultConcept(parser=parser, source="1+1", value=code) + assert return_value.cached_asts["a"] == foo + assert return_value.cached_asts["b"] == [ReturnValueConcept(parser.name, True, code_parser_result)] + + # sanity check, I can evaluate the result + evaluated = context.sheerka.evaluate_concept(context, return_value) + assert evaluated.key == return_value.key + assert evaluated.get_prop("a") == foo.init_key() + assert evaluated.get_prop("b") == 2 + + +def test_i_can_parse_when_multiple_concepts_are_recognized(): + foo = Concept("foo") + bar = Concept("bar") + plus_1 = Concept("a plus b", body="body1").set_prop("a").set_prop("b") + plus_2 = Concept("a plus b", body="body2").set_prop("a").set_prop("b") + + context, input_return_values = init([foo, bar, plus_1, plus_2], [foo, " plus ", bar]) + parser = ConceptsWithConceptsParser() + result = parser.parse(context, input_return_values.body) + + assert len(result) == 2 + + res = result[0] + wrapper = res.value + return_value = res.value.value + assert res.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert res.who == wrapper.parser.name + assert wrapper.source == "foo plus bar" + assert context.sheerka.isinstance(return_value, plus_1) + assert return_value.cached_asts["a"] == foo + assert return_value.cached_asts["b"] == bar + + res = result[1] + wrapper = res.value + return_value = res.value.value + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert res.who == wrapper.parser.name + assert wrapper.source == "foo plus bar" + assert context.sheerka.isinstance(return_value, plus_2) + assert return_value.cached_asts["a"] == foo + assert return_value.cached_asts["b"] == bar + + +def test_i_cannot_parse_when_unknown_concept(): + foo = Concept("foo") + bar = Concept("bar") + + context, input_return_values = init([foo, bar], [foo, " plus ", bar]) + parser = ConceptsWithConceptsParser() + result = parser.parse(context, input_return_values.body) + wrapper = result.body + return_value = result.body.body + + assert not result.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.NOT_FOR_ME) + assert result.who == parser.name + assert return_value == input_return_values.body.body diff --git a/tests/test_ConceptNodeEvaluator.py b/tests/test_LexerNodeEvaluator.py similarity index 61% rename from tests/test_ConceptNodeEvaluator.py rename to tests/test_LexerNodeEvaluator.py index 3772345..98b6b70 100644 --- a/tests/test_ConceptNodeEvaluator.py +++ b/tests/test_LexerNodeEvaluator.py @@ -1,11 +1,13 @@ +import ast + import pytest from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts from core.concept import Concept, ConceptParts, DoNotResolve from core.sheerka import Sheerka, ExecutionContext -from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator -from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, Sequence, TerminalNode, \ - StrMatch, Optional, OrderedChoice, ZeroOrMore, UnrecognizedTokensNode, ConceptMatch +from evaluators.LexerNodeEvaluator import LexerNodeEvaluator +from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, StrMatch, UnrecognizedTokensNode, SourceCodeNode +from parsers.PythonParser import PythonNode from sdp.sheerkaDataProvider import Event @@ -24,6 +26,18 @@ def from_parsing(context, grammar, expression): return ret_val +def from_fragments(*fragments): + nodes = [] + for fragment in fragments: + if isinstance(fragment, str): + node = PythonNode(fragment, ast.parse(fragment.strip(), mode="eval")) + nodes.append(SourceCodeNode(node, 0, 0, [], fragment)) + else: + nodes.append(ConceptNode(fragment, 0, 0, [], fragment.name)) + + return ReturnValueConcept("somme_name", True, ParserResultConcept(value=nodes)) + + def init(concept, grammar, text): context = get_context() if isinstance(concept, list): @@ -40,12 +54,14 @@ def init(concept, grammar, text): @pytest.mark.parametrize("ret_val, expected", [ (ReturnValueConcept("some_name", True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), True), (ReturnValueConcept("some_name", True, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), True), - (ReturnValueConcept("some_name", True, ParserResultConcept(value=[UnrecognizedTokensNode(0, 0, [])])), True), - (ReturnValueConcept("some_name", True, ParserResultConcept(value=UnrecognizedTokensNode(0, 0, []))), True), + (ReturnValueConcept("some_name", True, ParserResultConcept(value=[SourceCodeNode(0, 0, [])])), True), + (ReturnValueConcept("some_name", True, ParserResultConcept(value=SourceCodeNode(0, 0, []))), True), + (ReturnValueConcept("some_name", True, ParserResultConcept(value=[UnrecognizedTokensNode(0, 0, [])])), False), + (ReturnValueConcept("some_name", True, ParserResultConcept(value=UnrecognizedTokensNode(0, 0, []))), False), (ReturnValueConcept("some_name", False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), False), (ReturnValueConcept("some_name", False, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), False), - (ReturnValueConcept("some_name", False, ParserResultConcept(value=[UnrecognizedTokensNode(0, 0, [])])), False), - (ReturnValueConcept("some_name", False, ParserResultConcept(value=UnrecognizedTokensNode(0, 0, []))), False), + (ReturnValueConcept("some_name", False, ParserResultConcept(value=[SourceCodeNode(0, 0, [])])), False), + (ReturnValueConcept("some_name", False, ParserResultConcept(value=SourceCodeNode(0, 0, []))), False), (ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept node")), False), (ReturnValueConcept("some_name", True, ParserResultConcept(value=["Not a concept node"])), False), (ReturnValueConcept("some_name", True, [ConceptNode(Concept(), 0, 0)]), False), @@ -53,7 +69,7 @@ def init(concept, grammar, text): ]) def test_i_can_match(ret_val, expected): context = get_context() - assert ConceptNodeEvaluator().matches(context, ret_val) == expected + assert LexerNodeEvaluator().matches(context, ret_val) == expected def test_concept_is_returned_when_only_one_in_the_list(): @@ -61,9 +77,9 @@ def test_concept_is_returned_when_only_one_in_the_list(): context = get_context() context.sheerka.add_in_cache(foo) - evaluator = ConceptNodeEvaluator() ret_val = from_parsing(context, {foo: StrMatch("foo")}, "foo") + evaluator = LexerNodeEvaluator() result = evaluator.eval(context, ret_val) wrapper = result.body return_value = result.body.body @@ -77,3 +93,23 @@ def test_concept_is_returned_when_only_one_in_the_list(): assert return_value.cached_asts[ConceptParts.BODY] == DoNotResolve("foo") assert result.parents == [ret_val] + +def test_concept_python_node_is_returned_when_source_code(): + context = get_context() + foo = Concept("foo") + ret_val = from_fragments(foo, " + 1") + + evaluator = LexerNodeEvaluator() + result = evaluator.eval(context, ret_val) + wrapper = result.body + return_value = result.body.body + + assert result.who == evaluator.name + assert result.status + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert wrapper.parser == evaluator + assert wrapper.source == "foo + 1" + + assert return_value == PythonNode('foo + 1', ast.parse("__C__foo__C__ + 1", mode="eval")) + assert return_value.concepts == {"__C__foo__C__": foo} + assert result.parents == [ret_val] diff --git a/tests/test_MultipleConceptsParser.py b/tests/test_MultipleConceptsParser.py index 16d215f..e288f3d 100644 --- a/tests/test_MultipleConceptsParser.py +++ b/tests/test_MultipleConceptsParser.py @@ -1,8 +1,12 @@ +import pytest + from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext -from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, UnrecognizedTokensNode +from core.tokenizer import Tokenizer, TokenKind, Token +from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, cnode, utnode, scnode, SourceCodeNode from parsers.MultipleConceptsParser import MultipleConceptsParser +from parsers.PythonParser import PythonNode from sdp.sheerkaDataProvider import Event @@ -78,7 +82,7 @@ def test_i_can_parse_when_ending_with_bnf(): assert ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) - assert ret_val.value.value == [("bar", 0, 0, "bar"), ("foo", 2, 6, "foo1 foo2 foo3")] + assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")] assert ret_val.value.source == "bar foo1 foo2 foo3" @@ -94,7 +98,7 @@ def test_i_can_parse_when_starting_with_bnf(): assert ret_val.status assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) - assert ret_val.value.value == [("foo", 0, 4, "foo1 foo2 foo3"), ("bar", 6, 6, "bar")] + assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")] assert ret_val.value.source == "foo1 foo2 foo3 bar" @@ -112,13 +116,13 @@ def test_i_can_parse_when_concept_are_mixed(): assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [ - ("baz", 0, 0, "baz"), - ("foo", 2, 6, "foo1 foo2 foo3"), - ("bar", 8, 8, "bar")] + cnode("baz", 0, 0, "baz"), + cnode("foo", 2, 6, "foo1 foo2 foo3"), + cnode("bar", 8, 8, "bar")] assert ret_val.value.source == "baz foo1 foo2 foo3 bar" -def test_i_can_parse_when_multiple_concept_are_matching(): +def test_i_can_parse_when_multiple_concepts_are_matching(): foo = Concept("foo") bar = Concept("bar", body="bar1") baz = Concept("bar", body="bar2") @@ -130,16 +134,35 @@ def test_i_can_parse_when_multiple_concept_are_matching(): assert len(ret_val) == 2 assert ret_val[0].status - assert ret_val[0].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")] + assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")] assert ret_val[0].value.source == "foo bar" assert ret_val[0].value.value[1].concept.body == "bar1" assert ret_val[1].status - assert ret_val[1].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")] + assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")] assert ret_val[1].value.source == "foo bar" assert ret_val[1].value.value[1].concept.body == "bar2" +def test_i_can_parse_when_source_code(): + foo = Concept("foo") + grammar = {foo: "foo"} + context, return_value = init([foo], grammar, "1 foo") + + parser = MultipleConceptsParser() + ret_val = parser.parse(context, return_value.body) + wrapper = ret_val.value + value = ret_val.value.value + + assert ret_val.status + assert ret_val.who == parser.name + assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) + assert wrapper.source == "1 foo" + assert value == [ + scnode(0, 1, "1 "), + cnode("foo", 2, 2, "foo")] + + def test_i_cannot_parse_when_unrecognized_token(): twenty_two = Concept("twenty two") one = Concept("one") @@ -153,8 +176,56 @@ def test_i_cannot_parse_when_unrecognized_token(): assert ret_val.who == parser.name assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert ret_val.value.value == [ - ("twenty two", 0, 2, "twenty two"), - (3, 5, " + "), - ("one", 6, 6, "one") + cnode("twenty two", 0, 2, "twenty two"), + utnode(3, 5, " + "), + cnode("one", 6, 6, "one") ] assert ret_val.value.source == "twenty two + one" + + +def test_i_cannot_parse_when_unknown_concepts(): + twenty_two = Concept("twenty two") + one = Concept("one") + grammar = {twenty_two: Sequence("twenty", "two")} + context, return_value = init([twenty_two, one], grammar, "twenty two plus one") + + parser = MultipleConceptsParser() + ret_val = parser.parse(context, return_value.body) + + assert not ret_val.status + assert ret_val.who == parser.name + assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) + assert ret_val.value.value == [ + cnode("twenty two", 0, 2, "twenty two"), + utnode(3, 5, " plus "), + cnode("one", 6, 6, "one") + ] + assert ret_val.value.source == "twenty two plus one" + + +@pytest.mark.parametrize("text, expected_source, expected_end", [ + ("True", "True", 0), + ("1 == 1", "1 == 1", 5), + ("1!xdf", "1", 0), + ("1", "1", 0), +]) +def test_i_can_get_source_code_node(text, expected_source, expected_end): + tokens = list(Tokenizer(text))[:-1] # strip trailing EOF + + start_index = 5 # a random number different of zero + res = MultipleConceptsParser().get_source_code_node(get_context(), start_index, tokens) + + assert isinstance(res, SourceCodeNode) + assert isinstance(res.node, PythonNode) + assert res.source == expected_source + assert res.start == start_index + assert res.end == start_index + expected_end + + +def test_i_cannot_parse_null_text(): + res = MultipleConceptsParser().get_source_code_node(get_context(), 0, []) + assert res is None + + eof = Token(TokenKind.EOF, "", 0, 0, 0) + res = MultipleConceptsParser().get_source_code_node(get_context(), 0, [eof]) + assert res is None diff --git a/tests/test_PythonWithConceptsParser.py b/tests/test_PythonWithConceptsParser.py index 2e1dff7..5422118 100644 --- a/tests/test_PythonWithConceptsParser.py +++ b/tests/test_PythonWithConceptsParser.py @@ -41,17 +41,19 @@ def to_str_ast(expression): return PythonNode.get_dump(ast.parse(expression, mode="eval")) -@pytest.mark.parametrize("text", [ - "not parser result", - ParserResultConcept(value="not a list"), - ParserResultConcept(value=[]), - ParserResultConcept(value=["not a Node"]), +@pytest.mark.parametrize("text, interested", [ + ("not parser result", False), + (ParserResultConcept(parser="not multiple_concepts_parser"), False), + (ParserResultConcept(parser=multiple_concepts_parser, value=[]), True), ]) -def test_not_interested(text): +def test_not_interested(text, interested): context = get_context() res = PythonWithConceptsParser().parse(context, text) - assert res is None + if interested: + assert res is not None + else: + assert res is None def test_i_can_parse_concepts_and_python(): diff --git a/tests/test_sheerka_non_reg.py b/tests/test_sheerka_non_reg.py index 393ae6e..92815d8 100644 --- a/tests/test_sheerka_non_reg.py +++ b/tests/test_sheerka_non_reg.py @@ -401,18 +401,6 @@ def test_i_can_eval_bnf_definitions_from_separate_instances(): assert res[0].value.props["a"] == Property("a", sheerka.new(concept_a.key, body="one two").init_key()) -def test_i_can_eval_a_mix_with_bnf_and_python(): - sheerka = get_sheerka() - - sheerka.evaluate_user_input("def concept one as 1") - sheerka.evaluate_user_input("def concept twenties from bnf 'twenty' one as 20 + one") - - res = sheerka.evaluate_user_input("twenty one + 1") - assert len(res) == 1 - assert res[0].status - assert res[0].body == 22 - - @pytest.mark.parametrize("desc, definitions", [ ("Simple form", [ "def concept one as 1", @@ -450,16 +438,116 @@ def test_i_can_mix_concept_with_python_to_define_numbers(desc, definitions): assert res[0].status assert res[0].body == 22 + res = sheerka.evaluate_user_input("twenty one + one") + assert len(res) == 1 + assert res[0].status + assert res[0].body == 22 + res = sheerka.evaluate_user_input("twenty one + twenty two") assert len(res) == 1 assert res[0].status assert res[0].body == 43 - res = sheerka.evaluate_user_input("twenty one + one") + res = sheerka.evaluate_user_input("1 + twenty one") assert len(res) == 1 assert res[0].status assert res[0].body == 22 + # res = sheerka.evaluate_user_input("1 + 1 + twenty one") + # assert len(res) == 1 + # assert res[0].status + # assert res[0].body == 23 + + +def test_i_can_mix_concept_of_concept(): + sheerka = get_sheerka() + + definitions = [ + "def concept one as 1", + "def concept two as 2", + "def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit", + "def concept a plus b as a + b" + ] + + for definition in definitions: + sheerka.evaluate_user_input(definition) + + # res = sheerka.evaluate_user_input("1 plus 2") + # assert len(res) == 1 + # assert res[0].status + # assert res[0].body.body == 3 + # + # res = sheerka.evaluate_user_input("1 plus one") + # assert len(res) == 1 + # assert res[0].status + # assert res[0].body.body == 2 + + # res = sheerka.evaluate_user_input("1 + 1 plus 1") + # assert len(res) == 1 + # assert res[0].status + # assert res[0].body.body == 3 + + res = sheerka.evaluate_user_input("1 plus twenty one") + assert len(res) == 1 + assert res[0].status + assert res[0].body.body == 22 + + res = sheerka.evaluate_user_input("one plus 1") + assert len(res) == 1 + assert res[0].status + assert res[0].body.body == 2 + + res = sheerka.evaluate_user_input("one plus two") + assert len(res) == 1 + assert res[0].status + assert res[0].body.body == 3 + + res = sheerka.evaluate_user_input("one plus twenty one") + assert len(res) == 1 + assert res[0].status + assert res[0].body.body == 22 + + res = sheerka.evaluate_user_input("twenty one plus 1") + assert len(res) == 1 + assert res[0].status + assert res[0].body.body == 22 + + res = sheerka.evaluate_user_input("twenty one plus one") + assert len(res) == 1 + assert res[0].status + assert res[0].body.body == 22 + + res = sheerka.evaluate_user_input("twenty one plus twenty two") + assert len(res) == 1 + assert res[0].status + assert res[0].body.body == 43 + + +# def test_i_can_evaluate_concept_of_concept_when_multiple_choices(): +# sheerka = get_sheerka() +# +# definitions = [ +# "def concept little a where a", +# "def concept blue a where a", +# "def concept little blue a where a", +# "def concept house" +# ] +# +# for definition in definitions: +# sheerka.evaluate_user_input(definition) +# +# ### CAUTION #### +# # this test cannot work !! +# # it is just to hint the result that I would like to achieve +# +# res = sheerka.evaluate_user_input("little blue house") +# assert len(res) == 2 +# assert res[0].status +# assert res[0].body == "little(blue(house))" +# +# assert res[1].status +# assert res[1].body == "little blue(house)" + def test_i_can_say_that_a_concept_isa_another_concept(): sheerka = get_sheerka() diff --git a/tests/test_sheerka_transform.py b/tests/test_sheerka_transform.py index 98a8685..1aa6e74 100644 --- a/tests/test_sheerka_transform.py +++ b/tests/test_sheerka_transform.py @@ -213,6 +213,7 @@ def test_i_can_transform_simple_execution_context(): 'desc': 'this is the desc', 'children': [], 'preprocess': None, + 'inputs': {}, 'values': {}, 'obj': None, 'concepts': {} diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index dd9440b..81c2323 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -4,6 +4,7 @@ from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords def test_i_can_tokenize(): source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:" + source += "$£€!_identifier°~_^\\`#" tokens = list(Tokenizer(source)) assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1) assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2) @@ -40,8 +41,20 @@ def test_i_can_tokenize(): assert tokens[32] == Token(TokenKind.LESS, '<', 79, 6, 21) assert tokens[33] == Token(TokenKind.GREATER, '>', 80, 6, 22) assert tokens[34] == Token(TokenKind.CONCEPT, 'name', 81, 6, 23) + assert tokens[35] == Token(TokenKind.DOLLAR, '$', 88, 6, 30) + assert tokens[36] == Token(TokenKind.STERLING, '£', 89, 6, 31) + assert tokens[37] == Token(TokenKind.EURO, '€', 90, 6, 32) + assert tokens[38] == Token(TokenKind.EMARK, '!', 91, 6, 33) + assert tokens[39] == Token(TokenKind.IDENTIFIER, '_identifier', 92, 6, 34) + assert tokens[40] == Token(TokenKind.DEGREE, '°', 103, 6, 45) + assert tokens[41] == Token(TokenKind.TILDE, '~', 104, 6, 46) + assert tokens[42] == Token(TokenKind.UNDERSCORE, '_', 105, 6, 47) + assert tokens[43] == Token(TokenKind.CARAT, '^', 106, 6, 48) + assert tokens[44] == Token(TokenKind.BACK_SLASH, '\\', 107, 6, 49) + assert tokens[45] == Token(TokenKind.BACK_QUOTE, '`', 108, 6, 50) + assert tokens[46] == Token(TokenKind.HASH, '#', 109, 6, 51) - assert tokens[35] == Token(TokenKind.EOF, '', 88, 6, 30) + assert tokens[47] == Token(TokenKind.EOF, '', 110, 6, 52) @pytest.mark.parametrize("text, expected", [