Added simple form of concept composition

This commit is contained in:
2020-01-15 18:38:29 +01:00
parent 51fa9629d0
commit 8152f82c6b
22 changed files with 1105 additions and 544 deletions
+40 -13
View File
@@ -258,6 +258,7 @@ class Sheerka(Concept):
# execution_context.log(logger or self.log, f"Parsing {debug_text}")
with execution_context.push(desc=f"Parsing using {parser.name}") as sub_context:
sub_context.add_inputs(to_parse=to_parse)
res = parser.parse(sub_context, to_parse)
if res is not None:
if hasattr(res, "__iter__"):
@@ -278,6 +279,7 @@ class Sheerka(Concept):
to_process.append(res)
if res.status:
return_value_success_found = True
sub_context.add_values(return_values=res)
if return_value_success_found:
stop_processing = True
@@ -347,8 +349,11 @@ class Sheerka(Concept):
sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True)
# process
iteration = 0
while True:
with execution_context.push(desc=f"iteration #{iteration}", iteration=iteration) as iteration_context:
simple_digest = return_values[:]
iteration_context.add_inputs(return_values=simple_digest)
for priority in sorted_priorities:
@@ -358,30 +363,40 @@ class Sheerka(Concept):
for evaluator in grouped_evaluators[priority]:
evaluator = _preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy
# process evaluators that work on return value
sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})"
with iteration_context.push(desc=sub_context_desc) as sub_context:
sub_context.add_inputs(return_values=original_items)
# process evaluators that work on one simple return value at the time
from evaluators.BaseEvaluator import OneReturnValueEvaluator
if isinstance(evaluator, OneReturnValueEvaluator):
debug_result = []
for item in original_items:
if evaluator.matches(execution_context, item):
result = evaluator.eval(execution_context, item)
if evaluator.matches(sub_context, item):
result = evaluator.eval(sub_context, item)
if result is None:
debug_result.append({"input": item, "return_value": None})
continue
elif isinstance(result, list):
evaluated_items.extend(result)
to_delete.append(item)
if isinstance(result, list):
evaluated_items.extend(result)
elif isinstance(result, ReturnValueConcept):
evaluated_items.append(result)
to_delete.append(item)
else:
error = self.new(BuiltinConcepts.INVALID_RETURN_VALUE, body=result,
evaluator=evaluator)
evaluated_items.append(self.ret("sheerka.process", False, error, parents=[item]))
to_delete.append(item)
result = self.ret("sheerka.process", False, error, parents=[item])
evaluated_items.append(result)
debug_result.append({"input": item, "return_value": result})
else:
debug_result.append({"input": item, "return_value": "** No Match **"})
sub_context.add_values(return_values=debug_result)
# process evaluators that work on all return values
else:
if evaluator.matches(execution_context, original_items):
results = evaluator.eval(execution_context, original_items)
if evaluator.matches(sub_context, original_items):
results = evaluator.eval(sub_context, original_items)
if results is None:
continue
if not isinstance(results, list):
@@ -389,17 +404,22 @@ class Sheerka(Concept):
for result in results:
evaluated_items.append(result)
to_delete.extend(result.parents)
sub_context.add_values(return_values=results)
else:
sub_context.add_values(return_values="** No Match **")
return_values = evaluated_items
return_values.extend([item for item in original_items if item not in to_delete])
iteration_context.add_values(return_values=return_values[:])
# have we done something ?
to_compare = return_values[:]
if simple_digest == to_compare:
break
# inc the iteration and continue
execution_context = execution_context.push(iteration=execution_context.iteration + 1)
iteration += 1
# remove all evaluation context that are not reduced
return_values = core.utils.remove_list_from_list(return_values, evaluation_return_values)
@@ -879,11 +899,11 @@ class Sheerka(Concept):
return obj
def is_unknown(self, obj):
def is_known(self, obj):
if not isinstance(obj, Concept):
return True
return obj.key == BuiltinConcepts.UNKNOWN_CONCEPT
return obj.key != str(BuiltinConcepts.UNKNOWN_CONCEPT)
def isinstance(self, a, b):
"""
@@ -1049,6 +1069,8 @@ class ExecutionContext:
self.desc = desc # human description of what is going on
self.children = []
self.preprocess = None
self.inputs = {} # what was the parameters of the execution context
self.values = {} # what was produced by the execution context
self.obj = kwargs.pop("obj", None)
@@ -1105,6 +1127,11 @@ class ExecutionContext:
self.preprocess.add(preprocess)
return self
def add_inputs(self, **kwargs):
for k, v in kwargs.items():
self.inputs[k] = v
return self
def add_values(self, **kwargs):
for k, v in kwargs.items():
self.values[k] = v
+2 -2
View File
@@ -48,7 +48,7 @@ class SheerkaTransform:
self.ids[obj] = self.id_count
if isinstance(obj, Concept):
return self.context_to_dict(obj)
return self.concept_to_dict(obj)
elif isinstance(obj, ExecutionContext):
return self.execution_context_to_dict(obj)
@@ -104,7 +104,7 @@ class SheerkaTransform:
else:
return obj
def context_to_dict(self, obj: Concept):
def concept_to_dict(self, obj: Concept):
to_dict = {
OBJ_TYPE_KEY: SheerkaTransformType.Concept,
OBJ_ID_KEY: self.id_count,
+58
View File
@@ -37,6 +37,8 @@ class TokenKind(Enum):
BACK_SLASH = "bslash" # \
CARAT = "carat" # ^
DOLLAR = "dollar" # $
EURO = "dollar" # €
STERLING = "steling" # £
EMARK = "emark" # !
GREATER = "greater" # >
LESS = "less" # <
@@ -128,6 +130,18 @@ class Tokenizer:
yield Token(TokenKind.MINUS, "-", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "_":
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
yield Token(token_type, value, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
else:
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "/":
yield Token(TokenKind.SLASH, "/", self.i, self.line, self.column)
self.i += 1
@@ -205,6 +219,46 @@ class Tokenizer:
yield Token(TokenKind.GREATER, ">", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "!":
yield Token(TokenKind.EMARK, "!", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "`":
yield Token(TokenKind.BACK_QUOTE, "`", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\\":
yield Token(TokenKind.BACK_SLASH, "\\", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "^":
yield Token(TokenKind.CARAT, "^", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "$":
yield Token(TokenKind.DOLLAR, "$", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "":
yield Token(TokenKind.EURO, "", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "£":
yield Token(TokenKind.STERLING, "£", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "#":
yield Token(TokenKind.HASH, "#", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "°":
yield Token(TokenKind.DEGREE, "°", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "~":
yield Token(TokenKind.TILDE, "~", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\n" or c == "\r":
newline = self.eat_newline(self.i)
yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column)
@@ -234,6 +288,10 @@ class Tokenizer:
self.i += len(string)
self.column = 1 if newlines > 0 else self.column + len(string)
self.line += newlines
elif c == "_":
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
self.i += 1
self.column += 1
else:
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
+1 -1
View File
@@ -675,7 +675,7 @@ when you input
one two three four five
the list of :code:`[foo, bar]` will be returned by the parser (as return values)
the list of :code:`[foo, bar]` will be returned by the ConceptLexerParser (as return values)
How does it works ?
-109
View File
@@ -1,109 +0,0 @@
# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
# from core.concept import Concept
# from core.tokenizer import TokenKind
# from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator
# from parsers.BaseParser import BaseParser
# from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, ConceptLexerParser
# import core.utils
#
#
# class ConceptComposerEvaluator(AllReturnValuesEvaluator):
# """
# Try to reassemble parts of concepts from different evaluators
# """
#
# NAME = "ConceptComposer"
#
# def __init__(self):
# super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 40)
#
# def matches(self, context, return_values):
# concept_lexer_parser_name = ConceptLexerParser().name
#
# for return_value in return_values:
# if return_value.who.startswith(BaseParser.PREFIX) and return_value.status:
# return False
#
# if return_value.who.startswith(BaseEvaluator.PREFIX):
# return False
#
# if return_value.who != concept_lexer_parser_name:
# continue
#
# if not isinstance(return_value.value, ParserResultConcept):
# return False
#
# if not (
# isinstance(return_value.value.value, ConceptNode) or
# isinstance(return_value.value.value, UnrecognizedTokensNode) or
# (
# hasattr(return_value.value.value, "__iter__") and
# len(return_value.value.value) > 0 and
# (
# isinstance(return_value.value.value[0], ConceptNode) or
# isinstance(return_value.value.value[0], UnrecognizedTokensNode)
# ))):
# return False
#
# self.eaten = return_value
# return True
#
# return False
#
# def eval(self, context, return_value):
# sheerka = context.sheerka
# nodes = self.eaten.value.value
# temp_res = []
# has_error = False
# concepts_only = True
#
# for node in nodes:
# if isinstance(node, UnrecognizedTokensNode):
# tokens = core.utils.strip_tokens(node.tokens, True)
# for token in tokens:
# if token.type == TokenKind.IDENTIFIER:
# concept = context.new_concept(token.value)
# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# has_error = True
# else:
# with context.push(self.name, desc=f"Evaluating '{concept}'") as sub_context:
# sub_context.log_new(self.verbose_log)
# concept = sheerka.evaluate_concept(sub_context, concept, self.verbose_log)
# sub_context.add_values(return_values=concept)
# temp_res.append(concept)
#
# else:
# temp_res.append(core.utils.strip_quotes(token.value))
# concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
# else:
# with context.push(self.name, desc=f"Evaluating '{node.concept}'") as sub_context:
# sub_context.log_new(self.verbose_log)
# concept = sheerka.evaluate_concept(sub_context, node.concept, self.verbose_log)
# sub_context.add_values(return_values=concept)
# temp_res.append(concept)
#
# if has_error:
# return sheerka.ret(
# self.name,
# False,
# temp_res,
# parents=[self.eaten])
#
# if concepts_only:
# res = []
# for r in temp_res:
# if isinstance(r, Concept):
# res.append(r)
# else:
# res = ""
# for r in temp_res:
# if isinstance(r, Concept):
# res += sheerka.value(r)
# else:
# res += r
#
# return sheerka.ret(
# self.name,
# True,
# res,
# parents=[self.eaten])
-68
View File
@@ -1,68 +0,0 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode
class ConceptNodeEvaluator(OneReturnValueEvaluator):
"""
After a BNF is recognized, generates the concept or the list concepts
"""
NAME = "ConceptNode"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60)
def matches(self, context, return_value):
if not return_value.status:
return False
if not isinstance(return_value.value, ParserResultConcept):
return False
return (
isinstance(return_value.value.value, ConceptNode) or
isinstance(return_value.value.value, UnrecognizedTokensNode) or
(
hasattr(return_value.value.value, "__iter__") and
len(return_value.value.value) > 0 and
(
isinstance(return_value.value.value[0], ConceptNode) or
isinstance(return_value.value.value[0], UnrecognizedTokensNode)
)
)
)
def eval(self, context, return_value):
"""
From a concept node, creates a new concept
and makes sure that the properties are correctly set
"""
sheerka = context.sheerka
nodes = return_value.value.value
if not hasattr(nodes, "__iter__"):
nodes = [nodes]
concepts = []
error_found = False
source = ""
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source if source == "" else (" " + node.source)
concepts.append(node.concept)
else:
error_found = True
if len(concepts) == 1:
return sheerka.ret(
self.name,
not error_found,
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=concepts[0],
try_parsed=None),
parents=[return_value])
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME), parents=[return_value])
+102
View File
@@ -0,0 +1,102 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode
class LexerNodeEvaluator(OneReturnValueEvaluator):
"""
After a BNF is recognized, generates the concept or the list concepts
"""
NAME = "LexerNode"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60)
self.identifiers = {} # cache for already created identifier (the key is id(concept))
self.identifiers_key = {} # number of identifiers with the same root (prefix)
def matches(self, context, return_value):
if not return_value.status:
return False
if not isinstance(return_value.value, ParserResultConcept):
return False
value = return_value.value.value
if isinstance(value, (ConceptNode, SourceCodeNode)):
return True
if hasattr(value, "__iter__"):
for node in value:
if not isinstance(node, (ConceptNode, SourceCodeNode)):
return False
return True
return False
def eval(self, context, return_value):
"""
From a concept node, creates a new concept
and makes sure that the properties are correctly set
"""
nodes = return_value.value.value
if not hasattr(nodes, "__iter__"):
nodes = [nodes]
context.log(self.verbose_log, f"{nodes=}", self.name)
for node in nodes:
if isinstance(node, SourceCodeNode):
ret = self.evaluate_python_code(context, nodes)
break
else:
ret = self.evaluate_concepts_only(context, nodes)
ret.parents = [return_value]
return ret
def evaluate_concepts_only(self, context, nodes):
concepts = []
source = ""
sheerka = context.sheerka
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source if source == "" else (" " + node.source)
concepts.append(node.concept)
if len(concepts) == 1:
return sheerka.ret(
self.name,
True,
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=concepts[0],
try_parsed=None))
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=nodes))
def evaluate_python_code(self, context, nodes):
sheerka = context.sheerka
helper = LexerNodeParserHelperForPython()
result = helper.parse(context, nodes)
if isinstance(result, PythonNode):
return sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=result.source,
body=result,
try_parsed=None))
else:
return sheerka.ret(
self.name,
False,
result.body)
+52 -38
View File
@@ -6,7 +6,8 @@
# Arpeggio: A flexible PEG parser for Python,
# Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
#####################################################################################################
from dataclasses import field, dataclass
from collections import namedtuple
from dataclasses import dataclass
from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve
@@ -15,23 +16,6 @@ from parsers.BaseParser import BaseParser, Node, ErrorNode
import core.utils
def flatten(iterable):
if iterable is None:
return []
result = []
for e in iterable:
if e.parsing_expression.rule_name is not None and e.parsing_expression.rule_name != "":
if hasattr(e, "children"):
e.children = flatten(e.children)
result.append(e)
elif hasattr(e, "children"):
result.extend(flatten(e.children))
else:
result.append(e)
return result
@dataclass()
class LexerNode(Node):
start: int # starting index in the tokens list
@@ -68,10 +52,10 @@ class UnrecognizedTokensNode(LexerNode):
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
def __eq__(self, other):
if isinstance(other, tuple):
if len(other) != 3:
return False
return self.start == other[0] and self.end == other[1] and self.source == other[2]
if isinstance(other, utnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, UnrecognizedTokensNode):
return False
@@ -80,6 +64,9 @@ class UnrecognizedTokensNode(LexerNode):
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
@@ -99,17 +86,14 @@ class ConceptNode(LexerNode):
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if isinstance(other, tuple):
if len(other) == 2:
return self.concept.key == other[0] and self.source == other[1]
else:
return self.concept.key == other[0] and \
self.start == other[1] and \
self.end == other[2] and \
self.source == other[3]
if isinstance(other, cnode):
return self.concept.key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
# if not super().__eq__(other):
# return False
if isinstance(other, short_cnode):
return self.concept.key == other.concept_key and self.source == other.source
if not isinstance(other, ConceptNode):
return False
@@ -127,6 +111,42 @@ class ConceptNode(LexerNode):
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
class SourceCodeNode(LexerNode):
"""
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, node, start, end, tokens=None, source=None):
super().__init__(start, end, tokens, source)
self.node = node # The PythonNode (or whatever language node) that is found
def __eq__(self, other):
if isinstance(other, scnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, SourceCodeNode):
return False
return self.node == other.node and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
cnode = namedtuple("ConceptNode", "concept_key start end source")
short_cnode = namedtuple("ConceptNode", "concept_key source")
utnode = namedtuple("UnrecognizedTokensNode", "start end source")
scnode = namedtuple("SourceCodeNode", "start end source")
class NonTerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
@@ -146,9 +166,6 @@ class NonTerminalNode(LexerNode):
return name + sub_names
def __eq__(self, other):
# if not super().__eq__(other):
# return False
if not isinstance(other, NonTerminalNode):
return False
@@ -176,9 +193,6 @@ class TerminalNode(LexerNode):
return name + f"'{self.value}'"
def __eq__(self, other):
# if not super().__eq__(other):
# return False
if not isinstance(other, TerminalNode):
return False
+110
View File
@@ -0,0 +1,110 @@
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind, Token
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from core.concept import VARIABLE_PREFIX
import logging
multiple_concepts_parser = MultipleConceptsParser()
class ConceptsWithConceptsParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("ConceptsWithConcepts", 25)
@staticmethod
def get_tokens(nodes):
tokens = []
for node in nodes:
if isinstance(node, ConceptNode):
index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
else:
for token in node.tokens:
if token.type == TokenKind.EOF:
break
elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
continue
else:
tokens.append(token)
return tokens
@staticmethod
def get_key(nodes):
key = ""
index = 0
for node in nodes:
if key:
key += " "
if isinstance(node, UnrecognizedTokensNode):
key += node.source.strip()
else:
key += f"{VARIABLE_PREFIX}{index}"
index += 1
return key
def finalize_concept(self, context, concept, nodes):
index = 0
for node in nodes:
if isinstance(node, ConceptNode):
prop_name = list(concept.props.keys())[index]
concept.cached_asts[prop_name] = node.concept
context.log(
self.verbose_log,
f"Setting property '{prop_name}='{node.concept}'.",
self.name)
index += 1
elif isinstance(node, SourceCodeNode):
prop_name = list(concept.props.keys())[index]
sheerka = context.sheerka
value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
concept.cached_asts[prop_name] = [context.sheerka.ret(self.name, True, value)]
context.log(
self.verbose_log,
f"Setting property '{prop_name}'='Python({node.source})'.",
self.name)
index += 1
return concept
def parse(self, context, text):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
return None
if not text.parser == multiple_concepts_parser:
return None
nodes = text.body
concept_key = self.get_key(nodes)
concept = sheerka.new(concept_key)
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
concepts = concept if hasattr(concept, "__iter__") else [concept]
for concept in concepts:
self.finalize_concept(context, concept, nodes)
res = []
for concept in concepts:
res.append(sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text.source,
body=concept,
try_parsed=None)))
return res[0] if len(res) == 1 else res
+102 -34
View File
@@ -1,8 +1,11 @@
import ast
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
import core.utils
from parsers.PythonParser import PythonParser
concept_lexer_parser = ConceptLexerParser()
@@ -18,6 +21,25 @@ class MultipleConceptsParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "MultipleConcepts", 45)
@staticmethod
def finalize(nodes_found, unrecognized_tokens):
if not unrecognized_tokens:
return nodes_found, unrecognized_tokens
unrecognized_tokens.fix_source()
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
return nodes_found, None
@staticmethod
def create_or_add(unrecognized_tokens, token, index):
if unrecognized_tokens:
unrecognized_tokens.add_token(token, index)
else:
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
return unrecognized_tokens
def parse(self, context, text):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
@@ -29,50 +51,42 @@ class MultipleConceptsParser(BaseParser):
sheerka = context.sheerka
nodes = text.value
nodes_found = [[]]
source = ""
concepts_only = True
for node in nodes:
if isinstance(node, UnrecognizedTokensNode):
unrecognized_tokens = None
for i, token in enumerate(node.tokens):
index = node.start + i
i = 0
if token.type == TokenKind.IDENTIFIER:
# it may be a concept
concept = context.new_concept(token.value)
if hasattr(concept, "__iter__") or not sheerka.is_unknown(concept):
# finish processing unrecognized_tokens
if unrecognized_tokens:
unrecognized_tokens.fix_source()
source += unrecognized_tokens.source
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
unrecognized_tokens = None
while i < len(node.tokens):
source += token.value
concepts = concept if hasattr(concept, "__iter__") else [concept]
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
token_index = node.start + i
token = node.tokens[i]
concepts_nodes = self.get_concepts_nodes(context, token_index, token)
if concepts_nodes is not None:
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
nodes_found = core.utils.product(nodes_found, concepts_nodes)
i += 1
continue
else:
# it cannot be a concept
concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
if unrecognized_tokens:
unrecognized_tokens.add_token(token, index)
else:
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
if source_code_node:
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
nodes_found = core.utils.product(nodes_found, [source_code_node])
i += len(source_code_node.tokens)
continue
if unrecognized_tokens:
unrecognized_tokens.fix_source()
source += unrecognized_tokens.source
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
# not a concept nor some source code
unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
i += 1
# finish processing if needed
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
else:
nodes_found = core.utils.product(nodes_found, [node])
source += node.source
ret = []
for choice in nodes_found:
@@ -83,14 +97,68 @@ class MultipleConceptsParser(BaseParser):
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
source=text.source,
body=choice,
try_parsed=None))
)
if len(ret) == 1:
self.log_result(context, source, ret[0])
self.log_result(context, text.source, ret[0])
return ret[0]
else:
self.log_multiple_results(context, source, ret)
self.log_multiple_results(context, text.source, ret)
return ret
@staticmethod
def get_concepts_nodes(context, index, token):
"""
Tries to recognize a concept
from the univers of all known concepts
"""
if token.type != TokenKind.IDENTIFIER:
return None
concept = context.new_concept(token.value)
if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
concepts = concept if hasattr(concept, "__iter__") else [concept]
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
return concepts_nodes
return None
@staticmethod
def get_source_code_node(context, index, tokens):
"""
Tries to recognize source code.
For the time being, only Python is supported
:param context:
:param tokens:
:param index:
:return:
"""
if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
return None
end_index = len(tokens)
while end_index > 0:
parser = PythonParser()
tokens_to_parse = tokens[:end_index]
res = parser.parse(context, tokens_to_parse)
if res.status:
# only expression are accepted
ast_ = res.value.value.ast_
if not isinstance(ast_, ast.Expression):
return None
try:
compiled = compile(ast_, "<string>", "eval")
eval(compiled, {}, {})
except Exception:
return None
source = BaseParser.get_text_from_tokens(tokens_to_parse)
return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
end_index -= 1
return None
+80 -1
View File
@@ -5,6 +5,8 @@ from dataclasses import dataclass, field
import ast
import logging
from parsers.ConceptLexerParser import ConceptNode
log = logging.getLogger(__name__)
@@ -22,7 +24,7 @@ class PythonNode(Node):
def __init__(self, source, ast_, concepts=None):
self.source = source
self.ast_ = ast_
self.concepts = concepts or {}
self.concepts = concepts or {} # when concepts are recognized in the expression
# def __repr__(self):
# return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")"
@@ -133,3 +135,80 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
def visit_Name(self, node):
self.names.add(node.id)
class LexerNodeParserHelperForPython:
"""Helper class to parse mix of concepts and Python"""
def __init__(self):
self.identifiers = {} # cache for already created identifier (the key is id(concept))
self.identifiers_key = {} # number of identifiers with the same root (prefix)
def _get_identifier(self, concept):
"""
Get an identifier for a concept.
Make sure to return the same identifier if the same concept
Make sure to return a different identifier if same name but different concept
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
to be instance variables
I would like to keep this parser as stateless as possible
:param concept:
:return:
"""
if id(concept) in self.identifiers:
return self.identifiers[id(concept)]
identifier = "__C__" + self._sanitize(concept.key or concept.name)
if concept.id:
identifier += "__" + concept.id
if identifier in self.identifiers_key:
self.identifiers_key[identifier] += 1
identifier += f"_{self.identifiers_key[identifier]}"
else:
self.identifiers_key[identifier] = 0
identifier += "__C__"
self.identifiers[id(concept)] = identifier
return identifier
@staticmethod
def _sanitize(identifier):
res = ""
for c in identifier:
res += c if c.isalnum() else "0"
return res
def parse(self, context, nodes):
source = ""
to_parse = ""
concepts = {} # the key is the Python identifier
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source
if to_parse:
to_parse += " "
concept = node.concept
python_id = self._get_identifier(concept)
to_parse += python_id
concepts[python_id] = concept
else:
source += node.source
to_parse += node.source
with context.push(self, desc="Trying Python for '" + to_parse + "'") as sub_context:
sub_context.add_inputs(to_parse=to_parse)
python_parser = PythonParser()
result = python_parser.parse(sub_context, to_parse)
sub_context.add_values(return_values=result)
if result.status:
python_node = result.body.body
python_node.source = source
python_node.concepts = concepts
return python_node
return result.body # the error
+4 -11
View File
@@ -37,6 +37,10 @@ class PythonWithConceptsParser(BaseParser):
def _get_identifier(c):
"""
Get an identifier for a concept.
Make sure to return the same identifier if the same concept
Make sure to return a different identifier if same name but different concept
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
to be instance variables
I would like to keep this parser as stateless as possible
@@ -99,14 +103,3 @@ class PythonWithConceptsParser(BaseParser):
self.name,
False,
result.body)
def concept_identifier(self, concept):
if id(concept) in self.identifiers:
return self.identifiers[id(concept)]
identifier = "__C__" + (concept.key or concept.name)
if concept.id:
identifier += "__" + concept.id
identifier += "__C__"
return identifier
+4 -4
View File
@@ -6,7 +6,7 @@ from core.tokenizer import Tokenizer, TokenKind, LexerError
from parsers.BaseParser import UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
ConceptLexerParser, ConceptNode, ConceptMatch
ConceptLexerParser, ConceptNode, ConceptMatch, cnode
from sdp.sheerkaDataProvider import Event
@@ -108,12 +108,12 @@ def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [("bar", 0, 2, "twenty two")]
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [("bar", 0, 2, "thirty one")]
assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [("foo", 0, 0, "twenty")]
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
-128
View File
@@ -1,128 +0,0 @@
# import pytest
#
# from core.builtin_concepts import ReturnValueConcept, ParserResultConcept
# from core.concept import Concept
# from core.sheerka import Sheerka, ExecutionContext
# from evaluators.BaseEvaluator import BaseEvaluator
# from evaluators.ConceptComposerEvaluator import ConceptComposerEvaluator
# from parsers.BaseParser import BaseParser
# from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, Sequence
# from sdp.sheerkaDataProvider import Event
#
# concept_lexer_name = ConceptLexerParser().name
#
#
# def get_context():
# sheerka = Sheerka(skip_builtins_in_db=True)
# sheerka.initialize("mem://")
# return ExecutionContext("test", Event(), sheerka)
#
#
# def get_return_values(context, grammar, expression):
# parser = ConceptLexerParser()
# parser.initialize(context, grammar)
#
# ret_val = parser.parse(context, expression)
# assert not ret_val.status
# return [ret_val]
#
#
# def init(concepts, grammar, expression):
# context = get_context()
# for c in concepts:
# context.sheerka.add_in_cache(c)
# return_values = get_return_values(context, grammar, expression)
#
# return context, return_values
#
#
# @pytest.mark.parametrize("return_values, expected", [
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ReturnValueConcept("not a parser", True, "some value"),
# ], True),
# ([
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ], True),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ], False),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ], False),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, False, "some value"),
# ], False),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=["not a concept"])),
# ], False),
# ([
# ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", False, "evaluator in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ReturnValueConcept("not a parser", True, "some value"),
# ], False),
# ([
# ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", True, "evaluator"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ReturnValueConcept("not a parser", True, "some value"),
# ], False),
# ])
# def test_i_can_match(return_values, expected):
# context = get_context()
# assert ConceptComposerEvaluator().matches(context, return_values) == expected
#
#
# def test_i_can_eval_simple_concepts():
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {}
# context, return_values = init([foo, bar], grammar, "bar foo")
#
# composer = ConceptComposerEvaluator()
# assert composer.matches(context, return_values)
#
# ret_val = composer.eval(context, return_values)
# assert ret_val.status
# assert ret_val.who == composer.name
# assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()]
# assert ret_val.value[0].metadata.is_evaluated
# assert ret_val.value[1].metadata.is_evaluated
# assert ret_val.parents == [return_values[0]]
#
#
# def test_i_can_eval_simple_concepts_when_some_are_bnf():
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {foo: "foo"}
# context, return_values = init([foo, bar], grammar, "bar foo")
#
# composer = ConceptComposerEvaluator()
# assert composer.matches(context, return_values)
#
# ret_val = composer.eval(context, return_values)
# assert ret_val.status
# assert ret_val.who == composer.name
# assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()]
# assert ret_val.value[0].metadata.is_evaluated
# assert ret_val.value[1].metadata.is_evaluated
# assert ret_val.parents == [return_values[0]]
#
#
# def test_i_can_eval_simple_concept_and_text():
# foo = Concept("foo", body="'foo'")
# grammar = {}
# context, return_values = init([foo], grammar, "'bar' foo")
#
# composer = ConceptComposerEvaluator()
# assert composer.matches(context, return_values)
#
# ret_val = composer.eval(context, return_values)
# assert ret_val.status
# assert ret_val.who == composer.name
# assert ret_val.value == "bar foo"
# assert ret_val.parents == [return_values[0]]
+63 -63
View File
@@ -6,7 +6,7 @@ from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore, \
UnrecognizedTokensNode
UnrecognizedTokensNode, cnode, short_cnode
from sdp.sheerkaDataProvider import Event
@@ -163,7 +163,7 @@ def test_i_always_choose_the_longest_match():
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 4, "one two three")]
assert return_value == [cnode("foo", 0, 4, "one two three")]
def test_i_can_match_several_sequences():
@@ -176,8 +176,8 @@ def test_i_can_match_several_sequences():
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
("foo", 0, 4, "one two three"),
("bar", 6, 8, "one two"),
cnode("foo", 0, 4, "one two three"),
cnode("bar", 6, 8, "one two"),
]
@@ -189,13 +189,13 @@ def test_i_can_match_ordered_choice():
res1 = parser.parse(context, "one")
assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [("foo", 0, 0, "one")]
assert res1.value.body == [cnode("foo", 0, 0, "one")]
assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)])
res2 = parser.parse(context, "two")
assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [("foo", 0, 0, "two")]
assert res2.value.body == [cnode("foo", 0, 0, "two")]
assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)])
res3 = parser.parse(context, "three")
@@ -259,11 +259,11 @@ def test_i_can_mix_ordered_choices_and_sequences():
res = parser.parse(context, "twenty thirty")
assert res.status
assert res.value.value == [("foo", 0, 2, "twenty thirty")]
assert res.value.value == [cnode("foo", 0, 2, "twenty thirty")]
res = parser.parse(context, "one")
assert res.status
assert res.value.value == [("foo", 0, 0, "one")]
assert res.value.value == [cnode("foo", 0, 0, "one")]
def test_i_cannot_parse_empty_optional():
@@ -319,11 +319,11 @@ def test_i_can_parse_sequence_ending_with_optional():
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [("foo", 0, 4, "one two three")]
assert res.value.body == [cnode("foo", 0, 4, "one two three")]
res = parser.parse(context, "one two")
assert res.status
assert res.value.body == [("foo", 0, 2, "one two")]
assert res.value.body == [cnode("foo", 0, 2, "one two")]
def test_i_can_parse_sequence_with_optional_in_between():
@@ -335,11 +335,11 @@ def test_i_can_parse_sequence_with_optional_in_between():
res = parser.parse(context, "one two three")
assert res.status
assert res.value.body == [("foo", 0, 4, "one two three")]
assert res.value.body == [cnode("foo", 0, 4, "one two three")]
res = parser.parse(context, "one three")
assert res.status
assert res.value.body == [("foo", 0, 2, "one three")]
assert res.value.body == [cnode("foo", 0, 2, "one three")]
def test_i_cannot_parse_wrong_input_with_optional():
@@ -370,13 +370,13 @@ def test_i_can_use_reference():
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")]
assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept
assert cbody(concept_found_1) == DoNotResolve("one two")
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")]
assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept
# the body and the prop['foo'] are the same concept 'foo'
assert cbody(concept_found_2) == get_expected(foo, "one two")
@@ -400,13 +400,13 @@ def test_i_can_use_a_reference_with_a_body():
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")]
assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept
assert concept_found_1.body == "'foo'"
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")]
assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept
# the body and the prop['foo'] are the same concept 'foo'
assert cbody(concept_found_2) == foo
@@ -430,20 +430,20 @@ def test_i_can_use_context_reference_with_multiple_levels():
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")]
assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept
assert cbody(concept_found_1) == DoNotResolve("one two")
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")]
assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept
assert cbody(concept_found_2) == get_expected(foo, "one two")
assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2))
assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [("baz", 0, 2, "one two")]
assert res[2].value.body == [cnode("baz", 0, 2, "one two")]
concept_found_3 = res[2].value.body[0].concept
expected_foo = get_expected(foo, "one two")
assert cbody(concept_found_3) == get_expected(bar, expected_foo)
@@ -465,8 +465,8 @@ def test_order_is_not_important_when_using_references():
res = parser.parse(context, "one two")
assert len(res) == 2
assert res[0].value.body == [("bar", 0, 2, "one two")]
assert res[1].value.body == [("foo", 0, 2, "one two")]
assert res[0].value.body == [cnode("bar", 0, 2, "one two")]
assert res[1].value.body == [cnode("foo", 0, 2, "one two")]
def test_i_can_parse_when_reference():
@@ -477,21 +477,21 @@ def test_i_can_parse_when_reference():
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [("bar", 0, 2, "twenty two")]
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty two")
assert cprop(concept_found, "foo") == get_expected(foo, "twenty")
res = parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [("bar", 0, 2, "thirty one")]
assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("thirty one")
assert cprop(concept_found, "foo") == get_expected(foo, "thirty")
res = parser.parse(context, "twenty")
assert res.status
assert res.value.body == [("foo", 0, 0, "twenty")]
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty")
@@ -504,14 +504,14 @@ def test_i_can_parse_when_reference_has_a_body():
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [("bar", 0, 2, "twenty two")]
assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty two")
assert cprop(concept_found, "foo") == foo
res = parser.parse(context, "twenty")
assert res.status
assert res.value.body == [("foo", 0, 0, "twenty")]
assert res.value.body == [cnode("foo", 0, 0, "twenty")]
concept_found = res.value.body[0].concept
assert concept_found.body == "'one'"
@@ -529,14 +529,14 @@ def test_i_can_parse_multiple_results():
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", 0, 2, "one two")]
assert res[0].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_0 = res[0].value.body[0].concept
assert cbody(concept_found_0) == DoNotResolve("one two")
assert len(concept_found_0.props) == 0
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", 0, 2, "one two")]
assert res[1].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[1].value.body[0].concept
assert cbody(concept_found_1) == DoNotResolve("one two")
assert len(concept_found_1.props) == 0
@@ -555,19 +555,19 @@ def test_i_can_parse_multiple_results_times_two():
assert len(res) == 4
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", "one two"), ("bar", "one two")]
assert res[0].value.body == [short_cnode("bar", "one two"), short_cnode("bar", "one two")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", "one two"), ("bar", "one two")]
assert res[1].value.body == [short_cnode("foo", "one two"), short_cnode("bar", "one two")]
assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [("bar", "one two"), ("foo", "one two")]
assert res[2].value.body == [short_cnode("bar", "one two"), short_cnode("foo", "one two")]
assert res[3].status
assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT)
assert res[3].value.body == [("foo", "one two"), ("foo", "one two")]
assert res[3].value.body == [short_cnode("foo", "one two"), short_cnode("foo", "one two")]
def test_i_can_parse_multiple_results_when_reference():
@@ -589,11 +589,11 @@ def test_i_can_parse_multiple_results_when_reference():
assert len(res) == 2
assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", 0, 0, "twenty")]
assert res[0].value.body == [cnode("bar", 0, 0, "twenty")]
assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", 0, 0, "twenty")]
assert res[1].value.body == [cnode("foo", 0, 0, "twenty")]
def test_i_can_parse_concept_reference_that_is_not_in_grammar():
@@ -608,14 +608,14 @@ def test_i_can_parse_concept_reference_that_is_not_in_grammar():
res = parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [("foo", 0, 2, "twenty two")]
assert res.value.body == [cnode("foo", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty two")
assert cprop(concept_found, "two") == get_expected(two, "two")
res = parser.parse(context, "twenty one")
assert res.status
assert res.value.body == [("foo", 0, 2, "twenty one")]
assert res.value.body == [cnode("foo", 0, 2, "twenty one")]
def test_i_can_parse_zero_or_more():
@@ -625,7 +625,7 @@ def test_i_can_parse_zero_or_more():
context, res, wrapper, return_value = execute([foo], grammar, "one one")
assert res.status
assert return_value == [("foo", 0, 2, "one one")]
assert return_value == [cnode("foo", 0, 2, "one one")]
assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)])
concept_found = return_value[0].concept
@@ -639,11 +639,11 @@ def test_i_can_parse_sequence_and_zero_or_more():
res = parser.parse(context, "one one two")
assert res.status
assert res.value.value == [("foo", 0, 4, "one one two")]
assert res.value.value == [cnode("foo", 0, 4, "one one two")]
res = parser.parse(context, "two")
assert res.status
assert res.value.value == [("foo", 0, 0, "two")]
assert res.value.value == [cnode("foo", 0, 0, "two")]
def test_i_cannot_parse_zero_and_more_when_wrong_entry():
@@ -657,7 +657,7 @@ def test_i_cannot_parse_zero_and_more_when_wrong_entry():
res = parser.parse(context, "one two")
assert not res.status
assert res.value.value == [
("foo", 0, 0, "one"),
cnode("foo", 0, 0, "one"),
UnrecognizedTokensNode(1, 2, [t(" "), t("two")])
]
@@ -675,7 +675,7 @@ def test_i_can_parse_zero_and_more_with_separator():
context, res, wrapper, return_value = execute([foo], grammar, "one, one , one")
assert res.status
assert return_value == [("foo", 0, 7, "one, one , one")]
assert return_value == [cnode("foo", 0, 7, "one, one , one")]
def test_that_zero_and_more_is_greedy():
@@ -686,7 +686,7 @@ def test_that_zero_and_more_is_greedy():
context, res, wrapper, return_value = execute([foo], grammar, "one one one")
assert res.status
assert return_value == [("foo", 0, 4, "one one one")]
assert return_value == [cnode("foo", 0, 4, "one one one")]
def test_i_can_parse_one_and_more():
@@ -696,7 +696,7 @@ def test_i_can_parse_one_and_more():
context, res, wrapper, return_value = execute([foo], grammar, "one one")
assert res.status
assert return_value == [("foo", 0, 2, "one one")]
assert return_value == [cnode("foo", 0, 2, "one one")]
assert return_value[0].underlying == u(grammar[foo], 0, 2, [
u("one", 0, 0),
u("one", 2, 2)])
@@ -709,7 +709,7 @@ def test_i_can_parse_sequence_and_one_or_more():
res = parser.parse(context, "one one two")
assert res.status
assert res.value.value == [("foo", 0, 4, "one one two")]
assert res.value.value == [cnode("foo", 0, 4, "one one two")]
res = parser.parse(context, "two")
assert not res.status
@@ -725,7 +725,7 @@ def test_i_can_parse_one_and_more_with_separator():
context, res, wrapper, return_value = execute([foo], grammar, "one, one , one")
assert res.status
assert return_value == [("foo", 0, 7, "one, one , one")]
assert return_value == [cnode("foo", 0, 7, "one, one , one")]
assert return_value[0].underlying == u(grammar[foo], 0, 7, [
u("one", 0, 0),
u("one", 3, 3),
@@ -740,7 +740,7 @@ def test_that_one_and_more_is_greedy():
context, res, wrapper, return_value = execute([foo], grammar, "one one one")
assert res.status
assert return_value == [("foo", 0, 4, "one one one")]
assert return_value == [cnode("foo", 0, 4, "one one one")]
def test_i_can_detect_infinite_recursion():
@@ -785,9 +785,9 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
res = parser.parse(context, "foo")
assert len(res) == 2
assert res[0].status
assert res[0].value.body == [("bar", 0, 0, "foo")]
assert res[0].value.body == [cnode("bar", 0, 0, "foo")]
assert res[1].status
assert res[1].value.body == [("foo", 0, 0, "foo")]
assert res[1].value.body == [cnode("foo", 0, 0, "foo")]
def test_i_can_detect_indirect_infinite_recursion_with_sequence():
@@ -912,7 +912,7 @@ def test_i_cannot_parse_when_wrong_sequence():
assert not res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [
("foo", "one two three"),
short_cnode("foo", "one two three"),
UnrecognizedTokensNode(5, 6, [t(" "), t("one")])
]
@@ -945,14 +945,14 @@ def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end():
assert not res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
("bar", 0, 2, "one two"),
cnode("bar", 0, 2, "one two"),
UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
]
assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
("foo", 0, 2, "one two"),
cnode("foo", 0, 2, "one two"),
UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
]
@@ -973,14 +973,14 @@ def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens():
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("bar", 4, 6, "one two"),
cnode("bar", 4, 6, "one two"),
]
assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("foo", 4, 6, "one two"),
cnode("foo", 4, 6, "one two"),
]
@@ -999,7 +999,7 @@ def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens():
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("bar", 4, 6, "one two"),
cnode("bar", 4, 6, "one two"),
UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
]
@@ -1007,7 +1007,7 @@ def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens():
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("foo", 4, 6, "one two"),
cnode("foo", 4, 6, "one two"),
UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
]
@@ -1029,17 +1029,17 @@ def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle():
assert not res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [
("bar", 0, 2, "one two"),
cnode("bar", 0, 2, "one two"),
UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
("baz", 8, 8, "six"),
cnode("baz", 8, 8, "six"),
]
assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [
("foo", 0, 2, "one two"),
cnode("foo", 0, 2, "one two"),
UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
("baz", 8, 8, "six"),
cnode("baz", 8, 8, "six"),
]
@@ -1052,7 +1052,7 @@ def test_i_can_get_the_inner_concept_when_possible():
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 0, "one")]
assert return_value == [cnode("foo", 0, 0, "one")]
concept_found = return_value[0].concept
assert cbody(concept_found) == get_expected(one, "one")
assert id(cprop(concept_found, "one")) == id(cbody(concept_found))
@@ -1069,7 +1069,7 @@ def test_i_can_get_the_inner_concept_when_possible_with_rule_name():
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 0, "one")]
assert return_value == [cnode("foo", 0, 0, "one")]
concept_found = return_value[0].concept
assert cbody(concept_found) == get_expected(one, "one")
assert id(cprop(concept_found, "one")) == id(cbody(concept_found))
@@ -1086,7 +1086,7 @@ def test_i_get_multiple_props_when_zero_or_more():
context, res, wrapper, return_value = execute([foo, one], grammar, "one one one")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 4, "one one one")]
assert return_value == [cnode("foo", 0, 4, "one one one")]
concept_found = return_value[0].concept
assert cbody(concept_found) == DoNotResolve("one one one")
assert len(concept_found.cached_asts["one"]) == 3
@@ -1106,7 +1106,7 @@ def test_i_get_multiple_props_when_zero_or_more_and_different_values():
context, res, wrapper, return_value = execute([foo, one], grammar, "one ok un ok uno ok")
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", "one ok un ok uno ok")]
assert return_value == [short_cnode("foo", "one ok un ok uno ok")]
concept_found = return_value[0].concept
assert cprop(concept_found, "one")[0] == get_expected(one, "one")
assert cprop(concept_found, "one")[1] == get_expected(one, "un")
+204
View File
@@ -0,0 +1,204 @@
import ast
import pytest
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode
from sdp.sheerkaDataProvider import Event
multiple_concepts_parser = MultipleConceptsParser()
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("test", Event(), sheerka)
def get_ret_from(*args):
result = []
index = 0
source = ""
for item in args:
if isinstance(item, Concept):
tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)]
result.append(ConceptNode(item, index, index, tokens, item.name))
index += 1
source += item.name
elif isinstance(item, PythonNode):
tokens = list(Tokenizer(item.source))[:-1] # strip trailing EOF
result.append(SourceCodeNode(item, index, index + len(tokens) - 1, tokens, item.source))
index += len(tokens)
source += item.source
else:
tokens = list(Tokenizer(item))[:-1] # strip trailing EOF
result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens))
index += len(tokens)
source += item
return ReturnValueConcept(
"who",
False,
ParserResultConcept(parser=multiple_concepts_parser, value=result, source=source))
def init(concepts, inputs):
context = get_context()
for concept in concepts:
context.sheerka.create_new_concept(context, concept)
return context, get_ret_from(*inputs)
def execute(concepts, inputs):
context, input_return_values = init(concepts, inputs)
parser = ConceptsWithConceptsParser()
result = parser.parse(context, input_return_values.body)
wrapper = result.body
return_value = result.body.body
return context, parser, result, wrapper, return_value
@pytest.mark.parametrize("text, interested", [
("not parser result", False),
(ParserResultConcept(parser="not multiple_concepts_parser"), False),
(ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
])
def test_not_interested(text, interested):
context = get_context()
res = ConceptsWithConceptsParser().parse(context, text)
if interested:
assert res is not None
else:
assert res is None
def test_i_can_parse_composition_of_concepts():
foo = Concept("foo")
bar = Concept("bar")
plus = Concept("a plus b").set_prop("a").set_prop("b")
context, parser, result, wrapper, return_value = execute([foo, bar, plus], [foo, " plus ", bar])
assert result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert result.who == wrapper.parser.name
assert wrapper.source == "foo plus bar"
assert context.sheerka.isinstance(return_value, plus)
assert return_value.cached_asts["a"] == foo
assert return_value.cached_asts["b"] == bar
# sanity check, I can evaluate the result
evaluated = context.sheerka.evaluate_concept(context, return_value)
assert evaluated.key == return_value.key
assert evaluated.get_prop("a") == foo.init_key()
assert evaluated.get_prop("b") == bar.init_key()
def test_i_can_parse_when_composition_of_source_code():
plus = Concept("a plus b", body="a + b").set_prop("a").set_prop("b")
left = PythonNode("1+1", ast.parse("1+1", mode="eval"))
right = PythonNode("2+2", ast.parse("2+2", mode="eval"))
context, parser, result, wrapper, return_value = execute([plus], [left, " plus ", right])
assert result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert result.who == wrapper.parser.name
assert wrapper.source == "1+1 plus 2+2"
assert context.sheerka.isinstance(return_value, plus)
left_parser_result = ParserResultConcept(parser=parser, source="1+1", value=left)
right_parser_result = ParserResultConcept(parser=parser, source="2+2", value=right)
assert return_value.cached_asts["a"] == [ReturnValueConcept(parser.name, True, left_parser_result)]
assert return_value.cached_asts["b"] == [ReturnValueConcept(parser.name, True, right_parser_result)]
# sanity check, I can evaluate the result
evaluated = context.sheerka.evaluate_concept(context, return_value)
assert evaluated.key == return_value.key
assert evaluated.get_prop("a") == 2
assert evaluated.get_prop("b") == 4
assert evaluated.body == 6
def test_i_can_parse_when_mix_of_concept_and_code():
plus = Concept("a plus b").set_prop("a").set_prop("b")
code = PythonNode("1+1", ast.parse("1+1", mode="eval"))
foo = Concept("foo")
context, parser, result, wrapper, return_value = execute([plus, foo], [foo, " plus ", code])
assert result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert result.who == wrapper.parser.name
assert wrapper.source == "foo plus 1+1"
assert context.sheerka.isinstance(return_value, plus)
code_parser_result = ParserResultConcept(parser=parser, source="1+1", value=code)
assert return_value.cached_asts["a"] == foo
assert return_value.cached_asts["b"] == [ReturnValueConcept(parser.name, True, code_parser_result)]
# sanity check, I can evaluate the result
evaluated = context.sheerka.evaluate_concept(context, return_value)
assert evaluated.key == return_value.key
assert evaluated.get_prop("a") == foo.init_key()
assert evaluated.get_prop("b") == 2
def test_i_can_parse_when_multiple_concepts_are_recognized():
foo = Concept("foo")
bar = Concept("bar")
plus_1 = Concept("a plus b", body="body1").set_prop("a").set_prop("b")
plus_2 = Concept("a plus b", body="body2").set_prop("a").set_prop("b")
context, input_return_values = init([foo, bar, plus_1, plus_2], [foo, " plus ", bar])
parser = ConceptsWithConceptsParser()
result = parser.parse(context, input_return_values.body)
assert len(result) == 2
res = result[0]
wrapper = res.value
return_value = res.value.value
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert res.who == wrapper.parser.name
assert wrapper.source == "foo plus bar"
assert context.sheerka.isinstance(return_value, plus_1)
assert return_value.cached_asts["a"] == foo
assert return_value.cached_asts["b"] == bar
res = result[1]
wrapper = res.value
return_value = res.value.value
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert res.who == wrapper.parser.name
assert wrapper.source == "foo plus bar"
assert context.sheerka.isinstance(return_value, plus_2)
assert return_value.cached_asts["a"] == foo
assert return_value.cached_asts["b"] == bar
def test_i_cannot_parse_when_unknown_concept():
foo = Concept("foo")
bar = Concept("bar")
context, input_return_values = init([foo, bar], [foo, " plus ", bar])
parser = ConceptsWithConceptsParser()
result = parser.parse(context, input_return_values.body)
wrapper = result.body
return_value = result.body.body
assert not result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.NOT_FOR_ME)
assert result.who == parser.name
assert return_value == input_return_values.body.body
@@ -1,11 +1,13 @@
import ast
import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve
from core.sheerka import Sheerka, ExecutionContext
from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator
from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, Sequence, TerminalNode, \
StrMatch, Optional, OrderedChoice, ZeroOrMore, UnrecognizedTokensNode, ConceptMatch
from evaluators.LexerNodeEvaluator import LexerNodeEvaluator
from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, StrMatch, UnrecognizedTokensNode, SourceCodeNode
from parsers.PythonParser import PythonNode
from sdp.sheerkaDataProvider import Event
@@ -24,6 +26,18 @@ def from_parsing(context, grammar, expression):
return ret_val
def from_fragments(*fragments):
nodes = []
for fragment in fragments:
if isinstance(fragment, str):
node = PythonNode(fragment, ast.parse(fragment.strip(), mode="eval"))
nodes.append(SourceCodeNode(node, 0, 0, [], fragment))
else:
nodes.append(ConceptNode(fragment, 0, 0, [], fragment.name))
return ReturnValueConcept("somme_name", True, ParserResultConcept(value=nodes))
def init(concept, grammar, text):
context = get_context()
if isinstance(concept, list):
@@ -40,12 +54,14 @@ def init(concept, grammar, text):
@pytest.mark.parametrize("ret_val, expected", [
(ReturnValueConcept("some_name", True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=[UnrecognizedTokensNode(0, 0, [])])), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=UnrecognizedTokensNode(0, 0, []))), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=[SourceCodeNode(0, 0, [])])), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=SourceCodeNode(0, 0, []))), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=[UnrecognizedTokensNode(0, 0, [])])), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=UnrecognizedTokensNode(0, 0, []))), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=[UnrecognizedTokensNode(0, 0, [])])), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=UnrecognizedTokensNode(0, 0, []))), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=[SourceCodeNode(0, 0, [])])), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=SourceCodeNode(0, 0, []))), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept node")), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=["Not a concept node"])), False),
(ReturnValueConcept("some_name", True, [ConceptNode(Concept(), 0, 0)]), False),
@@ -53,7 +69,7 @@ def init(concept, grammar, text):
])
def test_i_can_match(ret_val, expected):
context = get_context()
assert ConceptNodeEvaluator().matches(context, ret_val) == expected
assert LexerNodeEvaluator().matches(context, ret_val) == expected
def test_concept_is_returned_when_only_one_in_the_list():
@@ -61,9 +77,9 @@ def test_concept_is_returned_when_only_one_in_the_list():
context = get_context()
context.sheerka.add_in_cache(foo)
evaluator = ConceptNodeEvaluator()
ret_val = from_parsing(context, {foo: StrMatch("foo")}, "foo")
evaluator = LexerNodeEvaluator()
result = evaluator.eval(context, ret_val)
wrapper = result.body
return_value = result.body.body
@@ -77,3 +93,23 @@ def test_concept_is_returned_when_only_one_in_the_list():
assert return_value.cached_asts[ConceptParts.BODY] == DoNotResolve("foo")
assert result.parents == [ret_val]
def test_concept_python_node_is_returned_when_source_code():
context = get_context()
foo = Concept("foo")
ret_val = from_fragments(foo, " + 1")
evaluator = LexerNodeEvaluator()
result = evaluator.eval(context, ret_val)
wrapper = result.body
return_value = result.body.body
assert result.who == evaluator.name
assert result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert wrapper.parser == evaluator
assert wrapper.source == "foo + 1"
assert return_value == PythonNode('foo + 1', ast.parse("__C__foo__C__ + 1", mode="eval"))
assert return_value.concepts == {"__C__foo__C__": foo}
assert result.parents == [ret_val]
+83 -12
View File
@@ -1,8 +1,12 @@
import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, UnrecognizedTokensNode
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, cnode, utnode, scnode, SourceCodeNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode
from sdp.sheerkaDataProvider import Event
@@ -78,7 +82,7 @@ def test_i_can_parse_when_ending_with_bnf():
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [("bar", 0, 0, "bar"), ("foo", 2, 6, "foo1 foo2 foo3")]
assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")]
assert ret_val.value.source == "bar foo1 foo2 foo3"
@@ -94,7 +98,7 @@ def test_i_can_parse_when_starting_with_bnf():
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [("foo", 0, 4, "foo1 foo2 foo3"), ("bar", 6, 6, "bar")]
assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")]
assert ret_val.value.source == "foo1 foo2 foo3 bar"
@@ -112,13 +116,13 @@ def test_i_can_parse_when_concept_are_mixed():
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
("baz", 0, 0, "baz"),
("foo", 2, 6, "foo1 foo2 foo3"),
("bar", 8, 8, "bar")]
cnode("baz", 0, 0, "baz"),
cnode("foo", 2, 6, "foo1 foo2 foo3"),
cnode("bar", 8, 8, "bar")]
assert ret_val.value.source == "baz foo1 foo2 foo3 bar"
def test_i_can_parse_when_multiple_concept_are_matching():
def test_i_can_parse_when_multiple_concepts_are_matching():
foo = Concept("foo")
bar = Concept("bar", body="bar1")
baz = Concept("bar", body="bar2")
@@ -130,16 +134,35 @@ def test_i_can_parse_when_multiple_concept_are_matching():
assert len(ret_val) == 2
assert ret_val[0].status
assert ret_val[0].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")]
assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
assert ret_val[0].value.source == "foo bar"
assert ret_val[0].value.value[1].concept.body == "bar1"
assert ret_val[1].status
assert ret_val[1].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")]
assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
assert ret_val[1].value.source == "foo bar"
assert ret_val[1].value.value[1].concept.body == "bar2"
def test_i_can_parse_when_source_code():
foo = Concept("foo")
grammar = {foo: "foo"}
context, return_value = init([foo], grammar, "1 foo")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
wrapper = ret_val.value
value = ret_val.value.value
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert wrapper.source == "1 foo"
assert value == [
scnode(0, 1, "1 "),
cnode("foo", 2, 2, "foo")]
def test_i_cannot_parse_when_unrecognized_token():
twenty_two = Concept("twenty two")
one = Concept("one")
@@ -153,8 +176,56 @@ def test_i_cannot_parse_when_unrecognized_token():
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
("twenty two", 0, 2, "twenty two"),
(3, 5, " + "),
("one", 6, 6, "one")
cnode("twenty two", 0, 2, "twenty two"),
utnode(3, 5, " + "),
cnode("one", 6, 6, "one")
]
assert ret_val.value.source == "twenty two + one"
def test_i_cannot_parse_when_unknown_concepts():
twenty_two = Concept("twenty two")
one = Concept("one")
grammar = {twenty_two: Sequence("twenty", "two")}
context, return_value = init([twenty_two, one], grammar, "twenty two plus one")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert not ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
cnode("twenty two", 0, 2, "twenty two"),
utnode(3, 5, " plus "),
cnode("one", 6, 6, "one")
]
assert ret_val.value.source == "twenty two plus one"
@pytest.mark.parametrize("text, expected_source, expected_end", [
("True", "True", 0),
("1 == 1", "1 == 1", 5),
("1!xdf", "1", 0),
("1", "1", 0),
])
def test_i_can_get_source_code_node(text, expected_source, expected_end):
tokens = list(Tokenizer(text))[:-1] # strip trailing EOF
start_index = 5 # a random number different of zero
res = MultipleConceptsParser().get_source_code_node(get_context(), start_index, tokens)
assert isinstance(res, SourceCodeNode)
assert isinstance(res.node, PythonNode)
assert res.source == expected_source
assert res.start == start_index
assert res.end == start_index + expected_end
def test_i_cannot_parse_null_text():
res = MultipleConceptsParser().get_source_code_node(get_context(), 0, [])
assert res is None
eof = Token(TokenKind.EOF, "", 0, 0, 0)
res = MultipleConceptsParser().get_source_code_node(get_context(), 0, [eof])
assert res is None
+8 -6
View File
@@ -41,16 +41,18 @@ def to_str_ast(expression):
return PythonNode.get_dump(ast.parse(expression, mode="eval"))
@pytest.mark.parametrize("text", [
"not parser result",
ParserResultConcept(value="not a list"),
ParserResultConcept(value=[]),
ParserResultConcept(value=["not a Node"]),
@pytest.mark.parametrize("text, interested", [
("not parser result", False),
(ParserResultConcept(parser="not multiple_concepts_parser"), False),
(ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
])
def test_not_interested(text):
def test_not_interested(text, interested):
context = get_context()
res = PythonWithConceptsParser().parse(context, text)
if interested:
assert res is not None
else:
assert res is None
+101 -13
View File
@@ -401,18 +401,6 @@ def test_i_can_eval_bnf_definitions_from_separate_instances():
assert res[0].value.props["a"] == Property("a", sheerka.new(concept_a.key, body="one two").init_key())
def test_i_can_eval_a_mix_with_bnf_and_python():
sheerka = get_sheerka()
sheerka.evaluate_user_input("def concept one as 1")
sheerka.evaluate_user_input("def concept twenties from bnf 'twenty' one as 20 + one")
res = sheerka.evaluate_user_input("twenty one + 1")
assert len(res) == 1
assert res[0].status
assert res[0].body == 22
@pytest.mark.parametrize("desc, definitions", [
("Simple form", [
"def concept one as 1",
@@ -450,16 +438,116 @@ def test_i_can_mix_concept_with_python_to_define_numbers(desc, definitions):
assert res[0].status
assert res[0].body == 22
res = sheerka.evaluate_user_input("twenty one + one")
assert len(res) == 1
assert res[0].status
assert res[0].body == 22
res = sheerka.evaluate_user_input("twenty one + twenty two")
assert len(res) == 1
assert res[0].status
assert res[0].body == 43
res = sheerka.evaluate_user_input("twenty one + one")
res = sheerka.evaluate_user_input("1 + twenty one")
assert len(res) == 1
assert res[0].status
assert res[0].body == 22
# res = sheerka.evaluate_user_input("1 + 1 + twenty one")
# assert len(res) == 1
# assert res[0].status
# assert res[0].body == 23
def test_i_can_mix_concept_of_concept():
sheerka = get_sheerka()
definitions = [
"def concept one as 1",
"def concept two as 2",
"def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit",
"def concept a plus b as a + b"
]
for definition in definitions:
sheerka.evaluate_user_input(definition)
# res = sheerka.evaluate_user_input("1 plus 2")
# assert len(res) == 1
# assert res[0].status
# assert res[0].body.body == 3
#
# res = sheerka.evaluate_user_input("1 plus one")
# assert len(res) == 1
# assert res[0].status
# assert res[0].body.body == 2
# res = sheerka.evaluate_user_input("1 + 1 plus 1")
# assert len(res) == 1
# assert res[0].status
# assert res[0].body.body == 3
res = sheerka.evaluate_user_input("1 plus twenty one")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 22
res = sheerka.evaluate_user_input("one plus 1")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 2
res = sheerka.evaluate_user_input("one plus two")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 3
res = sheerka.evaluate_user_input("one plus twenty one")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 22
res = sheerka.evaluate_user_input("twenty one plus 1")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 22
res = sheerka.evaluate_user_input("twenty one plus one")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 22
res = sheerka.evaluate_user_input("twenty one plus twenty two")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 43
# def test_i_can_evaluate_concept_of_concept_when_multiple_choices():
# sheerka = get_sheerka()
#
# definitions = [
# "def concept little a where a",
# "def concept blue a where a",
# "def concept little blue a where a",
# "def concept house"
# ]
#
# for definition in definitions:
# sheerka.evaluate_user_input(definition)
#
# ### CAUTION ####
# # this test cannot work !!
# # it is just to hint the result that I would like to achieve
#
# res = sheerka.evaluate_user_input("little blue house")
# assert len(res) == 2
# assert res[0].status
# assert res[0].body == "little(blue(house))"
#
# assert res[1].status
# assert res[1].body == "little blue(house)"
def test_i_can_say_that_a_concept_isa_another_concept():
sheerka = get_sheerka()
+1
View File
@@ -213,6 +213,7 @@ def test_i_can_transform_simple_execution_context():
'desc': 'this is the desc',
'children': [],
'preprocess': None,
'inputs': {},
'values': {},
'obj': None,
'concepts': {}
+14 -1
View File
@@ -4,6 +4,7 @@ from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords
def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:"
source += "$£€!_identifier°~_^\\`#"
tokens = list(Tokenizer(source))
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
@@ -40,8 +41,20 @@ def test_i_can_tokenize():
assert tokens[32] == Token(TokenKind.LESS, '<', 79, 6, 21)
assert tokens[33] == Token(TokenKind.GREATER, '>', 80, 6, 22)
assert tokens[34] == Token(TokenKind.CONCEPT, 'name', 81, 6, 23)
assert tokens[35] == Token(TokenKind.DOLLAR, '$', 88, 6, 30)
assert tokens[36] == Token(TokenKind.STERLING, '£', 89, 6, 31)
assert tokens[37] == Token(TokenKind.EURO, '', 90, 6, 32)
assert tokens[38] == Token(TokenKind.EMARK, '!', 91, 6, 33)
assert tokens[39] == Token(TokenKind.IDENTIFIER, '_identifier', 92, 6, 34)
assert tokens[40] == Token(TokenKind.DEGREE, '°', 103, 6, 45)
assert tokens[41] == Token(TokenKind.TILDE, '~', 104, 6, 46)
assert tokens[42] == Token(TokenKind.UNDERSCORE, '_', 105, 6, 47)
assert tokens[43] == Token(TokenKind.CARAT, '^', 106, 6, 48)
assert tokens[44] == Token(TokenKind.BACK_SLASH, '\\', 107, 6, 49)
assert tokens[45] == Token(TokenKind.BACK_QUOTE, '`', 108, 6, 50)
assert tokens[46] == Token(TokenKind.HASH, '#', 109, 6, 51)
assert tokens[35] == Token(TokenKind.EOF, '', 88, 6, 30)
assert tokens[47] == Token(TokenKind.EOF, '', 110, 6, 52)
@pytest.mark.parametrize("text, expected", [