Added simple form of concept composition

This commit is contained in:
2020-01-15 18:38:29 +01:00
parent 51fa9629d0
commit 8152f82c6b
22 changed files with 1105 additions and 544 deletions
+68 -41
View File
@@ -258,6 +258,7 @@ class Sheerka(Concept):
# execution_context.log(logger or self.log, f"Parsing {debug_text}") # execution_context.log(logger or self.log, f"Parsing {debug_text}")
with execution_context.push(desc=f"Parsing using {parser.name}") as sub_context: with execution_context.push(desc=f"Parsing using {parser.name}") as sub_context:
sub_context.add_inputs(to_parse=to_parse)
res = parser.parse(sub_context, to_parse) res = parser.parse(sub_context, to_parse)
if res is not None: if res is not None:
if hasattr(res, "__iter__"): if hasattr(res, "__iter__"):
@@ -278,6 +279,7 @@ class Sheerka(Concept):
to_process.append(res) to_process.append(res)
if res.status: if res.status:
return_value_success_found = True return_value_success_found = True
sub_context.add_values(return_values=res)
if return_value_success_found: if return_value_success_found:
stop_processing = True stop_processing = True
@@ -347,51 +349,69 @@ class Sheerka(Concept):
sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True) sorted_priorities = sorted(grouped_evaluators.keys(), reverse=True)
# process # process
iteration = 0
while True: while True:
simple_digest = return_values[:] with execution_context.push(desc=f"iteration #{iteration}", iteration=iteration) as iteration_context:
simple_digest = return_values[:]
iteration_context.add_inputs(return_values=simple_digest)
for priority in sorted_priorities: for priority in sorted_priorities:
original_items = return_values[:] original_items = return_values[:]
evaluated_items = [] evaluated_items = []
to_delete = [] to_delete = []
for evaluator in grouped_evaluators[priority]: for evaluator in grouped_evaluators[priority]:
evaluator = _preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy evaluator = _preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy
# process evaluators that work on return value sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})"
from evaluators.BaseEvaluator import OneReturnValueEvaluator with iteration_context.push(desc=sub_context_desc) as sub_context:
if isinstance(evaluator, OneReturnValueEvaluator): sub_context.add_inputs(return_values=original_items)
for item in original_items:
if evaluator.matches(execution_context, item): # process evaluators that work on one simple return value at the time
result = evaluator.eval(execution_context, item) from evaluators.BaseEvaluator import OneReturnValueEvaluator
if result is None: if isinstance(evaluator, OneReturnValueEvaluator):
continue debug_result = []
elif isinstance(result, list): for item in original_items:
evaluated_items.extend(result) if evaluator.matches(sub_context, item):
to_delete.append(item) result = evaluator.eval(sub_context, item)
elif isinstance(result, ReturnValueConcept): if result is None:
evaluated_items.append(result) debug_result.append({"input": item, "return_value": None})
to_delete.append(item) continue
to_delete.append(item)
if isinstance(result, list):
evaluated_items.extend(result)
elif isinstance(result, ReturnValueConcept):
evaluated_items.append(result)
else:
error = self.new(BuiltinConcepts.INVALID_RETURN_VALUE, body=result,
evaluator=evaluator)
result = self.ret("sheerka.process", False, error, parents=[item])
evaluated_items.append(result)
debug_result.append({"input": item, "return_value": result})
else:
debug_result.append({"input": item, "return_value": "** No Match **"})
sub_context.add_values(return_values=debug_result)
# process evaluators that work on all return values
else:
if evaluator.matches(sub_context, original_items):
results = evaluator.eval(sub_context, original_items)
if results is None:
continue
if not isinstance(results, list):
results = [results]
for result in results:
evaluated_items.append(result)
to_delete.extend(result.parents)
sub_context.add_values(return_values=results)
else: else:
error = self.new(BuiltinConcepts.INVALID_RETURN_VALUE, body=result, sub_context.add_values(return_values="** No Match **")
evaluator=evaluator)
evaluated_items.append(self.ret("sheerka.process", False, error, parents=[item]))
to_delete.append(item)
# process evaluators that work on all return values return_values = evaluated_items
else: return_values.extend([item for item in original_items if item not in to_delete])
if evaluator.matches(execution_context, original_items):
results = evaluator.eval(execution_context, original_items)
if results is None:
continue
if not isinstance(results, list):
results = [results]
for result in results:
evaluated_items.append(result)
to_delete.extend(result.parents)
return_values = evaluated_items iteration_context.add_values(return_values=return_values[:])
return_values.extend([item for item in original_items if item not in to_delete])
# have we done something ? # have we done something ?
to_compare = return_values[:] to_compare = return_values[:]
@@ -399,7 +419,7 @@ class Sheerka(Concept):
break break
# inc the iteration and continue # inc the iteration and continue
execution_context = execution_context.push(iteration=execution_context.iteration + 1) iteration += 1
# remove all evaluation context that are not reduced # remove all evaluation context that are not reduced
return_values = core.utils.remove_list_from_list(return_values, evaluation_return_values) return_values = core.utils.remove_list_from_list(return_values, evaluation_return_values)
@@ -879,11 +899,11 @@ class Sheerka(Concept):
return obj return obj
def is_unknown(self, obj): def is_known(self, obj):
if not isinstance(obj, Concept): if not isinstance(obj, Concept):
return True return True
return obj.key == BuiltinConcepts.UNKNOWN_CONCEPT return obj.key != str(BuiltinConcepts.UNKNOWN_CONCEPT)
def isinstance(self, a, b): def isinstance(self, a, b):
""" """
@@ -1049,6 +1069,8 @@ class ExecutionContext:
self.desc = desc # human description of what is going on self.desc = desc # human description of what is going on
self.children = [] self.children = []
self.preprocess = None self.preprocess = None
self.inputs = {} # what was the parameters of the execution context
self.values = {} # what was produced by the execution context self.values = {} # what was produced by the execution context
self.obj = kwargs.pop("obj", None) self.obj = kwargs.pop("obj", None)
@@ -1105,6 +1127,11 @@ class ExecutionContext:
self.preprocess.add(preprocess) self.preprocess.add(preprocess)
return self return self
def add_inputs(self, **kwargs):
for k, v in kwargs.items():
self.inputs[k] = v
return self
def add_values(self, **kwargs): def add_values(self, **kwargs):
for k, v in kwargs.items(): for k, v in kwargs.items():
self.values[k] = v self.values[k] = v
+2 -2
View File
@@ -48,7 +48,7 @@ class SheerkaTransform:
self.ids[obj] = self.id_count self.ids[obj] = self.id_count
if isinstance(obj, Concept): if isinstance(obj, Concept):
return self.context_to_dict(obj) return self.concept_to_dict(obj)
elif isinstance(obj, ExecutionContext): elif isinstance(obj, ExecutionContext):
return self.execution_context_to_dict(obj) return self.execution_context_to_dict(obj)
@@ -104,7 +104,7 @@ class SheerkaTransform:
else: else:
return obj return obj
def context_to_dict(self, obj: Concept): def concept_to_dict(self, obj: Concept):
to_dict = { to_dict = {
OBJ_TYPE_KEY: SheerkaTransformType.Concept, OBJ_TYPE_KEY: SheerkaTransformType.Concept,
OBJ_ID_KEY: self.id_count, OBJ_ID_KEY: self.id_count,
+58
View File
@@ -37,6 +37,8 @@ class TokenKind(Enum):
BACK_SLASH = "bslash" # \ BACK_SLASH = "bslash" # \
CARAT = "carat" # ^ CARAT = "carat" # ^
DOLLAR = "dollar" # $ DOLLAR = "dollar" # $
EURO = "dollar" # €
STERLING = "steling" # £
EMARK = "emark" # ! EMARK = "emark" # !
GREATER = "greater" # > GREATER = "greater" # >
LESS = "less" # < LESS = "less" # <
@@ -128,6 +130,18 @@ class Tokenizer:
yield Token(TokenKind.MINUS, "-", self.i, self.line, self.column) yield Token(TokenKind.MINUS, "-", self.i, self.line, self.column)
self.i += 1 self.i += 1
self.column += 1 self.column += 1
elif c == "_":
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
yield Token(token_type, value, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
else:
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "/": elif c == "/":
yield Token(TokenKind.SLASH, "/", self.i, self.line, self.column) yield Token(TokenKind.SLASH, "/", self.i, self.line, self.column)
self.i += 1 self.i += 1
@@ -205,6 +219,46 @@ class Tokenizer:
yield Token(TokenKind.GREATER, ">", self.i, self.line, self.column) yield Token(TokenKind.GREATER, ">", self.i, self.line, self.column)
self.i += 1 self.i += 1
self.column += 1 self.column += 1
elif c == "!":
yield Token(TokenKind.EMARK, "!", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "`":
yield Token(TokenKind.BACK_QUOTE, "`", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\\":
yield Token(TokenKind.BACK_SLASH, "\\", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "^":
yield Token(TokenKind.CARAT, "^", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "$":
yield Token(TokenKind.DOLLAR, "$", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "":
yield Token(TokenKind.EURO, "", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "£":
yield Token(TokenKind.STERLING, "£", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "#":
yield Token(TokenKind.HASH, "#", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "°":
yield Token(TokenKind.DEGREE, "°", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "~":
yield Token(TokenKind.TILDE, "~", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\n" or c == "\r": elif c == "\n" or c == "\r":
newline = self.eat_newline(self.i) newline = self.eat_newline(self.i)
yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column) yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column)
@@ -234,6 +288,10 @@ class Tokenizer:
self.i += len(string) self.i += len(string)
self.column = 1 if newlines > 0 else self.column + len(string) self.column = 1 if newlines > 0 else self.column + len(string)
self.line += newlines self.line += newlines
elif c == "_":
yield Token(TokenKind.UNDERSCORE, "_", self.i, self.line, self.column)
self.i += 1
self.column += 1
else: else:
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column) raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
+1 -1
View File
@@ -675,7 +675,7 @@ when you input
one two three four five one two three four five
the list of :code:`[foo, bar]` will be returned by the parser (as return values) the list of :code:`[foo, bar]` will be returned by the ConceptLexerParser (as return values)
How does it works ? How does it works ?
-109
View File
@@ -1,109 +0,0 @@
# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
# from core.concept import Concept
# from core.tokenizer import TokenKind
# from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator
# from parsers.BaseParser import BaseParser
# from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, ConceptLexerParser
# import core.utils
#
#
# class ConceptComposerEvaluator(AllReturnValuesEvaluator):
# """
# Try to reassemble parts of concepts from different evaluators
# """
#
# NAME = "ConceptComposer"
#
# def __init__(self):
# super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 40)
#
# def matches(self, context, return_values):
# concept_lexer_parser_name = ConceptLexerParser().name
#
# for return_value in return_values:
# if return_value.who.startswith(BaseParser.PREFIX) and return_value.status:
# return False
#
# if return_value.who.startswith(BaseEvaluator.PREFIX):
# return False
#
# if return_value.who != concept_lexer_parser_name:
# continue
#
# if not isinstance(return_value.value, ParserResultConcept):
# return False
#
# if not (
# isinstance(return_value.value.value, ConceptNode) or
# isinstance(return_value.value.value, UnrecognizedTokensNode) or
# (
# hasattr(return_value.value.value, "__iter__") and
# len(return_value.value.value) > 0 and
# (
# isinstance(return_value.value.value[0], ConceptNode) or
# isinstance(return_value.value.value[0], UnrecognizedTokensNode)
# ))):
# return False
#
# self.eaten = return_value
# return True
#
# return False
#
# def eval(self, context, return_value):
# sheerka = context.sheerka
# nodes = self.eaten.value.value
# temp_res = []
# has_error = False
# concepts_only = True
#
# for node in nodes:
# if isinstance(node, UnrecognizedTokensNode):
# tokens = core.utils.strip_tokens(node.tokens, True)
# for token in tokens:
# if token.type == TokenKind.IDENTIFIER:
# concept = context.new_concept(token.value)
# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# has_error = True
# else:
# with context.push(self.name, desc=f"Evaluating '{concept}'") as sub_context:
# sub_context.log_new(self.verbose_log)
# concept = sheerka.evaluate_concept(sub_context, concept, self.verbose_log)
# sub_context.add_values(return_values=concept)
# temp_res.append(concept)
#
# else:
# temp_res.append(core.utils.strip_quotes(token.value))
# concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
# else:
# with context.push(self.name, desc=f"Evaluating '{node.concept}'") as sub_context:
# sub_context.log_new(self.verbose_log)
# concept = sheerka.evaluate_concept(sub_context, node.concept, self.verbose_log)
# sub_context.add_values(return_values=concept)
# temp_res.append(concept)
#
# if has_error:
# return sheerka.ret(
# self.name,
# False,
# temp_res,
# parents=[self.eaten])
#
# if concepts_only:
# res = []
# for r in temp_res:
# if isinstance(r, Concept):
# res.append(r)
# else:
# res = ""
# for r in temp_res:
# if isinstance(r, Concept):
# res += sheerka.value(r)
# else:
# res += r
#
# return sheerka.ret(
# self.name,
# True,
# res,
# parents=[self.eaten])
-68
View File
@@ -1,68 +0,0 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode
class ConceptNodeEvaluator(OneReturnValueEvaluator):
"""
After a BNF is recognized, generates the concept or the list concepts
"""
NAME = "ConceptNode"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60)
def matches(self, context, return_value):
if not return_value.status:
return False
if not isinstance(return_value.value, ParserResultConcept):
return False
return (
isinstance(return_value.value.value, ConceptNode) or
isinstance(return_value.value.value, UnrecognizedTokensNode) or
(
hasattr(return_value.value.value, "__iter__") and
len(return_value.value.value) > 0 and
(
isinstance(return_value.value.value[0], ConceptNode) or
isinstance(return_value.value.value[0], UnrecognizedTokensNode)
)
)
)
def eval(self, context, return_value):
"""
From a concept node, creates a new concept
and makes sure that the properties are correctly set
"""
sheerka = context.sheerka
nodes = return_value.value.value
if not hasattr(nodes, "__iter__"):
nodes = [nodes]
concepts = []
error_found = False
source = ""
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source if source == "" else (" " + node.source)
concepts.append(node.concept)
else:
error_found = True
if len(concepts) == 1:
return sheerka.ret(
self.name,
not error_found,
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=concepts[0],
try_parsed=None),
parents=[return_value])
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME), parents=[return_value])
+102
View File
@@ -0,0 +1,102 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode
class LexerNodeEvaluator(OneReturnValueEvaluator):
"""
After a BNF is recognized, generates the concept or the list concepts
"""
NAME = "LexerNode"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60)
self.identifiers = {} # cache for already created identifier (the key is id(concept))
self.identifiers_key = {} # number of identifiers with the same root (prefix)
def matches(self, context, return_value):
if not return_value.status:
return False
if not isinstance(return_value.value, ParserResultConcept):
return False
value = return_value.value.value
if isinstance(value, (ConceptNode, SourceCodeNode)):
return True
if hasattr(value, "__iter__"):
for node in value:
if not isinstance(node, (ConceptNode, SourceCodeNode)):
return False
return True
return False
def eval(self, context, return_value):
"""
From a concept node, creates a new concept
and makes sure that the properties are correctly set
"""
nodes = return_value.value.value
if not hasattr(nodes, "__iter__"):
nodes = [nodes]
context.log(self.verbose_log, f"{nodes=}", self.name)
for node in nodes:
if isinstance(node, SourceCodeNode):
ret = self.evaluate_python_code(context, nodes)
break
else:
ret = self.evaluate_concepts_only(context, nodes)
ret.parents = [return_value]
return ret
def evaluate_concepts_only(self, context, nodes):
concepts = []
source = ""
sheerka = context.sheerka
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source if source == "" else (" " + node.source)
concepts.append(node.concept)
if len(concepts) == 1:
return sheerka.ret(
self.name,
True,
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=concepts[0],
try_parsed=None))
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=nodes))
def evaluate_python_code(self, context, nodes):
sheerka = context.sheerka
helper = LexerNodeParserHelperForPython()
result = helper.parse(context, nodes)
if isinstance(result, PythonNode):
return sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=result.source,
body=result,
try_parsed=None))
else:
return sheerka.ret(
self.name,
False,
result.body)
+52 -38
View File
@@ -6,7 +6,8 @@
# Arpeggio: A flexible PEG parser for Python, # Arpeggio: A flexible PEG parser for Python,
# Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004 # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
##################################################################################################### #####################################################################################################
from dataclasses import field, dataclass from collections import namedtuple
from dataclasses import dataclass
from collections import defaultdict from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve from core.concept import Concept, ConceptParts, DoNotResolve
@@ -15,23 +16,6 @@ from parsers.BaseParser import BaseParser, Node, ErrorNode
import core.utils import core.utils
def flatten(iterable):
if iterable is None:
return []
result = []
for e in iterable:
if e.parsing_expression.rule_name is not None and e.parsing_expression.rule_name != "":
if hasattr(e, "children"):
e.children = flatten(e.children)
result.append(e)
elif hasattr(e, "children"):
result.extend(flatten(e.children))
else:
result.append(e)
return result
@dataclass() @dataclass()
class LexerNode(Node): class LexerNode(Node):
start: int # starting index in the tokens list start: int # starting index in the tokens list
@@ -68,10 +52,10 @@ class UnrecognizedTokensNode(LexerNode):
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)) return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, tuple): if isinstance(other, utnode):
if len(other) != 3: return self.start == other.start and \
return False self.end == other.end and \
return self.start == other[0] and self.end == other[1] and self.source == other[2] self.source == other.source
if not isinstance(other, UnrecognizedTokensNode): if not isinstance(other, UnrecognizedTokensNode):
return False return False
@@ -80,6 +64,9 @@ class UnrecognizedTokensNode(LexerNode):
self.end == other.end and \ self.end == other.end and \
self.source == other.source self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self): def __repr__(self):
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')" return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
@@ -99,17 +86,14 @@ class ConceptNode(LexerNode):
self.source = BaseParser.get_text_from_tokens(self.tokens) self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, tuple): if isinstance(other, cnode):
if len(other) == 2: return self.concept.key == other.concept_key and \
return self.concept.key == other[0] and self.source == other[1] self.start == other.start and \
else: self.end == other.end and \
return self.concept.key == other[0] and \ self.source == other.source
self.start == other[1] and \
self.end == other[2] and \
self.source == other[3]
# if not super().__eq__(other): if isinstance(other, short_cnode):
# return False return self.concept.key == other.concept_key and self.source == other.source
if not isinstance(other, ConceptNode): if not isinstance(other, ConceptNode):
return False return False
@@ -127,6 +111,42 @@ class ConceptNode(LexerNode):
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')" return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
class SourceCodeNode(LexerNode):
"""
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, node, start, end, tokens=None, source=None):
super().__init__(start, end, tokens, source)
self.node = node # The PythonNode (or whatever language node) that is found
def __eq__(self, other):
if isinstance(other, scnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, SourceCodeNode):
return False
return self.node == other.node and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
cnode = namedtuple("ConceptNode", "concept_key start end source")
short_cnode = namedtuple("ConceptNode", "concept_key source")
utnode = namedtuple("UnrecognizedTokensNode", "start end source")
scnode = namedtuple("SourceCodeNode", "start end source")
class NonTerminalNode(LexerNode): class NonTerminalNode(LexerNode):
""" """
Returned by the ConceptLexerParser Returned by the ConceptLexerParser
@@ -146,9 +166,6 @@ class NonTerminalNode(LexerNode):
return name + sub_names return name + sub_names
def __eq__(self, other): def __eq__(self, other):
# if not super().__eq__(other):
# return False
if not isinstance(other, NonTerminalNode): if not isinstance(other, NonTerminalNode):
return False return False
@@ -176,9 +193,6 @@ class TerminalNode(LexerNode):
return name + f"'{self.value}'" return name + f"'{self.value}'"
def __eq__(self, other): def __eq__(self, other):
# if not super().__eq__(other):
# return False
if not isinstance(other, TerminalNode): if not isinstance(other, TerminalNode):
return False return False
+110
View File
@@ -0,0 +1,110 @@
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind, Token
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from core.concept import VARIABLE_PREFIX
import logging
multiple_concepts_parser = MultipleConceptsParser()
class ConceptsWithConceptsParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("ConceptsWithConcepts", 25)
@staticmethod
def get_tokens(nodes):
tokens = []
for node in nodes:
if isinstance(node, ConceptNode):
index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
else:
for token in node.tokens:
if token.type == TokenKind.EOF:
break
elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
continue
else:
tokens.append(token)
return tokens
@staticmethod
def get_key(nodes):
key = ""
index = 0
for node in nodes:
if key:
key += " "
if isinstance(node, UnrecognizedTokensNode):
key += node.source.strip()
else:
key += f"{VARIABLE_PREFIX}{index}"
index += 1
return key
def finalize_concept(self, context, concept, nodes):
index = 0
for node in nodes:
if isinstance(node, ConceptNode):
prop_name = list(concept.props.keys())[index]
concept.cached_asts[prop_name] = node.concept
context.log(
self.verbose_log,
f"Setting property '{prop_name}='{node.concept}'.",
self.name)
index += 1
elif isinstance(node, SourceCodeNode):
prop_name = list(concept.props.keys())[index]
sheerka = context.sheerka
value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
concept.cached_asts[prop_name] = [context.sheerka.ret(self.name, True, value)]
context.log(
self.verbose_log,
f"Setting property '{prop_name}'='Python({node.source})'.",
self.name)
index += 1
return concept
def parse(self, context, text):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
return None
if not text.parser == multiple_concepts_parser:
return None
nodes = text.body
concept_key = self.get_key(nodes)
concept = sheerka.new(concept_key)
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
concepts = concept if hasattr(concept, "__iter__") else [concept]
for concept in concepts:
self.finalize_concept(context, concept, nodes)
res = []
for concept in concepts:
res.append(sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text.source,
body=concept,
try_parsed=None)))
return res[0] if len(res) == 1 else res
+104 -36
View File
@@ -1,8 +1,11 @@
import ast
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind from core.tokenizer import TokenKind
from parsers.BaseParser import BaseParser from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
import core.utils import core.utils
from parsers.PythonParser import PythonParser
concept_lexer_parser = ConceptLexerParser() concept_lexer_parser = ConceptLexerParser()
@@ -18,6 +21,25 @@ class MultipleConceptsParser(BaseParser):
def __init__(self, **kwargs): def __init__(self, **kwargs):
BaseParser.__init__(self, "MultipleConcepts", 45) BaseParser.__init__(self, "MultipleConcepts", 45)
@staticmethod
def finalize(nodes_found, unrecognized_tokens):
if not unrecognized_tokens:
return nodes_found, unrecognized_tokens
unrecognized_tokens.fix_source()
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
return nodes_found, None
@staticmethod
def create_or_add(unrecognized_tokens, token, index):
if unrecognized_tokens:
unrecognized_tokens.add_token(token, index)
else:
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
return unrecognized_tokens
def parse(self, context, text): def parse(self, context, text):
sheerka = context.sheerka sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT): if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
@@ -29,50 +51,42 @@ class MultipleConceptsParser(BaseParser):
sheerka = context.sheerka sheerka = context.sheerka
nodes = text.value nodes = text.value
nodes_found = [[]] nodes_found = [[]]
source = ""
concepts_only = True concepts_only = True
for node in nodes: for node in nodes:
if isinstance(node, UnrecognizedTokensNode): if isinstance(node, UnrecognizedTokensNode):
unrecognized_tokens = None unrecognized_tokens = None
for i, token in enumerate(node.tokens): i = 0
index = node.start + i
if token.type == TokenKind.IDENTIFIER: while i < len(node.tokens):
# it may be a concept
concept = context.new_concept(token.value)
if hasattr(concept, "__iter__") or not sheerka.is_unknown(concept):
# finish processing unrecognized_tokens
if unrecognized_tokens:
unrecognized_tokens.fix_source()
source += unrecognized_tokens.source
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
unrecognized_tokens = None
source += token.value token_index = node.start + i
concepts = concept if hasattr(concept, "__iter__") else [concept] token = node.tokens[i]
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
nodes_found = core.utils.product(nodes_found, concepts_nodes)
continue
else:
# it cannot be a concept
concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
if unrecognized_tokens: concepts_nodes = self.get_concepts_nodes(context, token_index, token)
unrecognized_tokens.add_token(token, index) if concepts_nodes is not None:
else: nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token]) nodes_found = core.utils.product(nodes_found, concepts_nodes)
i += 1
continue
if unrecognized_tokens: source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
unrecognized_tokens.fix_source() if source_code_node:
source += unrecognized_tokens.source nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
if unrecognized_tokens.not_whitespace(): nodes_found = core.utils.product(nodes_found, [source_code_node])
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens]) i += len(source_code_node.tokens)
continue
# not a concept nor some source code
unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
i += 1
# finish processing if needed
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
else: else:
nodes_found = core.utils.product(nodes_found, [node]) nodes_found = core.utils.product(nodes_found, [node])
source += node.source
ret = [] ret = []
for choice in nodes_found: for choice in nodes_found:
@@ -83,14 +97,68 @@ class MultipleConceptsParser(BaseParser):
sheerka.new( sheerka.new(
BuiltinConcepts.PARSER_RESULT, BuiltinConcepts.PARSER_RESULT,
parser=self, parser=self,
source=source, source=text.source,
body=choice, body=choice,
try_parsed=None)) try_parsed=None))
) )
if len(ret) == 1: if len(ret) == 1:
self.log_result(context, source, ret[0]) self.log_result(context, text.source, ret[0])
return ret[0] return ret[0]
else: else:
self.log_multiple_results(context, source, ret) self.log_multiple_results(context, text.source, ret)
return ret return ret
@staticmethod
def get_concepts_nodes(context, index, token):
"""
Tries to recognize a concept
from the univers of all known concepts
"""
if token.type != TokenKind.IDENTIFIER:
return None
concept = context.new_concept(token.value)
if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
concepts = concept if hasattr(concept, "__iter__") else [concept]
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
return concepts_nodes
return None
@staticmethod
def get_source_code_node(context, index, tokens):
"""
Tries to recognize source code.
For the time being, only Python is supported
:param context:
:param tokens:
:param index:
:return:
"""
if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
return None
end_index = len(tokens)
while end_index > 0:
parser = PythonParser()
tokens_to_parse = tokens[:end_index]
res = parser.parse(context, tokens_to_parse)
if res.status:
# only expression are accepted
ast_ = res.value.value.ast_
if not isinstance(ast_, ast.Expression):
return None
try:
compiled = compile(ast_, "<string>", "eval")
eval(compiled, {}, {})
except Exception:
return None
source = BaseParser.get_text_from_tokens(tokens_to_parse)
return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
end_index -= 1
return None
+80 -1
View File
@@ -5,6 +5,8 @@ from dataclasses import dataclass, field
import ast import ast
import logging import logging
from parsers.ConceptLexerParser import ConceptNode
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -22,7 +24,7 @@ class PythonNode(Node):
def __init__(self, source, ast_, concepts=None): def __init__(self, source, ast_, concepts=None):
self.source = source self.source = source
self.ast_ = ast_ self.ast_ = ast_
self.concepts = concepts or {} self.concepts = concepts or {} # when concepts are recognized in the expression
# def __repr__(self): # def __repr__(self):
# return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")" # return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")"
@@ -133,3 +135,80 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
def visit_Name(self, node): def visit_Name(self, node):
self.names.add(node.id) self.names.add(node.id)
class LexerNodeParserHelperForPython:
"""Helper class to parse mix of concepts and Python"""
def __init__(self):
self.identifiers = {} # cache for already created identifier (the key is id(concept))
self.identifiers_key = {} # number of identifiers with the same root (prefix)
def _get_identifier(self, concept):
"""
Get an identifier for a concept.
Make sure to return the same identifier if the same concept
Make sure to return a different identifier if same name but different concept
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
to be instance variables
I would like to keep this parser as stateless as possible
:param concept:
:return:
"""
if id(concept) in self.identifiers:
return self.identifiers[id(concept)]
identifier = "__C__" + self._sanitize(concept.key or concept.name)
if concept.id:
identifier += "__" + concept.id
if identifier in self.identifiers_key:
self.identifiers_key[identifier] += 1
identifier += f"_{self.identifiers_key[identifier]}"
else:
self.identifiers_key[identifier] = 0
identifier += "__C__"
self.identifiers[id(concept)] = identifier
return identifier
@staticmethod
def _sanitize(identifier):
res = ""
for c in identifier:
res += c if c.isalnum() else "0"
return res
def parse(self, context, nodes):
source = ""
to_parse = ""
concepts = {} # the key is the Python identifier
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source
if to_parse:
to_parse += " "
concept = node.concept
python_id = self._get_identifier(concept)
to_parse += python_id
concepts[python_id] = concept
else:
source += node.source
to_parse += node.source
with context.push(self, desc="Trying Python for '" + to_parse + "'") as sub_context:
sub_context.add_inputs(to_parse=to_parse)
python_parser = PythonParser()
result = python_parser.parse(sub_context, to_parse)
sub_context.add_values(return_values=result)
if result.status:
python_node = result.body.body
python_node.source = source
python_node.concepts = concepts
return python_node
return result.body # the error
+4 -11
View File
@@ -37,6 +37,10 @@ class PythonWithConceptsParser(BaseParser):
def _get_identifier(c): def _get_identifier(c):
""" """
Get an identifier for a concept.
Make sure to return the same identifier if the same concept
Make sure to return a different identifier if same name but different concept
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
to be instance variables to be instance variables
I would like to keep this parser as stateless as possible I would like to keep this parser as stateless as possible
@@ -99,14 +103,3 @@ class PythonWithConceptsParser(BaseParser):
self.name, self.name,
False, False,
result.body) result.body)
def concept_identifier(self, concept):
if id(concept) in self.identifiers:
return self.identifiers[id(concept)]
identifier = "__C__" + (concept.key or concept.name)
if concept.id:
identifier += "__" + concept.id
identifier += "__C__"
return identifier
+4 -4
View File
@@ -6,7 +6,7 @@ from core.tokenizer import Tokenizer, TokenKind, LexerError
from parsers.BaseParser import UnexpectedTokenErrorNode from parsers.BaseParser import UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
ConceptLexerParser, ConceptNode, ConceptMatch ConceptLexerParser, ConceptNode, ConceptMatch, cnode
from sdp.sheerkaDataProvider import Event from sdp.sheerkaDataProvider import Event
@@ -108,12 +108,12 @@ def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
res = concept_parser.parse(context, "twenty two") res = concept_parser.parse(context, "twenty two")
assert res.status assert res.status
assert res.value.body == [("bar", 0, 2, "twenty two")] assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
res = concept_parser.parse(context, "thirty one") res = concept_parser.parse(context, "thirty one")
assert res.status assert res.status
assert res.value.body == [("bar", 0, 2, "thirty one")] assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
res = concept_parser.parse(context, "twenty") res = concept_parser.parse(context, "twenty")
assert res.status assert res.status
assert res.value.body == [("foo", 0, 0, "twenty")] assert res.value.body == [cnode("foo", 0, 0, "twenty")]
-128
View File
@@ -1,128 +0,0 @@
# import pytest
#
# from core.builtin_concepts import ReturnValueConcept, ParserResultConcept
# from core.concept import Concept
# from core.sheerka import Sheerka, ExecutionContext
# from evaluators.BaseEvaluator import BaseEvaluator
# from evaluators.ConceptComposerEvaluator import ConceptComposerEvaluator
# from parsers.BaseParser import BaseParser
# from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, Sequence
# from sdp.sheerkaDataProvider import Event
#
# concept_lexer_name = ConceptLexerParser().name
#
#
# def get_context():
# sheerka = Sheerka(skip_builtins_in_db=True)
# sheerka.initialize("mem://")
# return ExecutionContext("test", Event(), sheerka)
#
#
# def get_return_values(context, grammar, expression):
# parser = ConceptLexerParser()
# parser.initialize(context, grammar)
#
# ret_val = parser.parse(context, expression)
# assert not ret_val.status
# return [ret_val]
#
#
# def init(concepts, grammar, expression):
# context = get_context()
# for c in concepts:
# context.sheerka.add_in_cache(c)
# return_values = get_return_values(context, grammar, expression)
#
# return context, return_values
#
#
# @pytest.mark.parametrize("return_values, expected", [
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ReturnValueConcept("not a parser", True, "some value"),
# ], True),
# ([
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ], True),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ], False),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ], False),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, False, "some value"),
# ], False),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=["not a concept"])),
# ], False),
# ([
# ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", False, "evaluator in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ReturnValueConcept("not a parser", True, "some value"),
# ], False),
# ([
# ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", True, "evaluator"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ReturnValueConcept("not a parser", True, "some value"),
# ], False),
# ])
# def test_i_can_match(return_values, expected):
# context = get_context()
# assert ConceptComposerEvaluator().matches(context, return_values) == expected
#
#
# def test_i_can_eval_simple_concepts():
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {}
# context, return_values = init([foo, bar], grammar, "bar foo")
#
# composer = ConceptComposerEvaluator()
# assert composer.matches(context, return_values)
#
# ret_val = composer.eval(context, return_values)
# assert ret_val.status
# assert ret_val.who == composer.name
# assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()]
# assert ret_val.value[0].metadata.is_evaluated
# assert ret_val.value[1].metadata.is_evaluated
# assert ret_val.parents == [return_values[0]]
#
#
# def test_i_can_eval_simple_concepts_when_some_are_bnf():
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {foo: "foo"}
# context, return_values = init([foo, bar], grammar, "bar foo")
#
# composer = ConceptComposerEvaluator()
# assert composer.matches(context, return_values)
#
# ret_val = composer.eval(context, return_values)
# assert ret_val.status
# assert ret_val.who == composer.name
# assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()]
# assert ret_val.value[0].metadata.is_evaluated
# assert ret_val.value[1].metadata.is_evaluated
# assert ret_val.parents == [return_values[0]]
#
#
# def test_i_can_eval_simple_concept_and_text():
# foo = Concept("foo", body="'foo'")
# grammar = {}
# context, return_values = init([foo], grammar, "'bar' foo")
#
# composer = ConceptComposerEvaluator()
# assert composer.matches(context, return_values)
#
# ret_val = composer.eval(context, return_values)
# assert ret_val.status
# assert ret_val.who == composer.name
# assert ret_val.value == "bar foo"
# assert ret_val.parents == [return_values[0]]
+63 -63
View File
@@ -6,7 +6,7 @@ from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind, Token from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore, \ ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch, ZeroOrMore, OneOrMore, \
UnrecognizedTokensNode UnrecognizedTokensNode, cnode, short_cnode
from sdp.sheerkaDataProvider import Event from sdp.sheerkaDataProvider import Event
@@ -163,7 +163,7 @@ def test_i_always_choose_the_longest_match():
assert res.status assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 4, "one two three")] assert return_value == [cnode("foo", 0, 4, "one two three")]
def test_i_can_match_several_sequences(): def test_i_can_match_several_sequences():
@@ -176,8 +176,8 @@ def test_i_can_match_several_sequences():
assert res.status assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [ assert return_value == [
("foo", 0, 4, "one two three"), cnode("foo", 0, 4, "one two three"),
("bar", 6, 8, "one two"), cnode("bar", 6, 8, "one two"),
] ]
@@ -189,13 +189,13 @@ def test_i_can_match_ordered_choice():
res1 = parser.parse(context, "one") res1 = parser.parse(context, "one")
assert res1.status assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [("foo", 0, 0, "one")] assert res1.value.body == [cnode("foo", 0, 0, "one")]
assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)]) assert res1.value.body[0].underlying == u(grammar[foo], 0, 0, [u("one", 0, 0)])
res2 = parser.parse(context, "two") res2 = parser.parse(context, "two")
assert res2.status assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [("foo", 0, 0, "two")] assert res2.value.body == [cnode("foo", 0, 0, "two")]
assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)]) assert res2.value.body[0].underlying == u(grammar[foo], 0, 0, [u("two", 0, 0)])
res3 = parser.parse(context, "three") res3 = parser.parse(context, "three")
@@ -259,11 +259,11 @@ def test_i_can_mix_ordered_choices_and_sequences():
res = parser.parse(context, "twenty thirty") res = parser.parse(context, "twenty thirty")
assert res.status assert res.status
assert res.value.value == [("foo", 0, 2, "twenty thirty")] assert res.value.value == [cnode("foo", 0, 2, "twenty thirty")]
res = parser.parse(context, "one") res = parser.parse(context, "one")
assert res.status assert res.status
assert res.value.value == [("foo", 0, 0, "one")] assert res.value.value == [cnode("foo", 0, 0, "one")]
def test_i_cannot_parse_empty_optional(): def test_i_cannot_parse_empty_optional():
@@ -319,11 +319,11 @@ def test_i_can_parse_sequence_ending_with_optional():
res = parser.parse(context, "one two three") res = parser.parse(context, "one two three")
assert res.status assert res.status
assert res.value.body == [("foo", 0, 4, "one two three")] assert res.value.body == [cnode("foo", 0, 4, "one two three")]
res = parser.parse(context, "one two") res = parser.parse(context, "one two")
assert res.status assert res.status
assert res.value.body == [("foo", 0, 2, "one two")] assert res.value.body == [cnode("foo", 0, 2, "one two")]
def test_i_can_parse_sequence_with_optional_in_between(): def test_i_can_parse_sequence_with_optional_in_between():
@@ -335,11 +335,11 @@ def test_i_can_parse_sequence_with_optional_in_between():
res = parser.parse(context, "one two three") res = parser.parse(context, "one two three")
assert res.status assert res.status
assert res.value.body == [("foo", 0, 4, "one two three")] assert res.value.body == [cnode("foo", 0, 4, "one two three")]
res = parser.parse(context, "one three") res = parser.parse(context, "one three")
assert res.status assert res.status
assert res.value.body == [("foo", 0, 2, "one three")] assert res.value.body == [cnode("foo", 0, 2, "one three")]
def test_i_cannot_parse_wrong_input_with_optional(): def test_i_cannot_parse_wrong_input_with_optional():
@@ -370,13 +370,13 @@ def test_i_can_use_reference():
assert res[0].status assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")] assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept concept_found_1 = res[0].value.body[0].concept
assert cbody(concept_found_1) == DoNotResolve("one two") assert cbody(concept_found_1) == DoNotResolve("one two")
assert res[1].status assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")] assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept concept_found_2 = res[1].value.body[0].concept
# the body and the prop['foo'] are the same concept 'foo' # the body and the prop['foo'] are the same concept 'foo'
assert cbody(concept_found_2) == get_expected(foo, "one two") assert cbody(concept_found_2) == get_expected(foo, "one two")
@@ -400,13 +400,13 @@ def test_i_can_use_a_reference_with_a_body():
assert res[0].status assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")] assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept concept_found_1 = res[0].value.body[0].concept
assert concept_found_1.body == "'foo'" assert concept_found_1.body == "'foo'"
assert res[1].status assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")] assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept concept_found_2 = res[1].value.body[0].concept
# the body and the prop['foo'] are the same concept 'foo' # the body and the prop['foo'] are the same concept 'foo'
assert cbody(concept_found_2) == foo assert cbody(concept_found_2) == foo
@@ -430,20 +430,20 @@ def test_i_can_use_context_reference_with_multiple_levels():
assert res[0].status assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("foo", 0, 2, "one two")] assert res[0].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[0].value.body[0].concept concept_found_1 = res[0].value.body[0].concept
assert cbody(concept_found_1) == DoNotResolve("one two") assert cbody(concept_found_1) == DoNotResolve("one two")
assert res[1].status assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("bar", 0, 2, "one two")] assert res[1].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_2 = res[1].value.body[0].concept concept_found_2 = res[1].value.body[0].concept
assert cbody(concept_found_2) == get_expected(foo, "one two") assert cbody(concept_found_2) == get_expected(foo, "one two")
assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2)) assert id(cprop(concept_found_2, "foo")) == id(cbody(concept_found_2))
assert res[2].status assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [("baz", 0, 2, "one two")] assert res[2].value.body == [cnode("baz", 0, 2, "one two")]
concept_found_3 = res[2].value.body[0].concept concept_found_3 = res[2].value.body[0].concept
expected_foo = get_expected(foo, "one two") expected_foo = get_expected(foo, "one two")
assert cbody(concept_found_3) == get_expected(bar, expected_foo) assert cbody(concept_found_3) == get_expected(bar, expected_foo)
@@ -465,8 +465,8 @@ def test_order_is_not_important_when_using_references():
res = parser.parse(context, "one two") res = parser.parse(context, "one two")
assert len(res) == 2 assert len(res) == 2
assert res[0].value.body == [("bar", 0, 2, "one two")] assert res[0].value.body == [cnode("bar", 0, 2, "one two")]
assert res[1].value.body == [("foo", 0, 2, "one two")] assert res[1].value.body == [cnode("foo", 0, 2, "one two")]
def test_i_can_parse_when_reference(): def test_i_can_parse_when_reference():
@@ -477,21 +477,21 @@ def test_i_can_parse_when_reference():
res = parser.parse(context, "twenty two") res = parser.parse(context, "twenty two")
assert res.status assert res.status
assert res.value.body == [("bar", 0, 2, "twenty two")] assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty two") assert cbody(concept_found) == DoNotResolve("twenty two")
assert cprop(concept_found, "foo") == get_expected(foo, "twenty") assert cprop(concept_found, "foo") == get_expected(foo, "twenty")
res = parser.parse(context, "thirty one") res = parser.parse(context, "thirty one")
assert res.status assert res.status
assert res.value.body == [("bar", 0, 2, "thirty one")] assert res.value.body == [cnode("bar", 0, 2, "thirty one")]
concept_found = res.value.body[0].concept concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("thirty one") assert cbody(concept_found) == DoNotResolve("thirty one")
assert cprop(concept_found, "foo") == get_expected(foo, "thirty") assert cprop(concept_found, "foo") == get_expected(foo, "thirty")
res = parser.parse(context, "twenty") res = parser.parse(context, "twenty")
assert res.status assert res.status
assert res.value.body == [("foo", 0, 0, "twenty")] assert res.value.body == [cnode("foo", 0, 0, "twenty")]
concept_found = res.value.body[0].concept concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty") assert cbody(concept_found) == DoNotResolve("twenty")
@@ -504,14 +504,14 @@ def test_i_can_parse_when_reference_has_a_body():
res = parser.parse(context, "twenty two") res = parser.parse(context, "twenty two")
assert res.status assert res.status
assert res.value.body == [("bar", 0, 2, "twenty two")] assert res.value.body == [cnode("bar", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty two") assert cbody(concept_found) == DoNotResolve("twenty two")
assert cprop(concept_found, "foo") == foo assert cprop(concept_found, "foo") == foo
res = parser.parse(context, "twenty") res = parser.parse(context, "twenty")
assert res.status assert res.status
assert res.value.body == [("foo", 0, 0, "twenty")] assert res.value.body == [cnode("foo", 0, 0, "twenty")]
concept_found = res.value.body[0].concept concept_found = res.value.body[0].concept
assert concept_found.body == "'one'" assert concept_found.body == "'one'"
@@ -529,14 +529,14 @@ def test_i_can_parse_multiple_results():
assert len(res) == 2 assert len(res) == 2
assert res[0].status assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", 0, 2, "one two")] assert res[0].value.body == [cnode("bar", 0, 2, "one two")]
concept_found_0 = res[0].value.body[0].concept concept_found_0 = res[0].value.body[0].concept
assert cbody(concept_found_0) == DoNotResolve("one two") assert cbody(concept_found_0) == DoNotResolve("one two")
assert len(concept_found_0.props) == 0 assert len(concept_found_0.props) == 0
assert res[1].status assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", 0, 2, "one two")] assert res[1].value.body == [cnode("foo", 0, 2, "one two")]
concept_found_1 = res[1].value.body[0].concept concept_found_1 = res[1].value.body[0].concept
assert cbody(concept_found_1) == DoNotResolve("one two") assert cbody(concept_found_1) == DoNotResolve("one two")
assert len(concept_found_1.props) == 0 assert len(concept_found_1.props) == 0
@@ -555,19 +555,19 @@ def test_i_can_parse_multiple_results_times_two():
assert len(res) == 4 assert len(res) == 4
assert res[0].status assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", "one two"), ("bar", "one two")] assert res[0].value.body == [short_cnode("bar", "one two"), short_cnode("bar", "one two")]
assert res[1].status assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", "one two"), ("bar", "one two")] assert res[1].value.body == [short_cnode("foo", "one two"), short_cnode("bar", "one two")]
assert res[2].status assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [("bar", "one two"), ("foo", "one two")] assert res[2].value.body == [short_cnode("bar", "one two"), short_cnode("foo", "one two")]
assert res[3].status assert res[3].status
assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[3].value, BuiltinConcepts.PARSER_RESULT)
assert res[3].value.body == [("foo", "one two"), ("foo", "one two")] assert res[3].value.body == [short_cnode("foo", "one two"), short_cnode("foo", "one two")]
def test_i_can_parse_multiple_results_when_reference(): def test_i_can_parse_multiple_results_when_reference():
@@ -589,11 +589,11 @@ def test_i_can_parse_multiple_results_when_reference():
assert len(res) == 2 assert len(res) == 2
assert res[0].status assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [("bar", 0, 0, "twenty")] assert res[0].value.body == [cnode("bar", 0, 0, "twenty")]
assert res[1].status assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [("foo", 0, 0, "twenty")] assert res[1].value.body == [cnode("foo", 0, 0, "twenty")]
def test_i_can_parse_concept_reference_that_is_not_in_grammar(): def test_i_can_parse_concept_reference_that_is_not_in_grammar():
@@ -608,14 +608,14 @@ def test_i_can_parse_concept_reference_that_is_not_in_grammar():
res = parser.parse(context, "twenty two") res = parser.parse(context, "twenty two")
assert res.status assert res.status
assert res.value.body == [("foo", 0, 2, "twenty two")] assert res.value.body == [cnode("foo", 0, 2, "twenty two")]
concept_found = res.value.body[0].concept concept_found = res.value.body[0].concept
assert cbody(concept_found) == DoNotResolve("twenty two") assert cbody(concept_found) == DoNotResolve("twenty two")
assert cprop(concept_found, "two") == get_expected(two, "two") assert cprop(concept_found, "two") == get_expected(two, "two")
res = parser.parse(context, "twenty one") res = parser.parse(context, "twenty one")
assert res.status assert res.status
assert res.value.body == [("foo", 0, 2, "twenty one")] assert res.value.body == [cnode("foo", 0, 2, "twenty one")]
def test_i_can_parse_zero_or_more(): def test_i_can_parse_zero_or_more():
@@ -625,7 +625,7 @@ def test_i_can_parse_zero_or_more():
context, res, wrapper, return_value = execute([foo], grammar, "one one") context, res, wrapper, return_value = execute([foo], grammar, "one one")
assert res.status assert res.status
assert return_value == [("foo", 0, 2, "one one")] assert return_value == [cnode("foo", 0, 2, "one one")]
assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)]) assert return_value[0].underlying == u(grammar[foo], 0, 2, [u("one", 0, 0), u("one", 2, 2)])
concept_found = return_value[0].concept concept_found = return_value[0].concept
@@ -639,11 +639,11 @@ def test_i_can_parse_sequence_and_zero_or_more():
res = parser.parse(context, "one one two") res = parser.parse(context, "one one two")
assert res.status assert res.status
assert res.value.value == [("foo", 0, 4, "one one two")] assert res.value.value == [cnode("foo", 0, 4, "one one two")]
res = parser.parse(context, "two") res = parser.parse(context, "two")
assert res.status assert res.status
assert res.value.value == [("foo", 0, 0, "two")] assert res.value.value == [cnode("foo", 0, 0, "two")]
def test_i_cannot_parse_zero_and_more_when_wrong_entry(): def test_i_cannot_parse_zero_and_more_when_wrong_entry():
@@ -657,7 +657,7 @@ def test_i_cannot_parse_zero_and_more_when_wrong_entry():
res = parser.parse(context, "one two") res = parser.parse(context, "one two")
assert not res.status assert not res.status
assert res.value.value == [ assert res.value.value == [
("foo", 0, 0, "one"), cnode("foo", 0, 0, "one"),
UnrecognizedTokensNode(1, 2, [t(" "), t("two")]) UnrecognizedTokensNode(1, 2, [t(" "), t("two")])
] ]
@@ -675,7 +675,7 @@ def test_i_can_parse_zero_and_more_with_separator():
context, res, wrapper, return_value = execute([foo], grammar, "one, one , one") context, res, wrapper, return_value = execute([foo], grammar, "one, one , one")
assert res.status assert res.status
assert return_value == [("foo", 0, 7, "one, one , one")] assert return_value == [cnode("foo", 0, 7, "one, one , one")]
def test_that_zero_and_more_is_greedy(): def test_that_zero_and_more_is_greedy():
@@ -686,7 +686,7 @@ def test_that_zero_and_more_is_greedy():
context, res, wrapper, return_value = execute([foo], grammar, "one one one") context, res, wrapper, return_value = execute([foo], grammar, "one one one")
assert res.status assert res.status
assert return_value == [("foo", 0, 4, "one one one")] assert return_value == [cnode("foo", 0, 4, "one one one")]
def test_i_can_parse_one_and_more(): def test_i_can_parse_one_and_more():
@@ -696,7 +696,7 @@ def test_i_can_parse_one_and_more():
context, res, wrapper, return_value = execute([foo], grammar, "one one") context, res, wrapper, return_value = execute([foo], grammar, "one one")
assert res.status assert res.status
assert return_value == [("foo", 0, 2, "one one")] assert return_value == [cnode("foo", 0, 2, "one one")]
assert return_value[0].underlying == u(grammar[foo], 0, 2, [ assert return_value[0].underlying == u(grammar[foo], 0, 2, [
u("one", 0, 0), u("one", 0, 0),
u("one", 2, 2)]) u("one", 2, 2)])
@@ -709,7 +709,7 @@ def test_i_can_parse_sequence_and_one_or_more():
res = parser.parse(context, "one one two") res = parser.parse(context, "one one two")
assert res.status assert res.status
assert res.value.value == [("foo", 0, 4, "one one two")] assert res.value.value == [cnode("foo", 0, 4, "one one two")]
res = parser.parse(context, "two") res = parser.parse(context, "two")
assert not res.status assert not res.status
@@ -725,7 +725,7 @@ def test_i_can_parse_one_and_more_with_separator():
context, res, wrapper, return_value = execute([foo], grammar, "one, one , one") context, res, wrapper, return_value = execute([foo], grammar, "one, one , one")
assert res.status assert res.status
assert return_value == [("foo", 0, 7, "one, one , one")] assert return_value == [cnode("foo", 0, 7, "one, one , one")]
assert return_value[0].underlying == u(grammar[foo], 0, 7, [ assert return_value[0].underlying == u(grammar[foo], 0, 7, [
u("one", 0, 0), u("one", 0, 0),
u("one", 3, 3), u("one", 3, 3),
@@ -740,7 +740,7 @@ def test_that_one_and_more_is_greedy():
context, res, wrapper, return_value = execute([foo], grammar, "one one one") context, res, wrapper, return_value = execute([foo], grammar, "one one one")
assert res.status assert res.status
assert return_value == [("foo", 0, 4, "one one one")] assert return_value == [cnode("foo", 0, 4, "one one one")]
def test_i_can_detect_infinite_recursion(): def test_i_can_detect_infinite_recursion():
@@ -785,9 +785,9 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
res = parser.parse(context, "foo") res = parser.parse(context, "foo")
assert len(res) == 2 assert len(res) == 2
assert res[0].status assert res[0].status
assert res[0].value.body == [("bar", 0, 0, "foo")] assert res[0].value.body == [cnode("bar", 0, 0, "foo")]
assert res[1].status assert res[1].status
assert res[1].value.body == [("foo", 0, 0, "foo")] assert res[1].value.body == [cnode("foo", 0, 0, "foo")]
def test_i_can_detect_indirect_infinite_recursion_with_sequence(): def test_i_can_detect_indirect_infinite_recursion_with_sequence():
@@ -912,7 +912,7 @@ def test_i_cannot_parse_when_wrong_sequence():
assert not res.status assert not res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [ assert return_value == [
("foo", "one two three"), short_cnode("foo", "one two three"),
UnrecognizedTokensNode(5, 6, [t(" "), t("one")]) UnrecognizedTokensNode(5, 6, [t(" "), t("one")])
] ]
@@ -945,14 +945,14 @@ def test_i_cannot_parse_multiple_results_when_unknown_tokens_at_the_end():
assert not res[0].status assert not res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ assert res[0].value.body == [
("bar", 0, 2, "one two"), cnode("bar", 0, 2, "one two"),
UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
] ]
assert not res[1].status assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ assert res[1].value.body == [
("foo", 0, 2, "one two"), cnode("foo", 0, 2, "one two"),
UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")]) UnrecognizedTokensNode(3, 6, [t(" "), t("four"), t(" "), t("five")])
] ]
@@ -973,14 +973,14 @@ def test_i_cannot_parse_multiple_results_when_beginning_by_unknown_tokens():
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ assert res[0].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("bar", 4, 6, "one two"), cnode("bar", 4, 6, "one two"),
] ]
assert not res[1].status assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ assert res[1].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("foo", 4, 6, "one two"), cnode("foo", 4, 6, "one two"),
] ]
@@ -999,7 +999,7 @@ def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens():
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ assert res[0].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("bar", 4, 6, "one two"), cnode("bar", 4, 6, "one two"),
UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
] ]
@@ -1007,7 +1007,7 @@ def test_i_cannot_parse_multiple_results_when_surrounded_by_unknown_tokens():
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ assert res[1].value.body == [
UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]), UnrecognizedTokensNode(0, 3, [t("four"), t(" "), t("five"), t(" ")]),
("foo", 4, 6, "one two"), cnode("foo", 4, 6, "one two"),
UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]), UnrecognizedTokensNode(7, 10, [t(" "), t("six"), t(" "), t("seven")]),
] ]
@@ -1029,17 +1029,17 @@ def test_i_cannot_parse_multiple_results_when_unknown_tokens_in_the_middle():
assert not res[0].status assert not res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ assert res[0].value.body == [
("bar", 0, 2, "one two"), cnode("bar", 0, 2, "one two"),
UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
("baz", 8, 8, "six"), cnode("baz", 8, 8, "six"),
] ]
assert not res[1].status assert not res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ assert res[1].value.body == [
("foo", 0, 2, "one two"), cnode("foo", 0, 2, "one two"),
UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]), UnrecognizedTokensNode(3, 7, [t(" "), t("four"), t(" "), t("five"), t(" ")]),
("baz", 8, 8, "six"), cnode("baz", 8, 8, "six"),
] ]
@@ -1052,7 +1052,7 @@ def test_i_can_get_the_inner_concept_when_possible():
assert res.status assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 0, "one")] assert return_value == [cnode("foo", 0, 0, "one")]
concept_found = return_value[0].concept concept_found = return_value[0].concept
assert cbody(concept_found) == get_expected(one, "one") assert cbody(concept_found) == get_expected(one, "one")
assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) assert id(cprop(concept_found, "one")) == id(cbody(concept_found))
@@ -1069,7 +1069,7 @@ def test_i_can_get_the_inner_concept_when_possible_with_rule_name():
assert res.status assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 0, "one")] assert return_value == [cnode("foo", 0, 0, "one")]
concept_found = return_value[0].concept concept_found = return_value[0].concept
assert cbody(concept_found) == get_expected(one, "one") assert cbody(concept_found) == get_expected(one, "one")
assert id(cprop(concept_found, "one")) == id(cbody(concept_found)) assert id(cprop(concept_found, "one")) == id(cbody(concept_found))
@@ -1086,7 +1086,7 @@ def test_i_get_multiple_props_when_zero_or_more():
context, res, wrapper, return_value = execute([foo, one], grammar, "one one one") context, res, wrapper, return_value = execute([foo, one], grammar, "one one one")
assert res.status assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", 0, 4, "one one one")] assert return_value == [cnode("foo", 0, 4, "one one one")]
concept_found = return_value[0].concept concept_found = return_value[0].concept
assert cbody(concept_found) == DoNotResolve("one one one") assert cbody(concept_found) == DoNotResolve("one one one")
assert len(concept_found.cached_asts["one"]) == 3 assert len(concept_found.cached_asts["one"]) == 3
@@ -1106,7 +1106,7 @@ def test_i_get_multiple_props_when_zero_or_more_and_different_values():
context, res, wrapper, return_value = execute([foo, one], grammar, "one ok un ok uno ok") context, res, wrapper, return_value = execute([foo, one], grammar, "one ok un ok uno ok")
assert res.status assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert return_value == [("foo", "one ok un ok uno ok")] assert return_value == [short_cnode("foo", "one ok un ok uno ok")]
concept_found = return_value[0].concept concept_found = return_value[0].concept
assert cprop(concept_found, "one")[0] == get_expected(one, "one") assert cprop(concept_found, "one")[0] == get_expected(one, "one")
assert cprop(concept_found, "one")[1] == get_expected(one, "un") assert cprop(concept_found, "one")[1] == get_expected(one, "un")
+204
View File
@@ -0,0 +1,204 @@
import ast
import pytest
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode
from sdp.sheerkaDataProvider import Event
multiple_concepts_parser = MultipleConceptsParser()
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("test", Event(), sheerka)
def get_ret_from(*args):
result = []
index = 0
source = ""
for item in args:
if isinstance(item, Concept):
tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)]
result.append(ConceptNode(item, index, index, tokens, item.name))
index += 1
source += item.name
elif isinstance(item, PythonNode):
tokens = list(Tokenizer(item.source))[:-1] # strip trailing EOF
result.append(SourceCodeNode(item, index, index + len(tokens) - 1, tokens, item.source))
index += len(tokens)
source += item.source
else:
tokens = list(Tokenizer(item))[:-1] # strip trailing EOF
result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens))
index += len(tokens)
source += item
return ReturnValueConcept(
"who",
False,
ParserResultConcept(parser=multiple_concepts_parser, value=result, source=source))
def init(concepts, inputs):
context = get_context()
for concept in concepts:
context.sheerka.create_new_concept(context, concept)
return context, get_ret_from(*inputs)
def execute(concepts, inputs):
context, input_return_values = init(concepts, inputs)
parser = ConceptsWithConceptsParser()
result = parser.parse(context, input_return_values.body)
wrapper = result.body
return_value = result.body.body
return context, parser, result, wrapper, return_value
@pytest.mark.parametrize("text, interested", [
("not parser result", False),
(ParserResultConcept(parser="not multiple_concepts_parser"), False),
(ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
])
def test_not_interested(text, interested):
context = get_context()
res = ConceptsWithConceptsParser().parse(context, text)
if interested:
assert res is not None
else:
assert res is None
def test_i_can_parse_composition_of_concepts():
foo = Concept("foo")
bar = Concept("bar")
plus = Concept("a plus b").set_prop("a").set_prop("b")
context, parser, result, wrapper, return_value = execute([foo, bar, plus], [foo, " plus ", bar])
assert result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert result.who == wrapper.parser.name
assert wrapper.source == "foo plus bar"
assert context.sheerka.isinstance(return_value, plus)
assert return_value.cached_asts["a"] == foo
assert return_value.cached_asts["b"] == bar
# sanity check, I can evaluate the result
evaluated = context.sheerka.evaluate_concept(context, return_value)
assert evaluated.key == return_value.key
assert evaluated.get_prop("a") == foo.init_key()
assert evaluated.get_prop("b") == bar.init_key()
def test_i_can_parse_when_composition_of_source_code():
plus = Concept("a plus b", body="a + b").set_prop("a").set_prop("b")
left = PythonNode("1+1", ast.parse("1+1", mode="eval"))
right = PythonNode("2+2", ast.parse("2+2", mode="eval"))
context, parser, result, wrapper, return_value = execute([plus], [left, " plus ", right])
assert result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert result.who == wrapper.parser.name
assert wrapper.source == "1+1 plus 2+2"
assert context.sheerka.isinstance(return_value, plus)
left_parser_result = ParserResultConcept(parser=parser, source="1+1", value=left)
right_parser_result = ParserResultConcept(parser=parser, source="2+2", value=right)
assert return_value.cached_asts["a"] == [ReturnValueConcept(parser.name, True, left_parser_result)]
assert return_value.cached_asts["b"] == [ReturnValueConcept(parser.name, True, right_parser_result)]
# sanity check, I can evaluate the result
evaluated = context.sheerka.evaluate_concept(context, return_value)
assert evaluated.key == return_value.key
assert evaluated.get_prop("a") == 2
assert evaluated.get_prop("b") == 4
assert evaluated.body == 6
def test_i_can_parse_when_mix_of_concept_and_code():
plus = Concept("a plus b").set_prop("a").set_prop("b")
code = PythonNode("1+1", ast.parse("1+1", mode="eval"))
foo = Concept("foo")
context, parser, result, wrapper, return_value = execute([plus, foo], [foo, " plus ", code])
assert result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert result.who == wrapper.parser.name
assert wrapper.source == "foo plus 1+1"
assert context.sheerka.isinstance(return_value, plus)
code_parser_result = ParserResultConcept(parser=parser, source="1+1", value=code)
assert return_value.cached_asts["a"] == foo
assert return_value.cached_asts["b"] == [ReturnValueConcept(parser.name, True, code_parser_result)]
# sanity check, I can evaluate the result
evaluated = context.sheerka.evaluate_concept(context, return_value)
assert evaluated.key == return_value.key
assert evaluated.get_prop("a") == foo.init_key()
assert evaluated.get_prop("b") == 2
def test_i_can_parse_when_multiple_concepts_are_recognized():
foo = Concept("foo")
bar = Concept("bar")
plus_1 = Concept("a plus b", body="body1").set_prop("a").set_prop("b")
plus_2 = Concept("a plus b", body="body2").set_prop("a").set_prop("b")
context, input_return_values = init([foo, bar, plus_1, plus_2], [foo, " plus ", bar])
parser = ConceptsWithConceptsParser()
result = parser.parse(context, input_return_values.body)
assert len(result) == 2
res = result[0]
wrapper = res.value
return_value = res.value.value
assert res.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert res.who == wrapper.parser.name
assert wrapper.source == "foo plus bar"
assert context.sheerka.isinstance(return_value, plus_1)
assert return_value.cached_asts["a"] == foo
assert return_value.cached_asts["b"] == bar
res = result[1]
wrapper = res.value
return_value = res.value.value
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert res.who == wrapper.parser.name
assert wrapper.source == "foo plus bar"
assert context.sheerka.isinstance(return_value, plus_2)
assert return_value.cached_asts["a"] == foo
assert return_value.cached_asts["b"] == bar
def test_i_cannot_parse_when_unknown_concept():
foo = Concept("foo")
bar = Concept("bar")
context, input_return_values = init([foo, bar], [foo, " plus ", bar])
parser = ConceptsWithConceptsParser()
result = parser.parse(context, input_return_values.body)
wrapper = result.body
return_value = result.body.body
assert not result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.NOT_FOR_ME)
assert result.who == parser.name
assert return_value == input_return_values.body.body
@@ -1,11 +1,13 @@
import ast
import pytest import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve from core.concept import Concept, ConceptParts, DoNotResolve
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator from evaluators.LexerNodeEvaluator import LexerNodeEvaluator
from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, Sequence, TerminalNode, \ from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, StrMatch, UnrecognizedTokensNode, SourceCodeNode
StrMatch, Optional, OrderedChoice, ZeroOrMore, UnrecognizedTokensNode, ConceptMatch from parsers.PythonParser import PythonNode
from sdp.sheerkaDataProvider import Event from sdp.sheerkaDataProvider import Event
@@ -24,6 +26,18 @@ def from_parsing(context, grammar, expression):
return ret_val return ret_val
def from_fragments(*fragments):
nodes = []
for fragment in fragments:
if isinstance(fragment, str):
node = PythonNode(fragment, ast.parse(fragment.strip(), mode="eval"))
nodes.append(SourceCodeNode(node, 0, 0, [], fragment))
else:
nodes.append(ConceptNode(fragment, 0, 0, [], fragment.name))
return ReturnValueConcept("somme_name", True, ParserResultConcept(value=nodes))
def init(concept, grammar, text): def init(concept, grammar, text):
context = get_context() context = get_context()
if isinstance(concept, list): if isinstance(concept, list):
@@ -40,12 +54,14 @@ def init(concept, grammar, text):
@pytest.mark.parametrize("ret_val, expected", [ @pytest.mark.parametrize("ret_val, expected", [
(ReturnValueConcept("some_name", True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), True), (ReturnValueConcept("some_name", True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), True), (ReturnValueConcept("some_name", True, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=[UnrecognizedTokensNode(0, 0, [])])), True), (ReturnValueConcept("some_name", True, ParserResultConcept(value=[SourceCodeNode(0, 0, [])])), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=UnrecognizedTokensNode(0, 0, []))), True), (ReturnValueConcept("some_name", True, ParserResultConcept(value=SourceCodeNode(0, 0, []))), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=[UnrecognizedTokensNode(0, 0, [])])), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=UnrecognizedTokensNode(0, 0, []))), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), False), (ReturnValueConcept("some_name", False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), False), (ReturnValueConcept("some_name", False, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=[UnrecognizedTokensNode(0, 0, [])])), False), (ReturnValueConcept("some_name", False, ParserResultConcept(value=[SourceCodeNode(0, 0, [])])), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=UnrecognizedTokensNode(0, 0, []))), False), (ReturnValueConcept("some_name", False, ParserResultConcept(value=SourceCodeNode(0, 0, []))), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept node")), False), (ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept node")), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=["Not a concept node"])), False), (ReturnValueConcept("some_name", True, ParserResultConcept(value=["Not a concept node"])), False),
(ReturnValueConcept("some_name", True, [ConceptNode(Concept(), 0, 0)]), False), (ReturnValueConcept("some_name", True, [ConceptNode(Concept(), 0, 0)]), False),
@@ -53,7 +69,7 @@ def init(concept, grammar, text):
]) ])
def test_i_can_match(ret_val, expected): def test_i_can_match(ret_val, expected):
context = get_context() context = get_context()
assert ConceptNodeEvaluator().matches(context, ret_val) == expected assert LexerNodeEvaluator().matches(context, ret_val) == expected
def test_concept_is_returned_when_only_one_in_the_list(): def test_concept_is_returned_when_only_one_in_the_list():
@@ -61,9 +77,9 @@ def test_concept_is_returned_when_only_one_in_the_list():
context = get_context() context = get_context()
context.sheerka.add_in_cache(foo) context.sheerka.add_in_cache(foo)
evaluator = ConceptNodeEvaluator()
ret_val = from_parsing(context, {foo: StrMatch("foo")}, "foo") ret_val = from_parsing(context, {foo: StrMatch("foo")}, "foo")
evaluator = LexerNodeEvaluator()
result = evaluator.eval(context, ret_val) result = evaluator.eval(context, ret_val)
wrapper = result.body wrapper = result.body
return_value = result.body.body return_value = result.body.body
@@ -77,3 +93,23 @@ def test_concept_is_returned_when_only_one_in_the_list():
assert return_value.cached_asts[ConceptParts.BODY] == DoNotResolve("foo") assert return_value.cached_asts[ConceptParts.BODY] == DoNotResolve("foo")
assert result.parents == [ret_val] assert result.parents == [ret_val]
def test_concept_python_node_is_returned_when_source_code():
context = get_context()
foo = Concept("foo")
ret_val = from_fragments(foo, " + 1")
evaluator = LexerNodeEvaluator()
result = evaluator.eval(context, ret_val)
wrapper = result.body
return_value = result.body.body
assert result.who == evaluator.name
assert result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert wrapper.parser == evaluator
assert wrapper.source == "foo + 1"
assert return_value == PythonNode('foo + 1', ast.parse("__C__foo__C__ + 1", mode="eval"))
assert return_value.concepts == {"__C__foo__C__": foo}
assert result.parents == [ret_val]
+83 -12
View File
@@ -1,8 +1,12 @@
import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.concept import Concept from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, UnrecognizedTokensNode from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, cnode, utnode, scnode, SourceCodeNode
from parsers.MultipleConceptsParser import MultipleConceptsParser from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode
from sdp.sheerkaDataProvider import Event from sdp.sheerkaDataProvider import Event
@@ -78,7 +82,7 @@ def test_i_can_parse_when_ending_with_bnf():
assert ret_val.status assert ret_val.status
assert ret_val.who == parser.name assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [("bar", 0, 0, "bar"), ("foo", 2, 6, "foo1 foo2 foo3")] assert ret_val.value.value == [cnode("bar", 0, 0, "bar"), cnode("foo", 2, 6, "foo1 foo2 foo3")]
assert ret_val.value.source == "bar foo1 foo2 foo3" assert ret_val.value.source == "bar foo1 foo2 foo3"
@@ -94,7 +98,7 @@ def test_i_can_parse_when_starting_with_bnf():
assert ret_val.status assert ret_val.status
assert ret_val.who == parser.name assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [("foo", 0, 4, "foo1 foo2 foo3"), ("bar", 6, 6, "bar")] assert ret_val.value.value == [cnode("foo", 0, 4, "foo1 foo2 foo3"), cnode("bar", 6, 6, "bar")]
assert ret_val.value.source == "foo1 foo2 foo3 bar" assert ret_val.value.source == "foo1 foo2 foo3 bar"
@@ -112,13 +116,13 @@ def test_i_can_parse_when_concept_are_mixed():
assert ret_val.who == parser.name assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [ assert ret_val.value.value == [
("baz", 0, 0, "baz"), cnode("baz", 0, 0, "baz"),
("foo", 2, 6, "foo1 foo2 foo3"), cnode("foo", 2, 6, "foo1 foo2 foo3"),
("bar", 8, 8, "bar")] cnode("bar", 8, 8, "bar")]
assert ret_val.value.source == "baz foo1 foo2 foo3 bar" assert ret_val.value.source == "baz foo1 foo2 foo3 bar"
def test_i_can_parse_when_multiple_concept_are_matching(): def test_i_can_parse_when_multiple_concepts_are_matching():
foo = Concept("foo") foo = Concept("foo")
bar = Concept("bar", body="bar1") bar = Concept("bar", body="bar1")
baz = Concept("bar", body="bar2") baz = Concept("bar", body="bar2")
@@ -130,16 +134,35 @@ def test_i_can_parse_when_multiple_concept_are_matching():
assert len(ret_val) == 2 assert len(ret_val) == 2
assert ret_val[0].status assert ret_val[0].status
assert ret_val[0].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")] assert ret_val[0].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
assert ret_val[0].value.source == "foo bar" assert ret_val[0].value.source == "foo bar"
assert ret_val[0].value.value[1].concept.body == "bar1" assert ret_val[0].value.value[1].concept.body == "bar1"
assert ret_val[1].status assert ret_val[1].status
assert ret_val[1].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")] assert ret_val[1].value.value == [cnode("foo", 0, 0, "foo"), cnode("bar", 2, 2, "bar")]
assert ret_val[1].value.source == "foo bar" assert ret_val[1].value.source == "foo bar"
assert ret_val[1].value.value[1].concept.body == "bar2" assert ret_val[1].value.value[1].concept.body == "bar2"
def test_i_can_parse_when_source_code():
foo = Concept("foo")
grammar = {foo: "foo"}
context, return_value = init([foo], grammar, "1 foo")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
wrapper = ret_val.value
value = ret_val.value.value
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert wrapper.source == "1 foo"
assert value == [
scnode(0, 1, "1 "),
cnode("foo", 2, 2, "foo")]
def test_i_cannot_parse_when_unrecognized_token(): def test_i_cannot_parse_when_unrecognized_token():
twenty_two = Concept("twenty two") twenty_two = Concept("twenty two")
one = Concept("one") one = Concept("one")
@@ -153,8 +176,56 @@ def test_i_cannot_parse_when_unrecognized_token():
assert ret_val.who == parser.name assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [ assert ret_val.value.value == [
("twenty two", 0, 2, "twenty two"), cnode("twenty two", 0, 2, "twenty two"),
(3, 5, " + "), utnode(3, 5, " + "),
("one", 6, 6, "one") cnode("one", 6, 6, "one")
] ]
assert ret_val.value.source == "twenty two + one" assert ret_val.value.source == "twenty two + one"
def test_i_cannot_parse_when_unknown_concepts():
twenty_two = Concept("twenty two")
one = Concept("one")
grammar = {twenty_two: Sequence("twenty", "two")}
context, return_value = init([twenty_two, one], grammar, "twenty two plus one")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert not ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
cnode("twenty two", 0, 2, "twenty two"),
utnode(3, 5, " plus "),
cnode("one", 6, 6, "one")
]
assert ret_val.value.source == "twenty two plus one"
@pytest.mark.parametrize("text, expected_source, expected_end", [
("True", "True", 0),
("1 == 1", "1 == 1", 5),
("1!xdf", "1", 0),
("1", "1", 0),
])
def test_i_can_get_source_code_node(text, expected_source, expected_end):
tokens = list(Tokenizer(text))[:-1] # strip trailing EOF
start_index = 5 # a random number different of zero
res = MultipleConceptsParser().get_source_code_node(get_context(), start_index, tokens)
assert isinstance(res, SourceCodeNode)
assert isinstance(res.node, PythonNode)
assert res.source == expected_source
assert res.start == start_index
assert res.end == start_index + expected_end
def test_i_cannot_parse_null_text():
res = MultipleConceptsParser().get_source_code_node(get_context(), 0, [])
assert res is None
eof = Token(TokenKind.EOF, "", 0, 0, 0)
res = MultipleConceptsParser().get_source_code_node(get_context(), 0, [eof])
assert res is None
+9 -7
View File
@@ -41,17 +41,19 @@ def to_str_ast(expression):
return PythonNode.get_dump(ast.parse(expression, mode="eval")) return PythonNode.get_dump(ast.parse(expression, mode="eval"))
@pytest.mark.parametrize("text", [ @pytest.mark.parametrize("text, interested", [
"not parser result", ("not parser result", False),
ParserResultConcept(value="not a list"), (ParserResultConcept(parser="not multiple_concepts_parser"), False),
ParserResultConcept(value=[]), (ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
ParserResultConcept(value=["not a Node"]),
]) ])
def test_not_interested(text): def test_not_interested(text, interested):
context = get_context() context = get_context()
res = PythonWithConceptsParser().parse(context, text) res = PythonWithConceptsParser().parse(context, text)
assert res is None if interested:
assert res is not None
else:
assert res is None
def test_i_can_parse_concepts_and_python(): def test_i_can_parse_concepts_and_python():
+101 -13
View File
@@ -401,18 +401,6 @@ def test_i_can_eval_bnf_definitions_from_separate_instances():
assert res[0].value.props["a"] == Property("a", sheerka.new(concept_a.key, body="one two").init_key()) assert res[0].value.props["a"] == Property("a", sheerka.new(concept_a.key, body="one two").init_key())
def test_i_can_eval_a_mix_with_bnf_and_python():
sheerka = get_sheerka()
sheerka.evaluate_user_input("def concept one as 1")
sheerka.evaluate_user_input("def concept twenties from bnf 'twenty' one as 20 + one")
res = sheerka.evaluate_user_input("twenty one + 1")
assert len(res) == 1
assert res[0].status
assert res[0].body == 22
@pytest.mark.parametrize("desc, definitions", [ @pytest.mark.parametrize("desc, definitions", [
("Simple form", [ ("Simple form", [
"def concept one as 1", "def concept one as 1",
@@ -450,16 +438,116 @@ def test_i_can_mix_concept_with_python_to_define_numbers(desc, definitions):
assert res[0].status assert res[0].status
assert res[0].body == 22 assert res[0].body == 22
res = sheerka.evaluate_user_input("twenty one + one")
assert len(res) == 1
assert res[0].status
assert res[0].body == 22
res = sheerka.evaluate_user_input("twenty one + twenty two") res = sheerka.evaluate_user_input("twenty one + twenty two")
assert len(res) == 1 assert len(res) == 1
assert res[0].status assert res[0].status
assert res[0].body == 43 assert res[0].body == 43
res = sheerka.evaluate_user_input("twenty one + one") res = sheerka.evaluate_user_input("1 + twenty one")
assert len(res) == 1 assert len(res) == 1
assert res[0].status assert res[0].status
assert res[0].body == 22 assert res[0].body == 22
# res = sheerka.evaluate_user_input("1 + 1 + twenty one")
# assert len(res) == 1
# assert res[0].status
# assert res[0].body == 23
def test_i_can_mix_concept_of_concept():
sheerka = get_sheerka()
definitions = [
"def concept one as 1",
"def concept two as 2",
"def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit",
"def concept a plus b as a + b"
]
for definition in definitions:
sheerka.evaluate_user_input(definition)
# res = sheerka.evaluate_user_input("1 plus 2")
# assert len(res) == 1
# assert res[0].status
# assert res[0].body.body == 3
#
# res = sheerka.evaluate_user_input("1 plus one")
# assert len(res) == 1
# assert res[0].status
# assert res[0].body.body == 2
# res = sheerka.evaluate_user_input("1 + 1 plus 1")
# assert len(res) == 1
# assert res[0].status
# assert res[0].body.body == 3
res = sheerka.evaluate_user_input("1 plus twenty one")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 22
res = sheerka.evaluate_user_input("one plus 1")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 2
res = sheerka.evaluate_user_input("one plus two")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 3
res = sheerka.evaluate_user_input("one plus twenty one")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 22
res = sheerka.evaluate_user_input("twenty one plus 1")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 22
res = sheerka.evaluate_user_input("twenty one plus one")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 22
res = sheerka.evaluate_user_input("twenty one plus twenty two")
assert len(res) == 1
assert res[0].status
assert res[0].body.body == 43
# def test_i_can_evaluate_concept_of_concept_when_multiple_choices():
# sheerka = get_sheerka()
#
# definitions = [
# "def concept little a where a",
# "def concept blue a where a",
# "def concept little blue a where a",
# "def concept house"
# ]
#
# for definition in definitions:
# sheerka.evaluate_user_input(definition)
#
# ### CAUTION ####
# # this test cannot work !!
# # it is just to hint the result that I would like to achieve
#
# res = sheerka.evaluate_user_input("little blue house")
# assert len(res) == 2
# assert res[0].status
# assert res[0].body == "little(blue(house))"
#
# assert res[1].status
# assert res[1].body == "little blue(house)"
def test_i_can_say_that_a_concept_isa_another_concept(): def test_i_can_say_that_a_concept_isa_another_concept():
sheerka = get_sheerka() sheerka = get_sheerka()
+1
View File
@@ -213,6 +213,7 @@ def test_i_can_transform_simple_execution_context():
'desc': 'this is the desc', 'desc': 'this is the desc',
'children': [], 'children': [],
'preprocess': None, 'preprocess': None,
'inputs': {},
'values': {}, 'values': {},
'obj': None, 'obj': None,
'concepts': {} 'concepts': {}
+14 -1
View File
@@ -4,6 +4,7 @@ from core.tokenizer import Tokenizer, Token, TokenKind, LexerError, Keywords
def test_i_can_tokenize(): def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:" source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:"
source += "$£€!_identifier°~_^\\`#"
tokens = list(Tokenizer(source)) tokens = list(Tokenizer(source))
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1) assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2) assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
@@ -40,8 +41,20 @@ def test_i_can_tokenize():
assert tokens[32] == Token(TokenKind.LESS, '<', 79, 6, 21) assert tokens[32] == Token(TokenKind.LESS, '<', 79, 6, 21)
assert tokens[33] == Token(TokenKind.GREATER, '>', 80, 6, 22) assert tokens[33] == Token(TokenKind.GREATER, '>', 80, 6, 22)
assert tokens[34] == Token(TokenKind.CONCEPT, 'name', 81, 6, 23) assert tokens[34] == Token(TokenKind.CONCEPT, 'name', 81, 6, 23)
assert tokens[35] == Token(TokenKind.DOLLAR, '$', 88, 6, 30)
assert tokens[36] == Token(TokenKind.STERLING, '£', 89, 6, 31)
assert tokens[37] == Token(TokenKind.EURO, '', 90, 6, 32)
assert tokens[38] == Token(TokenKind.EMARK, '!', 91, 6, 33)
assert tokens[39] == Token(TokenKind.IDENTIFIER, '_identifier', 92, 6, 34)
assert tokens[40] == Token(TokenKind.DEGREE, '°', 103, 6, 45)
assert tokens[41] == Token(TokenKind.TILDE, '~', 104, 6, 46)
assert tokens[42] == Token(TokenKind.UNDERSCORE, '_', 105, 6, 47)
assert tokens[43] == Token(TokenKind.CARAT, '^', 106, 6, 48)
assert tokens[44] == Token(TokenKind.BACK_SLASH, '\\', 107, 6, 49)
assert tokens[45] == Token(TokenKind.BACK_QUOTE, '`', 108, 6, 50)
assert tokens[46] == Token(TokenKind.HASH, '#', 109, 6, 51)
assert tokens[35] == Token(TokenKind.EOF, '', 88, 6, 30) assert tokens[47] == Token(TokenKind.EOF, '', 110, 6, 52)
@pytest.mark.parametrize("text, expected", [ @pytest.mark.parametrize("text, expected", [