Enhanced complex concepts handling

This commit is contained in:
2020-01-11 08:03:35 +01:00
parent a62c1f0f13
commit 40416ac337
24 changed files with 1647 additions and 961 deletions
+10 -1
View File
@@ -129,6 +129,15 @@ class Concept:
def __hash__(self):
return hash(self.metadata.name)
def __getattr__(self, item):
# I have this complicated implementation because of the usage of Pickle
if 'props' in vars(self) and item in self.props:
return self.props[item].value
name = self.name if 'metadata' in vars(self) else 'Concept'
raise AttributeError(f"'{name}' concept has no attribute '{item}'")
@property
def name(self):
return self.metadata.name
@@ -166,7 +175,7 @@ class Concept:
if token.type == TokenKind.WHITESPACE:
continue
if not first:
key += " "
key += " " # spaces are normalized
if variables is not None and token.value in variables:
key += VARIABLE_PREFIX + str(variables.index(token.value))
else:
+21 -10
View File
@@ -735,7 +735,12 @@ class Sheerka(Concept):
:param kwargs:
:return:
"""
template = self.get(concept_key)
if isinstance(concept_key, tuple):
concept_key, concept_id = concept_key[0], concept_key[1]
else:
concept_id = None
template = self.get(concept_key, concept_id)
# manage concept not found
if self.isinstance(template, BuiltinConcepts.UNKNOWN_CONCEPT) and \
@@ -747,7 +752,7 @@ class Sheerka(Concept):
# if template is a list, it means that there a multiple concepts under the same key
concepts = [self.new_from_template(t, concept_key, **kwargs) for t in template]
return self.new(BuiltinConcepts.ENUMERATION, body=concepts)
return concepts
def new_from_template(self, template, key, **kwargs):
# manage singleton
@@ -759,15 +764,15 @@ class Sheerka(Concept):
concept.update_from(template)
# update the properties
for key, v in kwargs.items():
if key in concept.props:
concept.set_prop(key, v)
elif key in PROPERTIES_FOR_NEW:
setattr(concept.metadata, key, v)
elif hasattr(concept, key):
setattr(concept, key, v)
for k, v in kwargs.items():
if k in concept.props:
concept.set_prop(k, v)
elif k in PROPERTIES_FOR_NEW:
setattr(concept.metadata, k, v)
elif hasattr(concept, k):
setattr(concept, k, v)
else:
return self.new(BuiltinConcepts.UNKNOWN_PROPERTY, body=key, concept=concept)
return self.new(BuiltinConcepts.UNKNOWN_PROPERTY, body=k, concept=concept)
# TODO : add the concept to the list of known concepts (self.instances)
return concept
@@ -830,6 +835,12 @@ class Sheerka(Concept):
return obj
def is_unknown(self, obj):
if not isinstance(obj, Concept):
return True
return obj.key == BuiltinConcepts.UNKNOWN_CONCEPT
def isinstance(self, a, b):
"""
return true if the concept a is an instance of the concept b
+44
View File
@@ -891,3 +891,47 @@ So,
* If, for a given priority there is a match, the parser with a lower priority won't be executed
* A parser has access to the output of the parsers of higher priorities (which were executed before it)
2020-01-11
**********
Status
""""""
Last status was back in October. At that time I could
::
def concept hello name as "hello" + name
1 + 1
sheerka.test()
1. I can evaluate concepts
::
def concept hello a where a
hello kodjo
2. I have worked on BNF definition of the concept
::
def concept twenties from bnf 'twenty' (one | two | three)=unit as 20 + unit
twenty one
eval twenty one
3. I can mix complex concepts (concepts with more than one word) and Python
::
twenty one + twenty two
twenty one + one does not work :-(
4. I have a basic implementation for logging. With control of the verbosity
5. The result of an user input evaluation is now persisted, alongside with the event
that was used for it.
+1 -1
View File
@@ -82,7 +82,7 @@ class AddConceptEvaluator(OneReturnValueEvaluator):
# finish initialisation
concept.init_key(def_concept_node.name.tokens)
concept.add_codes(def_concept_node.get_asts())
#concept.add_codes(def_concept_node.get_asts())
if not isinstance(def_concept_node.definition, NotInitializedNode) and \
sheerka.is_success(def_concept_node.definition):
concept.bnf = def_concept_node.definition.value.value
+109 -109
View File
@@ -1,109 +1,109 @@
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept
from core.tokenizer import TokenKind
from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, ConceptLexerParser
import core.utils
class ConceptComposerEvaluator(AllReturnValuesEvaluator):
"""
Try to reassemble parts of concepts from different evaluators
"""
NAME = "ConceptComposer"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 40)
def matches(self, context, return_values):
concept_lexer_parser_name = ConceptLexerParser().name
for return_value in return_values:
if return_value.who.startswith(BaseParser.PREFIX) and return_value.status:
return False
if return_value.who.startswith(BaseEvaluator.PREFIX):
return False
if return_value.who != concept_lexer_parser_name:
continue
if not isinstance(return_value.value, ParserResultConcept):
return False
if not (
isinstance(return_value.value.value, ConceptNode) or
isinstance(return_value.value.value, UnrecognizedTokensNode) or
(
hasattr(return_value.value.value, "__iter__") and
len(return_value.value.value) > 0 and
(
isinstance(return_value.value.value[0], ConceptNode) or
isinstance(return_value.value.value[0], UnrecognizedTokensNode)
))):
return False
self.eaten = return_value
return True
return False
def eval(self, context, return_value):
sheerka = context.sheerka
nodes = self.eaten.value.value
temp_res = []
has_error = False
concepts_only = True
for node in nodes:
if isinstance(node, UnrecognizedTokensNode):
tokens = core.utils.strip_tokens(node.tokens, True)
for token in tokens:
if token.type == TokenKind.IDENTIFIER:
concept = context.new_concept(token.value)
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
has_error = True
else:
with context.push(self.name, desc=f"Evaluating '{concept}'") as sub_context:
sub_context.log_new(self.verbose_log)
concept = sheerka.evaluate_concept(sub_context, concept, self.verbose_log)
sub_context.add_values(return_values=concept)
temp_res.append(concept)
else:
temp_res.append(core.utils.strip_quotes(token.value))
concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
else:
with context.push(self.name, desc=f"Evaluating '{node.concept}'") as sub_context:
sub_context.log_new(self.verbose_log)
concept = sheerka.evaluate_concept(sub_context, node.concept, self.verbose_log)
sub_context.add_values(return_values=concept)
temp_res.append(concept)
if has_error:
return sheerka.ret(
self.name,
False,
temp_res,
parents=[self.eaten])
if concepts_only:
res = []
for r in temp_res:
if isinstance(r, Concept):
res.append(r)
else:
res = ""
for r in temp_res:
if isinstance(r, Concept):
res += sheerka.value(r)
else:
res += r
return sheerka.ret(
self.name,
True,
res,
parents=[self.eaten])
# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
# from core.concept import Concept
# from core.tokenizer import TokenKind
# from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator
# from parsers.BaseParser import BaseParser
# from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, ConceptLexerParser
# import core.utils
#
#
# class ConceptComposerEvaluator(AllReturnValuesEvaluator):
# """
# Try to reassemble parts of concepts from different evaluators
# """
#
# NAME = "ConceptComposer"
#
# def __init__(self):
# super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 40)
#
# def matches(self, context, return_values):
# concept_lexer_parser_name = ConceptLexerParser().name
#
# for return_value in return_values:
# if return_value.who.startswith(BaseParser.PREFIX) and return_value.status:
# return False
#
# if return_value.who.startswith(BaseEvaluator.PREFIX):
# return False
#
# if return_value.who != concept_lexer_parser_name:
# continue
#
# if not isinstance(return_value.value, ParserResultConcept):
# return False
#
# if not (
# isinstance(return_value.value.value, ConceptNode) or
# isinstance(return_value.value.value, UnrecognizedTokensNode) or
# (
# hasattr(return_value.value.value, "__iter__") and
# len(return_value.value.value) > 0 and
# (
# isinstance(return_value.value.value[0], ConceptNode) or
# isinstance(return_value.value.value[0], UnrecognizedTokensNode)
# ))):
# return False
#
# self.eaten = return_value
# return True
#
# return False
#
# def eval(self, context, return_value):
# sheerka = context.sheerka
# nodes = self.eaten.value.value
# temp_res = []
# has_error = False
# concepts_only = True
#
# for node in nodes:
# if isinstance(node, UnrecognizedTokensNode):
# tokens = core.utils.strip_tokens(node.tokens, True)
# for token in tokens:
# if token.type == TokenKind.IDENTIFIER:
# concept = context.new_concept(token.value)
# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# has_error = True
# else:
# with context.push(self.name, desc=f"Evaluating '{concept}'") as sub_context:
# sub_context.log_new(self.verbose_log)
# concept = sheerka.evaluate_concept(sub_context, concept, self.verbose_log)
# sub_context.add_values(return_values=concept)
# temp_res.append(concept)
#
# else:
# temp_res.append(core.utils.strip_quotes(token.value))
# concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
# else:
# with context.push(self.name, desc=f"Evaluating '{node.concept}'") as sub_context:
# sub_context.log_new(self.verbose_log)
# concept = sheerka.evaluate_concept(sub_context, node.concept, self.verbose_log)
# sub_context.add_values(return_values=concept)
# temp_res.append(concept)
#
# if has_error:
# return sheerka.ret(
# self.name,
# False,
# temp_res,
# parents=[self.eaten])
#
# if concepts_only:
# res = []
# for r in temp_res:
# if isinstance(r, Concept):
# res.append(r)
# else:
# res = ""
# for r in temp_res:
# if isinstance(r, Concept):
# res += sheerka.value(r)
# else:
# res += r
#
# return sheerka.ret(
# self.name,
# True,
# res,
# parents=[self.eaten])
+3 -121
View File
@@ -1,7 +1,6 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
import core.utils
from parsers.ConceptLexerParser import ConceptNode, NonTerminalNode, ConceptMatch, UnrecognizedTokensNode, TerminalNode
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode
class ConceptNodeEvaluator(OneReturnValueEvaluator):
@@ -12,7 +11,7 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator):
NAME = "ConceptNode"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60) # more than the ConceptNodeEvaluator
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60)
def matches(self, context, return_value):
if not return_value.status:
@@ -50,9 +49,7 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator):
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source if source == "" else (" " + node.source)
concept = sheerka.new(node.concept.key)
concept = self.finalize_concept(sheerka, concept, node.underlying)
concepts.append(concept)
concepts.append(node.concept)
else:
error_found = True
@@ -69,118 +66,3 @@ class ConceptNodeEvaluator(OneReturnValueEvaluator):
parents=[return_value])
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME), parents=[return_value])
def finalize_concept(self, sheerka, concept, underlying, init_empty_body=True):
"""
Updates the properties of the concept
Goes in recursion if the property is a concept
"""
def _add_prop(c, prop_name, value):
"""
Adds a new entry,
makes a list if the property already exists
"""
if prop_name not in c.props or c.props[prop_name].value is None:
# new entry
c.set_prop(prop_name, value)
else:
# make a list if there was a value
previous_value = c.props[prop_name].value
if isinstance(previous_value, list):
previous_value.append(value)
else:
new_value = [previous_value, value]
c.set_prop(prop_name, new_value)
parsing_expression = underlying.parsing_expression
if parsing_expression.rule_name:
_add_prop(concept, parsing_expression.rule_name, self.get_underlying_as_string(underlying))
# the update of the body must come BEFORE the recursion
# otherwise it will be updated by a children and it won't be possible to modify the value
if init_empty_body and concept.body is None:
concept.metadata.body = self.get_underlying_as_string(underlying) # self.escape_if_needed(underlying.source)
if isinstance(underlying, NonTerminalNode):
for child in underlying.children:
if isinstance(child.parsing_expression, ConceptMatch):
new_concept = sheerka.new(child.parsing_expression.concept.key)
_add_prop(concept, child.parsing_expression.rule_name, new_concept)
if sheerka.isinstance(new_concept, BuiltinConcepts.UNKNOWN_CONCEPT):
continue
else:
self.finalize_concept(sheerka, new_concept, child.children[0], init_empty_body)
else:
self.finalize_concept(sheerka, concept, child, init_empty_body)
return concept
@staticmethod
def escape_if_needed(value):
if not isinstance(value, str):
return value
return "'" + core.utils.escape_char(value, "'") + "'"
def get_underlying_as_string(self, underlying):
"""
Return the sequence of the recognized character
When a concept is recognized, return the string version of the concept eg c:concept name:
:param underlying:
:return:
"""
# Example
# grammar = {
# foo: Sequence("one", "two", rule_name="var"),
# bar: Sequence(foo, "three", rule_name="var")}
#
# we want bar.body and bar.prop["var"]
# to be "foo 'three'" (no quotes surrounding foo, as it is a concept, not a string)
if isinstance(underlying, TerminalNode):
return self.escape_if_needed(underlying.source)
res = ""
first = True
in_quote = ""
for node in underlying.children:
if isinstance(node.parsing_expression, ConceptMatch):
if in_quote != "":
res += in_quote + "'"
if not first:
res += " "
res += node.parsing_expression.concept.key
in_quote = ""
else:
if in_quote == "":
in_quote = ("'" if first else " '") + core.utils.escape_char(node.source, "'")
else:
in_quote += ("" if first else " ") + core.utils.escape_char(node.source, "'")
first = False
if in_quote:
res += in_quote + "'"
return res
# - - - E X P L A N A T I O N S - - -
# why do we need to update the body ?
# cf test_concept_property_is_correctly_updated_when_concept_recursion_using_zero_or_more()
# def concept number from bnf one | two | three
# def concept add from bnf number plus number
#
# the expression 'one plus two plus three' will match concept add
# add.props["number"] is a list of concepts 'number'
# But which one is 'one', which one is 'two' which one is 'three' ?
#
# That's the reason why we update the body
# add.props["number"] is a list of concepts 'number' but they won't have the same body
#
# !!! C A U T I O N !!!
# In the current implementation, the body is the sequence of char found
# If a concept is recognized, we don't put this information in the body
# Use get_body_as_string() instead of escape_if_needed() if we need this information
+65 -11
View File
@@ -40,7 +40,7 @@ class PythonEvaluator(OneReturnValueEvaluator):
not_for_me = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=node)
return sheerka.ret(self.name, False, not_for_me, parents=[return_value])
my_locals = self.get_locals(context, node.ast_)
my_locals = self.get_locals(context, node)
context.log(self.verbose_log, f"locals={my_locals}", self.name)
if isinstance(node.ast_, ast.Expression):
@@ -58,7 +58,7 @@ class PythonEvaluator(OneReturnValueEvaluator):
error = sheerka.new(BuiltinConcepts.ERROR, body=error)
return sheerka.ret(self.name, False, error, parents=[return_value])
def get_locals(self, context, ast_):
def get_locals(self, context, node):
my_locals = {"sheerka": context.sheerka}
if context.obj:
context.log(self.verbose_log,
@@ -70,30 +70,32 @@ class PythonEvaluator(OneReturnValueEvaluator):
else:
my_locals[prop_name] = context.sheerka.value(prop_value.value)
node_concept = core.ast.nodes.python_to_concept(ast_)
node_concept = core.ast.nodes.python_to_concept(node.ast_)
unreferenced_names_visitor = UnreferencedNamesVisitor(context.sheerka)
unreferenced_names_visitor.visit(node_concept)
for name in unreferenced_names_visitor.names:
context.log(self.verbose_log, f"Resolving '{name}'.", self.name)
if name in node.concepts:
context.log(self.verbose_log, f"Using value from node.", self.name)
concept = node.concepts[name]
return_concept = False
if name.startswith("__C__") and name.endswith("__C__"):
name_resolved = name[5:-5]
return_concept = True
else:
name_resolved = name
concept_key, concept_id, return_concept = self.resolve_name(context, name)
if name_resolved in my_locals:
if concept_key in my_locals:
context.log(self.verbose_log, f"Using value from property.", self.name)
continue
concept = context.sheerka.new(name_resolved)
context.log(self.verbose_log, f"Instantiating new concept.", self.name)
concept = context.sheerka.new((concept_key, concept_id))
if context.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
context.log(self.verbose_log, f"'{name_resolved}' is not a concept. Skipping.", self.name)
context.log(self.verbose_log, f"'{concept_key}' is not a concept. Skipping.", self.name)
continue
context.log(self.verbose_log, f"'{name_resolved}' is a concept. Evaluating.", self.name)
context.log(self.verbose_log, f"Evaluating '{concept}'", self.name)
with context.push(self.name, desc=f"Evaluating '{concept}'", obj=concept) as sub_context:
sub_context.log_new(self.verbose_log)
evaluated = context.sheerka.evaluate_concept(sub_context, concept, self.verbose_log)
@@ -109,6 +111,58 @@ class PythonEvaluator(OneReturnValueEvaluator):
return my_locals
def resolve_name(self, context, to_resolve):
"""
Try to match
__C__concept_key__C__
or
__C__concept_key__concept_id__C__
:param context:
:param to_resolve:
:return:
"""
if not to_resolve.startswith("__C__"):
return to_resolve, None, False
context.log(self.verbose_log, f"Resolving name '{to_resolve}'.", self.name)
if len(to_resolve) >= 18 and to_resolve[:18] == "__C__USE_CONCEPT__":
use_concept = True
index = 18
else:
use_concept = False
index = 5
try:
next_index = to_resolve.index("__", index)
if next_index == index:
context.log(self.verbose_log, f"Error: no key between '__'.", self.name)
return None
concept_key = to_resolve[index: next_index]
except ValueError:
context.log(self.verbose_log, f"Error: Missing trailing '__'.", self.name)
return None
if next_index == len(to_resolve) - 5:
context.log(self.verbose_log, f"Recognized concept '{concept_key}'", self.name)
return concept_key, None, use_concept
index = next_index + 2
try:
next_index = to_resolve.index("__", index)
if next_index == index:
context.log(self.verbose_log, f"Error: no id between '__'.", self.name)
return None
concept_id = to_resolve[index: next_index]
except ValueError:
context.log(self.verbose_log, f"Recognized concept '{concept_key}'.", self.name)
return concept_key, None, use_concept
context.log(self.verbose_log, f"Recognized concept '{concept_key}' (id='{concept_id}').", self.name)
return concept_key, concept_id, use_concept
@staticmethod
def expr_to_expression(expr):
expr.lineno = 0
+101 -13
View File
@@ -34,10 +34,10 @@ def flatten(iterable):
@dataclass()
class LexerNode(Node):
start: int
end: int
tokens: list = None
source: str = None
start: int # starting index in the tokens list
end: int # ending index in the tokens list
tokens: list = None # tokens
source: str = None # string representation of what was parsed
def __post_init__(self):
if self.source is None:
@@ -64,7 +64,15 @@ class UnrecognizedTokensNode(LexerNode):
def fix_source(self):
self.source = BaseParser.get_text_from_tokens(self.tokens)
def not_whitespace(self):
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
def __eq__(self, other):
if isinstance(other, tuple):
if len(other) != 3:
return False
return self.start == other[0] and self.end == other[1] and self.source == other[2]
if not isinstance(other, UnrecognizedTokensNode):
return False
@@ -93,9 +101,9 @@ class ConceptNode(LexerNode):
def __eq__(self, other):
if isinstance(other, tuple):
if len(other) == 2:
return self.concept == other[0] and self.source == other[1]
return self.concept.key == other[0] and self.source == other[1]
else:
return self.concept == other[0] and \
return self.concept.key == other[0] and \
self.start == other[1] and \
self.end == other[2] and \
self.source == other[3]
@@ -567,7 +575,7 @@ class ConceptLexerParser(BaseParser):
self.token = None
self.pos = -1
self.next_token()
self.next_token(False)
return True
def get_token(self) -> Token:
@@ -762,8 +770,9 @@ class ConceptLexerParser(BaseParser):
self.seek(init_pos)
node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
if node is not None and node.end != -1:
updated_concept = self.finalize_concept(context.sheerka, concept, node)
concept_node = ConceptNode(
concept,
updated_concept,
node.start,
node.end,
self.tokens[node.start: node.end + 1],
@@ -777,27 +786,30 @@ class ConceptLexerParser(BaseParser):
unrecognized_tokens.add_token(self.get_token(), init_pos)
else:
unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
has_unrecognized = True
if not self.next_token(False):
break
else: # some concepts are recognized
if unrecognized_tokens:
if unrecognized_tokens and unrecognized_tokens.not_whitespace():
unrecognized_tokens.fix_source()
concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
has_unrecognized = True
unrecognized_tokens = None
res = self.get_bests(res) # only keep the concepts that eat the more tokens
concepts_found = core.utils.product(concepts_found, res)
# loop
self.seek(res[0].end)
if not self.next_token():
if not self.next_token(False):
break
# Fix the source for unrecognized tokens
if unrecognized_tokens:
if unrecognized_tokens and unrecognized_tokens.not_whitespace():
unrecognized_tokens.fix_source()
concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
has_unrecognized = True
# else
# returns as many ReturnValue than choices found
@@ -821,6 +833,82 @@ class ConceptLexerParser(BaseParser):
self.log_multiple_results(context, text, ret)
return ret
def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
"""
Updates the properties of the concept
Goes in recursion if the property is a concept
"""
# this cache is to make sure that we return the same concept for the same ConceptMatch
_underlying_value_cache = {}
def _add_prop(_concept, prop_name, value):
"""
Adds a new entry,
makes a list if the property already exists
"""
if prop_name not in _concept.props or _concept.props[prop_name].value is None:
# new entry
_concept.set_prop(prop_name, value)
else:
# make a list if there was a value
previous_value = _concept.props[prop_name].value
if isinstance(previous_value, list):
previous_value.append(value)
else:
new_value = [previous_value, value]
_concept.set_prop(prop_name, new_value)
def _look_for_concept_match(_underlying):
if isinstance(_underlying.parsing_expression, ConceptMatch):
return _underlying
if not isinstance(_underlying, NonTerminalNode):
return None
if len(_underlying.children) != 1:
return None
return _look_for_concept_match(_underlying.children[0])
def _get_underlying_value(_underlying):
concept_match_node = _look_for_concept_match(_underlying)
if concept_match_node:
if id(concept_match_node) in _underlying_value_cache:
result = _underlying_value_cache[id(concept_match_node)]
else:
ref_tpl = concept_match_node.parsing_expression.concept
result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
_underlying_value_cache[id(concept_match_node)] = result
else:
result = _underlying.source
return result
def _process_rule_name(_concept, _underlying):
if _underlying.parsing_expression.rule_name:
value = _get_underlying_value(_underlying)
_add_prop(_concept, _underlying.parsing_expression.rule_name, value)
if isinstance(_underlying, NonTerminalNode):
for child in _underlying.children:
_process_rule_name(_concept, child)
key = (template.key, template.id) if template.id else template.key
concept = sheerka.new(key)
if init_empty_body and concept.body is None:
value = _get_underlying_value(underlying)
concept.metadata.body = value
concept.metadata.is_evaluated = True
if underlying.parsing_expression.rule_name:
_add_prop(concept, underlying.parsing_expression.rule_name, value)
if isinstance(underlying, NonTerminalNode):
for node in underlying.children:
_process_rule_name(concept, node)
return concept
@staticmethod
def get_bests(results):
"""
+2 -1
View File
@@ -92,7 +92,8 @@ class DefConceptNode(DefaultParserNode):
if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body,
ParserResultConcept) and hasattr(
prop_value.body.body, "ast_"):
asts[part_key] = prop_value.body.body.ast_
asts[part_key] = prop_value
#asts[part_key] = prop_value.body.body.ast_
return asts
+2 -1
View File
@@ -46,7 +46,8 @@ class ExactConceptParser(BaseParser):
if sheerka.isinstance(result, BuiltinConcepts.UNKNOWN_CONCEPT):
continue
concepts = result.body if sheerka.isinstance(result, BuiltinConcepts.ENUMERATION) else [result]
# concepts = result.body if sheerka.isinstance(result, BuiltinConcepts.ENUMERATION) else [result]
concepts = result if isinstance(result, list) else [result]
for concept in concepts:
context.log(self.verbose_log, f"Recognized concept {concept}.", self.name)
+96
View File
@@ -0,0 +1,96 @@
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode
import core.utils
concept_lexer_parser = ConceptLexerParser()
class MultipleConceptsParser(BaseParser):
"""
Parser that will take the result of ConceptLexerParser and
try to resolve the unrecognized tokens token by token
It is a success when it returns a list ConceptNode exclusively
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "MultipleConcepts", 45)
def parse(self, context, text):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
return None
if not text.parser == concept_lexer_parser:
return None
sheerka = context.sheerka
nodes = text.value
nodes_found = [[]]
source = ""
concepts_only = True
for node in nodes:
if isinstance(node, UnrecognizedTokensNode):
unrecognized_tokens = None
for i, token in enumerate(node.tokens):
index = node.start + i
if token.type == TokenKind.IDENTIFIER:
# it may be a concept
concept = context.new_concept(token.value)
if hasattr(concept, "__iter__") or not sheerka.is_unknown(concept):
# finish processing unrecognized_tokens
if unrecognized_tokens:
unrecognized_tokens.fix_source()
source += unrecognized_tokens.source
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
unrecognized_tokens = None
source += token.value
concepts = concept if hasattr(concept, "__iter__") else [concept]
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
nodes_found = core.utils.product(nodes_found, concepts_nodes)
continue
else:
# it cannot be a concept
concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
if unrecognized_tokens:
unrecognized_tokens.add_token(token, index)
else:
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
if unrecognized_tokens:
unrecognized_tokens.fix_source()
source += unrecognized_tokens.source
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
else:
nodes_found = core.utils.product(nodes_found, [node])
source += node.source
ret = []
for choice in nodes_found:
ret.append(
sheerka.ret(
self.name,
concepts_only,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=choice,
try_parsed=None))
)
if len(ret) == 1:
self.log_result(context, source, ret[0])
return ret[0]
else:
self.log_multiple_results(context, source, ret)
return ret
+7 -5
View File
@@ -1,7 +1,7 @@
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import Tokenizer, LexerError, TokenKind
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass
from dataclasses import dataclass, field
import ast
import logging
@@ -17,10 +17,12 @@ class PythonErrorNode(ErrorNode):
# self.log.debug("-> PythonErrorNode: " + str(self.exception))
@dataclass()
class PythonNode(Node):
source: str
ast_: ast.AST
def __init__(self, source, ast_, concepts=None):
self.source = source
self.ast_ = ast_
self.concepts = concepts or {}
# def __repr__(self):
# return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")"
@@ -67,7 +69,7 @@ class PythonParser(BaseParser):
tree = None
python_switcher = {
TokenKind.CONCEPT: lambda t: f"__C__{t.value}__C__"
TokenKind.CONCEPT: lambda t: f"__C__USE_CONCEPT__{t.value}__C__"
}
try:
+116
View File
@@ -0,0 +1,116 @@
from core.builtin_concepts import BuiltinConcepts
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import UnrecognizedTokensNode, ConceptNode
from parsers.PythonParser import PythonParser
class PythonWithConceptsParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("PythonWithConcepts", 20)
self.identifiers = None
self.identifiers_key = None
@staticmethod
def sanitize(identifier):
res = ""
for c in identifier:
res += c if c.isalnum() else "0"
return res
def parse(self, context, text):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
return None
nodes = text.body
if not isinstance(nodes, list):
return None
if len(nodes) == 0:
return None
if not isinstance(nodes[0], (ConceptNode, UnrecognizedTokensNode)):
return None
source = ""
to_parse = ""
identifiers = {}
identifiers_key = {}
python_ids_mappings = {}
def _get_identifier(c):
"""
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
to be instance variables
I would like to keep this parser as stateless as possible
:param c:
:return:
"""
if id(c) in identifiers:
return identifiers[id(c)]
identifier = "__C__" + self.sanitize(c.key or c.name)
if c.id:
identifier += "__" + c.id
if identifier in identifiers_key:
identifiers_key[identifier] += 1
identifier += f"_{identifiers_key[identifier]}"
else:
identifiers_key[identifier] = 0
identifier += "__C__"
identifiers[id(c)] = identifier
return identifier
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source
if to_parse:
to_parse += " "
concept = node.concept
python_id = _get_identifier(concept)
to_parse += python_id
python_ids_mappings[python_id] = concept
else:
source += node.source
to_parse += node.source
with context.push(self, "Trying Python for '" + to_parse + "'") as sub_context:
python_parser = PythonParser()
result = python_parser.parse(sub_context, to_parse)
if result.status:
python_node = result.body.body
python_node.source = source
python_node.concepts = python_ids_mappings
return sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=result.body.body,
try_parsed=None))
else:
return sheerka.ret(
self.name,
False,
result.body)
def concept_identifier(self, concept):
if id(concept) in self.identifiers:
return self.identifiers[id(concept)]
identifier = "__C__" + (concept.key or concept.name)
if concept.id:
identifier += "__" + concept.id
identifier += "__C__"
return identifier
+28 -20
View File
@@ -108,26 +108,34 @@ def test_that_the_source_is_correctly_set():
assert created_concept.metadata.definition == "hello a"
def test_that_the_ast_is_correctly_initialized():
context = get_context()
def_concept_return_value = get_concept(
name="hello a",
definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
where="isinstance(a, str )",
pre="a is not None",
body="print('hello' + a)")
evaluated = AddConceptEvaluator().eval(context, def_concept_return_value)
assert evaluated.status
assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
created_concept = evaluated.body.body
assert ConceptParts.WHERE in created_concept.cached_asts
assert ConceptParts.PRE in created_concept.cached_asts
assert ConceptParts.BODY in created_concept.cached_asts
assert ConceptParts.POST not in created_concept.cached_asts
# def test_that_the_ast_is_correctly_initialized():
# """
# When I parse the definition of a concept, I evaluate the metadata (like the body)
# I wanted to keep in cache these evaluation for further utilisation but I have
# a serialization issue.
# So I had to comment concept.add_codes(def_concept_node.get_asts()) around line 85
# So this test is now irrelevant
# :return:
# """
# context = get_context()
# def_concept_return_value = get_concept(
# name="hello a",
# definition=get_concept_definition("hello a", Sequence(StrMatch("hello"), StrMatch("a"))),
# where="isinstance(a, str )",
# pre="a is not None",
# body="print('hello' + a)")
#
# evaluated = AddConceptEvaluator().eval(context, def_concept_return_value)
#
# assert evaluated.status
# assert context.sheerka.isinstance(evaluated.body, BuiltinConcepts.NEW_CONCEPT)
#
# created_concept = evaluated.body.body
#
# assert ConceptParts.WHERE in created_concept.cached_asts
# assert ConceptParts.PRE in created_concept.cached_asts
# assert ConceptParts.BODY in created_concept.cached_asts
# assert ConceptParts.POST not in created_concept.cached_asts
def test_that_the_new_concept_is_correctly_saved():
+3 -3
View File
@@ -108,12 +108,12 @@ def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [(bar, 0, 2, "twenty two")]
assert res.value.body == [("bar", 0, 2, "twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [(bar, 0, 2, "thirty one")]
assert res.value.body == [("bar", 0, 2, "thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [(foo, 0, 0, "twenty")]
assert res.value.body == [("foo", 0, 0, "twenty")]
+128 -128
View File
@@ -1,128 +1,128 @@
import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from evaluators.BaseEvaluator import BaseEvaluator
from evaluators.ConceptComposerEvaluator import ConceptComposerEvaluator
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, Sequence
from sdp.sheerkaDataProvider import Event
concept_lexer_name = ConceptLexerParser().name
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("test", Event(), sheerka)
def get_return_values(context, grammar, expression):
parser = ConceptLexerParser()
parser.initialize(context, grammar)
ret_val = parser.parse(context, expression)
assert not ret_val.status
return [ret_val]
def init(concepts, grammar, expression):
context = get_context()
for c in concepts:
context.sheerka.add_in_cache(c)
return_values = get_return_values(context, grammar, expression)
return context, return_values
@pytest.mark.parametrize("return_values, expected", [
([
ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
ReturnValueConcept("not a parser", True, "some value"),
], True),
([
ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
], True),
([
ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not in error"),
ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
], False),
([
ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
ReturnValueConcept(concept_lexer_name, True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
], False),
([
ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
ReturnValueConcept(concept_lexer_name, False, "some value"),
], False),
([
ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=["not a concept"])),
], False),
([
ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", False, "evaluator in error"),
ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
ReturnValueConcept("not a parser", True, "some value"),
], False),
([
ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", True, "evaluator"),
ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
ReturnValueConcept("not a parser", True, "some value"),
], False),
])
def test_i_can_match(return_values, expected):
context = get_context()
assert ConceptComposerEvaluator().matches(context, return_values) == expected
def test_i_can_eval_simple_concepts():
foo = Concept("foo", body="'foo'")
bar = Concept("bar", body="'bar'")
grammar = {}
context, return_values = init([foo, bar], grammar, "bar foo")
composer = ConceptComposerEvaluator()
assert composer.matches(context, return_values)
ret_val = composer.eval(context, return_values)
assert ret_val.status
assert ret_val.who == composer.name
assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()]
assert ret_val.value[0].metadata.is_evaluated
assert ret_val.value[1].metadata.is_evaluated
assert ret_val.parents == [return_values[0]]
def test_i_can_eval_simple_concepts_when_some_are_bnf():
foo = Concept("foo", body="'foo'")
bar = Concept("bar", body="'bar'")
grammar = {foo: "foo"}
context, return_values = init([foo, bar], grammar, "bar foo")
composer = ConceptComposerEvaluator()
assert composer.matches(context, return_values)
ret_val = composer.eval(context, return_values)
assert ret_val.status
assert ret_val.who == composer.name
assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()]
assert ret_val.value[0].metadata.is_evaluated
assert ret_val.value[1].metadata.is_evaluated
assert ret_val.parents == [return_values[0]]
def test_i_can_eval_simple_concept_and_text():
foo = Concept("foo", body="'foo'")
grammar = {}
context, return_values = init([foo], grammar, "'bar' foo")
composer = ConceptComposerEvaluator()
assert composer.matches(context, return_values)
ret_val = composer.eval(context, return_values)
assert ret_val.status
assert ret_val.who == composer.name
assert ret_val.value == "bar foo"
assert ret_val.parents == [return_values[0]]
# import pytest
#
# from core.builtin_concepts import ReturnValueConcept, ParserResultConcept
# from core.concept import Concept
# from core.sheerka import Sheerka, ExecutionContext
# from evaluators.BaseEvaluator import BaseEvaluator
# from evaluators.ConceptComposerEvaluator import ConceptComposerEvaluator
# from parsers.BaseParser import BaseParser
# from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, Sequence
# from sdp.sheerkaDataProvider import Event
#
# concept_lexer_name = ConceptLexerParser().name
#
#
# def get_context():
# sheerka = Sheerka(skip_builtins_in_db=True)
# sheerka.initialize("mem://")
# return ExecutionContext("test", Event(), sheerka)
#
#
# def get_return_values(context, grammar, expression):
# parser = ConceptLexerParser()
# parser.initialize(context, grammar)
#
# ret_val = parser.parse(context, expression)
# assert not ret_val.status
# return [ret_val]
#
#
# def init(concepts, grammar, expression):
# context = get_context()
# for c in concepts:
# context.sheerka.add_in_cache(c)
# return_values = get_return_values(context, grammar, expression)
#
# return context, return_values
#
#
# @pytest.mark.parametrize("return_values, expected", [
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ReturnValueConcept("not a parser", True, "some value"),
# ], True),
# ([
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ], True),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", True, "not in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ], False),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ], False),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, False, "some value"),
# ], False),
# ([
# ReturnValueConcept(BaseParser.PREFIX + "some_name", False, "in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=["not a concept"])),
# ], False),
# ([
# ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", False, "evaluator in error"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ReturnValueConcept("not a parser", True, "some value"),
# ], False),
# ([
# ReturnValueConcept(BaseEvaluator.PREFIX + "some_name", True, "evaluator"),
# ReturnValueConcept(concept_lexer_name, False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])),
# ReturnValueConcept("not a parser", True, "some value"),
# ], False),
# ])
# def test_i_can_match(return_values, expected):
# context = get_context()
# assert ConceptComposerEvaluator().matches(context, return_values) == expected
#
#
# def test_i_can_eval_simple_concepts():
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {}
# context, return_values = init([foo, bar], grammar, "bar foo")
#
# composer = ConceptComposerEvaluator()
# assert composer.matches(context, return_values)
#
# ret_val = composer.eval(context, return_values)
# assert ret_val.status
# assert ret_val.who == composer.name
# assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()]
# assert ret_val.value[0].metadata.is_evaluated
# assert ret_val.value[1].metadata.is_evaluated
# assert ret_val.parents == [return_values[0]]
#
#
# def test_i_can_eval_simple_concepts_when_some_are_bnf():
# foo = Concept("foo", body="'foo'")
# bar = Concept("bar", body="'bar'")
# grammar = {foo: "foo"}
# context, return_values = init([foo, bar], grammar, "bar foo")
#
# composer = ConceptComposerEvaluator()
# assert composer.matches(context, return_values)
#
# ret_val = composer.eval(context, return_values)
# assert ret_val.status
# assert ret_val.who == composer.name
# assert ret_val.value == [Concept("bar", body="bar").init_key(), Concept("foo", body="foo").init_key()]
# assert ret_val.value[0].metadata.is_evaluated
# assert ret_val.value[1].metadata.is_evaluated
# assert ret_val.parents == [return_values[0]]
#
#
# def test_i_can_eval_simple_concept_and_text():
# foo = Concept("foo", body="'foo'")
# grammar = {}
# context, return_values = init([foo], grammar, "'bar' foo")
#
# composer = ConceptComposerEvaluator()
# assert composer.matches(context, return_values)
#
# ret_val = composer.eval(context, return_values)
# assert ret_val.status
# assert ret_val.who == composer.name
# assert ret_val.value == "bar foo"
# assert ret_val.parents == [return_values[0]]
File diff suppressed because it is too large Load Diff
+12 -160
View File
@@ -1,6 +1,6 @@
import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator
@@ -15,7 +15,7 @@ def get_context():
return ExecutionContext("test", Event(), sheerka)
def get_return_value(context, grammar, expression):
def from_parsing(context, grammar, expression):
parser = ConceptLexerParser()
parser.initialize(context, grammar)
@@ -31,7 +31,7 @@ def init(concept, grammar, text):
context.sheerka.add_in_cache(c)
else:
context.sheerka.add_in_cache(concept)
ret_val = get_return_value(context, grammar, text)
ret_val = from_parsing(context, grammar, text)
node = ret_val.value.value[0]
return context, node
@@ -56,172 +56,24 @@ def test_i_can_match(ret_val, expected):
assert ConceptNodeEvaluator().matches(context, ret_val) == expected
def test_parser_result_of_concept_is_returned_when_list_of_one_concept_node():
def test_concept_is_returned_when_only_one_in_the_list():
foo = Concept("foo")
context = get_context()
context.sheerka.add_in_cache(foo)
evaluator = ConceptNodeEvaluator()
ret_val = get_return_value(context, {foo: StrMatch("foo")}, "foo")
ret_val = from_parsing(context, {foo: StrMatch("foo")}, "foo")
result = evaluator.eval(context, ret_val)
wrapper = result.body
return_value = result.body.body
assert result.who == evaluator.name
assert result.status
assert result.value == ParserResultConcept(
evaluator,
"foo",
Concept("foo", body="'foo'").init_key(),
None)
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert wrapper.parser == evaluator
assert wrapper.source == "foo"
assert return_value == Concept("foo", body="foo").init_key()
assert return_value.metadata.is_evaluated
assert result.parents == [ret_val]
def test_concept_property_is_correctly_updated_for_str_match():
foo = Concept("foo")
grammar = {foo: StrMatch("foo", rule_name="variable")}
context, node = init(foo, grammar, "foo")
updated = ConceptNodeEvaluator().finalize_concept(context.sheerka, node.concept, node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "'foo'"
assert updated.body == "'foo'"
def test_concept_property_is_correctly_updated_for_sequence():
foo = Concept("foo")
grammar = {foo: Sequence("one", "two", rule_name="variable")}
context, node = init(foo, grammar, "one two")
updated = ConceptNodeEvaluator().finalize_concept(
context.sheerka,
context.sheerka.new(node.concept.key),
node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "'one two'"
assert updated.body == "'one two'"
def test_concept_property_is_updated_for_str_in_sequence():
foo = Concept("foo")
grammar = {foo: Sequence(StrMatch("one", rule_name="s1"), StrMatch("two", rule_name="s2"), rule_name="variable")}
context, node = init(foo, grammar, "one two")
updated = ConceptNodeEvaluator().finalize_concept(
context.sheerka,
context.sheerka.new(node.concept.key),
node.underlying)
assert updated.props["variable"].value == "'one two'"
assert updated.props["s1"].value == "'one'"
assert updated.props["s2"].value == "'two'"
assert updated.body == "'one two'"
def test_concept_property_is_correctly_updated_for_optional():
foo = Concept("foo")
grammar = {foo: Sequence("one", Optional("two", rule_name="o"), rule_name="variable")}
context, node = init(foo, grammar, "one two")
updated = ConceptNodeEvaluator().finalize_concept(
context.sheerka,
context.sheerka.new(node.concept.key),
node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "'one two'"
assert updated.props["o"].value == "'two'"
assert updated.body == "'one two'"
def test_concept_property_is_correctly_updated_for_zero_or_more():
foo = Concept("foo")
grammar = {foo: ZeroOrMore("one", rule_name="variable")}
context, node = init(foo, grammar, "one one one")
updated = ConceptNodeEvaluator().finalize_concept(
context.sheerka,
context.sheerka.new(node.concept.key),
node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "'one one one'"
assert updated.body == "'one one one'"
def test_concept_property_is_correctly_updated_when_list_of_properties():
foo = Concept("foo")
grammar = {foo: Sequence(StrMatch("one", rule_name="s"), StrMatch("two", rule_name="s"), rule_name="variable")}
context, node = init(foo, grammar, "one two")
updated = ConceptNodeEvaluator().finalize_concept(
context.sheerka,
context.sheerka.new(node.concept.key),
node.underlying)
assert updated.props["variable"].value == "'one two'"
assert updated.props["s"].value == ["'one'", "'two'"]
assert updated.body == "'one two'"
def test_concept_property_is_correctly_updated_when_another_concept():
foo = Concept("foo")
bar = Concept("bar")
grammar = {
foo: Sequence("one", "two", rule_name="var"),
bar: Sequence(foo, "three", "four", rule_name="var")}
context, node = init([foo, bar], grammar, "one two three four")
updated = ConceptNodeEvaluator().finalize_concept(
context.sheerka,
context.sheerka.new(node.concept.key),
node.underlying)
assert updated.body == "foo 'three four'"
assert updated.props["var"].value == "foo 'three four'"
assert updated.props["foo"].value == Concept("foo", body="'one two'").set_prop("var", "'one two'").init_key()
def test_concept_property_is_correctly_updated_when_concept_recursion_using_optional():
number = Concept("number")
add = Concept("add")
grammar = {
number: OrderedChoice("one", "two"),
add: Sequence(number, Optional(Sequence(OrderedChoice("plus", "minus", rule_name="op"), add)))
}
context, node = init([number, add], grammar, "one plus two")
updated = ConceptNodeEvaluator().finalize_concept(
context.sheerka,
context.sheerka.new(node.concept.key),
node.underlying)
assert updated.props["number"].value == Concept("number", body="'one'").init_key()
assert updated.props["op"].value == "'plus'"
expected_add = Concept("add", body="number"). \
set_prop("number", Concept("number", body="'two'").init_key()). \
init_key()
assert updated.props["add"].value == expected_add
def test_concept_property_is_correctly_updated_when_concept_recursion_using_zero_or_more():
number = Concept("number")
add = Concept("add")
grammar = {
number: OrderedChoice("one", "two", 'three'),
add: Sequence(number, ZeroOrMore(Sequence(OrderedChoice("plus", "minus", rule_name="op"), number)))
}
context, node = init([number, add], grammar, "one plus two minus three")
updated = ConceptNodeEvaluator().finalize_concept(
context.sheerka,
context.sheerka.new(node.concept.key),
node.underlying,
init_empty_body=True)
assert updated.props["number"].value == [Concept("number", body="'one'").init_key(),
Concept("number", body="'two'").init_key(),
Concept("number", body="'three'").init_key()]
assert updated.props["op"].value == ["'plus'", "'minus'"]
+160
View File
@@ -0,0 +1,160 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, UnrecognizedTokensNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from sdp.sheerkaDataProvider import Event
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("test", Event(), sheerka)
def get_return_value(context, grammar, expression):
parser = ConceptLexerParser()
parser.initialize(context, grammar)
ret_val = parser.parse(context, expression)
assert not ret_val.status
return ret_val
def init(concepts, grammar, expression):
context = get_context()
for c in concepts:
context.sheerka.create_new_concept(context, c)
return_value = get_return_value(context, grammar, expression)
return context, return_value
def test_not_interested_if_not_parser_result():
context = get_context()
text = "not parser result"
res = MultipleConceptsParser().parse(context, text)
assert res is None
def test_not_interested_if_not_from_concept_lexer_parser():
context = get_context()
text = ParserResultConcept(parser="not concept lexer", value="some value")
res = MultipleConceptsParser().parse(context, text)
assert res is None
def test_i_can_parse_exact_concepts():
foo = Concept("foo", body="'foo'")
bar = Concept("bar", body="'bar'")
baz = Concept("baz", body="'baz'")
grammar = {}
context, return_value = init([foo, bar, baz], grammar, "bar foo baz")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
ConceptNode(bar, 0, 0, source="bar"),
ConceptNode(foo, 2, 2, source="foo"),
ConceptNode(baz, 4, 4, source="baz")]
assert ret_val.value.source == "bar foo baz"
def test_i_can_parse_when_ending_with_bnf():
foo = Concept("foo", body="'foo'")
bar = Concept("bar", body="'bar'")
grammar = {foo: Sequence("foo1", "foo2", "foo3")}
context, return_value = init([foo, bar], grammar, "bar foo1 foo2 foo3")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [("bar", 0, 0, "bar"), ("foo", 2, 6, "foo1 foo2 foo3")]
assert ret_val.value.source == "bar foo1 foo2 foo3"
def test_i_can_parse_when_starting_with_bnf():
foo = Concept("foo", body="'foo'")
bar = Concept("bar", body="'bar'")
grammar = {foo: Sequence("foo1", "foo2", "foo3")}
context, return_value = init([foo, bar], grammar, "foo1 foo2 foo3 bar")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [("foo", 0, 4, "foo1 foo2 foo3"), ("bar", 6, 6, "bar")]
assert ret_val.value.source == "foo1 foo2 foo3 bar"
def test_i_can_parse_when_concept_are_mixed():
foo = Concept("foo")
bar = Concept("bar")
baz = Concept("baz")
grammar = {foo: Sequence("foo1", "foo2", "foo3")}
context, return_value = init([foo, bar, baz], grammar, "baz foo1 foo2 foo3 bar")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
("baz", 0, 0, "baz"),
("foo", 2, 6, "foo1 foo2 foo3"),
("bar", 8, 8, "bar")]
assert ret_val.value.source == "baz foo1 foo2 foo3 bar"
def test_i_can_parse_when_multiple_concept_are_matching():
foo = Concept("foo")
bar = Concept("bar", body="bar1")
baz = Concept("bar", body="bar2")
grammar = {foo: "foo"}
context, return_value = init([foo, bar, baz], grammar, "foo bar")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert len(ret_val) == 2
assert ret_val[0].status
assert ret_val[0].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")]
assert ret_val[0].value.source == "foo bar"
assert ret_val[0].value.value[1].concept.body == "bar1"
assert ret_val[1].status
assert ret_val[1].value.value == [("foo", 0, 0, "foo"), ("bar", 2, 2, "bar")]
assert ret_val[1].value.source == "foo bar"
assert ret_val[1].value.value[1].concept.body == "bar2"
def test_i_cannot_parse_when_unrecognized_token():
twenty_two = Concept("twenty two")
one = Concept("one")
grammar = {twenty_two: Sequence("twenty", "two")}
context, return_value = init([twenty_two, one], grammar, "twenty two + one")
parser = MultipleConceptsParser()
ret_val = parser.parse(context, return_value.body)
assert not ret_val.status
assert ret_val.who == parser.name
assert context.sheerka.isinstance(ret_val.value, BuiltinConcepts.PARSER_RESULT)
assert ret_val.value.value == [
("twenty two", 0, 2, "twenty two"),
(3, 5, " + "),
("one", 6, 6, "one")
]
assert ret_val.value.source == "twenty two + one"
+23
View File
@@ -126,3 +126,26 @@ def test_i_can_eval_concept_token():
assert not evaluated.status
assert evaluated.body.body.args[0] == "'int' object has no attribute 'name'"
@pytest.mark.parametrize("text, concept_key, concept_id, use_concept", [
("__C__key__C__", "key", None, False),
("__C__key__id__C__", "key", "id", False),
("__C__USE_CONCEPT__key__id__C__", "key", "id", True),
("__C__USE_CONCEPT__key__id__C__", "key", "id", True),
])
def test_i_can_resolve_name(text, concept_key, concept_id, use_concept):
context = get_context()
assert PythonEvaluator().resolve_name(context, text) == (concept_key, concept_id, use_concept)
@pytest.mark.parametrize("text", [
"__C__",
"__C__key",
"__C__key____",
"__C____",
"__C__USE_CONCEPT__",
])
def test_i_cannot_resolve_name(text):
context = get_context()
assert PythonEvaluator().resolve_name(context, text) is None
+1 -1
View File
@@ -86,4 +86,4 @@ def test_i_can_parse_a_concept():
assert res
assert res.value.value == PythonNode(
"c:concept_name: + 1",
ast.parse("__C__concept_name__C__+1", mode="eval"))
ast.parse("__C__USE_CONCEPT__concept_name__C__+1", mode="eval"))
+142
View File
@@ -0,0 +1,142 @@
import ast
import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode
from parsers.PythonParser import PythonNode, PythonErrorNode
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from sdp.sheerkaDataProvider import Event
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("test", Event(), sheerka)
def get_ret_from(*args):
result = []
index = 0
for item in args:
if isinstance(item, Concept):
tokens = [Token(TokenKind.IDENTIFIER, item.name, 0, 0, 0)]
result.append(ConceptNode(item, index, index, tokens, item.name))
index += 1
else:
tokens = list(Tokenizer(item))
result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens))
index += len(tokens)
return ReturnValueConcept("who", False, ParserResultConcept(parser="name", value=result))
def to_str_ast(expression):
return PythonNode.get_dump(ast.parse(expression, mode="eval"))
@pytest.mark.parametrize("text", [
"not parser result",
ParserResultConcept(value="not a list"),
ParserResultConcept(value=[]),
ParserResultConcept(value=["not a Node"]),
])
def test_not_interested(text):
context = get_context()
res = PythonWithConceptsParser().parse(context, text)
assert res is None
def test_i_can_parse_concepts_and_python():
context = get_context()
foo = Concept("foo")
input_return_value = get_ret_from(foo, " + 1")
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
wrapper = result.value
return_value = result.value.value
assert result.status
assert result.who == parser.name
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert wrapper.source == "foo + 1"
assert isinstance(return_value, PythonNode)
assert return_value.source == "foo + 1"
assert return_value.get_dump(return_value.ast_) == to_str_ast("__C__foo__C__ + 1")
assert return_value.concepts["__C__foo__C__"] == foo
def test_i_can_parse_concepts_and_python_when_concept_is_known():
context = get_context()
foo = Concept("foo")
foo = context.sheerka.create_new_concept(context, foo).body.body
input_return_value = get_ret_from(foo, " + 1")
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
wrapper = result.value
return_value = result.value.value
assert result.status
assert result.who == parser.name
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert wrapper.source == "foo + 1"
assert isinstance(return_value, PythonNode)
assert return_value.source == "foo + 1"
assert return_value.get_dump(return_value.ast_) == to_str_ast("__C__foo__1001__C__ + 1")
assert return_value.concepts["__C__foo__1001__C__"] == foo
def test_i_can_parse_when_concept_name_has_invalid_characters():
context = get_context()
foo = Concept("foo et > (,")
foo = context.sheerka.create_new_concept(context, foo).body.body
input_return_value = get_ret_from(foo, " + 1")
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
return_value = result.value.value
assert result.status
assert return_value.concepts["__C__foo0et000000__1001__C__"] == foo
def test_python_ids_mappings_are_correct_when_concepts_with_the_same_name():
context = get_context()
foo1 = Concept("foo")
foo2 = Concept("foo")
foo3 = context.sheerka.create_new_concept(context, Concept("foo", body="foo3")).body.body
foo4 = context.sheerka.create_new_concept(context, Concept("foo", body="foo4")).body.body
input_return_value = get_ret_from(foo1, "+", foo2, "+", foo3, "+", foo4)
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
return_value = result.value.value
assert result.status
assert return_value.concepts["__C__foo__C__"] == foo1
assert return_value.concepts["__C__foo_1__C__"] == foo2
assert return_value.concepts["__C__foo__1001__C__"] == foo3
assert return_value.concepts["__C__foo__1002__C__"] == foo4
def test_i_cannot_parse_if_syntax_error():
context = get_context()
foo = Concept("foo")
foo = context.sheerka.create_new_concept(context, foo).body.body
input_return_value = get_ret_from(foo, " + ")
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
wrapper = result.value
return_value = result.value.value
assert not result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert isinstance(return_value[0], PythonErrorNode)
+48 -1
View File
@@ -144,7 +144,7 @@ def test_i_can_get_a_builtin_concept_by_their_enum_or_the_string():
assert sheerka.get(str(key)) is not None
def test_i_can_get_new_concept():
def test_i_can_get_a_newly_created_concept():
sheerka = get_sheerka()
concept = get_default_concept()
@@ -324,6 +324,21 @@ def test_i_can_instantiate_a_concept():
assert new.props["b"].value == "value"
def test_i_can_instantiate_with_the_name_and_the_id():
sheerka = get_sheerka()
sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo1"))
sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo2"))
concepts = sheerka.new("foo")
assert len(concepts) == 2
foo1 = sheerka.new(("foo", "1001"))
assert foo1.body == "foo1"
foo2 = sheerka.new(("foo", "1002"))
assert foo2.body == "foo2"
def test_instances_are_different_when_asking_for_new():
sheerka = get_sheerka()
concept = get_default_concept()
@@ -357,6 +372,38 @@ def test_i_cannot_instantiate_an_unknown_concept():
assert new.body == "fake_concept"
def test_i_cannot_instantiate_with_invalid_id():
sheerka = get_sheerka()
sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo1"))
sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo2"))
new = sheerka.new(("foo", "invalid_id"))
assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT)
assert new.body == "foo"
def test_i_cannot_instantiate_with_invalid_key():
sheerka = get_sheerka()
sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo1"))
sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo2"))
new = sheerka.new(("invalid_key", "1001"))
assert sheerka.isinstance(new, BuiltinConcepts.UNKNOWN_CONCEPT)
assert new.body == "invalid_key"
def test_concept_id_is_irrelevant_when_only_one_concept():
sheerka = get_sheerka()
sheerka.create_new_concept(get_context(sheerka), Concept("foo", body="foo1"))
new = sheerka.new(("foo", "invalid_id"))
assert sheerka.isinstance(new, "foo")
assert new.body == "foo1"
def test_i_cannot_instantiate_when_properties_are_not_recognized():
sheerka = get_sheerka()
concept = get_default_concept()
+42
View File
@@ -401,6 +401,48 @@ def test_i_can_eval_bnf_definitions_from_separate_instances():
assert res[0].value.props["a"] == Property("a", sheerka.new(concept_a.key, body="one two").init_key())
def test_i_can_eval_a_mix_with_bnf_and_python():
sheerka = get_sheerka()
sheerka.evaluate_user_input("def concept one as 1")
sheerka.evaluate_user_input("def concept twenties from bnf 'twenty' one as 20 + one")
res = sheerka.evaluate_user_input("twenty one + 1")
assert len(res) == 1
assert res[0].status
assert res[0].body == 22
def test_i_can_eval_a_mix_with_bnf_and_python_when_rule_name():
sheerka = get_sheerka()
sheerka.evaluate_user_input("def concept one as 1")
sheerka.evaluate_user_input("def concept two as 2")
sheerka.evaluate_user_input("def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit")
assert sheerka.evaluate_user_input("eval twenty one")[0].body == 21
res = sheerka.evaluate_user_input("twenty one + 1")
assert len(res) == 1
assert res[0].status
assert res[0].body == 22
def test_i_can_eval_a_more_complicated_mix_with_bnf_and_python():
sheerka = get_sheerka()
sheerka.evaluate_user_input("def concept one as 1")
sheerka.evaluate_user_input("def concept two as 2")
sheerka.evaluate_user_input("def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit")
assert sheerka.evaluate_user_input("eval twenty one")[0].body == 21
res = sheerka.evaluate_user_input("twenty one + twenty two")
assert len(res) == 1
assert res[0].status
assert res[0].body == 43
def test_i_can_say_that_a_concept_isa_another_concept():
sheerka = get_sheerka()
sheerka.evaluate_user_input("def concept foo")