Implemented FunctionParser
This commit is contained in:
@@ -16,6 +16,8 @@ class BuiltinConcepts(Enum):
|
||||
SHEERKA = "sheerka"
|
||||
|
||||
# processing instructions during sheerka.execute()
|
||||
# The instruction may alter how the actions work
|
||||
DEBUG = "debug" # activate all debug information
|
||||
EVAL_BODY_REQUESTED = "eval body" # to evaluate the body
|
||||
EVAL_WHERE_REQUESTED = "eval where" # to evaluate the where clause
|
||||
RETURN_BODY_REQUESTED = "return body" # returns the body of the concept instead of the concept itself
|
||||
|
||||
+121
-5
@@ -6,14 +6,16 @@ from core.ast.nodes import CallNodeConcept
|
||||
from core.ast.visitors import UnreferencedNamesVisitor
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, NotInit, ConceptParts
|
||||
from core.sheerka.services.SheerkaExecute import SheerkaExecute
|
||||
from core.tokenizer import Keywords
|
||||
# from evaluators.BaseEvaluator import BaseEvaluator
|
||||
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode, SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
|
||||
PARSE_STEPS = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
|
||||
EVAL_STEPS = PARSE_STEPS + [BuiltinConcepts.BEFORE_EVALUATION, BuiltinConcepts.EVALUATION,
|
||||
BuiltinConcepts.AFTER_EVALUATION]
|
||||
PARSERS = ["EmptyString", "ShortTermMemory", "AtomNode", "BnfNode", "SyaNode", "Python"]
|
||||
|
||||
|
||||
def is_same_success(context, return_values):
|
||||
@@ -342,6 +344,37 @@ def parse_unrecognized(context, source, parsers, who=None, prop=None, filter_fun
|
||||
return no_python
|
||||
|
||||
|
||||
def parse_function(context, source, tokens=None, start=0):
|
||||
"""
|
||||
Helper function to parse what is supposed to be a function
|
||||
:param context:
|
||||
:param source:
|
||||
:param tokens:
|
||||
:param start: start index for the source code node
|
||||
:return:
|
||||
"""
|
||||
sheerka = context.sheerka
|
||||
from parsers.FunctionParser import FunctionParser
|
||||
parser = FunctionParser()
|
||||
desc = f"Parsing function '{source}'"
|
||||
with context.push(BuiltinConcepts.PARSE_CODE, source, desc=desc) as sub_context:
|
||||
sheerka_execution = sheerka.services[SheerkaExecute.NAME]
|
||||
res = parser.parse(sub_context, sheerka_execution.get_parser_input(source, tokens))
|
||||
|
||||
if not isinstance(res, list):
|
||||
res = [res]
|
||||
|
||||
for r in [r for r in res if sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT)]:
|
||||
r.body.body.start += start
|
||||
r.body.body.end += start
|
||||
if isinstance(r.body.body, SourceCodeWithConceptNode):
|
||||
for n in [r.body.body.first, r.body.body.last] + r.body.body.nodes:
|
||||
n.start += start
|
||||
n.end += start
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evaluate(context,
|
||||
source,
|
||||
evaluators="all",
|
||||
@@ -415,7 +448,12 @@ def get_lexer_nodes(return_values, start, tokens):
|
||||
|
||||
end = start + len(tokens) - 1
|
||||
lexer_nodes.append(
|
||||
[SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)])
|
||||
[SourceCodeNode(start,
|
||||
end,
|
||||
tokens,
|
||||
ret_val.body.source,
|
||||
python_node=ret_val.body.body,
|
||||
return_value=ret_val)])
|
||||
|
||||
elif ret_val.who == "parsers.ExactConcept":
|
||||
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
|
||||
@@ -479,6 +517,81 @@ def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers
|
||||
return get_lexer_nodes(res.body.body, unrecognized_tokens_node.start, unrecognized_tokens_node.tokens)
|
||||
|
||||
|
||||
def update_compiled(context, concept, errors, parsers=None):
|
||||
"""
|
||||
recursively iterate thru concept.compiled to replace LexerNode into concepts or list of ReturnValueConcept
|
||||
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...)
|
||||
the result will be a LexerNode.
|
||||
In the specific case of a ConceptNode, the compiled variables will also be LexerNode (UnrecognizedTokensNode...)
|
||||
This function iterate thru the compile to transform these nodes into concept of compiled AST
|
||||
:param context:
|
||||
:param concept:
|
||||
:param errors: a list the must be initialized by the caller
|
||||
:param parsers: to customize the parsers to use
|
||||
:return:
|
||||
"""
|
||||
|
||||
sheerka = context.sheerka
|
||||
parsers = parsers or PARSERS
|
||||
|
||||
def _validate_concept(c):
|
||||
"""
|
||||
Recursively browse the compiled properties in order to find unrecognized
|
||||
:param c:
|
||||
:return:
|
||||
"""
|
||||
for k, v in c.compiled.items():
|
||||
if isinstance(v, Concept):
|
||||
_validate_concept(v)
|
||||
|
||||
elif isinstance(v, SourceCodeWithConceptNode):
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
parser_helper = PythonWithConceptsParser()
|
||||
res = parser_helper.parse_nodes(context, v.get_all_nodes())
|
||||
if res.status:
|
||||
c.compiled[k] = [res]
|
||||
else:
|
||||
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
|
||||
|
||||
elif isinstance(v, UnrecognizedTokensNode):
|
||||
res = parse_unrecognized(context, v.source, parsers)
|
||||
res = only_successful(context, res) # only key successful parsers
|
||||
if res.status:
|
||||
c.compiled[k] = res.body.body
|
||||
else:
|
||||
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
|
||||
|
||||
def _get_source(compiled, var_name):
|
||||
if var_name not in compiled:
|
||||
return None
|
||||
if not isinstance(compiled[var_name], list):
|
||||
return None
|
||||
if not len(compiled[var_name]) == 1:
|
||||
return None
|
||||
if not sheerka.isinstance(compiled[var_name][0], BuiltinConcepts.RETURN_VALUE):
|
||||
return None
|
||||
if not sheerka.isinstance(compiled[var_name][0].body, BuiltinConcepts.PARSER_RESULT):
|
||||
return None
|
||||
if compiled[var_name][0].body.name == "parsers.ShortTermMemory":
|
||||
return None
|
||||
|
||||
return compiled[var_name][0].body.source
|
||||
|
||||
_validate_concept(concept)
|
||||
|
||||
# Special case where the values of the variables are the names of the variable
|
||||
# example : Concept("a plus b").def_var("a").def_var("b")
|
||||
# and the user has entered 'a plus b'
|
||||
# Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
|
||||
# This means that 'a' and 'b' don't have any real value
|
||||
if len(concept.metadata.variables) > 0:
|
||||
for name, value in concept.metadata.variables:
|
||||
if _get_source(concept.compiled, name) != name:
|
||||
break
|
||||
else:
|
||||
concept.metadata.is_evaluated = True
|
||||
|
||||
|
||||
def get_names(sheerka, concept_node):
|
||||
"""
|
||||
Finds all the names referenced by the concept_node
|
||||
@@ -603,10 +716,11 @@ def remove_from_ret_val(sheerka, return_values, concept_key):
|
||||
return return_values
|
||||
|
||||
|
||||
def set_is_evaluated(concepts):
|
||||
def set_is_evaluated(concepts, check_nb_variables=False):
|
||||
"""
|
||||
set is_evaluated to True
|
||||
:param concepts:
|
||||
:param check_nb_variables: only set is_evaluated if the concept has variables
|
||||
:return:
|
||||
"""
|
||||
if concepts is None:
|
||||
@@ -614,6 +728,8 @@ def set_is_evaluated(concepts):
|
||||
|
||||
if hasattr(concepts, "__iter__"):
|
||||
for c in concepts:
|
||||
c.metadata.is_evaluated = True
|
||||
if not check_nb_variables or check_nb_variables and len(c.metadata.variables) > 0:
|
||||
c.metadata.is_evaluated = True
|
||||
else:
|
||||
concepts.metadata.is_evaluated = True
|
||||
if not check_nb_variables or check_nb_variables and len(concepts.metadata.variables) > 0:
|
||||
concepts.metadata.is_evaluated = True
|
||||
|
||||
+42
-1
@@ -130,7 +130,7 @@ class Concept:
|
||||
if isinstance(other, simplec):
|
||||
return self.name == other.name and self.body == other.body
|
||||
|
||||
if isinstance(other, (CC, CB, CV, CMV)):
|
||||
if isinstance(other, (CC, CB, CV, CMV, CIO)):
|
||||
return other == self
|
||||
|
||||
if not isinstance(other, Concept):
|
||||
@@ -726,4 +726,45 @@ class CMV:
|
||||
return txt + ")"
|
||||
|
||||
|
||||
class CIO:
|
||||
"""
|
||||
Concept id only
|
||||
only test the id
|
||||
"""
|
||||
|
||||
def __init__(self, concept, source=None):
|
||||
if isinstance(concept, str):
|
||||
self.concept_name = concept
|
||||
self.concept_id = None
|
||||
self.concept = None
|
||||
elif isinstance(concept, Concept):
|
||||
self.concept_id = concept.id
|
||||
self.concept = concept
|
||||
self.source = source
|
||||
self.start = -1
|
||||
self.end = -1
|
||||
|
||||
def set_concept(self, concept):
|
||||
self.concept = concept
|
||||
self.concept_id = concept.id
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, Concept):
|
||||
return self.concept_id == other.id
|
||||
|
||||
if not isinstance(other, CIO):
|
||||
return False
|
||||
|
||||
return self.concept_id == other.concept_id
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.concept_id)
|
||||
|
||||
def __repr__(self):
|
||||
return f"CIO(concept='{self.concept}')" if self.concept else f"CIO(name='{self.concept_name}')"
|
||||
|
||||
|
||||
simplec = namedtuple("concept", "name body") # for simple concept (tests purposes only)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.concept import Concept
|
||||
from core.sheerka.services.SheerkaExecute import NO_MATCH
|
||||
from core.sheerka.services.SheerkaShortTermMemory import SheerkaShortTermMemory
|
||||
@@ -309,6 +309,15 @@ class ExecutionContext:
|
||||
def in_private_context(self, concept_key):
|
||||
return concept_key in self.private_hints
|
||||
|
||||
def add_to_private_hints (self, concept_key):
|
||||
self.private_hints.add(concept_key)
|
||||
|
||||
def add_to_protected_hints(self, concept_key):
|
||||
self.protected_hints.add(concept_key)
|
||||
|
||||
def add_to_global_hints(self, concept_key):
|
||||
self.global_hints.add(concept_key)
|
||||
|
||||
@staticmethod
|
||||
def _is_return_value(obj):
|
||||
return isinstance(obj, Concept) and obj.key == str(BuiltinConcepts.RETURN_VALUE)
|
||||
@@ -358,7 +367,11 @@ class ExecutionContext:
|
||||
ret_val = self.values["return_values"]
|
||||
if not isinstance(ret_val, Concept) or not ret_val.key == str(BuiltinConcepts.RETURN_VALUE):
|
||||
return None
|
||||
return ret_val.status
|
||||
if ret_val.status:
|
||||
return True
|
||||
if isinstance(ret_val.body, ParserResultConcept):
|
||||
return "Almost"
|
||||
return False
|
||||
|
||||
def as_bag(self):
|
||||
"""
|
||||
|
||||
@@ -558,6 +558,12 @@ class Sheerka(Concept):
|
||||
return self._get_unknown(metadata)
|
||||
|
||||
def resolve(self, concept):
|
||||
"""
|
||||
Try to find a concept by its name, id, or c:: definition
|
||||
A new instance (using new_from_template()) is returned when it's possible
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def new_instances(concepts):
|
||||
if hasattr(concepts, "__iter__"):
|
||||
@@ -567,6 +573,9 @@ class Sheerka(Concept):
|
||||
if concept is None:
|
||||
return None
|
||||
|
||||
# ##############
|
||||
# PREPROCESS
|
||||
# ##############
|
||||
# if the entry is a concept token, use its values.
|
||||
if isinstance(concept, Token):
|
||||
if concept.type != TokenKind.CONCEPT:
|
||||
@@ -578,6 +587,9 @@ class Sheerka(Concept):
|
||||
(tmp := core.utils.unstr_concept(concept)) != (None, None):
|
||||
concept = tmp
|
||||
|
||||
# ##############
|
||||
# PROCESS
|
||||
# ##############
|
||||
# if the entry is a tuple
|
||||
# concept[0] is the name
|
||||
# concept[1] is the id
|
||||
@@ -599,7 +611,7 @@ class Sheerka(Concept):
|
||||
if isinstance(concept, str):
|
||||
if self.is_known(found := self.get_by_name(concept)):
|
||||
instances = new_instances(found)
|
||||
core.builtin_helpers.set_is_evaluated(instances)
|
||||
core.builtin_helpers.set_is_evaluated(instances, check_nb_variables=True)
|
||||
return instances
|
||||
|
||||
return None
|
||||
|
||||
@@ -5,7 +5,7 @@ from core.sheerka.services.sheerka_service import BaseService
|
||||
|
||||
CONCEPTS_FILE = "_concepts_lite.txt"
|
||||
CONCEPTS_FILE_ALL_CONCEPTS = "_concepts.txt"
|
||||
CONCEPTS_FILE_TO_USE = CONCEPTS_FILE_ALL_CONCEPTS
|
||||
CONCEPTS_FILE_TO_USE = CONCEPTS_FILE
|
||||
|
||||
class SheerkaAdmin(BaseService):
|
||||
NAME = "Admin"
|
||||
@@ -47,6 +47,9 @@ class SheerkaAdmin(BaseService):
|
||||
if concept_file == "full":
|
||||
concept_file = CONCEPTS_FILE_ALL_CONCEPTS
|
||||
|
||||
elif not concept_file.startswith("_concepts"):
|
||||
concept_file = f"_concepts_{concept_file}.txt"
|
||||
|
||||
try:
|
||||
start = time.time_ns()
|
||||
nb_lines = 0
|
||||
|
||||
@@ -2,7 +2,7 @@ import core.utils
|
||||
from cache.Cache import Cache
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
||||
from core.sheerka.services.sheerka_service import BaseService
|
||||
from core.tokenizer import Tokenizer, TokenKind, Keywords, Token
|
||||
from core.tokenizer import Tokenizer, TokenKind, Token
|
||||
|
||||
NO_MATCH = "** No Match **"
|
||||
|
||||
@@ -88,6 +88,20 @@ class ParserInput:
|
||||
|
||||
return self.pos < self.end
|
||||
|
||||
def seek(self, pos):
|
||||
"""
|
||||
Move the token offset to position pos
|
||||
:param pos:
|
||||
:return: True is pos is a valid position False otherwise
|
||||
"""
|
||||
if pos < 0 or pos >= self.end:
|
||||
self.token = None
|
||||
return False
|
||||
|
||||
self.pos = pos
|
||||
self.token = self.tokens[self.pos]
|
||||
return True
|
||||
|
||||
def is_empty(self):
|
||||
if self.text.strip() == "":
|
||||
return True
|
||||
@@ -116,7 +130,6 @@ class ParserInput:
|
||||
tokens = [tokens]
|
||||
|
||||
switcher = {
|
||||
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
|
||||
TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ class SheerkaModifyConcept(BaseService):
|
||||
|
||||
if old_version == concept:
|
||||
# the concept is not modified
|
||||
# This is an important sanity check. Do no remove because you don't understand it
|
||||
return self.sheerka.ret(
|
||||
self.NAME, False,
|
||||
self.sheerka.new(
|
||||
|
||||
@@ -2,6 +2,7 @@ from dataclasses import dataclass
|
||||
from typing import List
|
||||
|
||||
from cache.Cache import Cache
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.services.sheerka_service import ServiceObj, BaseService
|
||||
|
||||
|
||||
@@ -48,6 +49,7 @@ class SheerkaVariableManager(BaseService):
|
||||
|
||||
variable = Variable(context.event.get_digest(), who, key, value, None)
|
||||
self.sheerka.cache_manager.put(self.VARIABLES_ENTRY, variable.get_key(), variable)
|
||||
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
|
||||
|
||||
def load(self, who, key):
|
||||
variable = self.sheerka.cache_manager.get(self.VARIABLES_ENTRY, who + "|" + key)
|
||||
|
||||
+26
-13
@@ -62,6 +62,7 @@ class Token:
|
||||
|
||||
_strip_quote: str = field(default=None, repr=False, compare=False, hash=None)
|
||||
_str_value: str = field(default=None, repr=False, compare=False, hash=None)
|
||||
_repr_value: str = field(default=None, repr=False, compare=False, hash=None)
|
||||
|
||||
def __repr__(self):
|
||||
if self.type == TokenKind.IDENTIFIER:
|
||||
@@ -82,7 +83,7 @@ class Token:
|
||||
if self._strip_quote:
|
||||
return self._strip_quote
|
||||
|
||||
self._strip_quote = self._to_str(True)
|
||||
self._strip_quote = self.value[1:-1] if self.type == TokenKind.STRING else self.value
|
||||
return self._strip_quote
|
||||
|
||||
@property
|
||||
@@ -90,18 +91,36 @@ class Token:
|
||||
if self._str_value:
|
||||
return self._str_value
|
||||
|
||||
self._str_value = self._to_str(False)
|
||||
self._str_value = self.to_str(False)
|
||||
return self._str_value
|
||||
|
||||
@property
|
||||
def repr_value(self):
|
||||
if self._repr_value:
|
||||
return self._repr_value
|
||||
|
||||
if self.type == TokenKind.EOF:
|
||||
self._repr_value = "<EOF>"
|
||||
elif self.type == TokenKind.WHITESPACE:
|
||||
self._repr_value = "<ws>"
|
||||
elif self.type == TokenKind.NEWLINE:
|
||||
self._repr_value = "<nl>"
|
||||
else:
|
||||
self._repr_value = self.str_value
|
||||
return self._repr_value
|
||||
|
||||
@staticmethod
|
||||
def is_whitespace(token):
|
||||
return token and token.type == TokenKind.WHITESPACE
|
||||
|
||||
def _to_str(self, strip_quote):
|
||||
def to_str(self, strip_quote):
|
||||
if strip_quote and self.type == TokenKind.STRING:
|
||||
return self.value[1:-1]
|
||||
elif self.type == TokenKind.KEYWORD:
|
||||
return self.value.value
|
||||
elif self.type == TokenKind.CONCEPT:
|
||||
from core.utils import str_concept
|
||||
return str_concept(self.value)
|
||||
else:
|
||||
return str(self.value)
|
||||
|
||||
@@ -136,8 +155,6 @@ class Tokenizer:
|
||||
Class that can iterate on the tokens
|
||||
"""
|
||||
|
||||
KEYWORDS = set(x.value for x in Keywords)
|
||||
|
||||
def __init__(self, text, yield_eof=True, parse_word=False):
|
||||
self.text = text
|
||||
self.text_len = len(text)
|
||||
@@ -175,9 +192,7 @@ class Tokenizer:
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
|
||||
identifier = self.eat_identifier(self.i)
|
||||
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
|
||||
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
|
||||
yield Token(token_type, value, self.i, self.line, self.column)
|
||||
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
|
||||
self.i += len(identifier)
|
||||
self.column += len(identifier)
|
||||
elif self.i + 7 < self.text_len and \
|
||||
@@ -335,11 +350,9 @@ class Tokenizer:
|
||||
yield Token(TokenKind.WORD, word, self.i, self.line, self.column)
|
||||
self.i += len(word)
|
||||
self.column += len(word)
|
||||
elif c.isalpha() or c == "_":
|
||||
elif c.isalpha():
|
||||
identifier = self.eat_identifier(self.i)
|
||||
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
|
||||
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
|
||||
yield Token(token_type, value, self.i, self.line, self.column)
|
||||
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
|
||||
self.i += len(identifier)
|
||||
self.column += len(identifier)
|
||||
elif c.isdigit():
|
||||
@@ -457,7 +470,7 @@ class Tokenizer:
|
||||
|
||||
i = start_index + 1
|
||||
escape = False
|
||||
#newline = None
|
||||
# newline = None
|
||||
while i < self.text_len:
|
||||
c = self.text[i]
|
||||
result += c
|
||||
|
||||
@@ -296,6 +296,28 @@ def dict_product(a, b):
|
||||
return res
|
||||
|
||||
|
||||
def get_n_clones(obj, n):
|
||||
objs = [obj]
|
||||
for i in range(n - 1):
|
||||
objs.append(obj.clone())
|
||||
return objs
|
||||
|
||||
|
||||
def obj_product(list_of_objs, new_items, add_item):
|
||||
if list_of_objs is None or len(list_of_objs) == 0:
|
||||
return list_of_objs
|
||||
|
||||
res = []
|
||||
|
||||
for obj in list_of_objs:
|
||||
instances = get_n_clones(obj, len(new_items))
|
||||
res.extend(instances)
|
||||
for instance, item in zip(instances, new_items):
|
||||
add_item(instance, item)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def strip_quotes(text):
|
||||
if not isinstance(text, str):
|
||||
return text
|
||||
|
||||
Reference in New Issue
Block a user