Implemented FunctionParser

This commit is contained in:
2020-09-17 14:11:09 +02:00
parent 8a866880bc
commit 177a6b1d5f
40 changed files with 1752 additions and 561 deletions
+2
View File
@@ -16,6 +16,8 @@ class BuiltinConcepts(Enum):
SHEERKA = "sheerka"
# processing instructions during sheerka.execute()
# The instruction may alter how the actions work
DEBUG = "debug" # activate all debug information
EVAL_BODY_REQUESTED = "eval body" # to evaluate the body
EVAL_WHERE_REQUESTED = "eval where" # to evaluate the where clause
RETURN_BODY_REQUESTED = "return body" # returns the body of the concept instead of the concept itself
+121 -5
View File
@@ -6,14 +6,16 @@ from core.ast.nodes import CallNodeConcept
from core.ast.visitors import UnreferencedNamesVisitor
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, NotInit, ConceptParts
from core.sheerka.services.SheerkaExecute import SheerkaExecute
from core.tokenizer import Keywords
# from evaluators.BaseEvaluator import BaseEvaluator
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode, SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ErrorNode
PARSE_STEPS = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
EVAL_STEPS = PARSE_STEPS + [BuiltinConcepts.BEFORE_EVALUATION, BuiltinConcepts.EVALUATION,
BuiltinConcepts.AFTER_EVALUATION]
PARSERS = ["EmptyString", "ShortTermMemory", "AtomNode", "BnfNode", "SyaNode", "Python"]
def is_same_success(context, return_values):
@@ -342,6 +344,37 @@ def parse_unrecognized(context, source, parsers, who=None, prop=None, filter_fun
return no_python
def parse_function(context, source, tokens=None, start=0):
"""
Helper function to parse what is supposed to be a function
:param context:
:param source:
:param tokens:
:param start: start index for the source code node
:return:
"""
sheerka = context.sheerka
from parsers.FunctionParser import FunctionParser
parser = FunctionParser()
desc = f"Parsing function '{source}'"
with context.push(BuiltinConcepts.PARSE_CODE, source, desc=desc) as sub_context:
sheerka_execution = sheerka.services[SheerkaExecute.NAME]
res = parser.parse(sub_context, sheerka_execution.get_parser_input(source, tokens))
if not isinstance(res, list):
res = [res]
for r in [r for r in res if sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT)]:
r.body.body.start += start
r.body.body.end += start
if isinstance(r.body.body, SourceCodeWithConceptNode):
for n in [r.body.body.first, r.body.body.last] + r.body.body.nodes:
n.start += start
n.end += start
return res
def evaluate(context,
source,
evaluators="all",
@@ -415,7 +448,12 @@ def get_lexer_nodes(return_values, start, tokens):
end = start + len(tokens) - 1
lexer_nodes.append(
[SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)])
[SourceCodeNode(start,
end,
tokens,
ret_val.body.source,
python_node=ret_val.body.body,
return_value=ret_val)])
elif ret_val.who == "parsers.ExactConcept":
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
@@ -479,6 +517,81 @@ def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers
return get_lexer_nodes(res.body.body, unrecognized_tokens_node.start, unrecognized_tokens_node.tokens)
def update_compiled(context, concept, errors, parsers=None):
"""
recursively iterate thru concept.compiled to replace LexerNode into concepts or list of ReturnValueConcept
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...)
the result will be a LexerNode.
In the specific case of a ConceptNode, the compiled variables will also be LexerNode (UnrecognizedTokensNode...)
This function iterate thru the compile to transform these nodes into concept of compiled AST
:param context:
:param concept:
:param errors: a list the must be initialized by the caller
:param parsers: to customize the parsers to use
:return:
"""
sheerka = context.sheerka
parsers = parsers or PARSERS
def _validate_concept(c):
"""
Recursively browse the compiled properties in order to find unrecognized
:param c:
:return:
"""
for k, v in c.compiled.items():
if isinstance(v, Concept):
_validate_concept(v)
elif isinstance(v, SourceCodeWithConceptNode):
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
parser_helper = PythonWithConceptsParser()
res = parser_helper.parse_nodes(context, v.get_all_nodes())
if res.status:
c.compiled[k] = [res]
else:
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
elif isinstance(v, UnrecognizedTokensNode):
res = parse_unrecognized(context, v.source, parsers)
res = only_successful(context, res) # only key successful parsers
if res.status:
c.compiled[k] = res.body.body
else:
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
def _get_source(compiled, var_name):
if var_name not in compiled:
return None
if not isinstance(compiled[var_name], list):
return None
if not len(compiled[var_name]) == 1:
return None
if not sheerka.isinstance(compiled[var_name][0], BuiltinConcepts.RETURN_VALUE):
return None
if not sheerka.isinstance(compiled[var_name][0].body, BuiltinConcepts.PARSER_RESULT):
return None
if compiled[var_name][0].body.name == "parsers.ShortTermMemory":
return None
return compiled[var_name][0].body.source
_validate_concept(concept)
# Special case where the values of the variables are the names of the variable
# example : Concept("a plus b").def_var("a").def_var("b")
# and the user has entered 'a plus b'
# Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
# This means that 'a' and 'b' don't have any real value
if len(concept.metadata.variables) > 0:
for name, value in concept.metadata.variables:
if _get_source(concept.compiled, name) != name:
break
else:
concept.metadata.is_evaluated = True
def get_names(sheerka, concept_node):
"""
Finds all the names referenced by the concept_node
@@ -603,10 +716,11 @@ def remove_from_ret_val(sheerka, return_values, concept_key):
return return_values
def set_is_evaluated(concepts):
def set_is_evaluated(concepts, check_nb_variables=False):
"""
set is_evaluated to True
:param concepts:
:param check_nb_variables: only set is_evaluated if the concept has variables
:return:
"""
if concepts is None:
@@ -614,6 +728,8 @@ def set_is_evaluated(concepts):
if hasattr(concepts, "__iter__"):
for c in concepts:
c.metadata.is_evaluated = True
if not check_nb_variables or check_nb_variables and len(c.metadata.variables) > 0:
c.metadata.is_evaluated = True
else:
concepts.metadata.is_evaluated = True
if not check_nb_variables or check_nb_variables and len(concepts.metadata.variables) > 0:
concepts.metadata.is_evaluated = True
+42 -1
View File
@@ -130,7 +130,7 @@ class Concept:
if isinstance(other, simplec):
return self.name == other.name and self.body == other.body
if isinstance(other, (CC, CB, CV, CMV)):
if isinstance(other, (CC, CB, CV, CMV, CIO)):
return other == self
if not isinstance(other, Concept):
@@ -726,4 +726,45 @@ class CMV:
return txt + ")"
class CIO:
"""
Concept id only
only test the id
"""
def __init__(self, concept, source=None):
if isinstance(concept, str):
self.concept_name = concept
self.concept_id = None
self.concept = None
elif isinstance(concept, Concept):
self.concept_id = concept.id
self.concept = concept
self.source = source
self.start = -1
self.end = -1
def set_concept(self, concept):
self.concept = concept
self.concept_id = concept.id
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, Concept):
return self.concept_id == other.id
if not isinstance(other, CIO):
return False
return self.concept_id == other.concept_id
def __hash__(self):
return hash(self.concept_id)
def __repr__(self):
return f"CIO(concept='{self.concept}')" if self.concept else f"CIO(name='{self.concept_name}')"
simplec = namedtuple("concept", "name body") # for simple concept (tests purposes only)
+15 -2
View File
@@ -1,7 +1,7 @@
import logging
import time
from core.builtin_concepts import BuiltinConcepts
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import NO_MATCH
from core.sheerka.services.SheerkaShortTermMemory import SheerkaShortTermMemory
@@ -309,6 +309,15 @@ class ExecutionContext:
def in_private_context(self, concept_key):
return concept_key in self.private_hints
def add_to_private_hints (self, concept_key):
self.private_hints.add(concept_key)
def add_to_protected_hints(self, concept_key):
self.protected_hints.add(concept_key)
def add_to_global_hints(self, concept_key):
self.global_hints.add(concept_key)
@staticmethod
def _is_return_value(obj):
return isinstance(obj, Concept) and obj.key == str(BuiltinConcepts.RETURN_VALUE)
@@ -358,7 +367,11 @@ class ExecutionContext:
ret_val = self.values["return_values"]
if not isinstance(ret_val, Concept) or not ret_val.key == str(BuiltinConcepts.RETURN_VALUE):
return None
return ret_val.status
if ret_val.status:
return True
if isinstance(ret_val.body, ParserResultConcept):
return "Almost"
return False
def as_bag(self):
"""
+13 -1
View File
@@ -558,6 +558,12 @@ class Sheerka(Concept):
return self._get_unknown(metadata)
def resolve(self, concept):
"""
Try to find a concept by its name, id, or c:: definition
A new instance (using new_from_template()) is returned when it's possible
:param concept:
:return:
"""
def new_instances(concepts):
if hasattr(concepts, "__iter__"):
@@ -567,6 +573,9 @@ class Sheerka(Concept):
if concept is None:
return None
# ##############
# PREPROCESS
# ##############
# if the entry is a concept token, use its values.
if isinstance(concept, Token):
if concept.type != TokenKind.CONCEPT:
@@ -578,6 +587,9 @@ class Sheerka(Concept):
(tmp := core.utils.unstr_concept(concept)) != (None, None):
concept = tmp
# ##############
# PROCESS
# ##############
# if the entry is a tuple
# concept[0] is the name
# concept[1] is the id
@@ -599,7 +611,7 @@ class Sheerka(Concept):
if isinstance(concept, str):
if self.is_known(found := self.get_by_name(concept)):
instances = new_instances(found)
core.builtin_helpers.set_is_evaluated(instances)
core.builtin_helpers.set_is_evaluated(instances, check_nb_variables=True)
return instances
return None
+4 -1
View File
@@ -5,7 +5,7 @@ from core.sheerka.services.sheerka_service import BaseService
CONCEPTS_FILE = "_concepts_lite.txt"
CONCEPTS_FILE_ALL_CONCEPTS = "_concepts.txt"
CONCEPTS_FILE_TO_USE = CONCEPTS_FILE_ALL_CONCEPTS
CONCEPTS_FILE_TO_USE = CONCEPTS_FILE
class SheerkaAdmin(BaseService):
NAME = "Admin"
@@ -47,6 +47,9 @@ class SheerkaAdmin(BaseService):
if concept_file == "full":
concept_file = CONCEPTS_FILE_ALL_CONCEPTS
elif not concept_file.startswith("_concepts"):
concept_file = f"_concepts_{concept_file}.txt"
try:
start = time.time_ns()
nb_lines = 0
+15 -2
View File
@@ -2,7 +2,7 @@ import core.utils
from cache.Cache import Cache
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.sheerka.services.sheerka_service import BaseService
from core.tokenizer import Tokenizer, TokenKind, Keywords, Token
from core.tokenizer import Tokenizer, TokenKind, Token
NO_MATCH = "** No Match **"
@@ -88,6 +88,20 @@ class ParserInput:
return self.pos < self.end
def seek(self, pos):
"""
Move the token offset to position pos
:param pos:
:return: True is pos is a valid position False otherwise
"""
if pos < 0 or pos >= self.end:
self.token = None
return False
self.pos = pos
self.token = self.tokens[self.pos]
return True
def is_empty(self):
if self.text.strip() == "":
return True
@@ -116,7 +130,6 @@ class ParserInput:
tokens = [tokens]
switcher = {
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
}
@@ -31,6 +31,7 @@ class SheerkaModifyConcept(BaseService):
if old_version == concept:
# the concept is not modified
# This is an important sanity check. Do no remove because you don't understand it
return self.sheerka.ret(
self.NAME, False,
self.sheerka.new(
@@ -2,6 +2,7 @@ from dataclasses import dataclass
from typing import List
from cache.Cache import Cache
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.sheerka_service import ServiceObj, BaseService
@@ -48,6 +49,7 @@ class SheerkaVariableManager(BaseService):
variable = Variable(context.event.get_digest(), who, key, value, None)
self.sheerka.cache_manager.put(self.VARIABLES_ENTRY, variable.get_key(), variable)
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
def load(self, who, key):
variable = self.sheerka.cache_manager.get(self.VARIABLES_ENTRY, who + "|" + key)
+26 -13
View File
@@ -62,6 +62,7 @@ class Token:
_strip_quote: str = field(default=None, repr=False, compare=False, hash=None)
_str_value: str = field(default=None, repr=False, compare=False, hash=None)
_repr_value: str = field(default=None, repr=False, compare=False, hash=None)
def __repr__(self):
if self.type == TokenKind.IDENTIFIER:
@@ -82,7 +83,7 @@ class Token:
if self._strip_quote:
return self._strip_quote
self._strip_quote = self._to_str(True)
self._strip_quote = self.value[1:-1] if self.type == TokenKind.STRING else self.value
return self._strip_quote
@property
@@ -90,18 +91,36 @@ class Token:
if self._str_value:
return self._str_value
self._str_value = self._to_str(False)
self._str_value = self.to_str(False)
return self._str_value
@property
def repr_value(self):
if self._repr_value:
return self._repr_value
if self.type == TokenKind.EOF:
self._repr_value = "<EOF>"
elif self.type == TokenKind.WHITESPACE:
self._repr_value = "<ws>"
elif self.type == TokenKind.NEWLINE:
self._repr_value = "<nl>"
else:
self._repr_value = self.str_value
return self._repr_value
@staticmethod
def is_whitespace(token):
return token and token.type == TokenKind.WHITESPACE
def _to_str(self, strip_quote):
def to_str(self, strip_quote):
if strip_quote and self.type == TokenKind.STRING:
return self.value[1:-1]
elif self.type == TokenKind.KEYWORD:
return self.value.value
elif self.type == TokenKind.CONCEPT:
from core.utils import str_concept
return str_concept(self.value)
else:
return str(self.value)
@@ -136,8 +155,6 @@ class Tokenizer:
Class that can iterate on the tokens
"""
KEYWORDS = set(x.value for x in Keywords)
def __init__(self, text, yield_eof=True, parse_word=False):
self.text = text
self.text_len = len(text)
@@ -175,9 +192,7 @@ class Tokenizer:
from core.concept import VARIABLE_PREFIX
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
yield Token(token_type, value, self.i, self.line, self.column)
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
elif self.i + 7 < self.text_len and \
@@ -335,11 +350,9 @@ class Tokenizer:
yield Token(TokenKind.WORD, word, self.i, self.line, self.column)
self.i += len(word)
self.column += len(word)
elif c.isalpha() or c == "_":
elif c.isalpha():
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
yield Token(token_type, value, self.i, self.line, self.column)
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
elif c.isdigit():
@@ -457,7 +470,7 @@ class Tokenizer:
i = start_index + 1
escape = False
#newline = None
# newline = None
while i < self.text_len:
c = self.text[i]
result += c
+22
View File
@@ -296,6 +296,28 @@ def dict_product(a, b):
return res
def get_n_clones(obj, n):
objs = [obj]
for i in range(n - 1):
objs.append(obj.clone())
return objs
def obj_product(list_of_objs, new_items, add_item):
if list_of_objs is None or len(list_of_objs) == 0:
return list_of_objs
res = []
for obj in list_of_objs:
instances = get_n_clones(obj, len(new_items))
res.extend(instances)
for instance, item in zip(instances, new_items):
add_item(instance, item)
return res
def strip_quotes(text):
if not isinstance(text, str):
return text