Implemented FunctionParser
This commit is contained in:
@@ -16,6 +16,8 @@ class BuiltinConcepts(Enum):
|
||||
SHEERKA = "sheerka"
|
||||
|
||||
# processing instructions during sheerka.execute()
|
||||
# The instruction may alter how the actions work
|
||||
DEBUG = "debug" # activate all debug information
|
||||
EVAL_BODY_REQUESTED = "eval body" # to evaluate the body
|
||||
EVAL_WHERE_REQUESTED = "eval where" # to evaluate the where clause
|
||||
RETURN_BODY_REQUESTED = "return body" # returns the body of the concept instead of the concept itself
|
||||
|
||||
+121
-5
@@ -6,14 +6,16 @@ from core.ast.nodes import CallNodeConcept
|
||||
from core.ast.visitors import UnreferencedNamesVisitor
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, NotInit, ConceptParts
|
||||
from core.sheerka.services.SheerkaExecute import SheerkaExecute
|
||||
from core.tokenizer import Keywords
|
||||
# from evaluators.BaseEvaluator import BaseEvaluator
|
||||
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode, SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
|
||||
PARSE_STEPS = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
|
||||
EVAL_STEPS = PARSE_STEPS + [BuiltinConcepts.BEFORE_EVALUATION, BuiltinConcepts.EVALUATION,
|
||||
BuiltinConcepts.AFTER_EVALUATION]
|
||||
PARSERS = ["EmptyString", "ShortTermMemory", "AtomNode", "BnfNode", "SyaNode", "Python"]
|
||||
|
||||
|
||||
def is_same_success(context, return_values):
|
||||
@@ -342,6 +344,37 @@ def parse_unrecognized(context, source, parsers, who=None, prop=None, filter_fun
|
||||
return no_python
|
||||
|
||||
|
||||
def parse_function(context, source, tokens=None, start=0):
|
||||
"""
|
||||
Helper function to parse what is supposed to be a function
|
||||
:param context:
|
||||
:param source:
|
||||
:param tokens:
|
||||
:param start: start index for the source code node
|
||||
:return:
|
||||
"""
|
||||
sheerka = context.sheerka
|
||||
from parsers.FunctionParser import FunctionParser
|
||||
parser = FunctionParser()
|
||||
desc = f"Parsing function '{source}'"
|
||||
with context.push(BuiltinConcepts.PARSE_CODE, source, desc=desc) as sub_context:
|
||||
sheerka_execution = sheerka.services[SheerkaExecute.NAME]
|
||||
res = parser.parse(sub_context, sheerka_execution.get_parser_input(source, tokens))
|
||||
|
||||
if not isinstance(res, list):
|
||||
res = [res]
|
||||
|
||||
for r in [r for r in res if sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT)]:
|
||||
r.body.body.start += start
|
||||
r.body.body.end += start
|
||||
if isinstance(r.body.body, SourceCodeWithConceptNode):
|
||||
for n in [r.body.body.first, r.body.body.last] + r.body.body.nodes:
|
||||
n.start += start
|
||||
n.end += start
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def evaluate(context,
|
||||
source,
|
||||
evaluators="all",
|
||||
@@ -415,7 +448,12 @@ def get_lexer_nodes(return_values, start, tokens):
|
||||
|
||||
end = start + len(tokens) - 1
|
||||
lexer_nodes.append(
|
||||
[SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)])
|
||||
[SourceCodeNode(start,
|
||||
end,
|
||||
tokens,
|
||||
ret_val.body.source,
|
||||
python_node=ret_val.body.body,
|
||||
return_value=ret_val)])
|
||||
|
||||
elif ret_val.who == "parsers.ExactConcept":
|
||||
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
|
||||
@@ -479,6 +517,81 @@ def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers
|
||||
return get_lexer_nodes(res.body.body, unrecognized_tokens_node.start, unrecognized_tokens_node.tokens)
|
||||
|
||||
|
||||
def update_compiled(context, concept, errors, parsers=None):
|
||||
"""
|
||||
recursively iterate thru concept.compiled to replace LexerNode into concepts or list of ReturnValueConcept
|
||||
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...)
|
||||
the result will be a LexerNode.
|
||||
In the specific case of a ConceptNode, the compiled variables will also be LexerNode (UnrecognizedTokensNode...)
|
||||
This function iterate thru the compile to transform these nodes into concept of compiled AST
|
||||
:param context:
|
||||
:param concept:
|
||||
:param errors: a list the must be initialized by the caller
|
||||
:param parsers: to customize the parsers to use
|
||||
:return:
|
||||
"""
|
||||
|
||||
sheerka = context.sheerka
|
||||
parsers = parsers or PARSERS
|
||||
|
||||
def _validate_concept(c):
|
||||
"""
|
||||
Recursively browse the compiled properties in order to find unrecognized
|
||||
:param c:
|
||||
:return:
|
||||
"""
|
||||
for k, v in c.compiled.items():
|
||||
if isinstance(v, Concept):
|
||||
_validate_concept(v)
|
||||
|
||||
elif isinstance(v, SourceCodeWithConceptNode):
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
parser_helper = PythonWithConceptsParser()
|
||||
res = parser_helper.parse_nodes(context, v.get_all_nodes())
|
||||
if res.status:
|
||||
c.compiled[k] = [res]
|
||||
else:
|
||||
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
|
||||
|
||||
elif isinstance(v, UnrecognizedTokensNode):
|
||||
res = parse_unrecognized(context, v.source, parsers)
|
||||
res = only_successful(context, res) # only key successful parsers
|
||||
if res.status:
|
||||
c.compiled[k] = res.body.body
|
||||
else:
|
||||
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
|
||||
|
||||
def _get_source(compiled, var_name):
|
||||
if var_name not in compiled:
|
||||
return None
|
||||
if not isinstance(compiled[var_name], list):
|
||||
return None
|
||||
if not len(compiled[var_name]) == 1:
|
||||
return None
|
||||
if not sheerka.isinstance(compiled[var_name][0], BuiltinConcepts.RETURN_VALUE):
|
||||
return None
|
||||
if not sheerka.isinstance(compiled[var_name][0].body, BuiltinConcepts.PARSER_RESULT):
|
||||
return None
|
||||
if compiled[var_name][0].body.name == "parsers.ShortTermMemory":
|
||||
return None
|
||||
|
||||
return compiled[var_name][0].body.source
|
||||
|
||||
_validate_concept(concept)
|
||||
|
||||
# Special case where the values of the variables are the names of the variable
|
||||
# example : Concept("a plus b").def_var("a").def_var("b")
|
||||
# and the user has entered 'a plus b'
|
||||
# Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
|
||||
# This means that 'a' and 'b' don't have any real value
|
||||
if len(concept.metadata.variables) > 0:
|
||||
for name, value in concept.metadata.variables:
|
||||
if _get_source(concept.compiled, name) != name:
|
||||
break
|
||||
else:
|
||||
concept.metadata.is_evaluated = True
|
||||
|
||||
|
||||
def get_names(sheerka, concept_node):
|
||||
"""
|
||||
Finds all the names referenced by the concept_node
|
||||
@@ -603,10 +716,11 @@ def remove_from_ret_val(sheerka, return_values, concept_key):
|
||||
return return_values
|
||||
|
||||
|
||||
def set_is_evaluated(concepts):
|
||||
def set_is_evaluated(concepts, check_nb_variables=False):
|
||||
"""
|
||||
set is_evaluated to True
|
||||
:param concepts:
|
||||
:param check_nb_variables: only set is_evaluated if the concept has variables
|
||||
:return:
|
||||
"""
|
||||
if concepts is None:
|
||||
@@ -614,6 +728,8 @@ def set_is_evaluated(concepts):
|
||||
|
||||
if hasattr(concepts, "__iter__"):
|
||||
for c in concepts:
|
||||
c.metadata.is_evaluated = True
|
||||
if not check_nb_variables or check_nb_variables and len(c.metadata.variables) > 0:
|
||||
c.metadata.is_evaluated = True
|
||||
else:
|
||||
concepts.metadata.is_evaluated = True
|
||||
if not check_nb_variables or check_nb_variables and len(concepts.metadata.variables) > 0:
|
||||
concepts.metadata.is_evaluated = True
|
||||
|
||||
+42
-1
@@ -130,7 +130,7 @@ class Concept:
|
||||
if isinstance(other, simplec):
|
||||
return self.name == other.name and self.body == other.body
|
||||
|
||||
if isinstance(other, (CC, CB, CV, CMV)):
|
||||
if isinstance(other, (CC, CB, CV, CMV, CIO)):
|
||||
return other == self
|
||||
|
||||
if not isinstance(other, Concept):
|
||||
@@ -726,4 +726,45 @@ class CMV:
|
||||
return txt + ")"
|
||||
|
||||
|
||||
class CIO:
|
||||
"""
|
||||
Concept id only
|
||||
only test the id
|
||||
"""
|
||||
|
||||
def __init__(self, concept, source=None):
|
||||
if isinstance(concept, str):
|
||||
self.concept_name = concept
|
||||
self.concept_id = None
|
||||
self.concept = None
|
||||
elif isinstance(concept, Concept):
|
||||
self.concept_id = concept.id
|
||||
self.concept = concept
|
||||
self.source = source
|
||||
self.start = -1
|
||||
self.end = -1
|
||||
|
||||
def set_concept(self, concept):
|
||||
self.concept = concept
|
||||
self.concept_id = concept.id
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, Concept):
|
||||
return self.concept_id == other.id
|
||||
|
||||
if not isinstance(other, CIO):
|
||||
return False
|
||||
|
||||
return self.concept_id == other.concept_id
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.concept_id)
|
||||
|
||||
def __repr__(self):
|
||||
return f"CIO(concept='{self.concept}')" if self.concept else f"CIO(name='{self.concept_name}')"
|
||||
|
||||
|
||||
simplec = namedtuple("concept", "name body") # for simple concept (tests purposes only)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.concept import Concept
|
||||
from core.sheerka.services.SheerkaExecute import NO_MATCH
|
||||
from core.sheerka.services.SheerkaShortTermMemory import SheerkaShortTermMemory
|
||||
@@ -309,6 +309,15 @@ class ExecutionContext:
|
||||
def in_private_context(self, concept_key):
|
||||
return concept_key in self.private_hints
|
||||
|
||||
def add_to_private_hints (self, concept_key):
|
||||
self.private_hints.add(concept_key)
|
||||
|
||||
def add_to_protected_hints(self, concept_key):
|
||||
self.protected_hints.add(concept_key)
|
||||
|
||||
def add_to_global_hints(self, concept_key):
|
||||
self.global_hints.add(concept_key)
|
||||
|
||||
@staticmethod
|
||||
def _is_return_value(obj):
|
||||
return isinstance(obj, Concept) and obj.key == str(BuiltinConcepts.RETURN_VALUE)
|
||||
@@ -358,7 +367,11 @@ class ExecutionContext:
|
||||
ret_val = self.values["return_values"]
|
||||
if not isinstance(ret_val, Concept) or not ret_val.key == str(BuiltinConcepts.RETURN_VALUE):
|
||||
return None
|
||||
return ret_val.status
|
||||
if ret_val.status:
|
||||
return True
|
||||
if isinstance(ret_val.body, ParserResultConcept):
|
||||
return "Almost"
|
||||
return False
|
||||
|
||||
def as_bag(self):
|
||||
"""
|
||||
|
||||
@@ -558,6 +558,12 @@ class Sheerka(Concept):
|
||||
return self._get_unknown(metadata)
|
||||
|
||||
def resolve(self, concept):
|
||||
"""
|
||||
Try to find a concept by its name, id, or c:: definition
|
||||
A new instance (using new_from_template()) is returned when it's possible
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def new_instances(concepts):
|
||||
if hasattr(concepts, "__iter__"):
|
||||
@@ -567,6 +573,9 @@ class Sheerka(Concept):
|
||||
if concept is None:
|
||||
return None
|
||||
|
||||
# ##############
|
||||
# PREPROCESS
|
||||
# ##############
|
||||
# if the entry is a concept token, use its values.
|
||||
if isinstance(concept, Token):
|
||||
if concept.type != TokenKind.CONCEPT:
|
||||
@@ -578,6 +587,9 @@ class Sheerka(Concept):
|
||||
(tmp := core.utils.unstr_concept(concept)) != (None, None):
|
||||
concept = tmp
|
||||
|
||||
# ##############
|
||||
# PROCESS
|
||||
# ##############
|
||||
# if the entry is a tuple
|
||||
# concept[0] is the name
|
||||
# concept[1] is the id
|
||||
@@ -599,7 +611,7 @@ class Sheerka(Concept):
|
||||
if isinstance(concept, str):
|
||||
if self.is_known(found := self.get_by_name(concept)):
|
||||
instances = new_instances(found)
|
||||
core.builtin_helpers.set_is_evaluated(instances)
|
||||
core.builtin_helpers.set_is_evaluated(instances, check_nb_variables=True)
|
||||
return instances
|
||||
|
||||
return None
|
||||
|
||||
@@ -5,7 +5,7 @@ from core.sheerka.services.sheerka_service import BaseService
|
||||
|
||||
CONCEPTS_FILE = "_concepts_lite.txt"
|
||||
CONCEPTS_FILE_ALL_CONCEPTS = "_concepts.txt"
|
||||
CONCEPTS_FILE_TO_USE = CONCEPTS_FILE_ALL_CONCEPTS
|
||||
CONCEPTS_FILE_TO_USE = CONCEPTS_FILE
|
||||
|
||||
class SheerkaAdmin(BaseService):
|
||||
NAME = "Admin"
|
||||
@@ -47,6 +47,9 @@ class SheerkaAdmin(BaseService):
|
||||
if concept_file == "full":
|
||||
concept_file = CONCEPTS_FILE_ALL_CONCEPTS
|
||||
|
||||
elif not concept_file.startswith("_concepts"):
|
||||
concept_file = f"_concepts_{concept_file}.txt"
|
||||
|
||||
try:
|
||||
start = time.time_ns()
|
||||
nb_lines = 0
|
||||
|
||||
@@ -2,7 +2,7 @@ import core.utils
|
||||
from cache.Cache import Cache
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
||||
from core.sheerka.services.sheerka_service import BaseService
|
||||
from core.tokenizer import Tokenizer, TokenKind, Keywords, Token
|
||||
from core.tokenizer import Tokenizer, TokenKind, Token
|
||||
|
||||
NO_MATCH = "** No Match **"
|
||||
|
||||
@@ -88,6 +88,20 @@ class ParserInput:
|
||||
|
||||
return self.pos < self.end
|
||||
|
||||
def seek(self, pos):
|
||||
"""
|
||||
Move the token offset to position pos
|
||||
:param pos:
|
||||
:return: True is pos is a valid position False otherwise
|
||||
"""
|
||||
if pos < 0 or pos >= self.end:
|
||||
self.token = None
|
||||
return False
|
||||
|
||||
self.pos = pos
|
||||
self.token = self.tokens[self.pos]
|
||||
return True
|
||||
|
||||
def is_empty(self):
|
||||
if self.text.strip() == "":
|
||||
return True
|
||||
@@ -116,7 +130,6 @@ class ParserInput:
|
||||
tokens = [tokens]
|
||||
|
||||
switcher = {
|
||||
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
|
||||
TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,7 @@ class SheerkaModifyConcept(BaseService):
|
||||
|
||||
if old_version == concept:
|
||||
# the concept is not modified
|
||||
# This is an important sanity check. Do no remove because you don't understand it
|
||||
return self.sheerka.ret(
|
||||
self.NAME, False,
|
||||
self.sheerka.new(
|
||||
|
||||
@@ -2,6 +2,7 @@ from dataclasses import dataclass
|
||||
from typing import List
|
||||
|
||||
from cache.Cache import Cache
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.services.sheerka_service import ServiceObj, BaseService
|
||||
|
||||
|
||||
@@ -48,6 +49,7 @@ class SheerkaVariableManager(BaseService):
|
||||
|
||||
variable = Variable(context.event.get_digest(), who, key, value, None)
|
||||
self.sheerka.cache_manager.put(self.VARIABLES_ENTRY, variable.get_key(), variable)
|
||||
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
|
||||
|
||||
def load(self, who, key):
|
||||
variable = self.sheerka.cache_manager.get(self.VARIABLES_ENTRY, who + "|" + key)
|
||||
|
||||
+26
-13
@@ -62,6 +62,7 @@ class Token:
|
||||
|
||||
_strip_quote: str = field(default=None, repr=False, compare=False, hash=None)
|
||||
_str_value: str = field(default=None, repr=False, compare=False, hash=None)
|
||||
_repr_value: str = field(default=None, repr=False, compare=False, hash=None)
|
||||
|
||||
def __repr__(self):
|
||||
if self.type == TokenKind.IDENTIFIER:
|
||||
@@ -82,7 +83,7 @@ class Token:
|
||||
if self._strip_quote:
|
||||
return self._strip_quote
|
||||
|
||||
self._strip_quote = self._to_str(True)
|
||||
self._strip_quote = self.value[1:-1] if self.type == TokenKind.STRING else self.value
|
||||
return self._strip_quote
|
||||
|
||||
@property
|
||||
@@ -90,18 +91,36 @@ class Token:
|
||||
if self._str_value:
|
||||
return self._str_value
|
||||
|
||||
self._str_value = self._to_str(False)
|
||||
self._str_value = self.to_str(False)
|
||||
return self._str_value
|
||||
|
||||
@property
|
||||
def repr_value(self):
|
||||
if self._repr_value:
|
||||
return self._repr_value
|
||||
|
||||
if self.type == TokenKind.EOF:
|
||||
self._repr_value = "<EOF>"
|
||||
elif self.type == TokenKind.WHITESPACE:
|
||||
self._repr_value = "<ws>"
|
||||
elif self.type == TokenKind.NEWLINE:
|
||||
self._repr_value = "<nl>"
|
||||
else:
|
||||
self._repr_value = self.str_value
|
||||
return self._repr_value
|
||||
|
||||
@staticmethod
|
||||
def is_whitespace(token):
|
||||
return token and token.type == TokenKind.WHITESPACE
|
||||
|
||||
def _to_str(self, strip_quote):
|
||||
def to_str(self, strip_quote):
|
||||
if strip_quote and self.type == TokenKind.STRING:
|
||||
return self.value[1:-1]
|
||||
elif self.type == TokenKind.KEYWORD:
|
||||
return self.value.value
|
||||
elif self.type == TokenKind.CONCEPT:
|
||||
from core.utils import str_concept
|
||||
return str_concept(self.value)
|
||||
else:
|
||||
return str(self.value)
|
||||
|
||||
@@ -136,8 +155,6 @@ class Tokenizer:
|
||||
Class that can iterate on the tokens
|
||||
"""
|
||||
|
||||
KEYWORDS = set(x.value for x in Keywords)
|
||||
|
||||
def __init__(self, text, yield_eof=True, parse_word=False):
|
||||
self.text = text
|
||||
self.text_len = len(text)
|
||||
@@ -175,9 +192,7 @@ class Tokenizer:
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
|
||||
identifier = self.eat_identifier(self.i)
|
||||
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
|
||||
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
|
||||
yield Token(token_type, value, self.i, self.line, self.column)
|
||||
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
|
||||
self.i += len(identifier)
|
||||
self.column += len(identifier)
|
||||
elif self.i + 7 < self.text_len and \
|
||||
@@ -335,11 +350,9 @@ class Tokenizer:
|
||||
yield Token(TokenKind.WORD, word, self.i, self.line, self.column)
|
||||
self.i += len(word)
|
||||
self.column += len(word)
|
||||
elif c.isalpha() or c == "_":
|
||||
elif c.isalpha():
|
||||
identifier = self.eat_identifier(self.i)
|
||||
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
|
||||
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
|
||||
yield Token(token_type, value, self.i, self.line, self.column)
|
||||
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
|
||||
self.i += len(identifier)
|
||||
self.column += len(identifier)
|
||||
elif c.isdigit():
|
||||
@@ -457,7 +470,7 @@ class Tokenizer:
|
||||
|
||||
i = start_index + 1
|
||||
escape = False
|
||||
#newline = None
|
||||
# newline = None
|
||||
while i < self.text_len:
|
||||
c = self.text[i]
|
||||
result += c
|
||||
|
||||
@@ -296,6 +296,28 @@ def dict_product(a, b):
|
||||
return res
|
||||
|
||||
|
||||
def get_n_clones(obj, n):
|
||||
objs = [obj]
|
||||
for i in range(n - 1):
|
||||
objs.append(obj.clone())
|
||||
return objs
|
||||
|
||||
|
||||
def obj_product(list_of_objs, new_items, add_item):
|
||||
if list_of_objs is None or len(list_of_objs) == 0:
|
||||
return list_of_objs
|
||||
|
||||
res = []
|
||||
|
||||
for obj in list_of_objs:
|
||||
instances = get_n_clones(obj, len(new_items))
|
||||
res.extend(instances)
|
||||
for instance, item in zip(instances, new_items):
|
||||
add_item(instance, item)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def strip_quotes(text):
|
||||
if not isinstance(text, str):
|
||||
return text
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import core.utils
|
||||
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
|
||||
from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind, Tokenizer
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
from parsers.BaseParser import NotInitializedNode
|
||||
@@ -67,7 +68,8 @@ class AddConceptEvaluator(OneReturnValueEvaluator):
|
||||
elif isinstance(part_ret_val, NameNode):
|
||||
source = str(part_ret_val)
|
||||
elif isinstance(part_ret_val, ReturnValueConcept) and part_ret_val.status:
|
||||
source = part_ret_val.value.source
|
||||
source = part_ret_val.value.source.as_text() if isinstance(part_ret_val.value.source,
|
||||
ParserInput) else part_ret_val.value.source
|
||||
else:
|
||||
raise Exception("Unexpected")
|
||||
setattr(concept.metadata, prop, source)
|
||||
@@ -143,7 +145,9 @@ class AddConceptEvaluator(OneReturnValueEvaluator):
|
||||
#
|
||||
if isinstance(ret_value.value, ParserResultConcept) and len(concept_name) > 1:
|
||||
variables = set()
|
||||
tokens = ret_value.value.tokens or list(Tokenizer(ret_value.value.source, yield_eof=False))
|
||||
source = ret_value.value.source.as_text() if isinstance(ret_value.value.source,
|
||||
ParserInput) else ret_value.value.source
|
||||
tokens = ret_value.value.tokens or list(Tokenizer(source, yield_eof=False))
|
||||
tokens = [t.str_value for t in tokens]
|
||||
for identifier in [i for i in concept_name if str(i).isalnum()]:
|
||||
if identifier in tokens:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode
|
||||
from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
|
||||
|
||||
class LexerNodeEvaluator(OneReturnValueEvaluator):
|
||||
@@ -82,19 +82,10 @@ class LexerNodeEvaluator(OneReturnValueEvaluator):
|
||||
def evaluate_python_code(self, context, nodes):
|
||||
sheerka = context.sheerka
|
||||
|
||||
helper = LexerNodeParserHelperForPython()
|
||||
result = helper.parse(context, nodes)
|
||||
|
||||
if isinstance(result, PythonNode):
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=result.source,
|
||||
body=result,
|
||||
try_parsed=None))
|
||||
parser = PythonWithConceptsParser()
|
||||
result = parser.parse_nodes(context, nodes)
|
||||
if result:
|
||||
return result
|
||||
else:
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
|
||||
@@ -40,6 +40,7 @@ class Expando:
|
||||
def __repr__(self):
|
||||
return f"{dir(self)}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class PythonEvalError:
|
||||
error: Exception
|
||||
@@ -59,13 +60,19 @@ class PythonEvaluator(OneReturnValueEvaluator):
|
||||
self.globals = {}
|
||||
|
||||
def matches(self, context, return_value):
|
||||
return return_value.status and \
|
||||
isinstance(return_value.value, ParserResultConcept) and \
|
||||
isinstance(return_value.value.value, PythonNode)
|
||||
if not return_value.status or not isinstance(return_value.value, ParserResultConcept):
|
||||
return False
|
||||
body = return_value.value.value
|
||||
return isinstance(body, PythonNode) or (
|
||||
hasattr(body, "python_node") and isinstance(body.python_node, PythonNode))
|
||||
# return return_value.status and \
|
||||
# isinstance(return_value.value, ParserResultConcept) and \
|
||||
# isinstance(return_value.value.value, PythonNode)
|
||||
|
||||
def eval(self, context, return_value):
|
||||
sheerka = context.sheerka
|
||||
node = return_value.value.value
|
||||
node = return_value.value.value if isinstance(return_value.value.value, PythonNode) else \
|
||||
return_value.value.value.python_node
|
||||
|
||||
context.log(f"Evaluating python node {node}.", self.name)
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import DEFINITION_TYPE_BNF, Concept
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import Tokenizer
|
||||
from core.utils import strip_tokens
|
||||
from core.tokenizer import Tokenizer, TokenKind
|
||||
from core.utils import strip_tokens, make_unique
|
||||
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode
|
||||
|
||||
@@ -228,6 +228,34 @@ class AtomNodeParser(BaseNodeParser):
|
||||
"""
|
||||
return len(concept.metadata.variables) == 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF
|
||||
|
||||
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
|
||||
|
||||
def new_instances(list_of_concepts):
|
||||
if list_of_concepts is None:
|
||||
return None
|
||||
|
||||
return [self.context.sheerka.new_from_template(c, c.id) for c in list_of_concepts]
|
||||
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
return None
|
||||
|
||||
def as_list(a):
|
||||
if a is None:
|
||||
return a
|
||||
|
||||
return a if isinstance(a, list) else [a]
|
||||
|
||||
concepts_by_name = as_list(self.sheerka.resolve(token.value))
|
||||
concepts_by_first_keyword = new_instances(super().get_concepts(token, self._is_eligible))
|
||||
|
||||
if concepts_by_name is None:
|
||||
return concepts_by_first_keyword
|
||||
|
||||
if concepts_by_first_keyword is None:
|
||||
return concepts_by_name
|
||||
|
||||
return make_unique(concepts_by_name + concepts_by_first_keyword, lambda c: c.id)
|
||||
|
||||
def get_concepts_sequences(self):
|
||||
|
||||
forked = []
|
||||
@@ -242,13 +270,6 @@ class AtomNodeParser(BaseNodeParser):
|
||||
concept_parser_helpers.extend(forked)
|
||||
forked.clear()
|
||||
|
||||
def _get_concepts_by_name(name):
|
||||
other_concepts = self.sheerka.get_by_name(name)
|
||||
if isinstance(other_concepts, list):
|
||||
return other_concepts
|
||||
|
||||
return [other_concepts] if self.sheerka.is_known(other_concepts) else []
|
||||
|
||||
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
|
||||
|
||||
while self.parser_input.next_token(False):
|
||||
@@ -263,8 +284,8 @@ class AtomNodeParser(BaseNodeParser):
|
||||
if concept_parser.eat_token(token, pos):
|
||||
concept_parser.lock()
|
||||
|
||||
concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name)
|
||||
#self.context.log(f"concepts found for {token=}: {concepts}", who=self.name)
|
||||
concepts = self.get_concepts(token, self._is_eligible)
|
||||
# self.context.log(f"concepts found for {token=}: {concepts}", who=self.name)
|
||||
if not concepts:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.eat_unrecognized(token, pos)
|
||||
@@ -303,12 +324,13 @@ class AtomNodeParser(BaseNodeParser):
|
||||
|
||||
def get_by_name(self):
|
||||
"""
|
||||
Try to recognize the full parser input as a concept name
|
||||
Use the whole input to recognize the concepts
|
||||
It will use the name of the concept, but also its compact form (c::)
|
||||
:return:
|
||||
"""
|
||||
source = self.parser_input.as_text()
|
||||
concepts = self.sheerka.get_by_name(source.strip())
|
||||
if not self.sheerka.is_known(concepts):
|
||||
concepts = self.sheerka.resolve(source.strip())
|
||||
if concepts is None:
|
||||
return None
|
||||
|
||||
concepts = [concepts] if isinstance(concepts, Concept) else concepts
|
||||
@@ -316,17 +338,27 @@ class AtomNodeParser(BaseNodeParser):
|
||||
start, end = self.get_tokens_boundaries(self.parser_input.as_tokens())
|
||||
for concept in concepts:
|
||||
parser_helper = AtomConceptParserHelper(None)
|
||||
parser_helper.sequence.append(ConceptNode(
|
||||
concept,
|
||||
start,
|
||||
end,
|
||||
strip_tokens(self.parser_input.as_tokens(), True), source))
|
||||
parser_helper.sequence.append(ConceptNode(concept,
|
||||
start,
|
||||
end,
|
||||
strip_tokens(self.parser_input.as_tokens(), True), source))
|
||||
res.append(parser_helper)
|
||||
|
||||
return res
|
||||
|
||||
def get_valid(self, concept_parser_helpers):
|
||||
valid_parser_helpers = [] # be careful, it will be a list of list
|
||||
already_seen = set()
|
||||
|
||||
def compute_hash_code(ph):
|
||||
"""
|
||||
compute a hash code for already seen parser helper
|
||||
:param ph:
|
||||
:return:
|
||||
"""
|
||||
return "#".join(
|
||||
[f"c:|{n.concept.id}:" if isinstance(n, ConceptNode) else n.source for n in ph.sequence])
|
||||
|
||||
for parser_helper in concept_parser_helpers:
|
||||
if parser_helper.has_error():
|
||||
continue
|
||||
@@ -335,16 +367,18 @@ class AtomNodeParser(BaseNodeParser):
|
||||
continue
|
||||
|
||||
for node in parser_helper.sequence:
|
||||
if isinstance(node, ConceptNode):
|
||||
if len(node.concept.metadata.variables) > 0:
|
||||
node.concept.metadata.is_evaluated = True # Do not try to evaluate those concepts
|
||||
# if isinstance(node, ConceptNode):
|
||||
# if len(node.concept.metadata.variables) > 0:
|
||||
# node.concept.metadata.is_evaluated = True # Do not try to evaluate those concepts
|
||||
node.tokens = self.parser_input.tokens[node.start:node.end + 1]
|
||||
node.fix_source()
|
||||
|
||||
if parser_helper in valid_parser_helpers:
|
||||
parser_helper_hash_code = compute_hash_code(parser_helper)
|
||||
if parser_helper_hash_code in already_seen:
|
||||
continue
|
||||
|
||||
valid_parser_helpers.append(parser_helper)
|
||||
already_seen.add(parser_helper_hash_code)
|
||||
|
||||
return valid_parser_helpers
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind, LexerError, Token, Keywords
|
||||
from core.tokenizer import TokenKind, LexerError, Token
|
||||
from parsers.BaseParser import Node, BaseParser, ErrorNode
|
||||
|
||||
DEBUG_COMPILED = True
|
||||
@@ -46,14 +46,18 @@ class LexerNode(Node):
|
||||
def clone(self):
|
||||
pass
|
||||
|
||||
def to_short_str(self):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class UnrecognizedTokensNode(LexerNode):
|
||||
def __init__(self, start, end, tokens):
|
||||
super().__init__(start, end, tokens)
|
||||
self.is_frozen = False
|
||||
self.is_frozen = False # TODO: Remove as it seems to now be useless
|
||||
self.parenthesis_count = 0
|
||||
|
||||
def freeze(self):
|
||||
# TODO: Remove as it seems to now be useless
|
||||
self.is_frozen = True
|
||||
|
||||
def reset(self):
|
||||
@@ -61,6 +65,7 @@ class UnrecognizedTokensNode(LexerNode):
|
||||
self.tokens.clear()
|
||||
self.is_frozen = False
|
||||
self.parenthesis_count = 0
|
||||
self.source = ""
|
||||
|
||||
def add_token(self, token, pos):
|
||||
if self.is_frozen:
|
||||
@@ -135,7 +140,7 @@ class UnrecognizedTokensNode(LexerNode):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
return f"UnrecognizedTokensNode(source='{self.source}', start={self.start}, end={self.end})"
|
||||
|
||||
def clone(self):
|
||||
clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:])
|
||||
@@ -143,6 +148,9 @@ class UnrecognizedTokensNode(LexerNode):
|
||||
clone.parenthesis_count = self.parenthesis_count
|
||||
return clone
|
||||
|
||||
def to_short_str(self):
|
||||
return f"UTN('{self.source}')"
|
||||
|
||||
|
||||
class ConceptNode(LexerNode):
|
||||
"""
|
||||
@@ -209,15 +217,30 @@ class ConceptNode(LexerNode):
|
||||
# bag["compiled"] = self.concept.compiled
|
||||
return bag
|
||||
|
||||
def to_short_str(self):
|
||||
return f'CN({self.concept})'
|
||||
|
||||
|
||||
class SourceCodeNode(LexerNode):
|
||||
"""
|
||||
Returned when some source code (like Python source code is recognized)
|
||||
"""
|
||||
|
||||
def __init__(self, node, start, end, tokens=None, source=None, return_value=None):
|
||||
def __init__(self, start, end, tokens=None, source=None, python_node=None, return_value=None):
|
||||
"""
|
||||
|
||||
:param start: start position (index of the first token)
|
||||
:param end: end position (index of the last token)
|
||||
:param tokens:
|
||||
:param source: tokens as string
|
||||
:param python_node: PythonNode found (when the SourceCodeNode is validated)
|
||||
:param return_value: ReturnValueConcept returned when the source was validated
|
||||
|
||||
When return_value is provided,
|
||||
You should have return_value.body.body == node
|
||||
"""
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.node = node # The PythonNode (or whatever language node) that is found
|
||||
self.python_node = python_node # The PythonNode (or whatever language node) that is found
|
||||
self.return_value = return_value # original result of the parsing
|
||||
|
||||
def __eq__(self, other):
|
||||
@@ -232,7 +255,7 @@ class SourceCodeNode(LexerNode):
|
||||
if not isinstance(other, SourceCodeNode):
|
||||
return False
|
||||
|
||||
return self.node == other.node and \
|
||||
return self.python_node == other.python_node and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
@@ -243,6 +266,9 @@ class SourceCodeNode(LexerNode):
|
||||
def __repr__(self):
|
||||
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
def to_short_str(self):
|
||||
return f"SCN('{self.source}')"
|
||||
|
||||
|
||||
class SourceCodeWithConceptNode(LexerNode):
|
||||
"""
|
||||
@@ -254,17 +280,22 @@ class SourceCodeWithConceptNode(LexerNode):
|
||||
So I push all the nodes into one big bag
|
||||
"""
|
||||
|
||||
def __init__(self, first_node, last_node, content_nodes=None):
|
||||
def __init__(self, first_node, last_node, content_nodes=None, has_unrecognized=False):
|
||||
super().__init__(9999, -1, None) # why not sys.maxint ?
|
||||
self.first = first_node
|
||||
self.last = last_node
|
||||
self.nodes = content_nodes or []
|
||||
self.has_unrecognized = False
|
||||
self.has_unrecognized = has_unrecognized
|
||||
self._all_nodes = None
|
||||
self.fix_all_pos()
|
||||
|
||||
self.python_node = None # if the source code node is validated against a python parse, here is the PythonNode
|
||||
self.return_value = None # return_value that produced the PythonNode
|
||||
|
||||
def add_node(self, node):
|
||||
self.nodes.append(node)
|
||||
self.fix_pos(node)
|
||||
self._all_nodes = None
|
||||
|
||||
return self
|
||||
|
||||
@@ -304,6 +335,9 @@ class SourceCodeWithConceptNode(LexerNode):
|
||||
return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
def fix_all_pos(self):
|
||||
if self.first is None: # to ease some unit test where only the python_node is necessary
|
||||
return
|
||||
|
||||
for n in [self.first, self.last] + self.nodes:
|
||||
self.fix_pos(n)
|
||||
|
||||
@@ -334,10 +368,20 @@ class SourceCodeWithConceptNode(LexerNode):
|
||||
self.source += self.last.source
|
||||
return self
|
||||
|
||||
def get_all_nodes(self):
|
||||
if self._all_nodes:
|
||||
return self._all_nodes
|
||||
|
||||
self._all_nodes = [self.first, *self.nodes, self.last]
|
||||
return self._all_nodes
|
||||
|
||||
def clone(self):
|
||||
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes)
|
||||
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes.copy(), self.has_unrecognized)
|
||||
return clone
|
||||
|
||||
def to_short_str(self):
|
||||
return f"SCWC({self.first}" + ", ".join(n.to_short_str for n in self.nodes) + f"{self.last})"
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GrammarErrorNode(ErrorNode):
|
||||
@@ -479,7 +523,7 @@ class SCWC(HelperWithPos):
|
||||
TODO: create a common function or whatever...
|
||||
:return:
|
||||
"""
|
||||
source = self.first.source
|
||||
source = self.first.source if hasattr(self.first, "source") else self.first
|
||||
for n in self.content:
|
||||
source += " "
|
||||
if hasattr(n, "source"):
|
||||
@@ -488,7 +532,7 @@ class SCWC(HelperWithPos):
|
||||
source += str(n.concept)
|
||||
else:
|
||||
source += " unknown"
|
||||
source += self.last.source
|
||||
source += self.last.source if hasattr(self.last, "source") else self.last
|
||||
return source
|
||||
|
||||
|
||||
@@ -514,7 +558,7 @@ class CN(HelperWithPos):
|
||||
self.concept = concept if isinstance(concept, Concept) else None
|
||||
|
||||
def fix_source(self, str_tokens):
|
||||
self.source = "".join([s.value if isinstance(s, Keywords) else s for s in str_tokens])
|
||||
self.source = "".join(str_tokens)
|
||||
return self
|
||||
|
||||
def __eq__(self, other):
|
||||
@@ -660,7 +704,7 @@ class UTN(HelperWithPos):
|
||||
return hash((self.source, self.start, self.end))
|
||||
|
||||
def __repr__(self):
|
||||
txt = f"UTN( source='{self.source}'"
|
||||
txt = f"UTN(source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
@@ -733,7 +777,7 @@ class BaseNodeParser(BaseParser):
|
||||
else:
|
||||
name = token.value
|
||||
|
||||
custom_concepts = custom(name) if custom else []
|
||||
custom_concepts = custom(name) if custom else [] # to get extra concepts using an alternative method
|
||||
|
||||
result = []
|
||||
if name in self.concepts_by_first_keyword:
|
||||
@@ -746,6 +790,7 @@ class BaseNodeParser(BaseParser):
|
||||
|
||||
concept = to_map(self, concept) if to_map else concept
|
||||
result.append(concept)
|
||||
|
||||
return core.utils.make_unique(result + custom_concepts,
|
||||
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
|
||||
|
||||
|
||||
@@ -5,8 +5,9 @@ import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.concept import Concept
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.sheerka_logger import get_logger
|
||||
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
|
||||
from core.tokenizer import TokenKind, Token, Tokenizer, LexerError
|
||||
|
||||
|
||||
# # keep a cache for the parser input
|
||||
@@ -118,6 +119,20 @@ class BaseParser:
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
def reset_parser(self, context, parser_input: ParserInput):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.parser_input = parser_input
|
||||
self.error_sink.clear()
|
||||
|
||||
try:
|
||||
self.parser_input.reset(False)
|
||||
self.parser_input.next_token()
|
||||
except LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
return True
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
pass
|
||||
|
||||
@@ -227,15 +242,14 @@ class BaseParser:
|
||||
tokens = [tokens]
|
||||
|
||||
switcher = {
|
||||
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
|
||||
TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
|
||||
# TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
|
||||
}
|
||||
|
||||
if custom_switcher:
|
||||
switcher.update(custom_switcher)
|
||||
|
||||
for token in tokens:
|
||||
value = switcher.get(token.type, lambda t: t.value)(token)
|
||||
value = switcher.get(token.type, lambda t: t.str_value)(token)
|
||||
res += value
|
||||
if tracker is not None and token.type in custom_switcher:
|
||||
tracker[value] = token.value
|
||||
|
||||
@@ -201,12 +201,12 @@ class DefaultParser(BaseParser):
|
||||
|
||||
def parse_statement(self):
|
||||
token = self.parser_input.token
|
||||
if token.value == Keywords.DEF:
|
||||
if token.value == Keywords.DEF.value:
|
||||
self.parser_input.next_token()
|
||||
self.context.log("Keyword DEF found.", self.name)
|
||||
return self.parse_def_concept(token)
|
||||
else:
|
||||
return self.parse_isa_concept()
|
||||
|
||||
return self.add_error(CannotHandleErrorNode([token], ""))
|
||||
|
||||
def parse_def_concept(self, def_token):
|
||||
"""
|
||||
@@ -250,44 +250,15 @@ class DefaultParser(BaseParser):
|
||||
|
||||
return concept_found
|
||||
|
||||
def parse_isa_concept(self):
|
||||
concept_name = self.parse_concept_name()
|
||||
if isinstance(concept_name, DefaultParserErrorNode):
|
||||
return concept_name
|
||||
|
||||
keyword = []
|
||||
token = self.parser_input.token
|
||||
if token.value != Keywords.ISA:
|
||||
return self.add_error(CannotHandleErrorNode([token], ""))
|
||||
keyword.append(token)
|
||||
self.parser_input.next_token()
|
||||
|
||||
set_name = self.parse_concept_name()
|
||||
return IsaConceptNode(keyword, concept_name, set_name)
|
||||
|
||||
def parse_concept_name(self):
|
||||
tokens = []
|
||||
token = self.parser_input.token
|
||||
|
||||
while not (token.type == TokenKind.EOF or token.type == TokenKind.KEYWORD):
|
||||
tokens.append(token)
|
||||
self.parser_input.next_token()
|
||||
token = self.parser_input.token
|
||||
|
||||
if len(tokens) == 0:
|
||||
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", []))
|
||||
else:
|
||||
return NameNode(tokens)
|
||||
|
||||
def regroup_tokens_by_parts(self, keywords_tokens):
|
||||
|
||||
def_concept_parts = [Keywords.CONCEPT,
|
||||
Keywords.FROM,
|
||||
Keywords.AS,
|
||||
Keywords.WHERE,
|
||||
Keywords.PRE,
|
||||
Keywords.POST,
|
||||
Keywords.RET]
|
||||
def_concept_parts = [Keywords.CONCEPT.value,
|
||||
Keywords.FROM.value,
|
||||
Keywords.AS.value,
|
||||
Keywords.WHERE.value,
|
||||
Keywords.PRE.value,
|
||||
Keywords.POST.value,
|
||||
Keywords.RET.value]
|
||||
|
||||
# tokens found, when trying to recognize the parts
|
||||
tokens_found_by_parts = {
|
||||
@@ -307,7 +278,7 @@ class DefaultParser(BaseParser):
|
||||
while token.type != TokenKind.EOF:
|
||||
if token.value in def_concept_parts:
|
||||
keywords_tokens.append(token) # keep track of the keywords
|
||||
keyword = token.value
|
||||
keyword = Keywords(token.value)
|
||||
if tokens_found_by_parts[keyword]:
|
||||
# a part is defined more than once
|
||||
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
|
||||
@@ -327,7 +298,7 @@ class DefaultParser(BaseParser):
|
||||
def get_concept_name(self, first_token, tokens_found_by_parts):
|
||||
name_first_token_index = 1
|
||||
token = self.parser_input.token
|
||||
if first_token.value != Keywords.CONCEPT:
|
||||
if first_token.value != Keywords.CONCEPT.value:
|
||||
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
||||
name_first_token_index = 0
|
||||
|
||||
@@ -353,7 +324,7 @@ class DefaultParser(BaseParser):
|
||||
self.add_error(SyntaxErrorNode([], "Empty declaration"), False)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if definition_tokens[1].value == Keywords.BNF:
|
||||
if definition_tokens[1].value == Keywords.BNF.value:
|
||||
return self.get_concept_bnf_definition(current_concept_def, definition_tokens)
|
||||
|
||||
return self.get_concept_simple_definition(definition_tokens)
|
||||
@@ -381,7 +352,7 @@ class DefaultParser(BaseParser):
|
||||
return DEFINITION_TYPE_BNF, parsing_result
|
||||
|
||||
def get_concept_simple_definition(self, definition_tokens):
|
||||
start = 2 if definition_tokens[1].value == Keywords.DEF else 1
|
||||
start = 2 if definition_tokens[1].value == Keywords.DEF.value else 1
|
||||
tokens = core.utils.strip_tokens(definition_tokens[start:])
|
||||
if len(tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([definition_tokens[start]], "Empty declaration"), False)
|
||||
|
||||
@@ -2,9 +2,9 @@ import logging
|
||||
|
||||
import core.builtin_helpers
|
||||
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, ConceptParts
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import Keywords, TokenKind, LexerError
|
||||
from core.tokenizer import TokenKind, LexerError
|
||||
from core.utils import str_concept
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
@@ -56,6 +56,7 @@ class ExactConceptParser(BaseParser):
|
||||
concepts = result if isinstance(result, list) else [result]
|
||||
|
||||
for concept in concepts:
|
||||
# update the variables of the freshly recognized concept
|
||||
if concept in already_recognized:
|
||||
context.log(f"Recognized concept {concept} again. Skipping.", self.name)
|
||||
# example
|
||||
@@ -105,7 +106,7 @@ class ExactConceptParser(BaseParser):
|
||||
break
|
||||
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
res.append(t.value.value if isinstance(t.value, Keywords) else t.value)
|
||||
res.append(t.value)
|
||||
return res
|
||||
|
||||
def combinations(self, iterable):
|
||||
|
||||
@@ -191,23 +191,8 @@ class ExpressionParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("Expression", 50, False)
|
||||
|
||||
def reset_parser(self, context, parser_input: ParserInput):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.parser_input = parser_input
|
||||
self.error_sink.clear()
|
||||
|
||||
try:
|
||||
self.parser_input.reset(False)
|
||||
self.parser_input.next_token()
|
||||
except LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
return True
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
parser_input can be string, but text can also be an list of tokens
|
||||
:param context:
|
||||
:param parser_input:
|
||||
:return:
|
||||
|
||||
@@ -0,0 +1,407 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from core.utils import get_n_clones
|
||||
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEof, Node
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
|
||||
# No need to check for Python code as the source code node will resolve to python code anyway
|
||||
# I only look for concepts, so
|
||||
PARSERS = ["BnfNode", "SyaNode", "AtomNode"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionParserNode(Node):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NamesNode(FunctionParserNode):
|
||||
start: int # index of the first token
|
||||
end: int # index of the last token
|
||||
tokens: List[Token]
|
||||
|
||||
def __repr__(self):
|
||||
return f"NameNode('{self.str_value()}')"
|
||||
|
||||
def str_value(self):
|
||||
if self.tokens is None:
|
||||
return None
|
||||
|
||||
return "".join([t.str_value for t in self.tokens])
|
||||
|
||||
def to_unrecognized(self):
|
||||
return UnrecognizedTokensNode(self.start, self.end, self.tokens).fix_source()
|
||||
|
||||
|
||||
@dataclass()
|
||||
class FunctionParameter:
|
||||
"""
|
||||
class the represent result of the parameter parsing
|
||||
"""
|
||||
value: NamesNode # value parsed
|
||||
separator: NamesNode = None # holds the value and the position of the separator
|
||||
|
||||
def add_sep(self, start, end, tokens):
|
||||
self.separator = NamesNode(start, end, tokens)
|
||||
|
||||
def value_to_unrecognized(self):
|
||||
return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source()
|
||||
|
||||
def separator_to_unrecognized(self):
|
||||
if self.separator is None:
|
||||
return None
|
||||
return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source()
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionNode(FunctionParserNode):
|
||||
first: NamesNode # beginning of the function (it should represent the name of the function)
|
||||
last: NamesNode # last part of the function (it should be the trailing parenthesis)
|
||||
parameters: list
|
||||
|
||||
|
||||
class FN(FunctionNode):
|
||||
"""
|
||||
Test class only
|
||||
It matches with FunctionNode but with less constraints
|
||||
|
||||
Thereby,
|
||||
FN("first", "last", ["param1," ...]) can be compared to
|
||||
FunctionNode(NamesNode("first"), NamesNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")])
|
||||
|
||||
Note that FunctionParameter can easily be defined with a single string
|
||||
* "param" -> FunctionParameter(NamesNode("param"), None)
|
||||
* "param, " -> FunctionParameter(NamesNode("param"), NamesNode(", "))
|
||||
For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part
|
||||
"""
|
||||
|
||||
def __init__(self, first, last, parameters):
|
||||
self.first = first
|
||||
self.last = last
|
||||
self.parameters = []
|
||||
for param in parameters:
|
||||
if isinstance(param, tuple):
|
||||
self.parameters.append(param)
|
||||
elif isinstance(param, str) and (pos := param.find(",")) != -1:
|
||||
self.parameters.append((param[:pos], param[pos:]))
|
||||
else:
|
||||
self.parameters.append((param, None))
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, FN):
|
||||
return self.first == other.first and self.last == other.last and self.parameters == other.parameters
|
||||
|
||||
if isinstance(other, FunctionNode):
|
||||
if self.first != other.first.str_value() or self.last != other.last.str_value():
|
||||
return False
|
||||
if len(self.parameters) != len(other.parameters):
|
||||
return False
|
||||
for self_parameter, other_parameter in zip(self.parameters, other.parameters):
|
||||
value = other_parameter.value.str_value() if isinstance(self_parameter[0],
|
||||
str) else other_parameter.value
|
||||
sep = other_parameter.separator.str_value() if other_parameter.separator else None
|
||||
if self_parameter[0] != value or self_parameter[1] != sep:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.first, self.last, self.parameters))
|
||||
|
||||
|
||||
class FunctionParser(BaseParser):
|
||||
"""
|
||||
The parser will be used to parse func(x, y, z)
|
||||
where x, y and z can be source code, concepts or other functions
|
||||
It will return a SourceCodeNode or SourceCodeNodeWithConcept
|
||||
"""
|
||||
|
||||
def __init__(self, sep=",", longest_concepts_only=True, **kwargs):
|
||||
"""
|
||||
|
||||
:param sep:
|
||||
:param longest_concepts_only: When multiples concepts are found, only keep the longest one
|
||||
so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]]
|
||||
:param kwargs:
|
||||
"""
|
||||
super().__init__("Function", 55, True)
|
||||
self.sep = sep
|
||||
self.longest_concepts_only = longest_concepts_only
|
||||
self.record_errors = True
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
if not self.record_errors:
|
||||
return
|
||||
|
||||
return super().add_error(error, next_token)
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
|
||||
:param context:
|
||||
:param parser_input:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(parser_input, ParserInput):
|
||||
return None
|
||||
|
||||
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
|
||||
sheerka = context.sheerka
|
||||
|
||||
if parser_input.is_empty():
|
||||
return sheerka.ret(self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
||||
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
node = self.parse_function()
|
||||
|
||||
if self.parser_input.next_token():
|
||||
self.add_error(UnexpectedTokenErrorNode("Only one function supported",
|
||||
self.parser_input.token,
|
||||
[TokenKind.EOF]))
|
||||
|
||||
if self.has_error:
|
||||
if node is None:
|
||||
body = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME,
|
||||
body=parser_input.as_text(),
|
||||
reason=self.error_sink)
|
||||
else:
|
||||
body = context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)
|
||||
return self.sheerka.ret(self.name, False, body)
|
||||
|
||||
source_code_nodes = self.to_source_code_node(node)
|
||||
|
||||
res = []
|
||||
for source_code_node in source_code_nodes:
|
||||
value = self.get_return_value_body(context.sheerka,
|
||||
self.parser_input.as_text(),
|
||||
source_code_node,
|
||||
source_code_node)
|
||||
|
||||
res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, value))
|
||||
|
||||
return res[0] if len(res) == 1 else res
|
||||
|
||||
def parse_function(self):
|
||||
|
||||
start = self.parser_input.pos
|
||||
token = self.parser_input.token
|
||||
if token.type != TokenKind.IDENTIFIER:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"{token.repr_value} is not a identifier",
|
||||
token,
|
||||
[TokenKind.IDENTIFIER]))
|
||||
return None
|
||||
|
||||
if not self.parser_input.next_token():
|
||||
self.add_error(UnexpectedEof(f"Unexpected EOF while parsing left parenthesis"))
|
||||
return None
|
||||
|
||||
token = self.parser_input.token
|
||||
if token.type != TokenKind.LPAR:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"{token.repr_value} is not a left parenthesis",
|
||||
token,
|
||||
[TokenKind.LPAR]))
|
||||
return None
|
||||
|
||||
start_node = NamesNode(start, start + 1, self.parser_input.tokens[start:start + 2])
|
||||
if not self.parser_input.next_token():
|
||||
self.add_error(UnexpectedEof(f"Unexpected EOF after left parenthesis"))
|
||||
return FunctionNode(start_node, None, None)
|
||||
|
||||
params = self.parse_parameters()
|
||||
if self.has_error:
|
||||
return FunctionNode(start_node, None, params)
|
||||
|
||||
token = self.parser_input.token
|
||||
if token.type != TokenKind.RPAR:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Right parenthesis not found",
|
||||
token,
|
||||
[TokenKind.RPAR]))
|
||||
return FunctionNode(start_node, None, params)
|
||||
|
||||
return FunctionNode(start_node,
|
||||
NamesNode(self.parser_input.pos, self.parser_input.pos, [token]),
|
||||
params)
|
||||
|
||||
def parse_parameters(self):
|
||||
nodes = []
|
||||
while True:
|
||||
param_value = self.parse_parameter_value()
|
||||
if not param_value:
|
||||
break
|
||||
|
||||
function_parameter = FunctionParameter(param_value)
|
||||
nodes.append(function_parameter)
|
||||
|
||||
token = self.parser_input.token
|
||||
if token.type == TokenKind.EOF:
|
||||
self.add_error(UnexpectedEof(f"Unexpected EOF while parsing parameters"))
|
||||
return None
|
||||
|
||||
if token.type == TokenKind.RPAR:
|
||||
break
|
||||
|
||||
if token.value == self.sep:
|
||||
sep_pos = self.parser_input.pos
|
||||
self.parser_input.next_token()
|
||||
function_parameter.add_sep(sep_pos,
|
||||
self.parser_input.pos - 1,
|
||||
self.parser_input.tokens[sep_pos: self.parser_input.pos])
|
||||
|
||||
return nodes
|
||||
|
||||
def parse_parameter_value(self):
|
||||
# check if the parameter is a function
|
||||
start_pos = self.parser_input.pos
|
||||
self.record_errors = False
|
||||
func = self.parse_function()
|
||||
self.record_errors = True
|
||||
if func:
|
||||
self.parser_input.next_token()
|
||||
return func
|
||||
|
||||
# otherwise, eat until LPAR or separator
|
||||
self.parser_input.seek(start_pos)
|
||||
self.record_errors = True
|
||||
tokens = []
|
||||
while True:
|
||||
token = self.parser_input.token
|
||||
# if token is None:
|
||||
# break
|
||||
|
||||
if token.value == self.sep or token.type == TokenKind.RPAR:
|
||||
break
|
||||
|
||||
tokens.append(token)
|
||||
if not self.parser_input.next_token(skip_whitespace=False):
|
||||
break
|
||||
|
||||
return NamesNode(start_pos, self.parser_input.pos - 1, tokens) if len(tokens) else None
|
||||
|
||||
def to_source_code_node(self, function_node: FunctionNode):
|
||||
python_parser = PythonWithConceptsParser()
|
||||
|
||||
if len(function_node.parameters) == 0:
|
||||
# validate the source
|
||||
nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()]
|
||||
python_parsing_res = python_parser.parse_nodes(self.context, nodes_to_parse)
|
||||
python_node = python_parsing_res.body.body if python_parsing_res.status else None
|
||||
|
||||
return [SourceCodeNode(start=function_node.first.start,
|
||||
end=function_node.last.end,
|
||||
tokens=function_node.first.tokens + function_node.last.tokens,
|
||||
python_node=python_node,
|
||||
return_value=python_parsing_res)]
|
||||
|
||||
def update_source_code_node(scn, nodes, sep):
|
||||
if hasattr(nodes, "__iter__"):
|
||||
for n in nodes:
|
||||
scn.add_node(n)
|
||||
else:
|
||||
scn.add_node(nodes)
|
||||
|
||||
if sep:
|
||||
scn.add_node(sep.to_unrecognized())
|
||||
|
||||
res = [SourceCodeWithConceptNode(function_node.first.to_unrecognized(), function_node.last.to_unrecognized())]
|
||||
for param in function_node.parameters:
|
||||
if isinstance(param.value, NamesNode):
|
||||
unrecognized = param.value.to_unrecognized()
|
||||
# try to recognize concepts
|
||||
nodes_sequences = get_lexer_nodes_from_unrecognized(self.context,
|
||||
unrecognized,
|
||||
PARSERS)
|
||||
else:
|
||||
# the parameter is also a function
|
||||
nodes_sequences = self.to_source_code_node(param.value)
|
||||
|
||||
if self.longest_concepts_only:
|
||||
nodes_sequences = self.get_longest_concepts(nodes_sequences)
|
||||
|
||||
if nodes_sequences is None:
|
||||
# no concept found
|
||||
for source_code_node in res:
|
||||
update_source_code_node(source_code_node, unrecognized, param.separator)
|
||||
|
||||
elif len(nodes_sequences) == 1:
|
||||
# only one result
|
||||
# It is the same code than when there are multiple results
|
||||
# But here, we save the creation of the tmp_res object (not sure it worth it)
|
||||
for source_code_node in res:
|
||||
update_source_code_node(source_code_node, nodes_sequences[0], param.separator)
|
||||
else:
|
||||
# multiple result, make the cartesian product
|
||||
tmp_res = []
|
||||
for source_code_node in res:
|
||||
instances = get_n_clones(source_code_node, len(nodes_sequences))
|
||||
tmp_res.extend(instances)
|
||||
for instance, node_sequence in zip(instances, nodes_sequences):
|
||||
update_source_code_node(instance, node_sequence, param.separator)
|
||||
res = tmp_res
|
||||
|
||||
# check if it is a valid source code
|
||||
for source_code_node in res:
|
||||
source_code_node.fix_all_pos()
|
||||
source_code_node.pseudo_fix_source()
|
||||
|
||||
python_parsing_res = python_parser.parse_nodes(self.context, source_code_node.get_all_nodes())
|
||||
if python_parsing_res.status:
|
||||
source_code_node.python_node = python_parsing_res.body.body
|
||||
source_code_node.return_value = python_parsing_res
|
||||
|
||||
# make sure that concepts found can be evaluated
|
||||
errors = []
|
||||
for c in source_code_node.python_node.concepts.values():
|
||||
update_compiled(self.context, c, errors)
|
||||
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def get_longest_concepts(nodes_sequences):
|
||||
"""
|
||||
The longest sequences are the ones that have the less number of concepts
|
||||
For example
|
||||
'twenty one' resolves to
|
||||
[c:twenty one:]
|
||||
[c:twenty:, c:one:]
|
||||
[c:twenty one:] has only one concept, so it's the longest one (two tokens against one token twice)
|
||||
:param nodes_sequences:
|
||||
:return:
|
||||
"""
|
||||
if nodes_sequences is None:
|
||||
return None
|
||||
|
||||
res = []
|
||||
min_len = -1
|
||||
for current_sequence in nodes_sequences:
|
||||
# awful hack to remove when NodeSequence and ConceptSequence will be implemented
|
||||
current_len = len(current_sequence) if hasattr(current_sequence, "__len__") else 1
|
||||
if len(res) == 0:
|
||||
res.append(current_sequence)
|
||||
min_len = current_len
|
||||
elif current_len == min_len:
|
||||
res.append(current_sequence)
|
||||
elif current_len < min_len:
|
||||
res.clear()
|
||||
res.append(current_sequence)
|
||||
min_len = current_len
|
||||
|
||||
return res
|
||||
@@ -4,9 +4,8 @@ from dataclasses import dataclass
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import LexerError, TokenKind
|
||||
from parsers.BaseNodeParser import ConceptNode
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -70,87 +69,6 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
|
||||
self.names.add(node.id)
|
||||
|
||||
|
||||
class LexerNodeParserHelperForPython:
|
||||
"""Helper class to parse mix of concepts and Python"""
|
||||
|
||||
def __init__(self):
|
||||
self.identifiers = {} # cache for already created identifier (the key is id(concept))
|
||||
self.identifiers_key = {} # number of identifiers with the same root (prefix)
|
||||
|
||||
def _get_identifier(self, concept):
|
||||
"""
|
||||
Get an identifier for a concept.
|
||||
Make sure to return the same identifier if the same concept
|
||||
Make sure to return a different identifier if same name but different concept
|
||||
|
||||
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
|
||||
to be instance variables
|
||||
I would like to keep this parser as stateless as possible
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
if id(concept) in self.identifiers:
|
||||
return self.identifiers[id(concept)]
|
||||
|
||||
identifier = "__C__" + self._sanitize(concept.key or concept.name)
|
||||
if concept.id:
|
||||
identifier += "__" + concept.id
|
||||
|
||||
if identifier in self.identifiers_key:
|
||||
self.identifiers_key[identifier] += 1
|
||||
identifier += f"_{self.identifiers_key[identifier]}"
|
||||
else:
|
||||
self.identifiers_key[identifier] = 0
|
||||
|
||||
identifier += "__C__"
|
||||
|
||||
self.identifiers[id(concept)] = identifier
|
||||
return identifier
|
||||
|
||||
@staticmethod
|
||||
def _sanitize(identifier):
|
||||
res = ""
|
||||
for c in identifier:
|
||||
res += c if c.isalnum() else "0"
|
||||
return res
|
||||
|
||||
def parse(self, context, nodes):
|
||||
source = ""
|
||||
to_parse = ""
|
||||
|
||||
concepts = {} # the key is the Python identifier
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
source += node.source
|
||||
if to_parse:
|
||||
to_parse += " "
|
||||
concept = node.concept
|
||||
python_id = self._get_identifier(concept)
|
||||
to_parse += python_id
|
||||
concepts[python_id] = concept
|
||||
else:
|
||||
source += node.source
|
||||
to_parse += node.source
|
||||
|
||||
with context.push(BuiltinConcepts.PARSE_CODE,
|
||||
{"language": "Python", "source": to_parse},
|
||||
desc="Trying Python for '" + to_parse + "'") as sub_context:
|
||||
sub_context.add_inputs(to_parse=to_parse)
|
||||
python_parser = PythonParser()
|
||||
parser_input = context.sheerka.services[SheerkaExecute.NAME].get_parser_input(to_parse)
|
||||
result = python_parser.parse(sub_context, parser_input)
|
||||
sub_context.add_values(return_values=result)
|
||||
|
||||
if result.status:
|
||||
python_node = result.body.body
|
||||
python_node.source = source
|
||||
python_node.concepts = concepts
|
||||
return python_node
|
||||
|
||||
return result.body # the error
|
||||
|
||||
|
||||
class PythonParser(BaseParser):
|
||||
"""
|
||||
Parse Python scripts
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.services.SheerkaExecute import SheerkaExecute
|
||||
from parsers.BaseNodeParser import ConceptNode
|
||||
from parsers.BaseNodeParser import SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.BaseNodeParser import ConceptNode
|
||||
from parsers.PythonParser import PythonParser
|
||||
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
|
||||
|
||||
@@ -12,8 +12,6 @@ unrecognized_nodes_parser = UnrecognizedNodeParser()
|
||||
class PythonWithConceptsParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("PythonWithConcepts", 20)
|
||||
self.identifiers = None
|
||||
self.identifiers_key = None
|
||||
|
||||
@staticmethod
|
||||
def sanitize(identifier):
|
||||
@@ -33,11 +31,15 @@ class PythonWithConceptsParser(BaseParser):
|
||||
yield node
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser)
|
||||
return self.parse_nodes(context, nodes)
|
||||
|
||||
def parse_nodes(self, context, nodes):
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
sheerka = context.sheerka
|
||||
|
||||
source = ""
|
||||
to_parse = ""
|
||||
identifiers = {}
|
||||
|
||||
+242
-87
@@ -5,10 +5,12 @@ from typing import List
|
||||
|
||||
from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_helpers import parse_function
|
||||
from core.concept import Concept, DEFINITION_TYPE_BNF
|
||||
from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import Token, TokenKind, Tokenizer
|
||||
from core.utils import get_n_clones
|
||||
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
|
||||
SourceCodeWithConceptNode, BaseNodeParser
|
||||
from parsers.BaseParser import ErrorNode
|
||||
@@ -17,39 +19,73 @@ PARSERS = ["BnfNode", "AtomNode", "Python"]
|
||||
|
||||
function_parser_res = namedtuple("FunctionParserRes", 'to_out function')
|
||||
|
||||
DEBUG_PUSH = "PUSH"
|
||||
DEBUG_PUSH_UNREC = "PUSH_UNREC"
|
||||
DEBUG_POP = "POP"
|
||||
DEBUG_EAT = "EAT"
|
||||
DEBUG_RECOG = "RECOG"
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DebugInfo:
|
||||
"""
|
||||
Debug item to trace how the sya parser worked
|
||||
Possible action:
|
||||
PUSH: push the token or the concept to the stack
|
||||
PUSH_UNREC: push the token to the UnrecognizedTokensNode
|
||||
POP: pop item to out
|
||||
EAT: eat the current token (it means that it was part of the concept currently being parsed)
|
||||
RECOG: when tokens from UnrecognizedTokensNode are parsed and recognized
|
||||
"""
|
||||
pos: int = -1 # position of the parser input
|
||||
token: Token = None # current token
|
||||
concept: Concept = None # current concept if ay
|
||||
action: str = None # action taken
|
||||
|
||||
def __repr__(self):
|
||||
token_repr = self.token.repr_value if isinstance(self.token, Token) else self.token
|
||||
msg = f"{self.pos:3}:{token_repr}" if self.pos != -1 else " _:"
|
||||
if self.concept:
|
||||
msg += f"({self.concept})"
|
||||
return msg + f" => {self.action}"
|
||||
|
||||
|
||||
class ParenthesisMismatchErrorNode(ErrorNode):
|
||||
|
||||
def __init__(self, error_int):
|
||||
if isinstance(error_int, tuple):
|
||||
self.token = error_int[0]
|
||||
if isinstance(error_int[0], Token):
|
||||
self.token_value = error_int[0].value
|
||||
self.token = error_int[0]
|
||||
else:
|
||||
self.token_value = error_int[0]
|
||||
self.token = None
|
||||
self.pos = error_int[1]
|
||||
elif isinstance(error_int, Token):
|
||||
self.token = error_int
|
||||
self.token_value = error_int.value
|
||||
self.pos = -1
|
||||
else: # isinstance(UnrecognizedTokensNode)
|
||||
for i, t in reversed(list(enumerate(error_int.tokens))):
|
||||
if t.type == TokenKind.LPAR:
|
||||
self.token = t
|
||||
self.token_value = t.value
|
||||
self.pos = i + error_int.start
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, tuple):
|
||||
return other[0] == self.token.value and other[1] == self.pos
|
||||
|
||||
if not isinstance(other, ParenthesisMismatchErrorNode):
|
||||
return False
|
||||
|
||||
return self.token == other.token and self.pos == other.pos
|
||||
return self.token_value == other.token_value and self.pos == other.pos
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.pos)
|
||||
|
||||
def __repr__(self):
|
||||
return f"ParenthesisMismatchErrorNode('{self.token.value}', {self.pos}"
|
||||
return f"ParenthesisMismatchErrorNode('{self.token_value}', {self.pos}"
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -211,8 +247,9 @@ class SyaConceptParserHelper:
|
||||
|
||||
|
||||
class InFixToPostFix:
|
||||
def __init__(self, context):
|
||||
def __init__(self, context, debug_enabled=False):
|
||||
self.context = context
|
||||
self.debug_enabled = debug_enabled
|
||||
|
||||
self.is_locked = False # when locked, cannot process input
|
||||
|
||||
@@ -227,6 +264,8 @@ class InFixToPostFix:
|
||||
self.false_positives = [] # concepts that looks like known one, but not (for debug purpose)
|
||||
self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens
|
||||
|
||||
self.parsing_function = False # indicate that we are currently parsing a function
|
||||
|
||||
def __repr__(self):
|
||||
return f"InFixToPostFix({self.debug})"
|
||||
|
||||
@@ -243,6 +282,8 @@ class InFixToPostFix:
|
||||
return len(self.sequence) + len(self.errors)
|
||||
|
||||
def _add_error(self, error):
|
||||
if self.debug_enabled:
|
||||
self.debug.append(DebugInfo(action=f"=> ERROR {error}"))
|
||||
self.errors.append(error)
|
||||
|
||||
def _is_lpar(self, token):
|
||||
@@ -294,7 +335,11 @@ class InFixToPostFix:
|
||||
item.error = "Not enough suffix parameters"
|
||||
else:
|
||||
item.error = f"token '{item.expected[0].strip_quote}' not found"
|
||||
if self.debug_enabled:
|
||||
self.debug.append(DebugInfo(action=f"ERROR {item.error}"))
|
||||
|
||||
if self.debug_enabled:
|
||||
self.debug.append(DebugInfo(action=f"{DEBUG_POP} {item}"))
|
||||
if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
|
||||
self.out.insert(item.potential_pos, item)
|
||||
else:
|
||||
@@ -345,6 +390,26 @@ class InFixToPostFix:
|
||||
for i, token in enumerate(parser_helper.tokens):
|
||||
self.unrecognized_tokens.add_token(token, parser_helper.start + i)
|
||||
|
||||
def _remove_debug_info_if_needed(self):
|
||||
"""
|
||||
Before trying to manage the unrecognized, a line is added to explain the token which has triggered
|
||||
the recognition try
|
||||
This line is useless if self.unrecognized_tokens was irrelevant
|
||||
:return:
|
||||
"""
|
||||
if len(self.debug) > 0 and self.debug[-1].action == "??":
|
||||
self.debug.pop()
|
||||
|
||||
def _debug_nodes(self, nodes_sequences):
|
||||
res = "["
|
||||
first = True
|
||||
for sequence in nodes_sequences:
|
||||
if not first:
|
||||
res += ", "
|
||||
res += "[" + ", ".join([n.to_short_str() for n in sequence]) + "]"
|
||||
first = False
|
||||
return res + "]"
|
||||
|
||||
def get_errors(self):
|
||||
def has_error(item):
|
||||
if isinstance(item, SyaConceptParserHelper) and item.error:
|
||||
@@ -439,41 +504,40 @@ class InFixToPostFix:
|
||||
|
||||
self.unrecognized_tokens.fix_source()
|
||||
|
||||
# try to recognize concepts
|
||||
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
|
||||
self.context,
|
||||
self.unrecognized_tokens,
|
||||
PARSERS)
|
||||
|
||||
if nodes_sequences:
|
||||
# There are more than one solution found
|
||||
# In the case, we create a new InfixToPostfix for each new possibility
|
||||
if len(nodes_sequences) > 1:
|
||||
for node_sequence in nodes_sequences[1:]:
|
||||
clone = self.clone()
|
||||
for node in node_sequence:
|
||||
clone._put_to_out(node)
|
||||
clone.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
self.forked.append(clone)
|
||||
|
||||
# Do not forget the first result that will go with the current InfixToPostfix
|
||||
for node in nodes_sequences[0]:
|
||||
self._put_to_out(node)
|
||||
else:
|
||||
if self.unrecognized_tokens.parenthesis_count > 0:
|
||||
# parenthesis mismatch detected, do not try to resolve the unrecognized
|
||||
self._add_error(ParenthesisMismatchErrorNode(self.unrecognized_tokens))
|
||||
self._put_to_out(self.unrecognized_tokens)
|
||||
else:
|
||||
# try to recognize concepts
|
||||
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
|
||||
self.context,
|
||||
self.unrecognized_tokens,
|
||||
PARSERS)
|
||||
|
||||
# # try to recognize concepts
|
||||
# nodes = self._get_lexer_nodes_from_unrecognized()
|
||||
# if nodes:
|
||||
# for node in nodes:
|
||||
# self._put_to_out(node)
|
||||
# else:
|
||||
# self._put_to_out(self.unrecognized_tokens)
|
||||
if nodes_sequences:
|
||||
# There are more than one solution found
|
||||
# In the case, we create a new InfixToPostfix for each new possibility
|
||||
if self.debug_enabled:
|
||||
self.debug.append(DebugInfo(action=f"{DEBUG_RECOG} {self._debug_nodes(nodes_sequences)}"))
|
||||
if len(nodes_sequences) > 1:
|
||||
for node_sequence in nodes_sequences[1:]:
|
||||
clone = self.clone()
|
||||
for node in node_sequence:
|
||||
clone._put_to_out(node)
|
||||
clone.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
self.forked.append(clone)
|
||||
|
||||
# Do not forget the first result that will go with the current InfixToPostfix
|
||||
for node in nodes_sequences[0]:
|
||||
self._put_to_out(node)
|
||||
else:
|
||||
self._put_to_out(self.unrecognized_tokens)
|
||||
|
||||
# create another instance
|
||||
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
|
||||
def get_functions_from_unrecognized(self, token, pos):
|
||||
def get_functions_names_from_unrecognized(self, token, pos):
|
||||
"""
|
||||
The unrecognized ends with an lpar '('
|
||||
It means that its a function like foo(something)
|
||||
@@ -489,19 +553,32 @@ class InFixToPostFix:
|
||||
self.context,
|
||||
self.unrecognized_tokens,
|
||||
PARSERS)
|
||||
if nodes_sequences is None:
|
||||
return None
|
||||
|
||||
if not nodes_sequences:
|
||||
nodes_sequences = [[self.unrecognized_tokens.clone()]]
|
||||
|
||||
res = []
|
||||
for sequence in nodes_sequences:
|
||||
if isinstance(sequence[-1], UnrecognizedTokensNode):
|
||||
function = sequence[-1]
|
||||
else:
|
||||
function = UnrecognizedTokensNode(sequence[-1].start, sequence[-1].end, sequence[-1].tokens)
|
||||
function.add_token(token, pos).fix_source()
|
||||
last_node = sequence[-1]
|
||||
|
||||
res.append(function_parser_res(sequence[:-1], function))
|
||||
if len(last_node.tokens) > 1:
|
||||
if isinstance(last_node, UnrecognizedTokensNode):
|
||||
to_out = [UnrecognizedTokensNode(last_node.start, pos - 2, last_node.tokens[:-1]).fix_source()]
|
||||
function_name = UnrecognizedTokensNode(pos - 1, pos - 1, [last_node.tokens[-1]])
|
||||
function_name.add_token(token, pos)
|
||||
else:
|
||||
to_out = [last_node.fix_source()]
|
||||
function_name = None
|
||||
|
||||
else: # len(last_node.tokens) == 1
|
||||
if not isinstance(last_node, UnrecognizedTokensNode):
|
||||
function_name = UnrecognizedTokensNode(last_node.start, last_node.end, last_node.tokens)
|
||||
else:
|
||||
function_name = last_node
|
||||
function_name.add_token(token, pos)
|
||||
to_out = []
|
||||
|
||||
res.append(function_parser_res(sequence[:-1] + to_out, function_name))
|
||||
return res
|
||||
|
||||
def pop_stack_to_out(self):
|
||||
@@ -614,6 +691,8 @@ class InFixToPostFix:
|
||||
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
|
||||
|
||||
current_concept.end = pos
|
||||
if self.debug_enabled:
|
||||
self.debug.append(DebugInfo(pos, token, None, "??"))
|
||||
self.manage_unrecognized()
|
||||
# manage that some clones may have been forked
|
||||
for forked in self.forked:
|
||||
@@ -673,17 +752,53 @@ class InFixToPostFix:
|
||||
if self.is_locked:
|
||||
return
|
||||
|
||||
if self.parsing_function:
|
||||
if self.debug_enabled:
|
||||
self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
|
||||
|
||||
self.unrecognized_tokens.add_token(token, pos)
|
||||
|
||||
if self.unrecognized_tokens.parenthesis_count == 0:
|
||||
self.unrecognized_tokens.fix_source()
|
||||
res = parse_function(self.context,
|
||||
self.unrecognized_tokens.source,
|
||||
self.unrecognized_tokens.tokens[:],
|
||||
self.unrecognized_tokens.start)
|
||||
|
||||
instances = get_n_clones(self, len(res))
|
||||
self.forked.extend(instances[1:])
|
||||
for instance, res_i in zip(instances, res):
|
||||
|
||||
if res_i.status or instance.context.sheerka.isinstance(res_i.body, BuiltinConcepts.PARSER_RESULT):
|
||||
# 1. we manage to recognize a function
|
||||
# 2. we almost manage, ex func(one two). It's not a function but almost
|
||||
instance._put_to_out(res_i.body.body)
|
||||
instance.unrecognized_tokens.reset()
|
||||
else:
|
||||
# it is not a function, try to recognized the token
|
||||
# This situation is unlikely to occur
|
||||
instance.manage_unrecognized()
|
||||
|
||||
instance.parsing_function = False
|
||||
|
||||
return True
|
||||
|
||||
if self.handle_expected_token(token, pos):
|
||||
# a token is found, let's check if it's part of a concepts being parsed
|
||||
# example Concept(name="foo", definition="foo a bar b").def_var("a").def_var("b")
|
||||
# if the token 'bar' is found, it has to be considered as part of the concept foo
|
||||
self.debug.append(token)
|
||||
if self.debug_enabled:
|
||||
self._remove_debug_info_if_needed()
|
||||
self.debug.append(DebugInfo(pos, token, None, DEBUG_EAT))
|
||||
return True
|
||||
|
||||
elif self._is_lpar(token):
|
||||
self.debug.append(token)
|
||||
|
||||
if self.debug_enabled:
|
||||
self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
|
||||
|
||||
if self.unrecognized_tokens.is_empty() or self.unrecognized_tokens.is_whitespace():
|
||||
|
||||
# first, remove what was in the buffer
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
@@ -691,40 +806,65 @@ class InFixToPostFix:
|
||||
forked.eat_token(token, pos)
|
||||
|
||||
self.stack.append((token, pos))
|
||||
|
||||
else:
|
||||
# the parenthesis is part of the unrecognized
|
||||
# So it's a function
|
||||
# So it's maybe a function call
|
||||
|
||||
list_of_results = self.get_functions_from_unrecognized(token, pos)
|
||||
if list_of_results:
|
||||
instances = [self]
|
||||
for i in range(len(list_of_results) - 1):
|
||||
clone = self.clone()
|
||||
self.forked.append(clone)
|
||||
instances.append(clone)
|
||||
list_of_results = self.get_functions_names_from_unrecognized(token, pos)
|
||||
instances = [self]
|
||||
for i in range(len(list_of_results) - 1):
|
||||
clone = self.clone()
|
||||
self.forked.append(clone)
|
||||
instances.append(clone)
|
||||
|
||||
# Manage the result for self and its clones
|
||||
for instance, parsing_res in zip(instances, list_of_results):
|
||||
for to_out in parsing_res.to_out:
|
||||
instance._put_to_out(to_out)
|
||||
# Manage the result for self and its clones
|
||||
for instance, parsing_res in zip(instances, list_of_results):
|
||||
|
||||
for to_out in parsing_res.to_out:
|
||||
instance._put_to_out(to_out)
|
||||
|
||||
if parsing_res.function:
|
||||
instance.unrecognized_tokens = parsing_res.function
|
||||
instance.parsing_function = True
|
||||
else:
|
||||
# special case of "twenty two(". It's not considered as a function
|
||||
# The manage_unrecognized() what somewhat done by get_functions_names_from_unrecognized()
|
||||
# So we just put the unrecognized to out
|
||||
|
||||
instance.unrecognized_tokens.reset()
|
||||
|
||||
# make sure to pop the current concept
|
||||
if self._stack_isinstance(SyaConceptParserHelper):
|
||||
self.pop_stack_to_out()
|
||||
|
||||
instance._put_to_out(")") # mark where the function should end
|
||||
instance.stack.append(parsing_res.function)
|
||||
instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # reset unrecognized
|
||||
else:
|
||||
self._put_to_out(")") # mark where the function should end
|
||||
self.eat_unrecognized(token, pos) # add the '(' to the rest of the unknown
|
||||
self.stack.append(self.unrecognized_tokens.fix_source())
|
||||
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
instance.stack.append((token, pos))
|
||||
|
||||
# # instance._put_to_out(")") # mark where the function should end
|
||||
# # instance.stack.append(parsing_res.function)
|
||||
# # instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # reset unrecognized
|
||||
# else:
|
||||
# # handle when there are multiple pending tokens
|
||||
# if len(self.unrecognized_tokens.tokens) > 1:
|
||||
# unrecognized = UnrecognizedTokensNode(self.unrecognized_tokens.start,
|
||||
# pos - 2,
|
||||
# self.unrecognized_tokens.tokens[:-1])
|
||||
# unrecognized.fix_source()
|
||||
# self._put_to_out(unrecognized)
|
||||
# last_token = self.unrecognized_tokens.tokens[-1]
|
||||
# self.unrecognized_tokens.reset()
|
||||
# self.unrecognized_tokens.add_token(last_token, pos - 1)
|
||||
#
|
||||
# self.eat_unrecognized(token, pos) # add the '(' to the rest of the unknown
|
||||
# self.parsing_function = True
|
||||
# # self.stack.append(self.unrecognized_tokens.fix_source())
|
||||
# # self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
|
||||
return True
|
||||
|
||||
elif self._is_rpar(token):
|
||||
self.debug.append(token)
|
||||
if self.debug_enabled:
|
||||
self.debug.append(DebugInfo(pos, token, None, DEBUG_EAT))
|
||||
|
||||
# first, remove what was in the buffer
|
||||
self.manage_unrecognized()
|
||||
@@ -775,32 +915,36 @@ class InFixToPostFix:
|
||||
|
||||
return False
|
||||
|
||||
def eat_concept(self, sya_concept_def, token, pos):
|
||||
def eat_concept(self, sya_concept_def, token, pos, first_pass=True):
|
||||
"""
|
||||
a concept is found
|
||||
:param sya_concept_def:
|
||||
:param token:
|
||||
:param pos:
|
||||
:param first_pass: When not called from a fork after manage_unrecognized()
|
||||
:return:
|
||||
"""
|
||||
|
||||
if self.is_locked:
|
||||
return
|
||||
self.debug.append(sya_concept_def)
|
||||
|
||||
parser_helper = SyaConceptParserHelper(sya_concept_def, pos)
|
||||
|
||||
if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE:
|
||||
parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1]
|
||||
if first_pass:
|
||||
if self.debug_enabled:
|
||||
self.debug.append(DebugInfo(pos, token, sya_concept_def, "??"))
|
||||
|
||||
if Token.is_whitespace(parser_helper.last_token_before_first_token):
|
||||
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
|
||||
if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE:
|
||||
parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1]
|
||||
|
||||
# First, try to recognize the tokens that are waiting
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage the fact that some clone may have been forked
|
||||
forked.eat_concept(sya_concept_def, token, pos)
|
||||
if Token.is_whitespace(parser_helper.last_token_before_first_token):
|
||||
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
|
||||
|
||||
# First, try to recognize the tokens that are waiting
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage the fact that some clone may have been forked
|
||||
forked.eat_concept(sya_concept_def, token, pos, first_pass=False)
|
||||
|
||||
# then, check if this new concept is linked to the previous ones
|
||||
# ie, is the previous concept fully matched ?
|
||||
@@ -823,6 +967,9 @@ class InFixToPostFix:
|
||||
self.manage_parameters_when_new_concept(parser_helper)
|
||||
self._put_to_out(parser_helper.fix_concept())
|
||||
else:
|
||||
if self.debug_enabled:
|
||||
self._remove_debug_info_if_needed()
|
||||
self.debug.append(DebugInfo(pos, token, sya_concept_def, DEBUG_PUSH))
|
||||
self.stack.append(parser_helper)
|
||||
self.manage_parameters_when_new_concept(parser_helper)
|
||||
|
||||
@@ -836,11 +983,12 @@ class InFixToPostFix:
|
||||
if self.is_locked:
|
||||
return
|
||||
|
||||
self.debug.append(token)
|
||||
if self.debug_enabled:
|
||||
self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
|
||||
|
||||
self.unrecognized_tokens.add_token(token, pos)
|
||||
|
||||
def finalize(self):
|
||||
def finalize(self, pos):
|
||||
"""
|
||||
Put the remaining items from the stack to out
|
||||
:return:
|
||||
@@ -850,8 +998,14 @@ class InFixToPostFix:
|
||||
return
|
||||
|
||||
if len(self.stack) == 0 and len(self.out) == 0:
|
||||
# check for parenthesis mismatch
|
||||
if self.unrecognized_tokens.parenthesis_count > 0:
|
||||
self._add_error(ParenthesisMismatchErrorNode(self.unrecognized_tokens))
|
||||
return # no need to pop the buffer, as no concept is found
|
||||
|
||||
if self.debug_enabled:
|
||||
self.debug.append(DebugInfo(pos, "<EOF>", None, "??"))
|
||||
|
||||
while len(self.stack) > 0:
|
||||
parser_helper = self.stack[-1]
|
||||
|
||||
@@ -863,7 +1017,7 @@ class InFixToPostFix:
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage that some clones may have been forked
|
||||
forked.finalize()
|
||||
forked.finalize(pos)
|
||||
|
||||
failed_to_match = sum(map(lambda e: e.type != TokenKind.VAR_DEF, parser_helper.expected))
|
||||
if failed_to_match > 0:
|
||||
@@ -878,10 +1032,10 @@ class InFixToPostFix:
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage that some clones may have been forked
|
||||
forked.finalize()
|
||||
forked.finalize(pos)
|
||||
|
||||
def clone(self):
|
||||
clone = InFixToPostFix(self.context)
|
||||
clone = InFixToPostFix(self.context, self.debug_enabled)
|
||||
clone.is_locked = self.is_locked
|
||||
clone.out = self.out[:]
|
||||
clone.stack = [i.clone() if hasattr(i, "clone") else i for i in self.stack]
|
||||
@@ -983,7 +1137,7 @@ class SyaNodeParser(BaseNodeParser):
|
||||
res.extend(forked)
|
||||
forked.clear()
|
||||
|
||||
res = [InFixToPostFix(context)]
|
||||
res = [InFixToPostFix(context, context.in_context(BuiltinConcepts.DEBUG))]
|
||||
while self.parser_input.next_token(False):
|
||||
for infix_to_postfix in res:
|
||||
infix_to_postfix.reset()
|
||||
@@ -1027,7 +1181,7 @@ class SyaNodeParser(BaseNodeParser):
|
||||
# make sure that remaining items in stack are moved to out
|
||||
for infix_to_postfix in res:
|
||||
infix_to_postfix.reset()
|
||||
infix_to_postfix.finalize()
|
||||
infix_to_postfix.finalize(self.parser_input.pos)
|
||||
_add_forked_to_res()
|
||||
|
||||
return res
|
||||
@@ -1058,14 +1212,14 @@ class SyaNodeParser(BaseNodeParser):
|
||||
start = item.start
|
||||
end = item.end
|
||||
has_unrecognized = False
|
||||
concept = sheerka.new_from_template(item.concept, item.concept.id)
|
||||
concept = sheerka.new_from_template(item.concept, item.concept.key)
|
||||
for param_index in reversed(range(len(concept.metadata.variables))):
|
||||
inner_item = self.postfix_to_item(sheerka, postfixed)
|
||||
if inner_item.start < start:
|
||||
start = inner_item.start
|
||||
if inner_item.end > end:
|
||||
end = inner_item.end
|
||||
has_unrecognized |= isinstance(inner_item, UnrecognizedTokensNode)
|
||||
has_unrecognized |= isinstance(inner_item, (UnrecognizedTokensNode, SourceCodeWithConceptNode))
|
||||
|
||||
param_name = concept.metadata.variables[param_index][0]
|
||||
param_value = inner_item.concept if hasattr(inner_item, "concept") else \
|
||||
@@ -1128,6 +1282,7 @@ class SyaNodeParser(BaseNodeParser):
|
||||
if has_unrecognized:
|
||||
# Manage some sick cases where missing parenthesis mess the order or the sequence
|
||||
# example "foo bar(one plus two"
|
||||
# too lazy to fix the why...
|
||||
sequence.sort(key=attrgetter("start"))
|
||||
|
||||
ret.append(
|
||||
|
||||
@@ -2,7 +2,7 @@ from dataclasses import dataclass
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
|
||||
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes, update_compiled
|
||||
from core.concept import Concept
|
||||
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
@@ -38,6 +38,7 @@ class UnrecognizedNodeParser(BaseParser):
|
||||
|
||||
sequences_found = [[]]
|
||||
has_unrecognized = False
|
||||
self.error_sink.clear()
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
@@ -93,7 +94,7 @@ class UnrecognizedNodeParser(BaseParser):
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input,
|
||||
source=parser_input.source,
|
||||
body=choice,
|
||||
try_parsed=choice)))
|
||||
|
||||
@@ -105,56 +106,8 @@ class UnrecognizedNodeParser(BaseParser):
|
||||
return ret
|
||||
|
||||
def validate_concept_node(self, context, concept_node):
|
||||
|
||||
sheerka = context.sheerka
|
||||
errors = []
|
||||
|
||||
def _validate_concept(concept):
|
||||
"""
|
||||
Recursively browse the compiled properties in order to find unrecognized
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
for k, v in concept.compiled.items():
|
||||
if isinstance(v, Concept):
|
||||
_validate_concept(v)
|
||||
|
||||
elif isinstance(v, UnrecognizedTokensNode):
|
||||
res = parse_unrecognized(context, v.source, PARSERS)
|
||||
res = only_successful(context, res) # only key successful parsers
|
||||
if res.status:
|
||||
concept.compiled[k] = res.body.body
|
||||
else:
|
||||
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
|
||||
|
||||
def _get_source(compiled, var_name):
|
||||
if var_name not in compiled:
|
||||
return None
|
||||
if not isinstance(compiled[var_name], list):
|
||||
return None
|
||||
if not len(compiled[var_name]) == 1:
|
||||
return None
|
||||
if not sheerka.isinstance(compiled[var_name][0], BuiltinConcepts.RETURN_VALUE):
|
||||
return None
|
||||
if not sheerka.isinstance(compiled[var_name][0].body, BuiltinConcepts.PARSER_RESULT):
|
||||
return None
|
||||
if compiled[var_name][0].body.name == "parsers.ShortTermMemory":
|
||||
return None
|
||||
|
||||
return compiled[var_name][0].body.source
|
||||
|
||||
_validate_concept(concept_node.concept)
|
||||
|
||||
# Special case where the values of the variables are the names of the variable
|
||||
# example : Concept("a plus b").def_var("a").def_var("b")
|
||||
# and the user has entered 'a plus b'
|
||||
# Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
|
||||
# This means that 'a' and 'b' don't have any real value
|
||||
for name, value in concept_node.concept.metadata.variables:
|
||||
if not _get_source(concept_node.concept.compiled, name) == name:
|
||||
break
|
||||
else:
|
||||
concept_node.concept.metadata.is_evaluated = True
|
||||
update_compiled(context, concept_node.concept, errors)
|
||||
|
||||
if len(errors) > 0:
|
||||
return context.sheerka.ret(self.name, False, errors)
|
||||
|
||||
@@ -173,8 +173,11 @@ class SheerkaPromptCompleter(Completer):
|
||||
break
|
||||
|
||||
m = NAME.match(text[:i][::-1])
|
||||
func_name = m.group(0)[::-1]
|
||||
return FuncFound(func_name, i - len(func_name), paren_index) if m else None
|
||||
if m:
|
||||
func_name = m.group(0)[::-1]
|
||||
return FuncFound(func_name, i - len(func_name), paren_index)
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def after_pipe(text, pos):
|
||||
|
||||
Reference in New Issue
Block a user