Implemented FunctionParser

This commit is contained in:
2020-09-17 14:11:09 +02:00
parent 8a866880bc
commit 177a6b1d5f
40 changed files with 1752 additions and 561 deletions
+2
View File
@@ -16,6 +16,8 @@ class BuiltinConcepts(Enum):
SHEERKA = "sheerka"
# processing instructions during sheerka.execute()
# The instruction may alter how the actions work
DEBUG = "debug" # activate all debug information
EVAL_BODY_REQUESTED = "eval body" # to evaluate the body
EVAL_WHERE_REQUESTED = "eval where" # to evaluate the where clause
RETURN_BODY_REQUESTED = "return body" # returns the body of the concept instead of the concept itself
+121 -5
View File
@@ -6,14 +6,16 @@ from core.ast.nodes import CallNodeConcept
from core.ast.visitors import UnreferencedNamesVisitor
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, NotInit, ConceptParts
from core.sheerka.services.SheerkaExecute import SheerkaExecute
from core.tokenizer import Keywords
# from evaluators.BaseEvaluator import BaseEvaluator
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode, SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ErrorNode
PARSE_STEPS = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
EVAL_STEPS = PARSE_STEPS + [BuiltinConcepts.BEFORE_EVALUATION, BuiltinConcepts.EVALUATION,
BuiltinConcepts.AFTER_EVALUATION]
PARSERS = ["EmptyString", "ShortTermMemory", "AtomNode", "BnfNode", "SyaNode", "Python"]
def is_same_success(context, return_values):
@@ -342,6 +344,37 @@ def parse_unrecognized(context, source, parsers, who=None, prop=None, filter_fun
return no_python
def parse_function(context, source, tokens=None, start=0):
"""
Helper function to parse what is supposed to be a function
:param context:
:param source:
:param tokens:
:param start: start index for the source code node
:return:
"""
sheerka = context.sheerka
from parsers.FunctionParser import FunctionParser
parser = FunctionParser()
desc = f"Parsing function '{source}'"
with context.push(BuiltinConcepts.PARSE_CODE, source, desc=desc) as sub_context:
sheerka_execution = sheerka.services[SheerkaExecute.NAME]
res = parser.parse(sub_context, sheerka_execution.get_parser_input(source, tokens))
if not isinstance(res, list):
res = [res]
for r in [r for r in res if sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT)]:
r.body.body.start += start
r.body.body.end += start
if isinstance(r.body.body, SourceCodeWithConceptNode):
for n in [r.body.body.first, r.body.body.last] + r.body.body.nodes:
n.start += start
n.end += start
return res
def evaluate(context,
source,
evaluators="all",
@@ -415,7 +448,12 @@ def get_lexer_nodes(return_values, start, tokens):
end = start + len(tokens) - 1
lexer_nodes.append(
[SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)])
[SourceCodeNode(start,
end,
tokens,
ret_val.body.source,
python_node=ret_val.body.body,
return_value=ret_val)])
elif ret_val.who == "parsers.ExactConcept":
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
@@ -479,6 +517,81 @@ def get_lexer_nodes_from_unrecognized(context, unrecognized_tokens_node, parsers
return get_lexer_nodes(res.body.body, unrecognized_tokens_node.start, unrecognized_tokens_node.tokens)
def update_compiled(context, concept, errors, parsers=None):
"""
recursively iterate thru concept.compiled to replace LexerNode into concepts or list of ReturnValueConcept
When parsing using a LexerNodeParser (SyaNodeParser, BnfNodeParser...)
the result will be a LexerNode.
In the specific case of a ConceptNode, the compiled variables will also be LexerNode (UnrecognizedTokensNode...)
This function iterate thru the compile to transform these nodes into concept of compiled AST
:param context:
:param concept:
:param errors: a list the must be initialized by the caller
:param parsers: to customize the parsers to use
:return:
"""
sheerka = context.sheerka
parsers = parsers or PARSERS
def _validate_concept(c):
"""
Recursively browse the compiled properties in order to find unrecognized
:param c:
:return:
"""
for k, v in c.compiled.items():
if isinstance(v, Concept):
_validate_concept(v)
elif isinstance(v, SourceCodeWithConceptNode):
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
parser_helper = PythonWithConceptsParser()
res = parser_helper.parse_nodes(context, v.get_all_nodes())
if res.status:
c.compiled[k] = [res]
else:
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
elif isinstance(v, UnrecognizedTokensNode):
res = parse_unrecognized(context, v.source, parsers)
res = only_successful(context, res) # only key successful parsers
if res.status:
c.compiled[k] = res.body.body
else:
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
def _get_source(compiled, var_name):
if var_name not in compiled:
return None
if not isinstance(compiled[var_name], list):
return None
if not len(compiled[var_name]) == 1:
return None
if not sheerka.isinstance(compiled[var_name][0], BuiltinConcepts.RETURN_VALUE):
return None
if not sheerka.isinstance(compiled[var_name][0].body, BuiltinConcepts.PARSER_RESULT):
return None
if compiled[var_name][0].body.name == "parsers.ShortTermMemory":
return None
return compiled[var_name][0].body.source
_validate_concept(concept)
# Special case where the values of the variables are the names of the variable
# example : Concept("a plus b").def_var("a").def_var("b")
# and the user has entered 'a plus b'
# Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
# This means that 'a' and 'b' don't have any real value
if len(concept.metadata.variables) > 0:
for name, value in concept.metadata.variables:
if _get_source(concept.compiled, name) != name:
break
else:
concept.metadata.is_evaluated = True
def get_names(sheerka, concept_node):
"""
Finds all the names referenced by the concept_node
@@ -603,10 +716,11 @@ def remove_from_ret_val(sheerka, return_values, concept_key):
return return_values
def set_is_evaluated(concepts):
def set_is_evaluated(concepts, check_nb_variables=False):
"""
set is_evaluated to True
:param concepts:
:param check_nb_variables: only set is_evaluated if the concept has variables
:return:
"""
if concepts is None:
@@ -614,6 +728,8 @@ def set_is_evaluated(concepts):
if hasattr(concepts, "__iter__"):
for c in concepts:
c.metadata.is_evaluated = True
if not check_nb_variables or check_nb_variables and len(c.metadata.variables) > 0:
c.metadata.is_evaluated = True
else:
concepts.metadata.is_evaluated = True
if not check_nb_variables or check_nb_variables and len(concepts.metadata.variables) > 0:
concepts.metadata.is_evaluated = True
+42 -1
View File
@@ -130,7 +130,7 @@ class Concept:
if isinstance(other, simplec):
return self.name == other.name and self.body == other.body
if isinstance(other, (CC, CB, CV, CMV)):
if isinstance(other, (CC, CB, CV, CMV, CIO)):
return other == self
if not isinstance(other, Concept):
@@ -726,4 +726,45 @@ class CMV:
return txt + ")"
class CIO:
"""
Concept id only
only test the id
"""
def __init__(self, concept, source=None):
if isinstance(concept, str):
self.concept_name = concept
self.concept_id = None
self.concept = None
elif isinstance(concept, Concept):
self.concept_id = concept.id
self.concept = concept
self.source = source
self.start = -1
self.end = -1
def set_concept(self, concept):
self.concept = concept
self.concept_id = concept.id
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, Concept):
return self.concept_id == other.id
if not isinstance(other, CIO):
return False
return self.concept_id == other.concept_id
def __hash__(self):
return hash(self.concept_id)
def __repr__(self):
return f"CIO(concept='{self.concept}')" if self.concept else f"CIO(name='{self.concept_name}')"
simplec = namedtuple("concept", "name body") # for simple concept (tests purposes only)
+15 -2
View File
@@ -1,7 +1,7 @@
import logging
import time
from core.builtin_concepts import BuiltinConcepts
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import NO_MATCH
from core.sheerka.services.SheerkaShortTermMemory import SheerkaShortTermMemory
@@ -309,6 +309,15 @@ class ExecutionContext:
def in_private_context(self, concept_key):
return concept_key in self.private_hints
def add_to_private_hints (self, concept_key):
self.private_hints.add(concept_key)
def add_to_protected_hints(self, concept_key):
self.protected_hints.add(concept_key)
def add_to_global_hints(self, concept_key):
self.global_hints.add(concept_key)
@staticmethod
def _is_return_value(obj):
return isinstance(obj, Concept) and obj.key == str(BuiltinConcepts.RETURN_VALUE)
@@ -358,7 +367,11 @@ class ExecutionContext:
ret_val = self.values["return_values"]
if not isinstance(ret_val, Concept) or not ret_val.key == str(BuiltinConcepts.RETURN_VALUE):
return None
return ret_val.status
if ret_val.status:
return True
if isinstance(ret_val.body, ParserResultConcept):
return "Almost"
return False
def as_bag(self):
"""
+13 -1
View File
@@ -558,6 +558,12 @@ class Sheerka(Concept):
return self._get_unknown(metadata)
def resolve(self, concept):
"""
Try to find a concept by its name, id, or c:: definition
A new instance (using new_from_template()) is returned when it's possible
:param concept:
:return:
"""
def new_instances(concepts):
if hasattr(concepts, "__iter__"):
@@ -567,6 +573,9 @@ class Sheerka(Concept):
if concept is None:
return None
# ##############
# PREPROCESS
# ##############
# if the entry is a concept token, use its values.
if isinstance(concept, Token):
if concept.type != TokenKind.CONCEPT:
@@ -578,6 +587,9 @@ class Sheerka(Concept):
(tmp := core.utils.unstr_concept(concept)) != (None, None):
concept = tmp
# ##############
# PROCESS
# ##############
# if the entry is a tuple
# concept[0] is the name
# concept[1] is the id
@@ -599,7 +611,7 @@ class Sheerka(Concept):
if isinstance(concept, str):
if self.is_known(found := self.get_by_name(concept)):
instances = new_instances(found)
core.builtin_helpers.set_is_evaluated(instances)
core.builtin_helpers.set_is_evaluated(instances, check_nb_variables=True)
return instances
return None
+4 -1
View File
@@ -5,7 +5,7 @@ from core.sheerka.services.sheerka_service import BaseService
CONCEPTS_FILE = "_concepts_lite.txt"
CONCEPTS_FILE_ALL_CONCEPTS = "_concepts.txt"
CONCEPTS_FILE_TO_USE = CONCEPTS_FILE_ALL_CONCEPTS
CONCEPTS_FILE_TO_USE = CONCEPTS_FILE
class SheerkaAdmin(BaseService):
NAME = "Admin"
@@ -47,6 +47,9 @@ class SheerkaAdmin(BaseService):
if concept_file == "full":
concept_file = CONCEPTS_FILE_ALL_CONCEPTS
elif not concept_file.startswith("_concepts"):
concept_file = f"_concepts_{concept_file}.txt"
try:
start = time.time_ns()
nb_lines = 0
+15 -2
View File
@@ -2,7 +2,7 @@ import core.utils
from cache.Cache import Cache
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.sheerka.services.sheerka_service import BaseService
from core.tokenizer import Tokenizer, TokenKind, Keywords, Token
from core.tokenizer import Tokenizer, TokenKind, Token
NO_MATCH = "** No Match **"
@@ -88,6 +88,20 @@ class ParserInput:
return self.pos < self.end
def seek(self, pos):
"""
Move the token offset to position pos
:param pos:
:return: True is pos is a valid position False otherwise
"""
if pos < 0 or pos >= self.end:
self.token = None
return False
self.pos = pos
self.token = self.tokens[self.pos]
return True
def is_empty(self):
if self.text.strip() == "":
return True
@@ -116,7 +130,6 @@ class ParserInput:
tokens = [tokens]
switcher = {
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
}
@@ -31,6 +31,7 @@ class SheerkaModifyConcept(BaseService):
if old_version == concept:
# the concept is not modified
# This is an important sanity check. Do no remove because you don't understand it
return self.sheerka.ret(
self.NAME, False,
self.sheerka.new(
@@ -2,6 +2,7 @@ from dataclasses import dataclass
from typing import List
from cache.Cache import Cache
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.sheerka_service import ServiceObj, BaseService
@@ -48,6 +49,7 @@ class SheerkaVariableManager(BaseService):
variable = Variable(context.event.get_digest(), who, key, value, None)
self.sheerka.cache_manager.put(self.VARIABLES_ENTRY, variable.get_key(), variable)
return self.sheerka.ret(self.NAME, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
def load(self, who, key):
variable = self.sheerka.cache_manager.get(self.VARIABLES_ENTRY, who + "|" + key)
+26 -13
View File
@@ -62,6 +62,7 @@ class Token:
_strip_quote: str = field(default=None, repr=False, compare=False, hash=None)
_str_value: str = field(default=None, repr=False, compare=False, hash=None)
_repr_value: str = field(default=None, repr=False, compare=False, hash=None)
def __repr__(self):
if self.type == TokenKind.IDENTIFIER:
@@ -82,7 +83,7 @@ class Token:
if self._strip_quote:
return self._strip_quote
self._strip_quote = self._to_str(True)
self._strip_quote = self.value[1:-1] if self.type == TokenKind.STRING else self.value
return self._strip_quote
@property
@@ -90,18 +91,36 @@ class Token:
if self._str_value:
return self._str_value
self._str_value = self._to_str(False)
self._str_value = self.to_str(False)
return self._str_value
@property
def repr_value(self):
if self._repr_value:
return self._repr_value
if self.type == TokenKind.EOF:
self._repr_value = "<EOF>"
elif self.type == TokenKind.WHITESPACE:
self._repr_value = "<ws>"
elif self.type == TokenKind.NEWLINE:
self._repr_value = "<nl>"
else:
self._repr_value = self.str_value
return self._repr_value
@staticmethod
def is_whitespace(token):
return token and token.type == TokenKind.WHITESPACE
def _to_str(self, strip_quote):
def to_str(self, strip_quote):
if strip_quote and self.type == TokenKind.STRING:
return self.value[1:-1]
elif self.type == TokenKind.KEYWORD:
return self.value.value
elif self.type == TokenKind.CONCEPT:
from core.utils import str_concept
return str_concept(self.value)
else:
return str(self.value)
@@ -136,8 +155,6 @@ class Tokenizer:
Class that can iterate on the tokens
"""
KEYWORDS = set(x.value for x in Keywords)
def __init__(self, text, yield_eof=True, parse_word=False):
self.text = text
self.text_len = len(text)
@@ -175,9 +192,7 @@ class Tokenizer:
from core.concept import VARIABLE_PREFIX
if self.i + 1 < self.text_len and self.text[self.i + 1].isalpha():
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
yield Token(token_type, value, self.i, self.line, self.column)
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
elif self.i + 7 < self.text_len and \
@@ -335,11 +350,9 @@ class Tokenizer:
yield Token(TokenKind.WORD, word, self.i, self.line, self.column)
self.i += len(word)
self.column += len(word)
elif c.isalpha() or c == "_":
elif c.isalpha():
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
yield Token(token_type, value, self.i, self.line, self.column)
yield Token(TokenKind.IDENTIFIER, identifier, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
elif c.isdigit():
@@ -457,7 +470,7 @@ class Tokenizer:
i = start_index + 1
escape = False
#newline = None
# newline = None
while i < self.text_len:
c = self.text[i]
result += c
+22
View File
@@ -296,6 +296,28 @@ def dict_product(a, b):
return res
def get_n_clones(obj, n):
objs = [obj]
for i in range(n - 1):
objs.append(obj.clone())
return objs
def obj_product(list_of_objs, new_items, add_item):
if list_of_objs is None or len(list_of_objs) == 0:
return list_of_objs
res = []
for obj in list_of_objs:
instances = get_n_clones(obj, len(new_items))
res.extend(instances)
for instance, item in zip(instances, new_items):
add_item(instance, item)
return res
def strip_quotes(text):
if not isinstance(text, str):
return text
+6 -2
View File
@@ -1,6 +1,7 @@
import core.utils
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind, Tokenizer
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.BaseParser import NotInitializedNode
@@ -67,7 +68,8 @@ class AddConceptEvaluator(OneReturnValueEvaluator):
elif isinstance(part_ret_val, NameNode):
source = str(part_ret_val)
elif isinstance(part_ret_val, ReturnValueConcept) and part_ret_val.status:
source = part_ret_val.value.source
source = part_ret_val.value.source.as_text() if isinstance(part_ret_val.value.source,
ParserInput) else part_ret_val.value.source
else:
raise Exception("Unexpected")
setattr(concept.metadata, prop, source)
@@ -143,7 +145,9 @@ class AddConceptEvaluator(OneReturnValueEvaluator):
#
if isinstance(ret_value.value, ParserResultConcept) and len(concept_name) > 1:
variables = set()
tokens = ret_value.value.tokens or list(Tokenizer(ret_value.value.source, yield_eof=False))
source = ret_value.value.source.as_text() if isinstance(ret_value.value.source,
ParserInput) else ret_value.value.source
tokens = ret_value.value.tokens or list(Tokenizer(source, yield_eof=False))
tokens = [t.str_value for t in tokens]
for identifier in [i for i in concept_name if str(i).isalnum()]:
if identifier in tokens:
+5 -14
View File
@@ -1,7 +1,7 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode
from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
class LexerNodeEvaluator(OneReturnValueEvaluator):
@@ -82,19 +82,10 @@ class LexerNodeEvaluator(OneReturnValueEvaluator):
def evaluate_python_code(self, context, nodes):
sheerka = context.sheerka
helper = LexerNodeParserHelperForPython()
result = helper.parse(context, nodes)
if isinstance(result, PythonNode):
return sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=result.source,
body=result,
try_parsed=None))
parser = PythonWithConceptsParser()
result = parser.parse_nodes(context, nodes)
if result:
return result
else:
return sheerka.ret(
self.name,
+11 -4
View File
@@ -40,6 +40,7 @@ class Expando:
def __repr__(self):
return f"{dir(self)}"
@dataclass
class PythonEvalError:
error: Exception
@@ -59,13 +60,19 @@ class PythonEvaluator(OneReturnValueEvaluator):
self.globals = {}
def matches(self, context, return_value):
return return_value.status and \
isinstance(return_value.value, ParserResultConcept) and \
isinstance(return_value.value.value, PythonNode)
if not return_value.status or not isinstance(return_value.value, ParserResultConcept):
return False
body = return_value.value.value
return isinstance(body, PythonNode) or (
hasattr(body, "python_node") and isinstance(body.python_node, PythonNode))
# return return_value.status and \
# isinstance(return_value.value, ParserResultConcept) and \
# isinstance(return_value.value.value, PythonNode)
def eval(self, context, return_value):
sheerka = context.sheerka
node = return_value.value.value
node = return_value.value.value if isinstance(return_value.value.value, PythonNode) else \
return_value.value.value.python_node
context.log(f"Evaluating python node {node}.", self.name)
+57 -23
View File
@@ -4,8 +4,8 @@ from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import DEFINITION_TYPE_BNF, Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer
from core.utils import strip_tokens
from core.tokenizer import Tokenizer, TokenKind
from core.utils import strip_tokens, make_unique
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode
@@ -228,6 +228,34 @@ class AtomNodeParser(BaseNodeParser):
"""
return len(concept.metadata.variables) == 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
def new_instances(list_of_concepts):
if list_of_concepts is None:
return None
return [self.context.sheerka.new_from_template(c, c.id) for c in list_of_concepts]
if token.type == TokenKind.WHITESPACE:
return None
def as_list(a):
if a is None:
return a
return a if isinstance(a, list) else [a]
concepts_by_name = as_list(self.sheerka.resolve(token.value))
concepts_by_first_keyword = new_instances(super().get_concepts(token, self._is_eligible))
if concepts_by_name is None:
return concepts_by_first_keyword
if concepts_by_first_keyword is None:
return concepts_by_name
return make_unique(concepts_by_name + concepts_by_first_keyword, lambda c: c.id)
def get_concepts_sequences(self):
forked = []
@@ -242,13 +270,6 @@ class AtomNodeParser(BaseNodeParser):
concept_parser_helpers.extend(forked)
forked.clear()
def _get_concepts_by_name(name):
other_concepts = self.sheerka.get_by_name(name)
if isinstance(other_concepts, list):
return other_concepts
return [other_concepts] if self.sheerka.is_known(other_concepts) else []
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
while self.parser_input.next_token(False):
@@ -263,8 +284,8 @@ class AtomNodeParser(BaseNodeParser):
if concept_parser.eat_token(token, pos):
concept_parser.lock()
concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name)
#self.context.log(f"concepts found for {token=}: {concepts}", who=self.name)
concepts = self.get_concepts(token, self._is_eligible)
# self.context.log(f"concepts found for {token=}: {concepts}", who=self.name)
if not concepts:
for concept_parser in concept_parser_helpers:
concept_parser.eat_unrecognized(token, pos)
@@ -303,12 +324,13 @@ class AtomNodeParser(BaseNodeParser):
def get_by_name(self):
"""
Try to recognize the full parser input as a concept name
Use the whole input to recognize the concepts
It will use the name of the concept, but also its compact form (c::)
:return:
"""
source = self.parser_input.as_text()
concepts = self.sheerka.get_by_name(source.strip())
if not self.sheerka.is_known(concepts):
concepts = self.sheerka.resolve(source.strip())
if concepts is None:
return None
concepts = [concepts] if isinstance(concepts, Concept) else concepts
@@ -316,17 +338,27 @@ class AtomNodeParser(BaseNodeParser):
start, end = self.get_tokens_boundaries(self.parser_input.as_tokens())
for concept in concepts:
parser_helper = AtomConceptParserHelper(None)
parser_helper.sequence.append(ConceptNode(
concept,
start,
end,
strip_tokens(self.parser_input.as_tokens(), True), source))
parser_helper.sequence.append(ConceptNode(concept,
start,
end,
strip_tokens(self.parser_input.as_tokens(), True), source))
res.append(parser_helper)
return res
def get_valid(self, concept_parser_helpers):
valid_parser_helpers = [] # be careful, it will be a list of list
already_seen = set()
def compute_hash_code(ph):
"""
compute a hash code for already seen parser helper
:param ph:
:return:
"""
return "#".join(
[f"c:|{n.concept.id}:" if isinstance(n, ConceptNode) else n.source for n in ph.sequence])
for parser_helper in concept_parser_helpers:
if parser_helper.has_error():
continue
@@ -335,16 +367,18 @@ class AtomNodeParser(BaseNodeParser):
continue
for node in parser_helper.sequence:
if isinstance(node, ConceptNode):
if len(node.concept.metadata.variables) > 0:
node.concept.metadata.is_evaluated = True # Do not try to evaluate those concepts
# if isinstance(node, ConceptNode):
# if len(node.concept.metadata.variables) > 0:
# node.concept.metadata.is_evaluated = True # Do not try to evaluate those concepts
node.tokens = self.parser_input.tokens[node.start:node.end + 1]
node.fix_source()
if parser_helper in valid_parser_helpers:
parser_helper_hash_code = compute_hash_code(parser_helper)
if parser_helper_hash_code in already_seen:
continue
valid_parser_helpers.append(parser_helper)
already_seen.add(parser_helper_hash_code)
return valid_parser_helpers
+59 -14
View File
@@ -7,7 +7,7 @@ import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind, LexerError, Token, Keywords
from core.tokenizer import TokenKind, LexerError, Token
from parsers.BaseParser import Node, BaseParser, ErrorNode
DEBUG_COMPILED = True
@@ -46,14 +46,18 @@ class LexerNode(Node):
def clone(self):
pass
def to_short_str(self):
raise NotImplementedError
class UnrecognizedTokensNode(LexerNode):
def __init__(self, start, end, tokens):
super().__init__(start, end, tokens)
self.is_frozen = False
self.is_frozen = False # TODO: Remove as it seems to now be useless
self.parenthesis_count = 0
def freeze(self):
# TODO: Remove as it seems to now be useless
self.is_frozen = True
def reset(self):
@@ -61,6 +65,7 @@ class UnrecognizedTokensNode(LexerNode):
self.tokens.clear()
self.is_frozen = False
self.parenthesis_count = 0
self.source = ""
def add_token(self, token, pos):
if self.is_frozen:
@@ -135,7 +140,7 @@ class UnrecognizedTokensNode(LexerNode):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
return f"UnrecognizedTokensNode(source='{self.source}', start={self.start}, end={self.end})"
def clone(self):
clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:])
@@ -143,6 +148,9 @@ class UnrecognizedTokensNode(LexerNode):
clone.parenthesis_count = self.parenthesis_count
return clone
def to_short_str(self):
return f"UTN('{self.source}')"
class ConceptNode(LexerNode):
"""
@@ -209,15 +217,30 @@ class ConceptNode(LexerNode):
# bag["compiled"] = self.concept.compiled
return bag
def to_short_str(self):
return f'CN({self.concept})'
class SourceCodeNode(LexerNode):
"""
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, node, start, end, tokens=None, source=None, return_value=None):
def __init__(self, start, end, tokens=None, source=None, python_node=None, return_value=None):
"""
:param start: start position (index of the first token)
:param end: end position (index of the last token)
:param tokens:
:param source: tokens as string
:param python_node: PythonNode found (when the SourceCodeNode is validated)
:param return_value: ReturnValueConcept returned when the source was validated
When return_value is provided,
You should have return_value.body.body == node
"""
super().__init__(start, end, tokens, source)
self.node = node # The PythonNode (or whatever language node) that is found
self.python_node = python_node # The PythonNode (or whatever language node) that is found
self.return_value = return_value # original result of the parsing
def __eq__(self, other):
@@ -232,7 +255,7 @@ class SourceCodeNode(LexerNode):
if not isinstance(other, SourceCodeNode):
return False
return self.node == other.node and \
return self.python_node == other.python_node and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
@@ -243,6 +266,9 @@ class SourceCodeNode(LexerNode):
def __repr__(self):
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
def to_short_str(self):
return f"SCN('{self.source}')"
class SourceCodeWithConceptNode(LexerNode):
"""
@@ -254,17 +280,22 @@ class SourceCodeWithConceptNode(LexerNode):
So I push all the nodes into one big bag
"""
def __init__(self, first_node, last_node, content_nodes=None):
def __init__(self, first_node, last_node, content_nodes=None, has_unrecognized=False):
super().__init__(9999, -1, None) # why not sys.maxint ?
self.first = first_node
self.last = last_node
self.nodes = content_nodes or []
self.has_unrecognized = False
self.has_unrecognized = has_unrecognized
self._all_nodes = None
self.fix_all_pos()
self.python_node = None # if the source code node is validated against a python parse, here is the PythonNode
self.return_value = None # return_value that produced the PythonNode
def add_node(self, node):
self.nodes.append(node)
self.fix_pos(node)
self._all_nodes = None
return self
@@ -304,6 +335,9 @@ class SourceCodeWithConceptNode(LexerNode):
return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')"
def fix_all_pos(self):
if self.first is None: # to ease some unit test where only the python_node is necessary
return
for n in [self.first, self.last] + self.nodes:
self.fix_pos(n)
@@ -334,10 +368,20 @@ class SourceCodeWithConceptNode(LexerNode):
self.source += self.last.source
return self
def get_all_nodes(self):
if self._all_nodes:
return self._all_nodes
self._all_nodes = [self.first, *self.nodes, self.last]
return self._all_nodes
def clone(self):
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes)
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes.copy(), self.has_unrecognized)
return clone
def to_short_str(self):
return f"SCWC({self.first}" + ", ".join(n.to_short_str for n in self.nodes) + f"{self.last})"
@dataclass()
class GrammarErrorNode(ErrorNode):
@@ -479,7 +523,7 @@ class SCWC(HelperWithPos):
TODO: create a common function or whatever...
:return:
"""
source = self.first.source
source = self.first.source if hasattr(self.first, "source") else self.first
for n in self.content:
source += " "
if hasattr(n, "source"):
@@ -488,7 +532,7 @@ class SCWC(HelperWithPos):
source += str(n.concept)
else:
source += " unknown"
source += self.last.source
source += self.last.source if hasattr(self.last, "source") else self.last
return source
@@ -514,7 +558,7 @@ class CN(HelperWithPos):
self.concept = concept if isinstance(concept, Concept) else None
def fix_source(self, str_tokens):
self.source = "".join([s.value if isinstance(s, Keywords) else s for s in str_tokens])
self.source = "".join(str_tokens)
return self
def __eq__(self, other):
@@ -660,7 +704,7 @@ class UTN(HelperWithPos):
return hash((self.source, self.start, self.end))
def __repr__(self):
txt = f"UTN( source='{self.source}'"
txt = f"UTN(source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
@@ -733,7 +777,7 @@ class BaseNodeParser(BaseParser):
else:
name = token.value
custom_concepts = custom(name) if custom else []
custom_concepts = custom(name) if custom else [] # to get extra concepts using an alternative method
result = []
if name in self.concepts_by_first_keyword:
@@ -746,6 +790,7 @@ class BaseNodeParser(BaseParser):
concept = to_map(self, concept) if to_map else concept
result.append(concept)
return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id)
+18 -4
View File
@@ -5,8 +5,9 @@ import core.utils
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept
from core.sheerka.ExecutionContext import ExecutionContext
from core.sheerka.services.SheerkaExecute import ParserInput
from core.sheerka_logger import get_logger
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
from core.tokenizer import TokenKind, Token, Tokenizer, LexerError
# # keep a cache for the parser input
@@ -118,6 +119,20 @@ class BaseParser:
def __repr__(self):
return self.name
def reset_parser(self, context, parser_input: ParserInput):
self.context = context
self.sheerka = context.sheerka
self.parser_input = parser_input
self.error_sink.clear()
try:
self.parser_input.reset(False)
self.parser_input.next_token()
except LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
return True
def parse(self, context, parser_input):
pass
@@ -227,15 +242,14 @@ class BaseParser:
tokens = [tokens]
switcher = {
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
# TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
}
if custom_switcher:
switcher.update(custom_switcher)
for token in tokens:
value = switcher.get(token.type, lambda t: t.value)(token)
value = switcher.get(token.type, lambda t: t.str_value)(token)
res += value
if tracker is not None and token.type in custom_switcher:
tracker[value] = token.value
+14 -43
View File
@@ -201,12 +201,12 @@ class DefaultParser(BaseParser):
def parse_statement(self):
token = self.parser_input.token
if token.value == Keywords.DEF:
if token.value == Keywords.DEF.value:
self.parser_input.next_token()
self.context.log("Keyword DEF found.", self.name)
return self.parse_def_concept(token)
else:
return self.parse_isa_concept()
return self.add_error(CannotHandleErrorNode([token], ""))
def parse_def_concept(self, def_token):
"""
@@ -250,44 +250,15 @@ class DefaultParser(BaseParser):
return concept_found
def parse_isa_concept(self):
concept_name = self.parse_concept_name()
if isinstance(concept_name, DefaultParserErrorNode):
return concept_name
keyword = []
token = self.parser_input.token
if token.value != Keywords.ISA:
return self.add_error(CannotHandleErrorNode([token], ""))
keyword.append(token)
self.parser_input.next_token()
set_name = self.parse_concept_name()
return IsaConceptNode(keyword, concept_name, set_name)
def parse_concept_name(self):
tokens = []
token = self.parser_input.token
while not (token.type == TokenKind.EOF or token.type == TokenKind.KEYWORD):
tokens.append(token)
self.parser_input.next_token()
token = self.parser_input.token
if len(tokens) == 0:
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", []))
else:
return NameNode(tokens)
def regroup_tokens_by_parts(self, keywords_tokens):
def_concept_parts = [Keywords.CONCEPT,
Keywords.FROM,
Keywords.AS,
Keywords.WHERE,
Keywords.PRE,
Keywords.POST,
Keywords.RET]
def_concept_parts = [Keywords.CONCEPT.value,
Keywords.FROM.value,
Keywords.AS.value,
Keywords.WHERE.value,
Keywords.PRE.value,
Keywords.POST.value,
Keywords.RET.value]
# tokens found, when trying to recognize the parts
tokens_found_by_parts = {
@@ -307,7 +278,7 @@ class DefaultParser(BaseParser):
while token.type != TokenKind.EOF:
if token.value in def_concept_parts:
keywords_tokens.append(token) # keep track of the keywords
keyword = token.value
keyword = Keywords(token.value)
if tokens_found_by_parts[keyword]:
# a part is defined more than once
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
@@ -327,7 +298,7 @@ class DefaultParser(BaseParser):
def get_concept_name(self, first_token, tokens_found_by_parts):
name_first_token_index = 1
token = self.parser_input.token
if first_token.value != Keywords.CONCEPT:
if first_token.value != Keywords.CONCEPT.value:
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
name_first_token_index = 0
@@ -353,7 +324,7 @@ class DefaultParser(BaseParser):
self.add_error(SyntaxErrorNode([], "Empty declaration"), False)
return None, NotInitializedNode()
if definition_tokens[1].value == Keywords.BNF:
if definition_tokens[1].value == Keywords.BNF.value:
return self.get_concept_bnf_definition(current_concept_def, definition_tokens)
return self.get_concept_simple_definition(definition_tokens)
@@ -381,7 +352,7 @@ class DefaultParser(BaseParser):
return DEFINITION_TYPE_BNF, parsing_result
def get_concept_simple_definition(self, definition_tokens):
start = 2 if definition_tokens[1].value == Keywords.DEF else 1
start = 2 if definition_tokens[1].value == Keywords.DEF.value else 1
tokens = core.utils.strip_tokens(definition_tokens[start:])
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([definition_tokens[start]], "Empty declaration"), False)
+4 -3
View File
@@ -2,9 +2,9 @@ import logging
import core.builtin_helpers
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from core.concept import VARIABLE_PREFIX, ConceptParts
from core.concept import VARIABLE_PREFIX
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Keywords, TokenKind, LexerError
from core.tokenizer import TokenKind, LexerError
from core.utils import str_concept
from parsers.BaseParser import BaseParser
@@ -56,6 +56,7 @@ class ExactConceptParser(BaseParser):
concepts = result if isinstance(result, list) else [result]
for concept in concepts:
# update the variables of the freshly recognized concept
if concept in already_recognized:
context.log(f"Recognized concept {concept} again. Skipping.", self.name)
# example
@@ -105,7 +106,7 @@ class ExactConceptParser(BaseParser):
break
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
continue
res.append(t.value.value if isinstance(t.value, Keywords) else t.value)
res.append(t.value)
return res
def combinations(self, iterable):
-15
View File
@@ -191,23 +191,8 @@ class ExpressionParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("Expression", 50, False)
def reset_parser(self, context, parser_input: ParserInput):
self.context = context
self.sheerka = context.sheerka
self.parser_input = parser_input
self.error_sink.clear()
try:
self.parser_input.reset(False)
self.parser_input.next_token()
except LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
return True
def parse(self, context, parser_input: ParserInput):
"""
parser_input can be string, but text can also be an list of tokens
:param context:
:param parser_input:
:return:
+407
View File
@@ -0,0 +1,407 @@
from dataclasses import dataclass
from typing import List
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind, Token
from core.utils import get_n_clones
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEof, Node
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
# No need to check for Python code as the source code node will resolve to python code anyway
# I only look for concepts, so
PARSERS = ["BnfNode", "SyaNode", "AtomNode"]
@dataclass
class FunctionParserNode(Node):
pass
@dataclass()
class NamesNode(FunctionParserNode):
start: int # index of the first token
end: int # index of the last token
tokens: List[Token]
def __repr__(self):
return f"NameNode('{self.str_value()}')"
def str_value(self):
if self.tokens is None:
return None
return "".join([t.str_value for t in self.tokens])
def to_unrecognized(self):
return UnrecognizedTokensNode(self.start, self.end, self.tokens).fix_source()
@dataclass()
class FunctionParameter:
"""
class the represent result of the parameter parsing
"""
value: NamesNode # value parsed
separator: NamesNode = None # holds the value and the position of the separator
def add_sep(self, start, end, tokens):
self.separator = NamesNode(start, end, tokens)
def value_to_unrecognized(self):
return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source()
def separator_to_unrecognized(self):
if self.separator is None:
return None
return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source()
@dataclass
class FunctionNode(FunctionParserNode):
first: NamesNode # beginning of the function (it should represent the name of the function)
last: NamesNode # last part of the function (it should be the trailing parenthesis)
parameters: list
class FN(FunctionNode):
"""
Test class only
It matches with FunctionNode but with less constraints
Thereby,
FN("first", "last", ["param1," ...]) can be compared to
FunctionNode(NamesNode("first"), NamesNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")])
Note that FunctionParameter can easily be defined with a single string
* "param" -> FunctionParameter(NamesNode("param"), None)
* "param, " -> FunctionParameter(NamesNode("param"), NamesNode(", "))
For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part
"""
def __init__(self, first, last, parameters):
self.first = first
self.last = last
self.parameters = []
for param in parameters:
if isinstance(param, tuple):
self.parameters.append(param)
elif isinstance(param, str) and (pos := param.find(",")) != -1:
self.parameters.append((param[:pos], param[pos:]))
else:
self.parameters.append((param, None))
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, FN):
return self.first == other.first and self.last == other.last and self.parameters == other.parameters
if isinstance(other, FunctionNode):
if self.first != other.first.str_value() or self.last != other.last.str_value():
return False
if len(self.parameters) != len(other.parameters):
return False
for self_parameter, other_parameter in zip(self.parameters, other.parameters):
value = other_parameter.value.str_value() if isinstance(self_parameter[0],
str) else other_parameter.value
sep = other_parameter.separator.str_value() if other_parameter.separator else None
if self_parameter[0] != value or self_parameter[1] != sep:
return False
return True
return False
def __hash__(self):
return hash((self.first, self.last, self.parameters))
class FunctionParser(BaseParser):
"""
The parser will be used to parse func(x, y, z)
where x, y and z can be source code, concepts or other functions
It will return a SourceCodeNode or SourceCodeNodeWithConcept
"""
def __init__(self, sep=",", longest_concepts_only=True, **kwargs):
"""
:param sep:
:param longest_concepts_only: When multiples concepts are found, only keep the longest one
so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]]
:param kwargs:
"""
super().__init__("Function", 55, True)
self.sep = sep
self.longest_concepts_only = longest_concepts_only
self.record_errors = True
def add_error(self, error, next_token=True):
if not self.record_errors:
return
return super().add_error(error, next_token)
def parse(self, context, parser_input: ParserInput):
"""
:param context:
:param parser_input:
:return:
"""
if not isinstance(parser_input, ParserInput):
return None
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
node = self.parse_function()
if self.parser_input.next_token():
self.add_error(UnexpectedTokenErrorNode("Only one function supported",
self.parser_input.token,
[TokenKind.EOF]))
if self.has_error:
if node is None:
body = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=parser_input.as_text(),
reason=self.error_sink)
else:
body = context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)
return self.sheerka.ret(self.name, False, body)
source_code_nodes = self.to_source_code_node(node)
res = []
for source_code_node in source_code_nodes:
value = self.get_return_value_body(context.sheerka,
self.parser_input.as_text(),
source_code_node,
source_code_node)
res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, value))
return res[0] if len(res) == 1 else res
def parse_function(self):
start = self.parser_input.pos
token = self.parser_input.token
if token.type != TokenKind.IDENTIFIER:
self.add_error(UnexpectedTokenErrorNode(f"{token.repr_value} is not a identifier",
token,
[TokenKind.IDENTIFIER]))
return None
if not self.parser_input.next_token():
self.add_error(UnexpectedEof(f"Unexpected EOF while parsing left parenthesis"))
return None
token = self.parser_input.token
if token.type != TokenKind.LPAR:
self.add_error(UnexpectedTokenErrorNode(f"{token.repr_value} is not a left parenthesis",
token,
[TokenKind.LPAR]))
return None
start_node = NamesNode(start, start + 1, self.parser_input.tokens[start:start + 2])
if not self.parser_input.next_token():
self.add_error(UnexpectedEof(f"Unexpected EOF after left parenthesis"))
return FunctionNode(start_node, None, None)
params = self.parse_parameters()
if self.has_error:
return FunctionNode(start_node, None, params)
token = self.parser_input.token
if token.type != TokenKind.RPAR:
self.add_error(UnexpectedTokenErrorNode(f"Right parenthesis not found",
token,
[TokenKind.RPAR]))
return FunctionNode(start_node, None, params)
return FunctionNode(start_node,
NamesNode(self.parser_input.pos, self.parser_input.pos, [token]),
params)
def parse_parameters(self):
nodes = []
while True:
param_value = self.parse_parameter_value()
if not param_value:
break
function_parameter = FunctionParameter(param_value)
nodes.append(function_parameter)
token = self.parser_input.token
if token.type == TokenKind.EOF:
self.add_error(UnexpectedEof(f"Unexpected EOF while parsing parameters"))
return None
if token.type == TokenKind.RPAR:
break
if token.value == self.sep:
sep_pos = self.parser_input.pos
self.parser_input.next_token()
function_parameter.add_sep(sep_pos,
self.parser_input.pos - 1,
self.parser_input.tokens[sep_pos: self.parser_input.pos])
return nodes
def parse_parameter_value(self):
# check if the parameter is a function
start_pos = self.parser_input.pos
self.record_errors = False
func = self.parse_function()
self.record_errors = True
if func:
self.parser_input.next_token()
return func
# otherwise, eat until LPAR or separator
self.parser_input.seek(start_pos)
self.record_errors = True
tokens = []
while True:
token = self.parser_input.token
# if token is None:
# break
if token.value == self.sep or token.type == TokenKind.RPAR:
break
tokens.append(token)
if not self.parser_input.next_token(skip_whitespace=False):
break
return NamesNode(start_pos, self.parser_input.pos - 1, tokens) if len(tokens) else None
def to_source_code_node(self, function_node: FunctionNode):
python_parser = PythonWithConceptsParser()
if len(function_node.parameters) == 0:
# validate the source
nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()]
python_parsing_res = python_parser.parse_nodes(self.context, nodes_to_parse)
python_node = python_parsing_res.body.body if python_parsing_res.status else None
return [SourceCodeNode(start=function_node.first.start,
end=function_node.last.end,
tokens=function_node.first.tokens + function_node.last.tokens,
python_node=python_node,
return_value=python_parsing_res)]
def update_source_code_node(scn, nodes, sep):
if hasattr(nodes, "__iter__"):
for n in nodes:
scn.add_node(n)
else:
scn.add_node(nodes)
if sep:
scn.add_node(sep.to_unrecognized())
res = [SourceCodeWithConceptNode(function_node.first.to_unrecognized(), function_node.last.to_unrecognized())]
for param in function_node.parameters:
if isinstance(param.value, NamesNode):
unrecognized = param.value.to_unrecognized()
# try to recognize concepts
nodes_sequences = get_lexer_nodes_from_unrecognized(self.context,
unrecognized,
PARSERS)
else:
# the parameter is also a function
nodes_sequences = self.to_source_code_node(param.value)
if self.longest_concepts_only:
nodes_sequences = self.get_longest_concepts(nodes_sequences)
if nodes_sequences is None:
# no concept found
for source_code_node in res:
update_source_code_node(source_code_node, unrecognized, param.separator)
elif len(nodes_sequences) == 1:
# only one result
# It is the same code than when there are multiple results
# But here, we save the creation of the tmp_res object (not sure it worth it)
for source_code_node in res:
update_source_code_node(source_code_node, nodes_sequences[0], param.separator)
else:
# multiple result, make the cartesian product
tmp_res = []
for source_code_node in res:
instances = get_n_clones(source_code_node, len(nodes_sequences))
tmp_res.extend(instances)
for instance, node_sequence in zip(instances, nodes_sequences):
update_source_code_node(instance, node_sequence, param.separator)
res = tmp_res
# check if it is a valid source code
for source_code_node in res:
source_code_node.fix_all_pos()
source_code_node.pseudo_fix_source()
python_parsing_res = python_parser.parse_nodes(self.context, source_code_node.get_all_nodes())
if python_parsing_res.status:
source_code_node.python_node = python_parsing_res.body.body
source_code_node.return_value = python_parsing_res
# make sure that concepts found can be evaluated
errors = []
for c in source_code_node.python_node.concepts.values():
update_compiled(self.context, c, errors)
return res
@staticmethod
def get_longest_concepts(nodes_sequences):
"""
The longest sequences are the ones that have the less number of concepts
For example
'twenty one' resolves to
[c:twenty one:]
[c:twenty:, c:one:]
[c:twenty one:] has only one concept, so it's the longest one (two tokens against one token twice)
:param nodes_sequences:
:return:
"""
if nodes_sequences is None:
return None
res = []
min_len = -1
for current_sequence in nodes_sequences:
# awful hack to remove when NodeSequence and ConceptSequence will be implemented
current_len = len(current_sequence) if hasattr(current_sequence, "__len__") else 1
if len(res) == 0:
res.append(current_sequence)
min_len = current_len
elif current_len == min_len:
res.append(current_sequence)
elif current_len < min_len:
res.clear()
res.append(current_sequence)
min_len = current_len
return res
+1 -83
View File
@@ -4,9 +4,8 @@ from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import LexerError, TokenKind
from parsers.BaseNodeParser import ConceptNode
from parsers.BaseParser import BaseParser, Node, ErrorNode
log = logging.getLogger(__name__)
@@ -70,87 +69,6 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
self.names.add(node.id)
class LexerNodeParserHelperForPython:
"""Helper class to parse mix of concepts and Python"""
def __init__(self):
self.identifiers = {} # cache for already created identifier (the key is id(concept))
self.identifiers_key = {} # number of identifiers with the same root (prefix)
def _get_identifier(self, concept):
"""
Get an identifier for a concept.
Make sure to return the same identifier if the same concept
Make sure to return a different identifier if same name but different concept
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
to be instance variables
I would like to keep this parser as stateless as possible
:param concept:
:return:
"""
if id(concept) in self.identifiers:
return self.identifiers[id(concept)]
identifier = "__C__" + self._sanitize(concept.key or concept.name)
if concept.id:
identifier += "__" + concept.id
if identifier in self.identifiers_key:
self.identifiers_key[identifier] += 1
identifier += f"_{self.identifiers_key[identifier]}"
else:
self.identifiers_key[identifier] = 0
identifier += "__C__"
self.identifiers[id(concept)] = identifier
return identifier
@staticmethod
def _sanitize(identifier):
res = ""
for c in identifier:
res += c if c.isalnum() else "0"
return res
def parse(self, context, nodes):
source = ""
to_parse = ""
concepts = {} # the key is the Python identifier
for node in nodes:
if isinstance(node, ConceptNode):
source += node.source
if to_parse:
to_parse += " "
concept = node.concept
python_id = self._get_identifier(concept)
to_parse += python_id
concepts[python_id] = concept
else:
source += node.source
to_parse += node.source
with context.push(BuiltinConcepts.PARSE_CODE,
{"language": "Python", "source": to_parse},
desc="Trying Python for '" + to_parse + "'") as sub_context:
sub_context.add_inputs(to_parse=to_parse)
python_parser = PythonParser()
parser_input = context.sheerka.services[SheerkaExecute.NAME].get_parser_input(to_parse)
result = python_parser.parse(sub_context, parser_input)
sub_context.add_values(return_values=result)
if result.status:
python_node = result.body.body
python_node.source = source
python_node.concepts = concepts
return python_node
return result.body # the error
class PythonParser(BaseParser):
"""
Parse Python scripts
+6 -4
View File
@@ -1,8 +1,8 @@
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import SheerkaExecute
from parsers.BaseNodeParser import ConceptNode
from parsers.BaseNodeParser import SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser
from parsers.BaseNodeParser import ConceptNode
from parsers.PythonParser import PythonParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
@@ -12,8 +12,6 @@ unrecognized_nodes_parser = UnrecognizedNodeParser()
class PythonWithConceptsParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("PythonWithConcepts", 20)
self.identifiers = None
self.identifiers_key = None
@staticmethod
def sanitize(identifier):
@@ -33,11 +31,15 @@ class PythonWithConceptsParser(BaseParser):
yield node
def parse(self, context, parser_input):
sheerka = context.sheerka
nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser)
return self.parse_nodes(context, nodes)
def parse_nodes(self, context, nodes):
if not nodes:
return None
sheerka = context.sheerka
source = ""
to_parse = ""
identifiers = {}
+242 -87
View File
@@ -5,10 +5,12 @@ from typing import List
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import parse_function
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Token, TokenKind, Tokenizer
from core.utils import get_n_clones
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
SourceCodeWithConceptNode, BaseNodeParser
from parsers.BaseParser import ErrorNode
@@ -17,39 +19,73 @@ PARSERS = ["BnfNode", "AtomNode", "Python"]
function_parser_res = namedtuple("FunctionParserRes", 'to_out function')
DEBUG_PUSH = "PUSH"
DEBUG_PUSH_UNREC = "PUSH_UNREC"
DEBUG_POP = "POP"
DEBUG_EAT = "EAT"
DEBUG_RECOG = "RECOG"
@dataclass()
class DebugInfo:
"""
Debug item to trace how the sya parser worked
Possible action:
PUSH: push the token or the concept to the stack
PUSH_UNREC: push the token to the UnrecognizedTokensNode
POP: pop item to out
EAT: eat the current token (it means that it was part of the concept currently being parsed)
RECOG: when tokens from UnrecognizedTokensNode are parsed and recognized
"""
pos: int = -1 # position of the parser input
token: Token = None # current token
concept: Concept = None # current concept if ay
action: str = None # action taken
def __repr__(self):
token_repr = self.token.repr_value if isinstance(self.token, Token) else self.token
msg = f"{self.pos:3}:{token_repr}" if self.pos != -1 else " _:"
if self.concept:
msg += f"({self.concept})"
return msg + f" => {self.action}"
class ParenthesisMismatchErrorNode(ErrorNode):
def __init__(self, error_int):
if isinstance(error_int, tuple):
self.token = error_int[0]
if isinstance(error_int[0], Token):
self.token_value = error_int[0].value
self.token = error_int[0]
else:
self.token_value = error_int[0]
self.token = None
self.pos = error_int[1]
elif isinstance(error_int, Token):
self.token = error_int
self.token_value = error_int.value
self.pos = -1
else: # isinstance(UnrecognizedTokensNode)
for i, t in reversed(list(enumerate(error_int.tokens))):
if t.type == TokenKind.LPAR:
self.token = t
self.token_value = t.value
self.pos = i + error_int.start
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, tuple):
return other[0] == self.token.value and other[1] == self.pos
if not isinstance(other, ParenthesisMismatchErrorNode):
return False
return self.token == other.token and self.pos == other.pos
return self.token_value == other.token_value and self.pos == other.pos
def __hash__(self):
return hash(self.pos)
def __repr__(self):
return f"ParenthesisMismatchErrorNode('{self.token.value}', {self.pos}"
return f"ParenthesisMismatchErrorNode('{self.token_value}', {self.pos}"
@dataclass()
@@ -211,8 +247,9 @@ class SyaConceptParserHelper:
class InFixToPostFix:
def __init__(self, context):
def __init__(self, context, debug_enabled=False):
self.context = context
self.debug_enabled = debug_enabled
self.is_locked = False # when locked, cannot process input
@@ -227,6 +264,8 @@ class InFixToPostFix:
self.false_positives = [] # concepts that looks like known one, but not (for debug purpose)
self.forked = [] # use to fork InFixToPostFix when multiple parsers recognize the unrecognized_tokens
self.parsing_function = False # indicate that we are currently parsing a function
def __repr__(self):
return f"InFixToPostFix({self.debug})"
@@ -243,6 +282,8 @@ class InFixToPostFix:
return len(self.sequence) + len(self.errors)
def _add_error(self, error):
if self.debug_enabled:
self.debug.append(DebugInfo(action=f"=> ERROR {error}"))
self.errors.append(error)
def _is_lpar(self, token):
@@ -294,7 +335,11 @@ class InFixToPostFix:
item.error = "Not enough suffix parameters"
else:
item.error = f"token '{item.expected[0].strip_quote}' not found"
if self.debug_enabled:
self.debug.append(DebugInfo(action=f"ERROR {item.error}"))
if self.debug_enabled:
self.debug.append(DebugInfo(action=f"{DEBUG_POP} {item}"))
if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
self.out.insert(item.potential_pos, item)
else:
@@ -345,6 +390,26 @@ class InFixToPostFix:
for i, token in enumerate(parser_helper.tokens):
self.unrecognized_tokens.add_token(token, parser_helper.start + i)
def _remove_debug_info_if_needed(self):
"""
Before trying to manage the unrecognized, a line is added to explain the token which has triggered
the recognition try
This line is useless if self.unrecognized_tokens was irrelevant
:return:
"""
if len(self.debug) > 0 and self.debug[-1].action == "??":
self.debug.pop()
def _debug_nodes(self, nodes_sequences):
res = "["
first = True
for sequence in nodes_sequences:
if not first:
res += ", "
res += "[" + ", ".join([n.to_short_str() for n in sequence]) + "]"
first = False
return res + "]"
def get_errors(self):
def has_error(item):
if isinstance(item, SyaConceptParserHelper) and item.error:
@@ -439,41 +504,40 @@ class InFixToPostFix:
self.unrecognized_tokens.fix_source()
# try to recognize concepts
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
self.context,
self.unrecognized_tokens,
PARSERS)
if nodes_sequences:
# There are more than one solution found
# In the case, we create a new InfixToPostfix for each new possibility
if len(nodes_sequences) > 1:
for node_sequence in nodes_sequences[1:]:
clone = self.clone()
for node in node_sequence:
clone._put_to_out(node)
clone.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
self.forked.append(clone)
# Do not forget the first result that will go with the current InfixToPostfix
for node in nodes_sequences[0]:
self._put_to_out(node)
else:
if self.unrecognized_tokens.parenthesis_count > 0:
# parenthesis mismatch detected, do not try to resolve the unrecognized
self._add_error(ParenthesisMismatchErrorNode(self.unrecognized_tokens))
self._put_to_out(self.unrecognized_tokens)
else:
# try to recognize concepts
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
self.context,
self.unrecognized_tokens,
PARSERS)
# # try to recognize concepts
# nodes = self._get_lexer_nodes_from_unrecognized()
# if nodes:
# for node in nodes:
# self._put_to_out(node)
# else:
# self._put_to_out(self.unrecognized_tokens)
if nodes_sequences:
# There are more than one solution found
# In the case, we create a new InfixToPostfix for each new possibility
if self.debug_enabled:
self.debug.append(DebugInfo(action=f"{DEBUG_RECOG} {self._debug_nodes(nodes_sequences)}"))
if len(nodes_sequences) > 1:
for node_sequence in nodes_sequences[1:]:
clone = self.clone()
for node in node_sequence:
clone._put_to_out(node)
clone.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
self.forked.append(clone)
# Do not forget the first result that will go with the current InfixToPostfix
for node in nodes_sequences[0]:
self._put_to_out(node)
else:
self._put_to_out(self.unrecognized_tokens)
# create another instance
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
def get_functions_from_unrecognized(self, token, pos):
def get_functions_names_from_unrecognized(self, token, pos):
"""
The unrecognized ends with an lpar '('
It means that its a function like foo(something)
@@ -489,19 +553,32 @@ class InFixToPostFix:
self.context,
self.unrecognized_tokens,
PARSERS)
if nodes_sequences is None:
return None
if not nodes_sequences:
nodes_sequences = [[self.unrecognized_tokens.clone()]]
res = []
for sequence in nodes_sequences:
if isinstance(sequence[-1], UnrecognizedTokensNode):
function = sequence[-1]
else:
function = UnrecognizedTokensNode(sequence[-1].start, sequence[-1].end, sequence[-1].tokens)
function.add_token(token, pos).fix_source()
last_node = sequence[-1]
res.append(function_parser_res(sequence[:-1], function))
if len(last_node.tokens) > 1:
if isinstance(last_node, UnrecognizedTokensNode):
to_out = [UnrecognizedTokensNode(last_node.start, pos - 2, last_node.tokens[:-1]).fix_source()]
function_name = UnrecognizedTokensNode(pos - 1, pos - 1, [last_node.tokens[-1]])
function_name.add_token(token, pos)
else:
to_out = [last_node.fix_source()]
function_name = None
else: # len(last_node.tokens) == 1
if not isinstance(last_node, UnrecognizedTokensNode):
function_name = UnrecognizedTokensNode(last_node.start, last_node.end, last_node.tokens)
else:
function_name = last_node
function_name.add_token(token, pos)
to_out = []
res.append(function_parser_res(sequence[:-1] + to_out, function_name))
return res
def pop_stack_to_out(self):
@@ -614,6 +691,8 @@ class InFixToPostFix:
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
current_concept.end = pos
if self.debug_enabled:
self.debug.append(DebugInfo(pos, token, None, "??"))
self.manage_unrecognized()
# manage that some clones may have been forked
for forked in self.forked:
@@ -673,17 +752,53 @@ class InFixToPostFix:
if self.is_locked:
return
if self.parsing_function:
if self.debug_enabled:
self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
self.unrecognized_tokens.add_token(token, pos)
if self.unrecognized_tokens.parenthesis_count == 0:
self.unrecognized_tokens.fix_source()
res = parse_function(self.context,
self.unrecognized_tokens.source,
self.unrecognized_tokens.tokens[:],
self.unrecognized_tokens.start)
instances = get_n_clones(self, len(res))
self.forked.extend(instances[1:])
for instance, res_i in zip(instances, res):
if res_i.status or instance.context.sheerka.isinstance(res_i.body, BuiltinConcepts.PARSER_RESULT):
# 1. we manage to recognize a function
# 2. we almost manage, ex func(one two). It's not a function but almost
instance._put_to_out(res_i.body.body)
instance.unrecognized_tokens.reset()
else:
# it is not a function, try to recognized the token
# This situation is unlikely to occur
instance.manage_unrecognized()
instance.parsing_function = False
return True
if self.handle_expected_token(token, pos):
# a token is found, let's check if it's part of a concepts being parsed
# example Concept(name="foo", definition="foo a bar b").def_var("a").def_var("b")
# if the token 'bar' is found, it has to be considered as part of the concept foo
self.debug.append(token)
if self.debug_enabled:
self._remove_debug_info_if_needed()
self.debug.append(DebugInfo(pos, token, None, DEBUG_EAT))
return True
elif self._is_lpar(token):
self.debug.append(token)
if self.debug_enabled:
self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
if self.unrecognized_tokens.is_empty() or self.unrecognized_tokens.is_whitespace():
# first, remove what was in the buffer
self.manage_unrecognized()
for forked in self.forked:
@@ -691,40 +806,65 @@ class InFixToPostFix:
forked.eat_token(token, pos)
self.stack.append((token, pos))
else:
# the parenthesis is part of the unrecognized
# So it's a function
# So it's maybe a function call
list_of_results = self.get_functions_from_unrecognized(token, pos)
if list_of_results:
instances = [self]
for i in range(len(list_of_results) - 1):
clone = self.clone()
self.forked.append(clone)
instances.append(clone)
list_of_results = self.get_functions_names_from_unrecognized(token, pos)
instances = [self]
for i in range(len(list_of_results) - 1):
clone = self.clone()
self.forked.append(clone)
instances.append(clone)
# Manage the result for self and its clones
for instance, parsing_res in zip(instances, list_of_results):
for to_out in parsing_res.to_out:
instance._put_to_out(to_out)
# Manage the result for self and its clones
for instance, parsing_res in zip(instances, list_of_results):
for to_out in parsing_res.to_out:
instance._put_to_out(to_out)
if parsing_res.function:
instance.unrecognized_tokens = parsing_res.function
instance.parsing_function = True
else:
# special case of "twenty two(". It's not considered as a function
# The manage_unrecognized() what somewhat done by get_functions_names_from_unrecognized()
# So we just put the unrecognized to out
instance.unrecognized_tokens.reset()
# make sure to pop the current concept
if self._stack_isinstance(SyaConceptParserHelper):
self.pop_stack_to_out()
instance._put_to_out(")") # mark where the function should end
instance.stack.append(parsing_res.function)
instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # reset unrecognized
else:
self._put_to_out(")") # mark where the function should end
self.eat_unrecognized(token, pos) # add the '(' to the rest of the unknown
self.stack.append(self.unrecognized_tokens.fix_source())
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
instance.stack.append((token, pos))
# # instance._put_to_out(")") # mark where the function should end
# # instance.stack.append(parsing_res.function)
# # instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # reset unrecognized
# else:
# # handle when there are multiple pending tokens
# if len(self.unrecognized_tokens.tokens) > 1:
# unrecognized = UnrecognizedTokensNode(self.unrecognized_tokens.start,
# pos - 2,
# self.unrecognized_tokens.tokens[:-1])
# unrecognized.fix_source()
# self._put_to_out(unrecognized)
# last_token = self.unrecognized_tokens.tokens[-1]
# self.unrecognized_tokens.reset()
# self.unrecognized_tokens.add_token(last_token, pos - 1)
#
# self.eat_unrecognized(token, pos) # add the '(' to the rest of the unknown
# self.parsing_function = True
# # self.stack.append(self.unrecognized_tokens.fix_source())
# # self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
return True
elif self._is_rpar(token):
self.debug.append(token)
if self.debug_enabled:
self.debug.append(DebugInfo(pos, token, None, DEBUG_EAT))
# first, remove what was in the buffer
self.manage_unrecognized()
@@ -775,32 +915,36 @@ class InFixToPostFix:
return False
def eat_concept(self, sya_concept_def, token, pos):
def eat_concept(self, sya_concept_def, token, pos, first_pass=True):
"""
a concept is found
:param sya_concept_def:
:param token:
:param pos:
:param first_pass: When not called from a fork after manage_unrecognized()
:return:
"""
if self.is_locked:
return
self.debug.append(sya_concept_def)
parser_helper = SyaConceptParserHelper(sya_concept_def, pos)
if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE:
parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1]
if first_pass:
if self.debug_enabled:
self.debug.append(DebugInfo(pos, token, sya_concept_def, "??"))
if Token.is_whitespace(parser_helper.last_token_before_first_token):
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
if self.unrecognized_tokens.last_token_type() == TokenKind.WHITESPACE:
parser_helper.remember_whitespace = self.unrecognized_tokens.tokens[-1]
# First, try to recognize the tokens that are waiting
self.manage_unrecognized()
for forked in self.forked:
# manage the fact that some clone may have been forked
forked.eat_concept(sya_concept_def, token, pos)
if Token.is_whitespace(parser_helper.last_token_before_first_token):
self.unrecognized_tokens.pop(TokenKind.WHITESPACE)
# First, try to recognize the tokens that are waiting
self.manage_unrecognized()
for forked in self.forked:
# manage the fact that some clone may have been forked
forked.eat_concept(sya_concept_def, token, pos, first_pass=False)
# then, check if this new concept is linked to the previous ones
# ie, is the previous concept fully matched ?
@@ -823,6 +967,9 @@ class InFixToPostFix:
self.manage_parameters_when_new_concept(parser_helper)
self._put_to_out(parser_helper.fix_concept())
else:
if self.debug_enabled:
self._remove_debug_info_if_needed()
self.debug.append(DebugInfo(pos, token, sya_concept_def, DEBUG_PUSH))
self.stack.append(parser_helper)
self.manage_parameters_when_new_concept(parser_helper)
@@ -836,11 +983,12 @@ class InFixToPostFix:
if self.is_locked:
return
self.debug.append(token)
if self.debug_enabled:
self.debug.append(DebugInfo(pos, token, None, DEBUG_PUSH_UNREC))
self.unrecognized_tokens.add_token(token, pos)
def finalize(self):
def finalize(self, pos):
"""
Put the remaining items from the stack to out
:return:
@@ -850,8 +998,14 @@ class InFixToPostFix:
return
if len(self.stack) == 0 and len(self.out) == 0:
# check for parenthesis mismatch
if self.unrecognized_tokens.parenthesis_count > 0:
self._add_error(ParenthesisMismatchErrorNode(self.unrecognized_tokens))
return # no need to pop the buffer, as no concept is found
if self.debug_enabled:
self.debug.append(DebugInfo(pos, "<EOF>", None, "??"))
while len(self.stack) > 0:
parser_helper = self.stack[-1]
@@ -863,7 +1017,7 @@ class InFixToPostFix:
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize()
forked.finalize(pos)
failed_to_match = sum(map(lambda e: e.type != TokenKind.VAR_DEF, parser_helper.expected))
if failed_to_match > 0:
@@ -878,10 +1032,10 @@ class InFixToPostFix:
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize()
forked.finalize(pos)
def clone(self):
clone = InFixToPostFix(self.context)
clone = InFixToPostFix(self.context, self.debug_enabled)
clone.is_locked = self.is_locked
clone.out = self.out[:]
clone.stack = [i.clone() if hasattr(i, "clone") else i for i in self.stack]
@@ -983,7 +1137,7 @@ class SyaNodeParser(BaseNodeParser):
res.extend(forked)
forked.clear()
res = [InFixToPostFix(context)]
res = [InFixToPostFix(context, context.in_context(BuiltinConcepts.DEBUG))]
while self.parser_input.next_token(False):
for infix_to_postfix in res:
infix_to_postfix.reset()
@@ -1027,7 +1181,7 @@ class SyaNodeParser(BaseNodeParser):
# make sure that remaining items in stack are moved to out
for infix_to_postfix in res:
infix_to_postfix.reset()
infix_to_postfix.finalize()
infix_to_postfix.finalize(self.parser_input.pos)
_add_forked_to_res()
return res
@@ -1058,14 +1212,14 @@ class SyaNodeParser(BaseNodeParser):
start = item.start
end = item.end
has_unrecognized = False
concept = sheerka.new_from_template(item.concept, item.concept.id)
concept = sheerka.new_from_template(item.concept, item.concept.key)
for param_index in reversed(range(len(concept.metadata.variables))):
inner_item = self.postfix_to_item(sheerka, postfixed)
if inner_item.start < start:
start = inner_item.start
if inner_item.end > end:
end = inner_item.end
has_unrecognized |= isinstance(inner_item, UnrecognizedTokensNode)
has_unrecognized |= isinstance(inner_item, (UnrecognizedTokensNode, SourceCodeWithConceptNode))
param_name = concept.metadata.variables[param_index][0]
param_value = inner_item.concept if hasattr(inner_item, "concept") else \
@@ -1128,6 +1282,7 @@ class SyaNodeParser(BaseNodeParser):
if has_unrecognized:
# Manage some sick cases where missing parenthesis mess the order or the sequence
# example "foo bar(one plus two"
# too lazy to fix the why...
sequence.sort(key=attrgetter("start"))
ret.append(
+4 -51
View File
@@ -2,7 +2,7 @@ from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes, update_compiled
from core.concept import Concept
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ErrorNode
@@ -38,6 +38,7 @@ class UnrecognizedNodeParser(BaseParser):
sequences_found = [[]]
has_unrecognized = False
self.error_sink.clear()
for node in nodes:
if isinstance(node, ConceptNode):
@@ -93,7 +94,7 @@ class UnrecognizedNodeParser(BaseParser):
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
source=parser_input.source,
body=choice,
try_parsed=choice)))
@@ -105,56 +106,8 @@ class UnrecognizedNodeParser(BaseParser):
return ret
def validate_concept_node(self, context, concept_node):
sheerka = context.sheerka
errors = []
def _validate_concept(concept):
"""
Recursively browse the compiled properties in order to find unrecognized
:param concept:
:return:
"""
for k, v in concept.compiled.items():
if isinstance(v, Concept):
_validate_concept(v)
elif isinstance(v, UnrecognizedTokensNode):
res = parse_unrecognized(context, v.source, PARSERS)
res = only_successful(context, res) # only key successful parsers
if res.status:
concept.compiled[k] = res.body.body
else:
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{v.source}'"))
def _get_source(compiled, var_name):
if var_name not in compiled:
return None
if not isinstance(compiled[var_name], list):
return None
if not len(compiled[var_name]) == 1:
return None
if not sheerka.isinstance(compiled[var_name][0], BuiltinConcepts.RETURN_VALUE):
return None
if not sheerka.isinstance(compiled[var_name][0].body, BuiltinConcepts.PARSER_RESULT):
return None
if compiled[var_name][0].body.name == "parsers.ShortTermMemory":
return None
return compiled[var_name][0].body.source
_validate_concept(concept_node.concept)
# Special case where the values of the variables are the names of the variable
# example : Concept("a plus b").def_var("a").def_var("b")
# and the user has entered 'a plus b'
# Chances are that we are talking about the concept itself, and not an instantiation (like '10 plus 2')
# This means that 'a' and 'b' don't have any real value
for name, value in concept_node.concept.metadata.variables:
if not _get_source(concept_node.concept.compiled, name) == name:
break
else:
concept_node.concept.metadata.is_evaluated = True
update_compiled(context, concept_node.concept, errors)
if len(errors) > 0:
return context.sheerka.ret(self.name, False, errors)
+5 -2
View File
@@ -173,8 +173,11 @@ class SheerkaPromptCompleter(Completer):
break
m = NAME.match(text[:i][::-1])
func_name = m.group(0)[::-1]
return FuncFound(func_name, i - len(func_name), paren_index) if m else None
if m:
func_name = m.group(0)[::-1]
return FuncFound(func_name, i - len(func_name), paren_index)
return None
@staticmethod
def after_pipe(text, pos):