Added SyaNodeParser (finally, after one month)
This commit is contained in:
@@ -37,6 +37,8 @@ class BuiltinConcepts(Enum):
|
||||
PARSER_RESULT = "parser result"
|
||||
TOO_MANY_SUCCESS = "too many success" # when expecting a limited number of successful return value
|
||||
TOO_MANY_ERRORS = "too many errors" # when expecting a limited number of successful return value
|
||||
ONLY_SUCCESSFUL = "only successful" # filter the result, only keep successful ones
|
||||
MULTIPLE_ERRORS = "multiple errors" # filter the result, only keep evaluator in error
|
||||
NOT_FOR_ME = "not for me" # a parser recognize that the entry is not meant for it
|
||||
IS_EMPTY = "is empty" # when a set is empty
|
||||
INVALID_RETURN_VALUE = "invalid return value" # the return value of an evaluator is not correct
|
||||
@@ -45,6 +47,7 @@ class BuiltinConcepts(Enum):
|
||||
CONCEPT_EVAL_ERROR = "concept evaluation error" # cannot evaluate a property or metadata of a concept
|
||||
ENUMERATION = "enum" # represents a list or a set
|
||||
LIST = "list" # represents a list
|
||||
FILTERED = "filtered" # represents the result of a filtering
|
||||
CONCEPT_ALREADY_IN_SET = "concept already in set"
|
||||
EVALUATOR_PRE_PROCESS = "evaluator pre process" # used modify / tweak behaviour of evaluators
|
||||
EVAL_BODY_REQUESTED = "eval body requested" # to evaluate the body
|
||||
@@ -91,6 +94,7 @@ BuiltinErrors = [str(e) for e in {
|
||||
BuiltinConcepts.UNKNOWN_PROPERTY,
|
||||
BuiltinConcepts.TOO_MANY_SUCCESS,
|
||||
BuiltinConcepts.TOO_MANY_ERRORS,
|
||||
BuiltinConcepts.MULTIPLE_ERRORS,
|
||||
BuiltinConcepts.INVALID_RETURN_VALUE,
|
||||
BuiltinConcepts.CONCEPT_ALREADY_DEFINED,
|
||||
BuiltinConcepts.CONCEPT_EVAL_ERROR,
|
||||
@@ -249,11 +253,12 @@ class ParserResultConcept(Concept):
|
||||
Result of a parsing
|
||||
"""
|
||||
|
||||
def __init__(self, parser=None, source=None, value=None, try_parsed=None, validate_concept=None):
|
||||
def __init__(self, parser=None, source=None, tokens=None, value=None, try_parsed=None):
|
||||
super().__init__(BuiltinConcepts.PARSER_RESULT, True, False, BuiltinConcepts.PARSER_RESULT)
|
||||
self.set_metadata_value(ConceptParts.BODY, value)
|
||||
self.set_prop("parser", parser)
|
||||
self.set_prop("source", source)
|
||||
self.set_prop("tokens", tokens)
|
||||
self.set_prop("try_parsed", try_parsed) # in case of error, what was found before the error
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
@@ -372,6 +377,14 @@ class ListConcept(Concept):
|
||||
# return item in self.body
|
||||
|
||||
|
||||
class FilteredConcept(Concept):
|
||||
def __init__(self, filtered=None, iterable=None, predicate=None):
|
||||
super().__init__(BuiltinConcepts.FILTERED, True, False, BuiltinConcepts.FILTERED)
|
||||
self.set_metadata_value(ConceptParts.BODY, filtered)
|
||||
self.def_prop("iterable", iterable)
|
||||
self.def_prop("predicate", predicate)
|
||||
|
||||
|
||||
class ConceptAlreadyInSet(Concept):
|
||||
def __init__(self, concept=None, concept_set=None):
|
||||
super().__init__(BuiltinConcepts.CONCEPT_ALREADY_IN_SET,
|
||||
@@ -409,3 +422,17 @@ class WhereClauseFailed(Concept):
|
||||
@property
|
||||
def concept(self):
|
||||
return self.body
|
||||
|
||||
|
||||
class NotForMeConcept(Concept):
|
||||
def __init__(self, source=None, reason=None):
|
||||
super().__init__(BuiltinConcepts.NOT_FOR_ME,
|
||||
True,
|
||||
False,
|
||||
BuiltinConcepts.NOT_FOR_ME)
|
||||
self.set_metadata_value(ConceptParts.BODY, source)
|
||||
self.def_prop("reason", reason)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
def __repr__(self):
|
||||
return f"NotForMeConcept(source={self.body}, reason={self.get_prop('reason')})"
|
||||
|
||||
@@ -6,6 +6,8 @@ from core.ast.nodes import CallNodeConcept, GenericNodeConcept
|
||||
from core.ast.visitors import UnreferencedNamesVisitor
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
|
||||
|
||||
def is_same_success(context, return_values):
|
||||
@@ -132,6 +134,181 @@ def expect_one(context, return_values):
|
||||
parents=return_values)
|
||||
|
||||
|
||||
def only_successful(context, return_values):
|
||||
"""
|
||||
Removes all return values that are not successful
|
||||
Return error when no successful return value
|
||||
:param context:
|
||||
:param return_values:
|
||||
:return:
|
||||
"""
|
||||
if not isinstance(return_values, list):
|
||||
return return_values
|
||||
|
||||
sheerka = context.sheerka
|
||||
|
||||
if len(return_values) == 0:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values),
|
||||
parents=return_values)
|
||||
|
||||
successful_results = [item for item in return_values if item.status]
|
||||
if len(successful_results) == 0:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values),
|
||||
parents=return_values)
|
||||
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
True,
|
||||
sheerka.new(BuiltinConcepts.ONLY_SUCCESSFUL, body=successful_results),
|
||||
parents=return_values)
|
||||
|
||||
|
||||
def only_parsers_results(context, return_values):
|
||||
"""
|
||||
Filters the return_values and returns when the result is a ParserResult
|
||||
regardless of the status
|
||||
|
||||
So it filters errors
|
||||
:param context:
|
||||
:param return_values:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(return_values, list):
|
||||
return return_values
|
||||
|
||||
sheerka = context.sheerka
|
||||
|
||||
if len(return_values) == 0:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values),
|
||||
parents=return_values)
|
||||
|
||||
return_values_ok = [item for item in return_values if sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)]
|
||||
|
||||
# hack because some parsers don't follow the NOT_FOR_ME rule
|
||||
temp_ret_val = []
|
||||
for ret_val in return_values_ok:
|
||||
if isinstance(ret_val.body.body, ErrorNode):
|
||||
continue
|
||||
if isinstance(ret_val.body.body, list) and \
|
||||
len(ret_val.body.body) == 1 and \
|
||||
isinstance(ret_val.body.body[0], UnrecognizedTokensNode):
|
||||
continue
|
||||
temp_ret_val.append(ret_val)
|
||||
return_values_ok = temp_ret_val
|
||||
|
||||
if len(return_values_ok) == 0:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values),
|
||||
parents=return_values)
|
||||
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
True,
|
||||
sheerka.new(BuiltinConcepts.FILTERED,
|
||||
body=return_values_ok,
|
||||
iterable=return_values,
|
||||
predicate="sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)"),
|
||||
parents=return_values)
|
||||
|
||||
|
||||
def parse_unrecognized(context, tokens, parsers):
|
||||
"""
|
||||
Try to recognize concepts or code from tokens using the given parsers
|
||||
:param context:
|
||||
:param tokens:
|
||||
:param parsers:
|
||||
:return:
|
||||
"""
|
||||
steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
|
||||
sheerka = context.sheerka
|
||||
|
||||
with context.push(desc=f"Parsing unrecognized '{tokens}'") as sub_context:
|
||||
# disable all parsers but the following ones
|
||||
sub_context.add_preprocess(BaseParser.PREFIX + "*", enabled=False)
|
||||
for parser in parsers:
|
||||
sub_context.add_preprocess(BaseParser.PREFIX + parser, enabled=True)
|
||||
|
||||
sub_context.add_inputs(source=tokens)
|
||||
to_parse = sheerka.ret(
|
||||
context.who,
|
||||
True,
|
||||
sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
|
||||
res = sheerka.execute(sub_context, to_parse, steps)
|
||||
sub_context.add_values(return_values=res)
|
||||
|
||||
# discard Python response if accepted by AtomNode
|
||||
is_concept = False
|
||||
for r in res:
|
||||
if r.status and r.who == "parsers.AtomNode":
|
||||
is_concept = True
|
||||
|
||||
if not is_concept:
|
||||
return res
|
||||
|
||||
filtered = []
|
||||
for r in res:
|
||||
if r.who == "parsers.Python":
|
||||
continue
|
||||
filtered.append(r)
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
def get_lexer_nodes(return_values, start, tokens):
|
||||
"""
|
||||
From a parser result, return the corresponding LexerNode
|
||||
either ConceptNode, UnrecognizedTokensNode or SourceCodeNode
|
||||
:param return_values:
|
||||
:param start:
|
||||
:param tokens:
|
||||
:return: list of list (list of concept node sequence)
|
||||
"""
|
||||
|
||||
lexer_nodes = []
|
||||
for ret_val in return_values:
|
||||
if ret_val.who == "parsers.Python":
|
||||
|
||||
if ret_val.body.source.strip().isalnum() and not ret_val.body.source.strip().isnumeric():
|
||||
# Discard SourceCodeNode which seems to be a concept
|
||||
# It may be a wrong idea, so let's see
|
||||
continue
|
||||
|
||||
end = start + len(tokens) - 1
|
||||
lexer_nodes.append([SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)])
|
||||
|
||||
elif ret_val.who == "parsers.ExactConcept":
|
||||
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
|
||||
end = start + len(tokens) - 1
|
||||
for concept in concepts:
|
||||
lexer_nodes.append([ConceptNode(concept, start, end, tokens, ret_val.body.source)])
|
||||
|
||||
elif ret_val.who in ("parsers.BnfNode", "parsers.SyaNode", "parsers.AtomNode"):
|
||||
nodes = [node for node in ret_val.body.body]
|
||||
for node in nodes:
|
||||
node.start += start
|
||||
node.end += start
|
||||
|
||||
# but append the whole sequence if when it's a sequence
|
||||
lexer_nodes.append(nodes)
|
||||
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
return lexer_nodes
|
||||
|
||||
|
||||
def get_names(sheerka, concept_node):
|
||||
"""
|
||||
Finds all the names referenced by the concept_node
|
||||
|
||||
+81
-2
@@ -108,11 +108,14 @@ class Concept:
|
||||
|
||||
def __eq__(self, other):
|
||||
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, simplec):
|
||||
return self.name == other.name and self.body == other.body
|
||||
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
if isinstance(other, CC):
|
||||
return other == self
|
||||
|
||||
if not isinstance(other, Concept):
|
||||
return False
|
||||
@@ -346,6 +349,17 @@ class Concept:
|
||||
"""
|
||||
return self.props[prop_name].value
|
||||
|
||||
def set_prop_by_index(self, index: int, value):
|
||||
"""
|
||||
Set the value of a property (not the metadata) using the index
|
||||
:param index: Name the property or another concept
|
||||
:param value:
|
||||
:return:
|
||||
"""
|
||||
prop_name = list(self.props.keys())[index]
|
||||
self.props[prop_name].value = value
|
||||
return self
|
||||
|
||||
def set_metadata_value(self, metadata: ConceptParts, value):
|
||||
"""
|
||||
Set the resolved value of a metadata (not the metadata itself)
|
||||
@@ -438,3 +452,68 @@ class InfiniteRecursionResolved:
|
||||
|
||||
def get_value(self):
|
||||
return self.value
|
||||
|
||||
|
||||
class CC:
|
||||
"""
|
||||
Concept class for test purpose
|
||||
CC means concept for compiled (or concept with compiled)
|
||||
It matches a concept if the compiles are equals
|
||||
"""
|
||||
|
||||
# The only properties that are testes are concept_key and compiled
|
||||
# The other properties (concept, source, start and end)
|
||||
# are used in tests/parsers/parsers_utils.py to help creating helper objects
|
||||
|
||||
def __init__(self, concept, source=None, **kwargs):
|
||||
self.concept_key = concept.key if isinstance(concept, Concept) else concept
|
||||
self.compiled = kwargs
|
||||
self.concept = concept if isinstance(concept, Concept) else None
|
||||
self.source = source # to use when the key is different from the sub str to search when filling start and stop
|
||||
self.start = None # for debug purpose, indicate where the concept starts
|
||||
self.end = None # for debug purpose, indicate where the concept ends
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, Concept):
|
||||
if other.key != self.concept_key:
|
||||
return False
|
||||
return self.compiled == other.compiled
|
||||
|
||||
if not isinstance(other, CC):
|
||||
return False
|
||||
|
||||
return self.concept_key == other.concept_key and \
|
||||
self.compiled == other.compiled
|
||||
|
||||
def __hash__(self):
|
||||
if self.concept:
|
||||
return hash(self.concept)
|
||||
return hash(self.concept_key)
|
||||
|
||||
def __repr__(self):
|
||||
if self.concept:
|
||||
txt = f"CC(concept='{self.concept}'"
|
||||
else:
|
||||
txt = f"CC(concept_key='{self.concept_key}'"
|
||||
|
||||
for k, v in self.compiled.items():
|
||||
txt += f", {k}='{v}'"
|
||||
return txt + ")"
|
||||
|
||||
def fix_pos(self, node):
|
||||
start = node.start if hasattr(node, "start") else \
|
||||
node[0] if isinstance(node, tuple) else None
|
||||
end = node.end if hasattr(node, "end") else \
|
||||
node[1] if isinstance(node, tuple) else None
|
||||
|
||||
if start is not None:
|
||||
if self.start is None or start < self.start:
|
||||
self.start = start
|
||||
|
||||
if end is not None:
|
||||
if self.end is None or end > self.end:
|
||||
self.end = end
|
||||
return self
|
||||
|
||||
@@ -43,6 +43,7 @@ class ExecutionContext:
|
||||
desc: str = None,
|
||||
logger=None,
|
||||
global_hints=None,
|
||||
global_errors=None,
|
||||
**kwargs):
|
||||
|
||||
self._parent = None
|
||||
@@ -61,6 +62,7 @@ class ExecutionContext:
|
||||
self.logger = logger
|
||||
self.local_hints = set()
|
||||
self.global_hints = set() if global_hints is None else global_hints
|
||||
self.global_errors = [] if global_errors is None else global_errors
|
||||
|
||||
self.inputs = {} # what was the parameters of the execution context
|
||||
self.values = {} # what was produced by the execution context
|
||||
@@ -146,8 +148,8 @@ class ExecutionContext:
|
||||
preprocess.set_prop(k, v)
|
||||
|
||||
if not self.preprocess:
|
||||
self.preprocess = set()
|
||||
self.preprocess.add(preprocess)
|
||||
self.preprocess = []
|
||||
self.preprocess.append(preprocess)
|
||||
return self
|
||||
|
||||
def add_inputs(self, **kwargs):
|
||||
@@ -212,6 +214,7 @@ class ExecutionContext:
|
||||
desc,
|
||||
logger,
|
||||
self.global_hints,
|
||||
self.global_errors,
|
||||
**_kwargs)
|
||||
new._parent = self
|
||||
new._tab = self._tab + " " * DEBUG_TAB_SIZE
|
||||
@@ -230,7 +233,8 @@ class ExecutionContext:
|
||||
if self.logger and not self.logger.disabled:
|
||||
self.logger.debug(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
|
||||
|
||||
def log_error(self, message, who=None):
|
||||
def log_error(self, message, who=None, exc=None):
|
||||
self.global_errors.append(exc or message)
|
||||
if self.logger and not self.logger.disabled:
|
||||
self.logger.exception(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
|
||||
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ErrorConcept
|
||||
from core.concept import Concept
|
||||
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError, SheerkaDataProviderRef
|
||||
import core.utils
|
||||
|
||||
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
|
||||
BNF_NODE_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser"
|
||||
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
|
||||
|
||||
|
||||
class SheerkaCreateNewConcept:
|
||||
@@ -13,6 +15,7 @@ class SheerkaCreateNewConcept:
|
||||
def __init__(self, sheerka):
|
||||
self.sheerka = sheerka
|
||||
self.logger_name = self.create_new_concept.__name__
|
||||
self.base_lexer_parser = core.utils.get_class(BASE_NODE_PARSER_CLASS)("BaseNodeParser", 0)
|
||||
|
||||
def create_new_concept(self, context, concept: Concept):
|
||||
"""
|
||||
@@ -25,7 +28,7 @@ class SheerkaCreateNewConcept:
|
||||
|
||||
concept.init_key()
|
||||
concepts_definitions = None
|
||||
init_ret_value = None
|
||||
init_bnf_ret_value = None
|
||||
|
||||
sdp = self.sheerka.sdp
|
||||
|
||||
@@ -49,13 +52,19 @@ class SheerkaCreateNewConcept:
|
||||
concepts_definitions[concept] = concept.bnf
|
||||
|
||||
# check if it's a valid BNF or whether it breaks the known rules
|
||||
concept_lexer_parser = self.sheerka.parsers[CONCEPT_LEXER_PARSER_CLASS]()
|
||||
bnf_lexer_parser = self.sheerka.parsers[BNF_NODE_PARSER_CLASS]()
|
||||
with context.push(self.sheerka.name, desc=f"Initializing concept definition for {concept}") as sub_context:
|
||||
sub_context.concepts[concept.key] = concept # the concept is not in the real cache yet
|
||||
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
|
||||
sub_context.add_values(return_values=init_ret_value)
|
||||
if not init_ret_value.status:
|
||||
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
|
||||
init_bnf_ret_value = bnf_lexer_parser.initialize(sub_context, concepts_definitions)
|
||||
sub_context.add_values(return_values=init_bnf_ret_value)
|
||||
if not init_bnf_ret_value.status:
|
||||
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_bnf_ret_value.value))
|
||||
|
||||
# update concept definition by key
|
||||
init_sya_ret_value = self.base_lexer_parser.initialize(context, [concept], use_sheerka=True)
|
||||
if not init_sya_ret_value.status:
|
||||
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value))
|
||||
concepts_by_first_keyword = init_sya_ret_value.body
|
||||
|
||||
concept.freeze_definition_hash()
|
||||
|
||||
@@ -97,9 +106,15 @@ class SheerkaCreateNewConcept:
|
||||
sdp.set(
|
||||
context.event.get_digest(),
|
||||
self.sheerka.CONCEPTS_DEFINITIONS_ENTRY,
|
||||
concept_lexer_parser.encode_grammar(init_ret_value.body),
|
||||
bnf_lexer_parser.encode_grammar(init_bnf_ret_value.body),
|
||||
use_ref=True)
|
||||
self.sheerka.concepts_definitions_cache = None # invalidate cache
|
||||
|
||||
# update the concepts by first keyword
|
||||
sdp.set(context.event.get_digest(),
|
||||
self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
|
||||
concepts_by_first_keyword)
|
||||
|
||||
except SheerkaDataProviderDuplicateKeyError as error:
|
||||
context.log_error("Failed to create a new concept.", who=self.logger_name)
|
||||
return self.sheerka.ret(
|
||||
@@ -109,13 +124,13 @@ class SheerkaCreateNewConcept:
|
||||
error.args[0])
|
||||
|
||||
# Updates the caches
|
||||
|
||||
self.sheerka.cache_by_key[concept.key] = sdp.get_safe(self.sheerka.CONCEPTS_ENTRY, concept.key)
|
||||
self.sheerka.cache_by_name[concept.name] = sdp.get_safe(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name)
|
||||
self.sheerka.cache_by_id[concept.id] = concept
|
||||
if init_ret_value is not None and init_ret_value.status:
|
||||
self.sheerka.concepts_grammars = init_ret_value.body
|
||||
if init_bnf_ret_value is not None and init_bnf_ret_value.status:
|
||||
self.sheerka.concepts_grammars = init_bnf_ret_value.body
|
||||
self.sheerka.concepts_by_first_keyword = concepts_by_first_keyword
|
||||
|
||||
# process the return in needed
|
||||
# process the return if needed
|
||||
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
||||
return ret
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved
|
||||
from core.builtin_helpers import add_to_ret_val, remove_from_ret_val, expect_one
|
||||
from core.builtin_helpers import expect_one
|
||||
|
||||
CONCEPT_EVALUATION_STEPS = [
|
||||
BuiltinConcepts.BEFORE_EVALUATION,
|
||||
|
||||
@@ -33,6 +33,8 @@ class SheerkaExecute:
|
||||
|
||||
# group the parsers by priorities
|
||||
instantiated_parsers = [parser(sheerka=self.sheerka) for parser in self.sheerka.parsers.values()]
|
||||
instantiated_parsers = self.preprocess(execution_context, instantiated_parsers)
|
||||
|
||||
grouped_parsers = {}
|
||||
for parser in [p for p in instantiated_parsers if p.enabled]:
|
||||
grouped_parsers.setdefault(parser.priority, []).append(parser)
|
||||
@@ -44,7 +46,6 @@ class SheerkaExecute:
|
||||
|
||||
for parser in grouped_parsers[priority]:
|
||||
|
||||
return_value_success_found = False
|
||||
for return_value in inputs_for_this_group:
|
||||
|
||||
to_parse = return_value.body.body \
|
||||
@@ -67,22 +68,23 @@ class SheerkaExecute:
|
||||
r.parents = [return_value]
|
||||
result.append(r)
|
||||
if self.sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT):
|
||||
# if a ParserResultConcept is returned, it will be used by the parsers
|
||||
# of the following groups
|
||||
to_process.append(r)
|
||||
if r.status:
|
||||
return_value_success_found = True
|
||||
stop_processing = True
|
||||
|
||||
else:
|
||||
res.parents = [return_value]
|
||||
result.append(res)
|
||||
if self.sheerka.isinstance(res.body, BuiltinConcepts.PARSER_RESULT):
|
||||
# if a ParserResultConcept is returned, it will be used by the parsers
|
||||
# of the following groups
|
||||
to_process.append(res)
|
||||
if res.status:
|
||||
return_value_success_found = True
|
||||
stop_processing = True
|
||||
sub_context.add_values(return_values=res)
|
||||
|
||||
if return_value_success_found:
|
||||
stop_processing = True
|
||||
break # Stop the other return_values (but not the other parsers with the same priority)
|
||||
|
||||
if stop_processing:
|
||||
break # Do not try the other priorities if a match is found
|
||||
@@ -102,7 +104,7 @@ class SheerkaExecute:
|
||||
instantiated_evaluators = [e_class() for e_class in self.sheerka.evaluators]
|
||||
|
||||
# pre-process evaluators if needed
|
||||
instantiated_evaluators = self._preprocess_evaluators(execution_context, instantiated_evaluators)
|
||||
instantiated_evaluators = self.preprocess(execution_context, instantiated_evaluators)
|
||||
|
||||
for evaluator in [e for e in instantiated_evaluators if e.enabled and process_step in e.steps]:
|
||||
grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator)
|
||||
@@ -123,7 +125,7 @@ class SheerkaExecute:
|
||||
evaluated_items = []
|
||||
to_delete = []
|
||||
for evaluator in grouped_evaluators[priority]:
|
||||
evaluator = self._preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy
|
||||
evaluator = self.preprocess(execution_context, evaluator.__class__()) # fresh copy
|
||||
|
||||
sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})"
|
||||
with iteration_context.push(desc=sub_context_desc, logger=evaluator.verbose_log) as sub_context:
|
||||
@@ -215,22 +217,29 @@ class SheerkaExecute:
|
||||
|
||||
return return_values
|
||||
|
||||
def _preprocess_evaluators(self, context, evaluators):
|
||||
def preprocess(self, context, parsers_or_evaluators):
|
||||
if not context.preprocess:
|
||||
return evaluators
|
||||
return parsers_or_evaluators
|
||||
|
||||
if not hasattr(evaluators, "__iter__"):
|
||||
if not hasattr(parsers_or_evaluators, "__iter__"):
|
||||
single_one = True
|
||||
evaluators = [evaluators]
|
||||
parsers_or_evaluators = [parsers_or_evaluators]
|
||||
else:
|
||||
single_one = False
|
||||
|
||||
for preprocess in context.preprocess:
|
||||
for e in evaluators:
|
||||
if preprocess.props["name"].value == e.name:
|
||||
for e in parsers_or_evaluators:
|
||||
if self.matches(e.name, preprocess.get_prop("name")):
|
||||
for prop, value in preprocess.props.items():
|
||||
if prop == "name":
|
||||
continue
|
||||
if hasattr(e, prop):
|
||||
setattr(e, prop, value.value)
|
||||
return evaluators[0] if single_one else evaluators
|
||||
return parsers_or_evaluators[0] if single_one else parsers_or_evaluators
|
||||
|
||||
@staticmethod
|
||||
def matches(parser_or_evaluator_name, preprocessor_name):
|
||||
if preprocessor_name.endswith("*"):
|
||||
return parser_or_evaluator_name.startswith(preprocessor_name[:-1])
|
||||
else:
|
||||
return parser_or_evaluator_name == preprocessor_name
|
||||
|
||||
+94
-10
@@ -17,12 +17,7 @@ from core.sheerka_logger import console_handler
|
||||
|
||||
import logging
|
||||
|
||||
# CONCEPT_EVALUATION_STEPS = [
|
||||
# BuiltinConcepts.BEFORE_EVALUATION,
|
||||
# BuiltinConcepts.EVALUATION,
|
||||
# BuiltinConcepts.AFTER_EVALUATION]
|
||||
|
||||
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
|
||||
CONCEPT_LEXER_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser"
|
||||
BNF_PARSER_CLASS = "parsers.BnfParser.BnfParser"
|
||||
CONCEPTS_FILE = "_concepts.txt"
|
||||
|
||||
@@ -37,6 +32,9 @@ class Sheerka(Concept):
|
||||
CONCEPTS_BY_NAME_ENTRY = "Concepts_By_Name"
|
||||
CONCEPTS_BY_HASH_ENTRY = "Concepts_By_Hash" # store hash of concepts definitions (not values)
|
||||
CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts
|
||||
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "Concepts_By_First_Keyword"
|
||||
CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions"
|
||||
|
||||
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
|
||||
USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts
|
||||
|
||||
@@ -65,6 +63,10 @@ class Sheerka(Concept):
|
||||
# a grammar is a resolved BNF
|
||||
self.concepts_grammars = {}
|
||||
|
||||
# cache for SYA concepts
|
||||
self.concepts_by_first_keyword = {}
|
||||
self.sya_definitions = {}
|
||||
|
||||
# a concept can be instantiated
|
||||
# ex: File is a concept, but File('foo.txt') is an instance
|
||||
# TODO: manage contexts
|
||||
@@ -119,7 +121,8 @@ class Sheerka(Concept):
|
||||
self.initialize_builtin_concepts()
|
||||
self.initialize_builtin_parsers()
|
||||
self.initialize_builtin_evaluators()
|
||||
self.initialize_concepts_definitions(exec_context)
|
||||
self.initialize_bnf_parsing(exec_context)
|
||||
self.initialize_sya_parsing()
|
||||
res = ReturnValueConcept(self, True, self)
|
||||
|
||||
exec_context.add_values(return_values=res)
|
||||
@@ -174,12 +177,25 @@ class Sheerka(Concept):
|
||||
"""
|
||||
core.utils.init_package_import("parsers")
|
||||
base_class = core.utils.get_class("parsers.BaseParser.BaseParser")
|
||||
modules_to_skip = ["parsers.BaseNodeParser"]
|
||||
|
||||
temp_result = {}
|
||||
for parser in core.utils.get_sub_classes("parsers", base_class):
|
||||
if parser.__module__ == base_class.__module__:
|
||||
continue
|
||||
|
||||
self.init_log.debug(f"Adding builtin parser '{parser.__name__}'")
|
||||
self.parsers[core.utils.get_full_qualified_name(parser)] = parser
|
||||
if parser.__module__ in modules_to_skip:
|
||||
continue
|
||||
|
||||
qualified_name = core.utils.get_full_qualified_name(parser)
|
||||
self.init_log.debug(f"Adding builtin parser '{qualified_name}'")
|
||||
temp_result[qualified_name] = parser
|
||||
|
||||
# Now we sort the parser by name.
|
||||
# It's not important for the logic of their usage as they have their priority anyway,
|
||||
# We do that for the unit tests. They are to complicated to write otherwise
|
||||
for name in sorted(temp_result.keys()):
|
||||
self.parsers[name] = temp_result[name]
|
||||
|
||||
def initialize_builtin_evaluators(self):
|
||||
"""
|
||||
@@ -195,7 +211,7 @@ class Sheerka(Concept):
|
||||
self.init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
||||
self.evaluators.append(evaluator)
|
||||
|
||||
def initialize_concepts_definitions(self, execution_context):
|
||||
def initialize_bnf_parsing(self, execution_context):
|
||||
self.init_log.debug("Initializing concepts grammars.")
|
||||
definitions = self.get_concepts_definitions(execution_context)
|
||||
|
||||
@@ -211,6 +227,25 @@ class Sheerka(Concept):
|
||||
|
||||
self.concepts_grammars = lexer_parser.concepts_grammars
|
||||
|
||||
def initialize_sya_parsing(self):
|
||||
self.init_log.debug("Initializing sya definitions.")
|
||||
|
||||
self.concepts_by_first_keyword = self.sdp.get_safe(
|
||||
self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
|
||||
load_origin=False) or {}
|
||||
|
||||
self.sya_definitions = self.sdp.get_safe(
|
||||
self.CONCEPTS_SYA_DEFINITION_ENTRY,
|
||||
load_origin=False) or {}
|
||||
|
||||
def reset(self):
|
||||
self.reset_cache()
|
||||
self.concepts_by_first_keyword = {}
|
||||
self.concepts_grammars = {}
|
||||
self.sya_definitions = {}
|
||||
self.sdp.reset()
|
||||
self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000)
|
||||
|
||||
def reset_cache(self, filter_to_use=None):
|
||||
"""
|
||||
reset the different cache that exists
|
||||
@@ -220,6 +255,7 @@ class Sheerka(Concept):
|
||||
if filter_to_use is None:
|
||||
self.cache_by_key = {}
|
||||
self.cache_by_id = {}
|
||||
self.cache_by_name = {}
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -324,6 +360,38 @@ class Sheerka(Concept):
|
||||
"""
|
||||
return self.sets_handler.set_isa(context, concept, concept_set)
|
||||
|
||||
def set_sya_def(self, context, list_of_def):
|
||||
"""
|
||||
Set the precedence and/or the associativity of a concept
|
||||
:param context:
|
||||
:param list_of_def list of tuple(concept_id, precedence (int), SyaAssociativity)
|
||||
:return:
|
||||
"""
|
||||
|
||||
# validate the entries
|
||||
for concept_id, precedence, associativity in list_of_def:
|
||||
if concept_id == BuiltinConcepts.UNKNOWN_CONCEPT:
|
||||
return self.ret(self.name,
|
||||
False,
|
||||
self.new(BuiltinConcepts.ERROR, body=f"Concept {concept_id} is not known"))
|
||||
|
||||
# update the definitions
|
||||
for concept_id, precedence, associativity in list_of_def:
|
||||
if precedence is None and associativity is None:
|
||||
try:
|
||||
del self.sya_definitions[concept_id]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
self.sya_definitions[concept_id] = (precedence, associativity.value)
|
||||
|
||||
# then save
|
||||
self.sdp.set(context.event.get_digest(),
|
||||
self.CONCEPTS_SYA_DEFINITION_ENTRY,
|
||||
self.sya_definitions)
|
||||
|
||||
return self.ret(self.name, True, self.new(BuiltinConcepts.SUCCESS))
|
||||
|
||||
def get_set_elements(self, context, concept):
|
||||
"""
|
||||
Concept is supposed to be a set
|
||||
@@ -571,6 +639,22 @@ class Sheerka(Concept):
|
||||
|
||||
return self.value(body_to_use)
|
||||
|
||||
def get_error(self, obj):
|
||||
if isinstance(obj, Concept) and obj.metadata.is_builtin and obj.key in BuiltinErrors:
|
||||
return obj
|
||||
|
||||
if isinstance(obj, list):
|
||||
return obj
|
||||
|
||||
if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE):
|
||||
if obj.status:
|
||||
return None
|
||||
|
||||
if self.isinstance(obj.body, BuiltinConcepts.PARSER_RESULT):
|
||||
return self.get_error(obj.body.body)
|
||||
|
||||
return NotImplementedError()
|
||||
|
||||
def get_values(self, objs):
|
||||
if not (isinstance(objs, list) or
|
||||
self.isinstance(objs, BuiltinConcepts.LIST) or
|
||||
|
||||
+36
-2
@@ -163,7 +163,7 @@ def remove_list_from_list(lst, to_remove):
|
||||
def product(a, b):
|
||||
"""
|
||||
Kind of cartesian product between lists a and b
|
||||
knowing that a is also a list
|
||||
knowing that a is also a list : a is a list of list !!!
|
||||
|
||||
So it's a cartesian product between a list of list and a list
|
||||
"""
|
||||
@@ -176,7 +176,12 @@ def product(a, b):
|
||||
res = []
|
||||
for item_b in b:
|
||||
for item_a in a:
|
||||
items = item_a + [item_b]
|
||||
#items = item_a + [item_b]
|
||||
items = item_a[:]
|
||||
if hasattr(item_b, "__iter__"):
|
||||
items.extend(item_b)
|
||||
else:
|
||||
items.append(item_b)
|
||||
res.append(items)
|
||||
|
||||
return res
|
||||
@@ -276,6 +281,7 @@ def str_concept(t):
|
||||
>>> assert str_concept((None, "id")) == "c:|id:"
|
||||
>>> assert str_concept(("key", None)) == "c:key:"
|
||||
>>> assert str_concept((None, None)) == ""
|
||||
>>> assert str_concept(Concept(key="foo", id="bar")) == "c:foo|bar:"
|
||||
:param t:
|
||||
:return:
|
||||
"""
|
||||
@@ -297,6 +303,12 @@ def unstr_concept(concept_repr):
|
||||
"""
|
||||
if concept_repr is like :c:key:id:
|
||||
return the key and the id
|
||||
>>> assert unstr_concept("c:key:") == "key"
|
||||
>>> assert unstr_concept("c:key|id:") == ("key", "id")
|
||||
>>> assert unstr_concept("c:|id:") == ("None", "id")
|
||||
>>> assert unstr_concept("c:key|:") == ("key", "None")
|
||||
>>> # Otherwise, return (None,None)
|
||||
|
||||
:param concept_repr:
|
||||
:return:
|
||||
"""
|
||||
@@ -371,3 +383,25 @@ def decode_concept(text):
|
||||
return key, id_, use_concept
|
||||
|
||||
return None, None, None
|
||||
|
||||
|
||||
def tokens_index(tokens, sub_tokens, skip=0):
|
||||
"""
|
||||
Index of the sub tokens in tokens
|
||||
:param tokens: tokens
|
||||
:param sub_tokens: sub tokens to search
|
||||
:param skip: number of found to skip
|
||||
:return:
|
||||
"""
|
||||
expected = [token.value for token in sub_tokens if token.type != TokenKind.EOF]
|
||||
for i in range(0, len(tokens) - len(expected) + 1):
|
||||
for j in range(len(expected)):
|
||||
if tokens[i + j].value != expected[j]:
|
||||
break
|
||||
else:
|
||||
if skip == 0:
|
||||
return i
|
||||
else:
|
||||
skip -= 1
|
||||
|
||||
raise ValueError(f"sub tokens '{sub_tokens}' not found")
|
||||
|
||||
@@ -5,7 +5,7 @@ from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
|
||||
from core.tokenizer import TokenKind
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
from parsers.BaseParser import NotInitializedNode
|
||||
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
|
||||
from parsers.BnfNodeParser import ParsingExpression, ParsingExpressionVisitor
|
||||
from parsers.DefaultParser import DefConceptNode, NameNode
|
||||
from parsers.PythonParser import PythonNode
|
||||
import core.utils
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
|
||||
class MultipleErrorsEvaluator(AllReturnValuesEvaluator):
|
||||
"""
|
||||
Use to reduce to evaluator errors
|
||||
All parser error will be discarded
|
||||
Cannot match if there is at least one successful evaluator
|
||||
"""
|
||||
NAME = "MultipleErrors"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 30)
|
||||
self.return_values_in_error = []
|
||||
|
||||
def matches(self, context, return_values):
|
||||
nb_evaluators_in_error = 0
|
||||
to_process = False
|
||||
|
||||
for ret in return_values:
|
||||
if ret.status and (ret.who.startswith(self.PREFIX) or ret.who.startswith(BaseParser.PREFIX)):
|
||||
return False
|
||||
elif ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
|
||||
to_process = True
|
||||
self.eaten.append(ret)
|
||||
elif not ret.status and ret.who.startswith(self.PREFIX):
|
||||
nb_evaluators_in_error += 1
|
||||
self.return_values_in_error.append(ret)
|
||||
self.eaten.append(ret)
|
||||
elif not ret.status and ret.who.startswith(BaseParser.PREFIX):
|
||||
self.eaten.append(ret)
|
||||
# else:
|
||||
# other concepts. We do not care if there are successful or not
|
||||
# They won't be part of result nor part of the parent
|
||||
# --> So they will be handled by other evaluators
|
||||
|
||||
return to_process and nb_evaluators_in_error > 1
|
||||
|
||||
def eval(self, context, return_values):
|
||||
context.log(f"{len(self.return_values_in_error)} return value in error, {len(self.eaten)} item(s) eaten",
|
||||
who=self)
|
||||
context.log(f"{self.return_values_in_error}", who=self)
|
||||
|
||||
sheerka = context.sheerka
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.MULTIPLE_ERRORS, body=self.return_values_in_error),
|
||||
parents=self.eaten)
|
||||
@@ -31,6 +31,10 @@ class OneErrorEvaluator(AllReturnValuesEvaluator):
|
||||
self.eaten.append(ret)
|
||||
elif not ret.status and ret.who.startswith(BaseParser.PREFIX):
|
||||
self.eaten.append(ret)
|
||||
# else:
|
||||
# other concepts. We do not care if there are successful or not
|
||||
# They won't be part of result nor part of the parent
|
||||
# --> So they will be handled by other evaluators
|
||||
|
||||
return to_process and nb_evaluators_in_error == 1
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import copy
|
||||
import traceback
|
||||
from enum import Enum
|
||||
|
||||
from core.ast.visitors import UnreferencedNamesVisitor
|
||||
@@ -59,7 +60,7 @@ class PythonEvaluator(OneReturnValueEvaluator):
|
||||
return sheerka.ret(self.name, True, evaluated, parents=[return_value])
|
||||
|
||||
except Exception as error:
|
||||
context.log_error(error, self.name)
|
||||
context.log_error(error, who=self.name, exc=traceback.format_exc())
|
||||
error = sheerka.new(BuiltinConcepts.ERROR, body=error)
|
||||
return sheerka.ret(self.name, False, error, parents=[return_value])
|
||||
|
||||
|
||||
@@ -0,0 +1,369 @@
|
||||
import copy
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, DEFINITION_TYPE_BNF
|
||||
from core.tokenizer import TokenKind, Tokenizer
|
||||
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, ErrorNode
|
||||
|
||||
PARSERS = ["BnfNode", "SyaNode", "Python"]
|
||||
|
||||
|
||||
@dataclass()
|
||||
class TokensNodeFound(ErrorNode):
|
||||
expected_tokens: list
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(other) == id(self):
|
||||
return True
|
||||
|
||||
if not isinstance(other, UnexpectedTokenErrorNode):
|
||||
return False
|
||||
|
||||
if self.message != other.message:
|
||||
return False
|
||||
|
||||
if self.token.type != other.token.type or self.token.value != other.token.value:
|
||||
return False
|
||||
|
||||
if len(self.expected_tokens) != len(other.expected_tokens):
|
||||
return False
|
||||
|
||||
for i, t in enumerate(self.expected_tokens):
|
||||
if t != other.expected_tokens[i]:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.message, self.token, self.expected_tokens))
|
||||
|
||||
|
||||
class AtomConceptParserHelper:
|
||||
def __init__(self, context):
|
||||
|
||||
self.context = context
|
||||
self.debug = []
|
||||
self.sequence = [] # sequence of concepts already found found
|
||||
self.current_concept: ConceptNode = None # concept being parsed
|
||||
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # buffer that keeps tracks of tokens positions
|
||||
self.expected_tokens = None # expected tokens for this concepts
|
||||
self.is_locked = False
|
||||
self.errors = []
|
||||
self.has_unrecognized = False
|
||||
self.forked = [] # use to duplicate AtomConceptParserHelper. See manage_unrecognized()
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(other) == id(self):
|
||||
return True
|
||||
|
||||
if not isinstance(other, AtomConceptParserHelper):
|
||||
return False
|
||||
|
||||
if len(self.sequence) != len(other.sequence):
|
||||
return False
|
||||
|
||||
for item_self, item_other in zip(self.sequence, other.sequence):
|
||||
if item_self != item_other:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash(len(self.sequence))
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.sequence}"
|
||||
|
||||
def lock(self):
|
||||
self.is_locked = True
|
||||
|
||||
def reset(self):
|
||||
self.is_locked = False
|
||||
|
||||
def has_error(self):
|
||||
return len(self.errors) > 0
|
||||
|
||||
def eat_token(self, token, pos):
|
||||
if not self.expected_tokens:
|
||||
return False
|
||||
|
||||
self.debug.append(token)
|
||||
|
||||
if self.expected_tokens[0] != BaseNodeParser.get_token_value(token):
|
||||
self.errors.append(UnexpectedTokenErrorNode(
|
||||
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
|
||||
token,
|
||||
[self.expected_tokens[0]]))
|
||||
return False
|
||||
|
||||
self.current_concept.end = pos
|
||||
del self.expected_tokens[0]
|
||||
|
||||
if not self.expected_tokens:
|
||||
# the concept is fully matched
|
||||
self.sequence.append(self.current_concept)
|
||||
self.expected_tokens = None
|
||||
|
||||
return True
|
||||
|
||||
def eat_concept(self, concept, pos):
|
||||
if self.is_locked:
|
||||
return
|
||||
|
||||
self.debug.append(concept)
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage that some clones may have been forked
|
||||
forked.eat_concept(concept, pos)
|
||||
|
||||
concept_node = ConceptNode(concept, pos, pos)
|
||||
expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1]
|
||||
|
||||
if not expected:
|
||||
# the concept is already matched
|
||||
self.sequence.append(concept_node)
|
||||
else:
|
||||
self.current_concept = concept_node
|
||||
self.expected_tokens = expected
|
||||
|
||||
def manage_unrecognized(self):
|
||||
if self.unrecognized_tokens.is_empty():
|
||||
return
|
||||
|
||||
# do not put empty UnrecognizedToken in out
|
||||
if self.unrecognized_tokens.is_whitespace():
|
||||
self.unrecognized_tokens.reset()
|
||||
return
|
||||
|
||||
self.unrecognized_tokens.fix_source()
|
||||
|
||||
# try to recognize concepts
|
||||
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
|
||||
if nodes_sequences:
|
||||
instances = [self]
|
||||
for i in range(len(nodes_sequences) - 1):
|
||||
clone = self.clone()
|
||||
instances.append(clone)
|
||||
self.forked.append(clone)
|
||||
|
||||
for instance, node_sequence in zip(instances, nodes_sequences):
|
||||
for node in node_sequence:
|
||||
instance.sequence.append(node)
|
||||
if isinstance(node, UnrecognizedTokensNode) or \
|
||||
hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens:
|
||||
instance.has_unrecognized = True
|
||||
instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
|
||||
else:
|
||||
self.sequence.append(self.unrecognized_tokens)
|
||||
self.has_unrecognized = True
|
||||
|
||||
# create another instance
|
||||
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
|
||||
def eat_unrecognized(self, token, pos):
|
||||
if self.is_locked:
|
||||
return
|
||||
|
||||
self.debug.append(token)
|
||||
self.unrecognized_tokens.add_token(token, pos)
|
||||
|
||||
def finalize(self):
|
||||
if len(self.sequence) > 0:
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage that some clones may have been forked
|
||||
forked.finalize()
|
||||
|
||||
if self.expected_tokens:
|
||||
self.errors.append(TokensNodeFound(self.expected_tokens))
|
||||
|
||||
def clone(self):
|
||||
clone = AtomConceptParserHelper(self.context)
|
||||
clone.debug = self.debug[:]
|
||||
clone.sequence = self.sequence[:]
|
||||
clone.current_concept = self.current_concept.clone() if self.current_concept else None
|
||||
clone.unrecognized_tokens = self.unrecognized_tokens.clone()
|
||||
clone.expected_tokens = self.expected_tokens[:] if self.expected_tokens else None
|
||||
clone.is_locked = self.is_locked
|
||||
clone.errors = self.errors[:]
|
||||
clone.has_unrecognized = self.has_unrecognized
|
||||
return clone
|
||||
|
||||
def _get_lexer_nodes_from_unrecognized(self):
|
||||
"""
|
||||
Use the source of self.unrecognized_tokens gto find concepts or source code
|
||||
:return:
|
||||
"""
|
||||
|
||||
res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
|
||||
only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
|
||||
|
||||
if not only_parsers_results.status:
|
||||
return None
|
||||
|
||||
return builtin_helpers.get_lexer_nodes(
|
||||
only_parsers_results.body.body,
|
||||
self.unrecognized_tokens.start,
|
||||
self.unrecognized_tokens.tokens)
|
||||
|
||||
|
||||
class AtomNodeParser(BaseNodeParser):
|
||||
"""
|
||||
Parser used to recognize atoms concepts or sequence of atoms concepts
|
||||
An atom concept is concept that does not have any property thought it may have a body
|
||||
|
||||
So, if 'one', 'two', 'three' are defined as atom concepts (with no property/parameter)
|
||||
This parser can recognize the sequence 'one two three'
|
||||
as [ConceptNode(one), ConceptNode(two), ConceptNode(three)]
|
||||
It can partly recognized 'one x$1!! two three'
|
||||
as [ConceptNode(one), UnrecognizedTokensNode(x$1!!), [ConceptNode(two), [ConceptNode(three)]
|
||||
It cannot recognize concepts with parameters (non atom)
|
||||
ex: 'one plus two' won't be recognized as ConceptNode(plus, one, two)
|
||||
it will be [ConceptNode(one), UnrecognizedTokensNode(plus), [ConceptNode(two)]
|
||||
|
||||
Note 'one plus two' will be recognized by the SyaParser
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("AtomNode", 50, **kwargs)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def _is_eligible(concept):
|
||||
"""
|
||||
Predicate that select concepts that must handled by AtomNodeParser
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF
|
||||
|
||||
def get_concepts_sequences(self):
|
||||
|
||||
forked = []
|
||||
|
||||
def _add_forked_to_concept_parser_helpers():
|
||||
# check that if some new InfixToPostfix are created
|
||||
for parser in concept_parser_helpers:
|
||||
if len(parser.forked) > 0:
|
||||
forked.extend(parser.forked)
|
||||
parser.forked.clear()
|
||||
if len(forked) > 0:
|
||||
concept_parser_helpers.extend(forked)
|
||||
forked.clear()
|
||||
|
||||
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
|
||||
|
||||
while self.next_token(False):
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.reset()
|
||||
|
||||
token = self.token
|
||||
|
||||
try:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
if concept_parser.eat_token(self.token, self.pos):
|
||||
concept_parser.lock()
|
||||
|
||||
concepts = self.get_concepts(token, self._is_eligible)
|
||||
if not concepts:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.eat_unrecognized(token, self.pos)
|
||||
continue
|
||||
|
||||
if len(concepts) == 1:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.eat_concept(concepts[0], self.pos)
|
||||
continue
|
||||
|
||||
# make the cartesian product
|
||||
temp_res = []
|
||||
for concept_parser in concept_parser_helpers:
|
||||
if concept_parser.is_locked:
|
||||
# It means that it already eat the token
|
||||
# so simply add it, do not clone
|
||||
temp_res.append(concept_parser)
|
||||
continue
|
||||
|
||||
for concept in concepts:
|
||||
clone = concept_parser.clone()
|
||||
temp_res.append(clone)
|
||||
clone.eat_concept(concept, self.pos)
|
||||
|
||||
concept_parser_helpers = temp_res
|
||||
finally:
|
||||
_add_forked_to_concept_parser_helpers()
|
||||
|
||||
# make sure that remaining items in stack are moved to out
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.reset()
|
||||
concept_parser.finalize()
|
||||
_add_forked_to_concept_parser_helpers()
|
||||
|
||||
return concept_parser_helpers
|
||||
|
||||
def get_valid(self, concept_parser_helpers):
|
||||
valid_parser_helpers = [] # be careful, it will be a list of list
|
||||
for parser_helper in concept_parser_helpers:
|
||||
if parser_helper.has_error():
|
||||
continue
|
||||
|
||||
if len(parser_helper.sequence) == 0:
|
||||
continue
|
||||
|
||||
for node in parser_helper.sequence:
|
||||
node.tokens = self.tokens[node.start:node.end + 1]
|
||||
node.fix_source()
|
||||
|
||||
if parser_helper in valid_parser_helpers:
|
||||
continue
|
||||
|
||||
valid_parser_helpers.append(parser_helper)
|
||||
|
||||
return valid_parser_helpers
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
if parser_input == "":
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
||||
)
|
||||
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
parser_helpers = self.get_valid(self.get_concepts_sequences())
|
||||
|
||||
if len(parser_helpers):
|
||||
ret = []
|
||||
for parser_helper in parser_helpers:
|
||||
ret.append(
|
||||
self.sheerka.ret(
|
||||
self.name,
|
||||
not parser_helper.has_unrecognized,
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input,
|
||||
body=parser_helper.sequence,
|
||||
try_parsed=parser_helper.sequence)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, parser_input, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
else:
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
@@ -0,0 +1,669 @@
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.tokenizer import TokenKind, LexerError, Token
|
||||
from parsers.BaseParser import Node, BaseParser, ErrorNode
|
||||
|
||||
DEBUG_COMPILED = True
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerNode(Node):
|
||||
start: int # starting index in the tokens list
|
||||
end: int # ending index in the tokens list
|
||||
tokens: list = None # tokens
|
||||
source: str = None # string representation of what was parsed
|
||||
|
||||
def __post_init__(self):
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LexerNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.tokens == other.tokens
|
||||
|
||||
def fix_source(self, force=True):
|
||||
if force or self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
return self
|
||||
|
||||
|
||||
class UnrecognizedTokensNode(LexerNode):
|
||||
def __init__(self, start, end, tokens):
|
||||
super().__init__(start, end, tokens)
|
||||
self.is_frozen = False
|
||||
self.parenthesis_count = 0
|
||||
|
||||
def freeze(self):
|
||||
self.is_frozen = True
|
||||
|
||||
def reset(self):
|
||||
self.start = self.end = -1
|
||||
self.tokens.clear()
|
||||
self.is_frozen = False
|
||||
self.parenthesis_count = 0
|
||||
|
||||
def has_open_paren(self):
|
||||
return self.parenthesis_count > 0
|
||||
|
||||
def add_token(self, token, pos):
|
||||
if self.is_frozen:
|
||||
raise Exception("The node is frozen")
|
||||
|
||||
if self.end != -1 and pos == self.end + 2:
|
||||
# add the missing whitespace
|
||||
p = self.tokens[-1] # previous token
|
||||
self.tokens.append(Token(TokenKind.WHITESPACE, " ", p.index + 1, p.line, p.column + 1))
|
||||
|
||||
self.tokens.append(token)
|
||||
self.end = pos
|
||||
if self.start == -1:
|
||||
self.start = pos
|
||||
|
||||
if token.type == TokenKind.LPAR:
|
||||
self.parenthesis_count += 1
|
||||
|
||||
if token.type == TokenKind.RPAR:
|
||||
self.parenthesis_count -= 1
|
||||
|
||||
return self
|
||||
|
||||
def not_whitespace(self):
|
||||
return not self.is_whitespace()
|
||||
|
||||
def is_whitespace(self):
|
||||
for t in self.tokens:
|
||||
if t.type not in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_empty(self):
|
||||
return len(self.tokens) == 0
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, utnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, UTN):
|
||||
return other == self
|
||||
|
||||
if not isinstance(other, UnrecognizedTokensNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
def clone(self):
|
||||
clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:])
|
||||
clone.is_frozen = self.is_frozen
|
||||
clone.parenthesis_count = self.parenthesis_count
|
||||
return clone
|
||||
|
||||
|
||||
class ConceptNode(LexerNode):
|
||||
"""
|
||||
Returned by the BnfNodeParser
|
||||
It represents a recognized concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.concept = concept
|
||||
self.underlying = underlying
|
||||
self.fix_source(False)
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, (CN, CNC)):
|
||||
return other == self
|
||||
|
||||
if isinstance(other, cnode):
|
||||
return self.concept.key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, short_cnode):
|
||||
return self.concept.key == other.concept_key and self.source == other.source
|
||||
|
||||
if not isinstance(other, ConceptNode):
|
||||
return False
|
||||
|
||||
return self.concept == other.concept and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.underlying == other.underlying
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.start, self.end, self.source, self.underlying))
|
||||
|
||||
def __repr__(self):
|
||||
text = f"ConceptNode(concept='{self.concept}', source='{self.source}', start={self.start}, end={self.end}"
|
||||
if DEBUG_COMPILED:
|
||||
for k, v in self.concept.compiled.items():
|
||||
text += f", {k}='{v}'"
|
||||
return text + ")"
|
||||
|
||||
def clone(self):
|
||||
# do we need to clone the concept as well ?
|
||||
clone = ConceptNode(self.concept, self.start, self.end, self.tokens, self.source, self.underlying)
|
||||
return clone
|
||||
|
||||
|
||||
class SourceCodeNode(LexerNode):
|
||||
"""
|
||||
Returned when some source code (like Python source code is recognized)
|
||||
"""
|
||||
|
||||
def __init__(self, node, start, end, tokens=None, source=None, return_value=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.node = node # The PythonNode (or whatever language node) that is found
|
||||
self.return_value = return_value # original result of the parsing
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, scnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, SourceCodeNode):
|
||||
return False
|
||||
|
||||
return self.node == other.node and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class SourceCodeWithConceptNode(LexerNode):
|
||||
"""
|
||||
Kind of temporary version for SourceCodeNode
|
||||
I know that there is some code,
|
||||
I know that there are some concepts
|
||||
I just don't want to make the glue yet
|
||||
|
||||
So I push all the nodes into one big bag
|
||||
"""
|
||||
|
||||
def __init__(self, first_node, last_node, content_nodes=None):
|
||||
super().__init__(9999, -1, None) # why not sys.maxint ?
|
||||
self.first = first_node
|
||||
self.last = last_node
|
||||
self.nodes = content_nodes or []
|
||||
self.has_unrecognized = False
|
||||
self.fix_all_pos()
|
||||
|
||||
def add_node(self, node):
|
||||
self.nodes.append(node)
|
||||
self.fix_pos(node)
|
||||
|
||||
return self
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if not isinstance(other, SourceCodeWithConceptNode):
|
||||
return False
|
||||
|
||||
if self.start != other.start or self.end != other.end:
|
||||
return False
|
||||
|
||||
if self.first != other.first:
|
||||
return False
|
||||
|
||||
if self.last != other.last:
|
||||
return False
|
||||
|
||||
if len(self.nodes) != len(other.nodes):
|
||||
return False
|
||||
|
||||
for self_node, other_node in zip(self.nodes, other.nodes):
|
||||
if self_node != other_node:
|
||||
return False
|
||||
|
||||
# at last
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.first, self.last, len(self.nodes)))
|
||||
|
||||
def __repr__(self):
|
||||
return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
def fix_all_pos(self):
|
||||
for n in [self.first, self.last] + self.nodes:
|
||||
self.fix_pos(n)
|
||||
|
||||
def fix_pos(self, node):
|
||||
if hasattr(node, "start") and node.start is not None:
|
||||
if node.start < self.start:
|
||||
self.start = node.start
|
||||
|
||||
if hasattr(node, "end") and node.end is not None:
|
||||
if node.end > self.end:
|
||||
self.end = node.end
|
||||
return self
|
||||
|
||||
def pseudo_fix_source(self):
|
||||
self.source = self.first.source
|
||||
for n in self.nodes:
|
||||
self.source += " "
|
||||
if hasattr(n, "source"):
|
||||
self.source += n.source
|
||||
elif hasattr(n, "concept"):
|
||||
self.source += str(n.concept)
|
||||
else:
|
||||
self.source += " unknown"
|
||||
self.source += self.last.source
|
||||
return self
|
||||
|
||||
def clone(self):
|
||||
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes)
|
||||
return clone
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GrammarErrorNode(ErrorNode):
|
||||
message: str
|
||||
|
||||
|
||||
class SyaAssociativity(Enum):
|
||||
Left = "left"
|
||||
Right = "right"
|
||||
No = "No"
|
||||
|
||||
def __repr__(self):
|
||||
return self.value
|
||||
|
||||
|
||||
cnode = namedtuple("ConceptNode", "concept_key start end source")
|
||||
short_cnode = namedtuple("ConceptNode", "concept_key source")
|
||||
utnode = namedtuple("utnode", "start end source")
|
||||
scnode = namedtuple("scnode", "start end source")
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
class SCWC:
|
||||
"""
|
||||
SourceNodeWithConcept tester class
|
||||
It matches with a SourceNodeWithConcept
|
||||
but it's easier to instantiate during the tests
|
||||
"""
|
||||
first: LexerNode
|
||||
last: LexerNode
|
||||
content: tuple
|
||||
|
||||
def __init__(self, first, last, *args):
|
||||
self.first = first
|
||||
self.last = last
|
||||
self.content = args
|
||||
|
||||
|
||||
class HelperWithPos:
|
||||
def __init__(self, start=None, end=None):
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
self.start_is_fixed = start is not None
|
||||
self.end_is_fixed = end is not None
|
||||
|
||||
def fix_pos(self, node):
|
||||
if not self.start_is_fixed:
|
||||
start = node.start if hasattr(node, "start") else \
|
||||
node[0] if isinstance(node, tuple) else None
|
||||
|
||||
if start is not None and (self.start is None or start < self.start):
|
||||
self.start = start
|
||||
|
||||
if not self.end_is_fixed:
|
||||
end = node.end if hasattr(node, "end") else \
|
||||
node[1] if isinstance(node, tuple) else None
|
||||
|
||||
if end is not None and (self.end is None or end > self.end):
|
||||
self.end = end
|
||||
return self
|
||||
|
||||
|
||||
class CN(HelperWithPos):
|
||||
"""
|
||||
ConceptNode tester class
|
||||
It matches with ConceptNode but with less constraints
|
||||
|
||||
CNC == ConceptNode if concept key, start, end and source are the same
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start=None, end=None, source=None):
|
||||
"""
|
||||
|
||||
:param concept: Concept or concept_key (only the key is used anyway)
|
||||
:param start:
|
||||
:param end:
|
||||
:param source:
|
||||
"""
|
||||
super().__init__(start, end)
|
||||
self.concept_key = concept.key if isinstance(concept, Concept) else concept
|
||||
self.source = source
|
||||
self.concept = concept if isinstance(concept, Concept) else None
|
||||
|
||||
def fix_source(self, str_tokens):
|
||||
self.source = "".join(str_tokens)
|
||||
return self
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, ConceptNode):
|
||||
if other.concept is None:
|
||||
return False
|
||||
if other.concept.key != self.concept_key:
|
||||
return False
|
||||
if self.start is not None and self.start != other.start:
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
return True
|
||||
|
||||
if not isinstance(other, CN):
|
||||
return False
|
||||
|
||||
return self.concept_key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept_key, self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
if self.concept:
|
||||
txt = f"CN(concept='{self.concept}'"
|
||||
else:
|
||||
txt = f"CN(concept_key='{self.concept_key}'"
|
||||
txt += f", source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
txt += f", end={self.end}"
|
||||
return txt + ")"
|
||||
|
||||
|
||||
class CNC(CN):
|
||||
"""
|
||||
ConceptNode for Compiled tester class
|
||||
It matches with ConceptNode
|
||||
But focuses on the 'compiled' property of the concept
|
||||
|
||||
CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
|
||||
"""
|
||||
|
||||
def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
|
||||
super().__init__(concept_key, start, end, source)
|
||||
self.compiled = kwargs
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, ConceptNode):
|
||||
if other.concept is None:
|
||||
return False
|
||||
if other.concept.key != self.concept_key:
|
||||
return False
|
||||
if self.start is not None and self.start != other.start:
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
return self.compiled == other.concept.compiled # assert instead of return to help debugging tests
|
||||
|
||||
if not isinstance(other, CNC):
|
||||
return False
|
||||
|
||||
return self.concept_key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.compiled == other.compiled
|
||||
|
||||
def __repr__(self):
|
||||
if self.concept:
|
||||
txt = f"CNC(concept='{self.concept}'"
|
||||
else:
|
||||
txt = f"CNC(concept_key='{self.concept_key}'"
|
||||
txt += f", source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
txt += f", end={self.end}"
|
||||
|
||||
for k, v in self.compiled.items():
|
||||
txt += f", {k}='{v}'"
|
||||
return txt + ")"
|
||||
|
||||
|
||||
class BaseNodeParser(BaseParser):
|
||||
def __init__(self, name, priority, **kwargs):
|
||||
super().__init__(name, priority)
|
||||
if 'sheerka' in kwargs:
|
||||
sheerka = kwargs.get("sheerka")
|
||||
self.init_from_sheerka(sheerka)
|
||||
|
||||
else:
|
||||
self.concepts_by_first_keyword = None
|
||||
self.sya_definitions = None
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
self.tokens = None
|
||||
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
def init_from_sheerka(self, sheerka):
|
||||
"""
|
||||
Use the definitons from Sheerka to initialize
|
||||
:param sheerka:
|
||||
:return:
|
||||
"""
|
||||
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
|
||||
if sheerka.sya_definitions:
|
||||
self.sya_definitions = {}
|
||||
for k, v in sheerka.sya_definitions.items():
|
||||
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.text = text
|
||||
|
||||
try:
|
||||
self.tokens = list(self.get_input_as_tokens(text))
|
||||
except LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
return True
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self.token
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
if self.token and self.token.type == TokenKind.EOF:
|
||||
return False
|
||||
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
|
||||
def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
|
||||
"""
|
||||
To quickly find a concept, we store them in an hash where the key is the first token of the concept
|
||||
example :
|
||||
Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
|
||||
So the key to use will be "foo"
|
||||
|
||||
Concept("a foo").def_prop("a") -> first token is "foo"
|
||||
|
||||
Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
|
||||
Note that under the same key, there will be multiple entry
|
||||
a B-Tree may be a better implementation in the future
|
||||
|
||||
We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
|
||||
:param context:
|
||||
:param concepts: list[Concept]
|
||||
:param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
|
||||
:param use_sheerka: first init with the definitions from Sheerka
|
||||
:return:
|
||||
"""
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
if use_sheerka:
|
||||
self.init_from_sheerka(self.sheerka)
|
||||
|
||||
if sya_definitions:
|
||||
if self.sya_definitions:
|
||||
self.sya_definitions.update(sya_definitions)
|
||||
else:
|
||||
self.sya_definitions = sya_definitions
|
||||
|
||||
if self.concepts_by_first_keyword is None:
|
||||
self.concepts_by_first_keyword = {}
|
||||
|
||||
for concept in concepts:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
|
||||
break
|
||||
|
||||
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
|
||||
|
||||
def get_concepts(self, token, to_keep, to_map=None):
|
||||
"""
|
||||
Tries to find if there are concepts that match the value of the token
|
||||
:param token:
|
||||
:param to_keep: predicate to tell if the concept is eligible
|
||||
:param to_map:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
name = token.value[1:-1]
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
name = token.value.value
|
||||
else:
|
||||
name = token.value
|
||||
|
||||
result = []
|
||||
if name in self.concepts_by_first_keyword:
|
||||
for concept_id in self.concepts_by_first_keyword[name]:
|
||||
|
||||
concept = self.sheerka.get_by_id(concept_id)
|
||||
|
||||
if not to_keep(concept):
|
||||
continue
|
||||
|
||||
concept = to_map(concept) if to_map else concept
|
||||
result.append(concept)
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_token_value(token):
|
||||
if token.type == TokenKind.STRING:
|
||||
return token.value[1:-1]
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
return token.value.value
|
||||
else:
|
||||
return token.value
|
||||
|
||||
|
||||
class UTN(HelperWithPos):
|
||||
"""
|
||||
Tester class for UnrecognizedTokenNode
|
||||
compare the source, and start, end if defined
|
||||
"""
|
||||
|
||||
def __init__(self, source, start=None, end=None):
|
||||
"""
|
||||
:param concept: Concept or concept_key (only the key is used anyway)
|
||||
:param start:
|
||||
:param end:
|
||||
:param source:
|
||||
"""
|
||||
super().__init__(start, end)
|
||||
self.source = source
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, UnrecognizedTokensNode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, UTN):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.source, self.start, self.end))
|
||||
|
||||
def __repr__(self):
|
||||
txt = f"UTN( source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
txt += f", end={self.end}"
|
||||
return txt + ")"
|
||||
@@ -1,8 +1,8 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import TokenKind, Keywords, Token
|
||||
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
|
||||
from core.sheerka_logger import get_logger
|
||||
import core.utils
|
||||
import logging
|
||||
@@ -77,7 +77,6 @@ class BaseParser:
|
||||
self.priority = priority
|
||||
self.enabled = enabled
|
||||
|
||||
self.has_error = False
|
||||
self.error_sink = []
|
||||
|
||||
def __eq__(self, other):
|
||||
@@ -91,9 +90,13 @@ class BaseParser:
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
pass
|
||||
|
||||
@property
|
||||
def has_error(self):
|
||||
return len(self.error_sink) > 0
|
||||
|
||||
def log_result(self, context, source, ret):
|
||||
if not self.log.isEnabledFor(logging.DEBUG):
|
||||
return
|
||||
@@ -132,6 +135,53 @@ class BaseParser:
|
||||
body=self.error_sink if self.has_error else tree,
|
||||
try_parsed=try_parse)
|
||||
|
||||
def get_input_as_text(self, parser_input, custom_switcher=None):
|
||||
if isinstance(parser_input, list):
|
||||
return self.get_text_from_tokens(parser_input, custom_switcher)
|
||||
|
||||
if isinstance(parser_input, ParserResultConcept):
|
||||
parser_input = parser_input.source
|
||||
|
||||
if "c:" in parser_input:
|
||||
return self.get_text_from_tokens(list(Tokenizer(parser_input)), custom_switcher)
|
||||
|
||||
return parser_input
|
||||
|
||||
def get_input_as_tokens(self, parser_input):
|
||||
if isinstance(parser_input, list):
|
||||
return self.add_eof_if_needed(parser_input)
|
||||
|
||||
if isinstance(parser_input, ParserResultConcept):
|
||||
if parser_input.tokens:
|
||||
return self.add_eof_if_needed(parser_input.tokens)
|
||||
else:
|
||||
return Tokenizer(parser_input.source)
|
||||
|
||||
return Tokenizer(parser_input)
|
||||
|
||||
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
|
||||
if not isinstance(parser_input, ParserResultConcept):
|
||||
return None
|
||||
|
||||
if expected_parser and parser_input.parser != expected_parser:
|
||||
return None
|
||||
|
||||
if len(parser_input.value) == 0:
|
||||
return None
|
||||
|
||||
for node in parser_input.value:
|
||||
from parsers.BaseNodeParser import LexerNode
|
||||
if not isinstance(node, LexerNode):
|
||||
return None
|
||||
|
||||
return parser_input.value
|
||||
|
||||
@staticmethod
|
||||
def add_eof_if_needed(lst):
|
||||
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
|
||||
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
|
||||
return lst
|
||||
|
||||
@staticmethod
|
||||
def get_text_from_tokens(tokens, custom_switcher=None):
|
||||
if tokens is None:
|
||||
|
||||
@@ -9,147 +9,17 @@
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from collections import defaultdict
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.concept import Concept, ConceptParts, DoNotResolve
|
||||
from core.tokenizer import TokenKind, Tokenizer, Token
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
import core.utils
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerNode(Node):
|
||||
start: int # starting index in the tokens list
|
||||
end: int # ending index in the tokens list
|
||||
tokens: list = None # tokens
|
||||
source: str = None # string representation of what was parsed
|
||||
|
||||
def __post_init__(self):
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LexerNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.tokens == other.tokens
|
||||
|
||||
|
||||
class UnrecognizedTokensNode(LexerNode):
|
||||
def __init__(self, start, end, tokens):
|
||||
super().__init__(start, end, tokens)
|
||||
|
||||
def add_token(self, token, pos):
|
||||
self.tokens.append(token)
|
||||
self.end = pos
|
||||
|
||||
def fix_source(self):
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def not_whitespace(self):
|
||||
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, utnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, UnrecognizedTokensNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class ConceptNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
It represents a recognized concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.concept = concept
|
||||
self.underlying = underlying
|
||||
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, cnode):
|
||||
return self.concept.key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, short_cnode):
|
||||
return self.concept.key == other.concept_key and self.source == other.source
|
||||
|
||||
if not isinstance(other, ConceptNode):
|
||||
return False
|
||||
|
||||
return self.concept == other.concept and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.underlying == other.underlying
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.start, self.end, self.source, self.underlying))
|
||||
|
||||
def __repr__(self):
|
||||
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class SourceCodeNode(LexerNode):
|
||||
"""
|
||||
Returned when some source code (like Python source code is recognized)
|
||||
"""
|
||||
|
||||
def __init__(self, node, start, end, tokens=None, source=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.node = node # The PythonNode (or whatever language node) that is found
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, scnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, SourceCodeNode):
|
||||
return False
|
||||
|
||||
return self.node == other.node and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
cnode = namedtuple("ConceptNode", "concept_key start end source")
|
||||
short_cnode = namedtuple("ConceptNode", "concept_key source")
|
||||
utnode = namedtuple("UnrecognizedTokensNode", "start end source")
|
||||
scnode = namedtuple("SourceCodeNode", "start end source")
|
||||
|
||||
|
||||
class NonTerminalNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
Returned by the BnfNodeParser
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, tokens, children=None):
|
||||
@@ -180,7 +50,7 @@ class NonTerminalNode(LexerNode):
|
||||
|
||||
class TerminalNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
Returned by the BnfNodeParser
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, value):
|
||||
@@ -205,11 +75,6 @@ class TerminalNode(LexerNode):
|
||||
return hash((self.parsing_expression, self.start, self.end, self.value))
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GrammarErrorNode(ErrorNode):
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnknownConceptNode(ErrorNode):
|
||||
concept_key: str
|
||||
@@ -574,9 +439,9 @@ class StrMatch(Match):
|
||||
return None
|
||||
|
||||
|
||||
class ConceptLexerParser(BaseParser):
|
||||
class BnfNodeParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptLexer", 50)
|
||||
super().__init__("BnfNode", 50)
|
||||
if 'grammars' in kwargs:
|
||||
self.concepts_grammars = kwargs.get("grammars")
|
||||
elif 'sheerka' in kwargs:
|
||||
@@ -595,7 +460,6 @@ class ConceptLexerParser(BaseParser):
|
||||
self.sheerka = None
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
@@ -606,16 +470,11 @@ class ConceptLexerParser(BaseParser):
|
||||
self.sheerka = context.sheerka
|
||||
self.text = text
|
||||
|
||||
if isinstance(text, str):
|
||||
try:
|
||||
self.tokens = list(Tokenizer(text))
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
else:
|
||||
self.tokens = list(text)
|
||||
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
|
||||
try:
|
||||
self.tokens = list(self.get_input_as_tokens(text))
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
@@ -785,15 +644,15 @@ class ConceptLexerParser(BaseParser):
|
||||
removed_concepts.append(e)
|
||||
return removed_concepts
|
||||
|
||||
def parse(self, context, text):
|
||||
if text == "":
|
||||
def parse(self, context, parser_input):
|
||||
if parser_input == "":
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
||||
)
|
||||
|
||||
if not self.reset_parser(context, text):
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
@@ -877,15 +736,15 @@ class ConceptLexerParser(BaseParser):
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
source=parser_input,
|
||||
body=choice,
|
||||
try_parsed=choice)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, text, ret[0])
|
||||
self.log_result(context, parser_input, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, text, ret)
|
||||
self.log_multiple_results(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
|
||||
@@ -915,6 +774,11 @@ class ConceptLexerParser(BaseParser):
|
||||
_concept.compiled[prop_name] = new_value
|
||||
|
||||
def _look_for_concept_match(_underlying):
|
||||
"""
|
||||
At some point, there is either an StrMatch or a ConceptMatch,
|
||||
that allowed the recognition.
|
||||
Look for the ConceptMatch, with recursion if needed
|
||||
"""
|
||||
if isinstance(_underlying.parsing_expression, ConceptExpression):
|
||||
return _underlying
|
||||
|
||||
@@ -929,6 +793,7 @@ class ConceptLexerParser(BaseParser):
|
||||
def _get_underlying_value(_underlying):
|
||||
concept_match_node = _look_for_concept_match(_underlying)
|
||||
if concept_match_node:
|
||||
# the value is a concept
|
||||
if id(concept_match_node) in _underlying_value_cache:
|
||||
result = _underlying_value_cache[id(concept_match_node)]
|
||||
else:
|
||||
@@ -936,6 +801,7 @@ class ConceptLexerParser(BaseParser):
|
||||
result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
|
||||
_underlying_value_cache[id(concept_match_node)] = result
|
||||
else:
|
||||
# the value is a string
|
||||
result = DoNotResolve(_underlying.source)
|
||||
|
||||
return result
|
||||
@@ -957,6 +823,7 @@ class ConceptLexerParser(BaseParser):
|
||||
concept.compiled[ConceptParts.BODY] = value
|
||||
if underlying.parsing_expression.rule_name:
|
||||
_add_prop(concept, underlying.parsing_expression.rule_name, value)
|
||||
# KSI : Why don't we set concept.metadata.need_validation to True ?
|
||||
|
||||
if isinstance(underlying, NonTerminalNode):
|
||||
for node in underlying.children:
|
||||
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
|
||||
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
|
||||
StrMatch, ConceptGroupExpression
|
||||
|
||||
|
||||
@@ -30,7 +30,6 @@ class BnfParser(BaseParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("Bnf", 50, False)
|
||||
# self.has_error = False
|
||||
# self.error_sink = []
|
||||
# self.name = BaseParser.PREFIX + "Bnf"
|
||||
|
||||
@@ -61,7 +60,6 @@ class BnfParser(BaseParser):
|
||||
self.eat_white_space()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
@@ -115,11 +113,11 @@ class BnfParser(BaseParser):
|
||||
token = self.get_token()
|
||||
return token.type == second or token.type == first and self.next_after().type == second
|
||||
|
||||
def parse(self, context: ExecutionContext, text):
|
||||
def parse(self, context: ExecutionContext, parser_input):
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
self.reset_parser(context, parser_input)
|
||||
tree = self.parse_choice()
|
||||
|
||||
token = self.get_token()
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
# try to match something like
|
||||
# ConceptNode 'plus' ConceptNode
|
||||
#
|
||||
# Replaced by SyaNodeParser
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
import logging
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
|
||||
@@ -12,6 +16,7 @@ multiple_concepts_parser = MultipleConceptsParser()
|
||||
class ConceptsWithConceptsParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptsWithConcepts", 25)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def get_tokens(nodes):
|
||||
@@ -71,23 +76,19 @@ class ConceptsWithConceptsParser(BaseParser):
|
||||
|
||||
return concept
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
if not text.parser == multiple_concepts_parser:
|
||||
return None
|
||||
|
||||
nodes = text.body
|
||||
|
||||
concept_key = self.get_key(nodes)
|
||||
concept = sheerka.new(concept_key)
|
||||
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
|
||||
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
|
||||
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
for concept in concepts:
|
||||
@@ -101,7 +102,7 @@ class ConceptsWithConceptsParser(BaseParser):
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text.source,
|
||||
source=parser_input.source,
|
||||
body=concept,
|
||||
try_parsed=None)))
|
||||
|
||||
|
||||
@@ -110,7 +110,7 @@ class DefaultParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "Default", 50)
|
||||
BaseParser.__init__(self, "Default", 60)
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.context: ExecutionContext = None
|
||||
@@ -168,7 +168,6 @@ class DefaultParser(BaseParser):
|
||||
self.next_token()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
@@ -188,19 +187,19 @@ class DefaultParser(BaseParser):
|
||||
|
||||
return
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
# default parser can only manage string text
|
||||
if not isinstance(text, str):
|
||||
if not isinstance(parser_input, str):
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
|
||||
self.log_result(context, text, ret)
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
self.reset_parser(context, parser_input)
|
||||
tree = self.parse_statement()
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(e, False)
|
||||
@@ -211,7 +210,7 @@ class DefaultParser(BaseParser):
|
||||
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
|
||||
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
|
||||
else:
|
||||
body = self.get_return_value_body(context.sheerka, text, tree, tree)
|
||||
body = self.get_return_value_body(context.sheerka, parser_input, tree, tree)
|
||||
# body = self.sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
@@ -224,7 +223,7 @@ class DefaultParser(BaseParser):
|
||||
not self.has_error,
|
||||
body)
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def parse_statement(self):
|
||||
|
||||
@@ -10,12 +10,12 @@ class EmptyStringParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "EmptyString", 90)
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
|
||||
if isinstance(text, str) and text.strip() == "" or \
|
||||
isinstance(text, list) and text == [] or \
|
||||
text is None:
|
||||
if isinstance(parser_input, str) and parser_input.strip() == "" or \
|
||||
isinstance(parser_input, list) and parser_input == [] or \
|
||||
parser_input is None:
|
||||
ret = sheerka.ret(self.name, True, sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
@@ -24,5 +24,5 @@ class EmptyStringParser(BaseParser):
|
||||
else:
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
@@ -16,26 +16,26 @@ class ExactConceptParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "ExactConcept", 80)
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
"""
|
||||
text can be string, but text can also be an list of tokens
|
||||
:param context:
|
||||
:param text:
|
||||
:param parser_input:
|
||||
:return:
|
||||
"""
|
||||
|
||||
context.log(f"Parsing '{text}'", self.name)
|
||||
context.log(f"Parsing '{parser_input}'", self.name)
|
||||
res = []
|
||||
sheerka = context.sheerka
|
||||
try:
|
||||
words = self.get_words(text)
|
||||
words = self.get_words(parser_input)
|
||||
except LexerError as e:
|
||||
context.log(f"Error found in tokenizer {e}", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
|
||||
|
||||
if len(words) > self.MAX_WORDS_SIZE:
|
||||
context.log(f"Max words reached. Stopping.", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input))
|
||||
|
||||
recognized = False
|
||||
for combination in self.combinations(words):
|
||||
@@ -69,26 +69,25 @@ class ExactConceptParser(BaseParser):
|
||||
context.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
|
||||
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
|
||||
body=concept,
|
||||
try_parsed=concept)))
|
||||
recognized = True
|
||||
|
||||
if recognized:
|
||||
if len(res) == 1:
|
||||
self.log_result(context, text, res[0])
|
||||
self.log_result(context, parser_input, res[0])
|
||||
else:
|
||||
self.log_multiple_results(context, text, res)
|
||||
self.log_multiple_results(context, parser_input, res)
|
||||
return res
|
||||
return res
|
||||
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text))
|
||||
self.log_result(context, text, ret)
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input))
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def get_words(text):
|
||||
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
|
||||
def get_words(self, text):
|
||||
tokens = self.get_input_as_tokens(text)
|
||||
res = []
|
||||
for t in tokens:
|
||||
if t.type == TokenKind.EOF:
|
||||
|
||||
@@ -1,18 +1,20 @@
|
||||
# to be replaced by SyaNodeParser
|
||||
import ast
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
|
||||
from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
|
||||
import core.utils
|
||||
from parsers.PythonParser import PythonParser
|
||||
|
||||
concept_lexer_parser = ConceptLexerParser()
|
||||
concept_lexer_parser = BnfNodeParser()
|
||||
|
||||
|
||||
class MultipleConceptsParser(BaseParser):
|
||||
"""
|
||||
Parser that will take the result of ConceptLexerParser and
|
||||
Parser that will take the result of BnfNodeParser and
|
||||
try to resolve the unrecognized tokens token by token
|
||||
|
||||
It is a success when it returns a list ConceptNode exclusively
|
||||
@@ -20,6 +22,7 @@ class MultipleConceptsParser(BaseParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "MultipleConcepts", 45)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def finalize(nodes_found, unrecognized_tokens):
|
||||
@@ -40,16 +43,12 @@ class MultipleConceptsParser(BaseParser):
|
||||
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
||||
return unrecognized_tokens
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
if not text.parser == concept_lexer_parser:
|
||||
return None
|
||||
|
||||
sheerka = context.sheerka
|
||||
nodes = text.value
|
||||
nodes_found = [[]]
|
||||
concepts_only = True
|
||||
|
||||
@@ -97,16 +96,16 @@ class MultipleConceptsParser(BaseParser):
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text.source,
|
||||
source=parser_input.source,
|
||||
body=choice,
|
||||
try_parsed=None))
|
||||
)
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, text.source, ret[0])
|
||||
self.log_result(context, parser_input.source, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, text.source, ret)
|
||||
self.log_multiple_results(context, parser_input.source, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
|
||||
+25
-24
@@ -1,4 +1,4 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.tokenizer import Tokenizer, LexerError, TokenKind
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from dataclasses import dataclass
|
||||
@@ -6,7 +6,7 @@ import ast
|
||||
import logging
|
||||
import core.utils
|
||||
|
||||
from parsers.ConceptLexerParser import ConceptNode
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -67,7 +67,7 @@ class PythonParser(BaseParser):
|
||||
BaseParser.__init__(self, "Python", 50)
|
||||
self.source = kwargs.get("source", "<undef>")
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
tree = None
|
||||
|
||||
@@ -76,15 +76,9 @@ class PythonParser(BaseParser):
|
||||
}
|
||||
|
||||
try:
|
||||
if isinstance(text, str) and "c:" in text:
|
||||
source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher)
|
||||
elif isinstance(text, str):
|
||||
source = text
|
||||
else:
|
||||
source = self.get_text_from_tokens(text, python_switcher)
|
||||
source = self.get_input_as_text(parser_input, python_switcher)
|
||||
source = source.strip()
|
||||
|
||||
text = text if isinstance(text, str) else source
|
||||
parser_input = parser_input if isinstance(parser_input, str) else source
|
||||
|
||||
# first, try to parse an expression
|
||||
res, tree, error = self.try_parse_expression(source)
|
||||
@@ -92,25 +86,32 @@ class PythonParser(BaseParser):
|
||||
# then try to parse a statement
|
||||
res, tree, error = self.try_parse_statement(source)
|
||||
if not res:
|
||||
self.has_error = True
|
||||
error_node = PythonErrorNode(text, error)
|
||||
error_node = PythonErrorNode(parser_input, error)
|
||||
self.error_sink.append(error_node)
|
||||
|
||||
except LexerError as e:
|
||||
self.has_error = True
|
||||
self.error_sink.append(e)
|
||||
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
body=self.error_sink if self.has_error else PythonNode(text, tree),
|
||||
try_parsed=None))
|
||||
if self.has_error:
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.NOT_FOR_ME,
|
||||
body=parser_input,
|
||||
reason=self.error_sink))
|
||||
else:
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input,
|
||||
body=PythonNode(parser_input, tree),
|
||||
try_parsed=None))
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def try_parse_expression(self, text):
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptNode
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from parsers.PythonParser import PythonParser
|
||||
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
unrecognized_nodes_parser = UnrecognizedNodeParser()
|
||||
|
||||
|
||||
class PythonWithConceptsParser(BaseParser):
|
||||
@@ -20,15 +21,12 @@ class PythonWithConceptsParser(BaseParser):
|
||||
res += c if c.isalnum() else "0"
|
||||
return res
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
if not text.parser == multiple_concepts_parser:
|
||||
return None
|
||||
|
||||
nodes = text.body
|
||||
source = ""
|
||||
to_parse = ""
|
||||
identifiers = {}
|
||||
@@ -74,6 +72,7 @@ class PythonWithConceptsParser(BaseParser):
|
||||
python_id = _get_identifier(concept)
|
||||
to_parse += python_id
|
||||
python_ids_mappings[python_id] = concept
|
||||
|
||||
else:
|
||||
source += node.source
|
||||
to_parse += node.source
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,114 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
|
||||
import core.utils
|
||||
|
||||
PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"]
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CannotParseNode(ErrorNode):
|
||||
unrecognized: UnrecognizedTokensNode
|
||||
|
||||
|
||||
class UnrecognizedNodeParser(BaseParser):
|
||||
"""
|
||||
This parser comes after the other NodeParsers (Atom, Bnf or Sya)
|
||||
It will try to resolve all UnrecognizedTokensNode.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("UnrecognizedNode", 45) # lower than AtomNode, BnfNode and SyaNode
|
||||
|
||||
def add_error(self, error):
|
||||
if hasattr(error, "__iter__"):
|
||||
self.error_sink.extend(error)
|
||||
else:
|
||||
self.error_sink.append(error)
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, None)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
sequences_found = [[]]
|
||||
has_unrecognized = False
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
res = self.validate_concept_node(context, node)
|
||||
if not res.status:
|
||||
self.add_error(res.body)
|
||||
else:
|
||||
sequences_found = core.utils.product(sequences_found, [res.body])
|
||||
|
||||
elif isinstance(node, UnrecognizedTokensNode):
|
||||
res = parse_unrecognized(context, node.source, PARSERS)
|
||||
res = only_successful(context, res)
|
||||
if res.status:
|
||||
lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens)
|
||||
sequences_found = core.utils.product(sequences_found, lexer_nodes)
|
||||
else:
|
||||
sequences_found = core.utils.product(sequences_found, [node])
|
||||
has_unrecognized = True
|
||||
|
||||
else: # cannot happen as of today :-)
|
||||
raise NotImplementedError()
|
||||
|
||||
# concept with UnrecognizedToken in their properties is considered as fatal error
|
||||
if self.has_error:
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
ret = []
|
||||
for choice in sequences_found:
|
||||
ret.append(
|
||||
sheerka.ret(
|
||||
self.name,
|
||||
not has_unrecognized,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input,
|
||||
body=choice,
|
||||
try_parsed=choice)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, parser_input, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def validate_concept_node(self, context, concept_node):
|
||||
|
||||
sheerka = context.sheerka
|
||||
errors = []
|
||||
|
||||
def _validate_concept(concept):
|
||||
"""
|
||||
Recursively browse the compiled properties in order to find unrecognized
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
for name, value in concept.compiled.items():
|
||||
if isinstance(value, Concept):
|
||||
_validate_concept(value)
|
||||
|
||||
elif isinstance(value, UnrecognizedTokensNode):
|
||||
res = parse_unrecognized(context, value.tokens, PARSERS)
|
||||
res = only_successful(context, res) # only key successful parsers
|
||||
if res.status:
|
||||
concept.compiled[name] = res.body.body
|
||||
else:
|
||||
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'"))
|
||||
|
||||
_validate_concept(concept_node.concept)
|
||||
if len(errors) > 0:
|
||||
return context.sheerka.ret(self.name, False, errors)
|
||||
else:
|
||||
return context.sheerka.ret(self.name, True, concept_node)
|
||||
@@ -20,6 +20,9 @@ def json_default_converter(o):
|
||||
if isinstance(o, (date, datetime)):
|
||||
return o.isoformat()
|
||||
|
||||
if isinstance(o, SheerkaDataProviderRef):
|
||||
return f"##XREF##:{o.target}"
|
||||
|
||||
|
||||
class Event(object):
|
||||
"""
|
||||
@@ -389,7 +392,7 @@ class SheerkaDataProvider:
|
||||
return getattr(obj, Serializer.ORIGIN)
|
||||
|
||||
if isinstance(obj, SheerkaDataProviderRef):
|
||||
return obj.original_target
|
||||
return obj.original_target
|
||||
|
||||
return None
|
||||
|
||||
@@ -406,6 +409,11 @@ class SheerkaDataProvider:
|
||||
def is_reference(obj):
|
||||
return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX)
|
||||
|
||||
def reset(self):
|
||||
self.first_time = self.io.first_time
|
||||
if hasattr(self.io, "reset"):
|
||||
self.io.reset()
|
||||
|
||||
def add(self, event_digest: str, entry, obj, allow_multiple=True, use_ref=False):
|
||||
"""
|
||||
Adds obj to the entry 'entry'
|
||||
@@ -999,3 +1007,12 @@ class SheerkaDataProvider:
|
||||
keys[entry] = value
|
||||
self.save_keys(keys)
|
||||
return str(value)
|
||||
|
||||
def dump_state(self, digest=None):
|
||||
digest = digest or self.get_snapshot(SheerkaDataProvider.HeadFile)
|
||||
state = self.load_state(digest)
|
||||
print(json.dumps(state.data, sort_keys=True, default=json_default_converter, indent=True))
|
||||
|
||||
def dump_obj(self, digest):
|
||||
obj = self.load_obj(digest)
|
||||
print(json.dumps(obj.__dict__, sort_keys=True, default=json_default_converter, indent=True))
|
||||
|
||||
@@ -170,6 +170,10 @@ class SheerkaDataProviderDictionaryIO(SheerkaDataProviderIO):
|
||||
|
||||
return io.BytesIO(self.cache[file_path]) if "b" in mode else io.StringIO(self.cache[file_path])
|
||||
|
||||
def reset(self):
|
||||
self.cache.clear()
|
||||
self.first_time = True
|
||||
|
||||
|
||||
def on_close(dictionary_io, file_path, stream):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user