Added SyaNodeParser (finally, after one month)

This commit is contained in:
2020-04-09 15:42:36 +02:00
parent c9acfa99a1
commit 6c7c529016
56 changed files with 5322 additions and 404 deletions
+28 -1
View File
@@ -37,6 +37,8 @@ class BuiltinConcepts(Enum):
PARSER_RESULT = "parser result"
TOO_MANY_SUCCESS = "too many success" # when expecting a limited number of successful return value
TOO_MANY_ERRORS = "too many errors" # when expecting a limited number of successful return value
ONLY_SUCCESSFUL = "only successful" # filter the result, only keep successful ones
MULTIPLE_ERRORS = "multiple errors" # filter the result, only keep evaluator in error
NOT_FOR_ME = "not for me" # a parser recognize that the entry is not meant for it
IS_EMPTY = "is empty" # when a set is empty
INVALID_RETURN_VALUE = "invalid return value" # the return value of an evaluator is not correct
@@ -45,6 +47,7 @@ class BuiltinConcepts(Enum):
CONCEPT_EVAL_ERROR = "concept evaluation error" # cannot evaluate a property or metadata of a concept
ENUMERATION = "enum" # represents a list or a set
LIST = "list" # represents a list
FILTERED = "filtered" # represents the result of a filtering
CONCEPT_ALREADY_IN_SET = "concept already in set"
EVALUATOR_PRE_PROCESS = "evaluator pre process" # used modify / tweak behaviour of evaluators
EVAL_BODY_REQUESTED = "eval body requested" # to evaluate the body
@@ -91,6 +94,7 @@ BuiltinErrors = [str(e) for e in {
BuiltinConcepts.UNKNOWN_PROPERTY,
BuiltinConcepts.TOO_MANY_SUCCESS,
BuiltinConcepts.TOO_MANY_ERRORS,
BuiltinConcepts.MULTIPLE_ERRORS,
BuiltinConcepts.INVALID_RETURN_VALUE,
BuiltinConcepts.CONCEPT_ALREADY_DEFINED,
BuiltinConcepts.CONCEPT_EVAL_ERROR,
@@ -249,11 +253,12 @@ class ParserResultConcept(Concept):
Result of a parsing
"""
def __init__(self, parser=None, source=None, value=None, try_parsed=None, validate_concept=None):
def __init__(self, parser=None, source=None, tokens=None, value=None, try_parsed=None):
super().__init__(BuiltinConcepts.PARSER_RESULT, True, False, BuiltinConcepts.PARSER_RESULT)
self.set_metadata_value(ConceptParts.BODY, value)
self.set_prop("parser", parser)
self.set_prop("source", source)
self.set_prop("tokens", tokens)
self.set_prop("try_parsed", try_parsed) # in case of error, what was found before the error
self.metadata.is_evaluated = True
@@ -372,6 +377,14 @@ class ListConcept(Concept):
# return item in self.body
class FilteredConcept(Concept):
def __init__(self, filtered=None, iterable=None, predicate=None):
super().__init__(BuiltinConcepts.FILTERED, True, False, BuiltinConcepts.FILTERED)
self.set_metadata_value(ConceptParts.BODY, filtered)
self.def_prop("iterable", iterable)
self.def_prop("predicate", predicate)
class ConceptAlreadyInSet(Concept):
def __init__(self, concept=None, concept_set=None):
super().__init__(BuiltinConcepts.CONCEPT_ALREADY_IN_SET,
@@ -409,3 +422,17 @@ class WhereClauseFailed(Concept):
@property
def concept(self):
return self.body
class NotForMeConcept(Concept):
def __init__(self, source=None, reason=None):
super().__init__(BuiltinConcepts.NOT_FOR_ME,
True,
False,
BuiltinConcepts.NOT_FOR_ME)
self.set_metadata_value(ConceptParts.BODY, source)
self.def_prop("reason", reason)
self.metadata.is_evaluated = True
def __repr__(self):
return f"NotForMeConcept(source={self.body}, reason={self.get_prop('reason')})"
+177
View File
@@ -6,6 +6,8 @@ from core.ast.nodes import CallNodeConcept, GenericNodeConcept
from core.ast.visitors import UnreferencedNamesVisitor
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, ErrorNode
def is_same_success(context, return_values):
@@ -132,6 +134,181 @@ def expect_one(context, return_values):
parents=return_values)
def only_successful(context, return_values):
"""
Removes all return values that are not successful
Return error when no successful return value
:param context:
:param return_values:
:return:
"""
if not isinstance(return_values, list):
return return_values
sheerka = context.sheerka
if len(return_values) == 0:
return sheerka.ret(
context.who,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values),
parents=return_values)
successful_results = [item for item in return_values if item.status]
if len(successful_results) == 0:
return sheerka.ret(
context.who,
False,
sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values),
parents=return_values)
return sheerka.ret(
context.who,
True,
sheerka.new(BuiltinConcepts.ONLY_SUCCESSFUL, body=successful_results),
parents=return_values)
def only_parsers_results(context, return_values):
"""
Filters the return_values and returns when the result is a ParserResult
regardless of the status
So it filters errors
:param context:
:param return_values:
:return:
"""
if not isinstance(return_values, list):
return return_values
sheerka = context.sheerka
if len(return_values) == 0:
return sheerka.ret(
context.who,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values),
parents=return_values)
return_values_ok = [item for item in return_values if sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)]
# hack because some parsers don't follow the NOT_FOR_ME rule
temp_ret_val = []
for ret_val in return_values_ok:
if isinstance(ret_val.body.body, ErrorNode):
continue
if isinstance(ret_val.body.body, list) and \
len(ret_val.body.body) == 1 and \
isinstance(ret_val.body.body[0], UnrecognizedTokensNode):
continue
temp_ret_val.append(ret_val)
return_values_ok = temp_ret_val
if len(return_values_ok) == 0:
return sheerka.ret(
context.who,
False,
sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values),
parents=return_values)
return sheerka.ret(
context.who,
True,
sheerka.new(BuiltinConcepts.FILTERED,
body=return_values_ok,
iterable=return_values,
predicate="sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)"),
parents=return_values)
def parse_unrecognized(context, tokens, parsers):
"""
Try to recognize concepts or code from tokens using the given parsers
:param context:
:param tokens:
:param parsers:
:return:
"""
steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
sheerka = context.sheerka
with context.push(desc=f"Parsing unrecognized '{tokens}'") as sub_context:
# disable all parsers but the following ones
sub_context.add_preprocess(BaseParser.PREFIX + "*", enabled=False)
for parser in parsers:
sub_context.add_preprocess(BaseParser.PREFIX + parser, enabled=True)
sub_context.add_inputs(source=tokens)
to_parse = sheerka.ret(
context.who,
True,
sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
res = sheerka.execute(sub_context, to_parse, steps)
sub_context.add_values(return_values=res)
# discard Python response if accepted by AtomNode
is_concept = False
for r in res:
if r.status and r.who == "parsers.AtomNode":
is_concept = True
if not is_concept:
return res
filtered = []
for r in res:
if r.who == "parsers.Python":
continue
filtered.append(r)
return filtered
def get_lexer_nodes(return_values, start, tokens):
"""
From a parser result, return the corresponding LexerNode
either ConceptNode, UnrecognizedTokensNode or SourceCodeNode
:param return_values:
:param start:
:param tokens:
:return: list of list (list of concept node sequence)
"""
lexer_nodes = []
for ret_val in return_values:
if ret_val.who == "parsers.Python":
if ret_val.body.source.strip().isalnum() and not ret_val.body.source.strip().isnumeric():
# Discard SourceCodeNode which seems to be a concept
# It may be a wrong idea, so let's see
continue
end = start + len(tokens) - 1
lexer_nodes.append([SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)])
elif ret_val.who == "parsers.ExactConcept":
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
end = start + len(tokens) - 1
for concept in concepts:
lexer_nodes.append([ConceptNode(concept, start, end, tokens, ret_val.body.source)])
elif ret_val.who in ("parsers.BnfNode", "parsers.SyaNode", "parsers.AtomNode"):
nodes = [node for node in ret_val.body.body]
for node in nodes:
node.start += start
node.end += start
# but append the whole sequence if when it's a sequence
lexer_nodes.append(nodes)
else:
raise NotImplementedError()
return lexer_nodes
def get_names(sheerka, concept_node):
"""
Finds all the names referenced by the concept_node
+81 -2
View File
@@ -108,11 +108,14 @@ class Concept:
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, simplec):
return self.name == other.name and self.body == other.body
if id(self) == id(other):
return True
if isinstance(other, CC):
return other == self
if not isinstance(other, Concept):
return False
@@ -346,6 +349,17 @@ class Concept:
"""
return self.props[prop_name].value
def set_prop_by_index(self, index: int, value):
"""
Set the value of a property (not the metadata) using the index
:param index: Name the property or another concept
:param value:
:return:
"""
prop_name = list(self.props.keys())[index]
self.props[prop_name].value = value
return self
def set_metadata_value(self, metadata: ConceptParts, value):
"""
Set the resolved value of a metadata (not the metadata itself)
@@ -438,3 +452,68 @@ class InfiniteRecursionResolved:
def get_value(self):
return self.value
class CC:
"""
Concept class for test purpose
CC means concept for compiled (or concept with compiled)
It matches a concept if the compiles are equals
"""
# The only properties that are testes are concept_key and compiled
# The other properties (concept, source, start and end)
# are used in tests/parsers/parsers_utils.py to help creating helper objects
def __init__(self, concept, source=None, **kwargs):
self.concept_key = concept.key if isinstance(concept, Concept) else concept
self.compiled = kwargs
self.concept = concept if isinstance(concept, Concept) else None
self.source = source # to use when the key is different from the sub str to search when filling start and stop
self.start = None # for debug purpose, indicate where the concept starts
self.end = None # for debug purpose, indicate where the concept ends
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, Concept):
if other.key != self.concept_key:
return False
return self.compiled == other.compiled
if not isinstance(other, CC):
return False
return self.concept_key == other.concept_key and \
self.compiled == other.compiled
def __hash__(self):
if self.concept:
return hash(self.concept)
return hash(self.concept_key)
def __repr__(self):
if self.concept:
txt = f"CC(concept='{self.concept}'"
else:
txt = f"CC(concept_key='{self.concept_key}'"
for k, v in self.compiled.items():
txt += f", {k}='{v}'"
return txt + ")"
def fix_pos(self, node):
start = node.start if hasattr(node, "start") else \
node[0] if isinstance(node, tuple) else None
end = node.end if hasattr(node, "end") else \
node[1] if isinstance(node, tuple) else None
if start is not None:
if self.start is None or start < self.start:
self.start = start
if end is not None:
if self.end is None or end > self.end:
self.end = end
return self
+7 -3
View File
@@ -43,6 +43,7 @@ class ExecutionContext:
desc: str = None,
logger=None,
global_hints=None,
global_errors=None,
**kwargs):
self._parent = None
@@ -61,6 +62,7 @@ class ExecutionContext:
self.logger = logger
self.local_hints = set()
self.global_hints = set() if global_hints is None else global_hints
self.global_errors = [] if global_errors is None else global_errors
self.inputs = {} # what was the parameters of the execution context
self.values = {} # what was produced by the execution context
@@ -146,8 +148,8 @@ class ExecutionContext:
preprocess.set_prop(k, v)
if not self.preprocess:
self.preprocess = set()
self.preprocess.add(preprocess)
self.preprocess = []
self.preprocess.append(preprocess)
return self
def add_inputs(self, **kwargs):
@@ -212,6 +214,7 @@ class ExecutionContext:
desc,
logger,
self.global_hints,
self.global_errors,
**_kwargs)
new._parent = self
new._tab = self._tab + " " * DEBUG_TAB_SIZE
@@ -230,7 +233,8 @@ class ExecutionContext:
if self.logger and not self.logger.disabled:
self.logger.debug(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
def log_error(self, message, who=None):
def log_error(self, message, who=None, exc=None):
self.global_errors.append(exc or message)
if self.logger and not self.logger.disabled:
self.logger.exception(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
@@ -1,8 +1,10 @@
from core.builtin_concepts import BuiltinConcepts, ErrorConcept
from core.concept import Concept
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError, SheerkaDataProviderRef
import core.utils
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
BNF_NODE_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser"
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
class SheerkaCreateNewConcept:
@@ -13,6 +15,7 @@ class SheerkaCreateNewConcept:
def __init__(self, sheerka):
self.sheerka = sheerka
self.logger_name = self.create_new_concept.__name__
self.base_lexer_parser = core.utils.get_class(BASE_NODE_PARSER_CLASS)("BaseNodeParser", 0)
def create_new_concept(self, context, concept: Concept):
"""
@@ -25,7 +28,7 @@ class SheerkaCreateNewConcept:
concept.init_key()
concepts_definitions = None
init_ret_value = None
init_bnf_ret_value = None
sdp = self.sheerka.sdp
@@ -49,13 +52,19 @@ class SheerkaCreateNewConcept:
concepts_definitions[concept] = concept.bnf
# check if it's a valid BNF or whether it breaks the known rules
concept_lexer_parser = self.sheerka.parsers[CONCEPT_LEXER_PARSER_CLASS]()
bnf_lexer_parser = self.sheerka.parsers[BNF_NODE_PARSER_CLASS]()
with context.push(self.sheerka.name, desc=f"Initializing concept definition for {concept}") as sub_context:
sub_context.concepts[concept.key] = concept # the concept is not in the real cache yet
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
sub_context.add_values(return_values=init_ret_value)
if not init_ret_value.status:
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
init_bnf_ret_value = bnf_lexer_parser.initialize(sub_context, concepts_definitions)
sub_context.add_values(return_values=init_bnf_ret_value)
if not init_bnf_ret_value.status:
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_bnf_ret_value.value))
# update concept definition by key
init_sya_ret_value = self.base_lexer_parser.initialize(context, [concept], use_sheerka=True)
if not init_sya_ret_value.status:
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value))
concepts_by_first_keyword = init_sya_ret_value.body
concept.freeze_definition_hash()
@@ -97,9 +106,15 @@ class SheerkaCreateNewConcept:
sdp.set(
context.event.get_digest(),
self.sheerka.CONCEPTS_DEFINITIONS_ENTRY,
concept_lexer_parser.encode_grammar(init_ret_value.body),
bnf_lexer_parser.encode_grammar(init_bnf_ret_value.body),
use_ref=True)
self.sheerka.concepts_definitions_cache = None # invalidate cache
# update the concepts by first keyword
sdp.set(context.event.get_digest(),
self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
concepts_by_first_keyword)
except SheerkaDataProviderDuplicateKeyError as error:
context.log_error("Failed to create a new concept.", who=self.logger_name)
return self.sheerka.ret(
@@ -109,13 +124,13 @@ class SheerkaCreateNewConcept:
error.args[0])
# Updates the caches
self.sheerka.cache_by_key[concept.key] = sdp.get_safe(self.sheerka.CONCEPTS_ENTRY, concept.key)
self.sheerka.cache_by_name[concept.name] = sdp.get_safe(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name)
self.sheerka.cache_by_id[concept.id] = concept
if init_ret_value is not None and init_ret_value.status:
self.sheerka.concepts_grammars = init_ret_value.body
if init_bnf_ret_value is not None and init_bnf_ret_value.status:
self.sheerka.concepts_grammars = init_bnf_ret_value.body
self.sheerka.concepts_by_first_keyword = concepts_by_first_keyword
# process the return in needed
# process the return if needed
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
return ret
@@ -1,6 +1,6 @@
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved
from core.builtin_helpers import add_to_ret_val, remove_from_ret_val, expect_one
from core.builtin_helpers import expect_one
CONCEPT_EVALUATION_STEPS = [
BuiltinConcepts.BEFORE_EVALUATION,
+24 -15
View File
@@ -33,6 +33,8 @@ class SheerkaExecute:
# group the parsers by priorities
instantiated_parsers = [parser(sheerka=self.sheerka) for parser in self.sheerka.parsers.values()]
instantiated_parsers = self.preprocess(execution_context, instantiated_parsers)
grouped_parsers = {}
for parser in [p for p in instantiated_parsers if p.enabled]:
grouped_parsers.setdefault(parser.priority, []).append(parser)
@@ -44,7 +46,6 @@ class SheerkaExecute:
for parser in grouped_parsers[priority]:
return_value_success_found = False
for return_value in inputs_for_this_group:
to_parse = return_value.body.body \
@@ -67,22 +68,23 @@ class SheerkaExecute:
r.parents = [return_value]
result.append(r)
if self.sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT):
# if a ParserResultConcept is returned, it will be used by the parsers
# of the following groups
to_process.append(r)
if r.status:
return_value_success_found = True
stop_processing = True
else:
res.parents = [return_value]
result.append(res)
if self.sheerka.isinstance(res.body, BuiltinConcepts.PARSER_RESULT):
# if a ParserResultConcept is returned, it will be used by the parsers
# of the following groups
to_process.append(res)
if res.status:
return_value_success_found = True
stop_processing = True
sub_context.add_values(return_values=res)
if return_value_success_found:
stop_processing = True
break # Stop the other return_values (but not the other parsers with the same priority)
if stop_processing:
break # Do not try the other priorities if a match is found
@@ -102,7 +104,7 @@ class SheerkaExecute:
instantiated_evaluators = [e_class() for e_class in self.sheerka.evaluators]
# pre-process evaluators if needed
instantiated_evaluators = self._preprocess_evaluators(execution_context, instantiated_evaluators)
instantiated_evaluators = self.preprocess(execution_context, instantiated_evaluators)
for evaluator in [e for e in instantiated_evaluators if e.enabled and process_step in e.steps]:
grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator)
@@ -123,7 +125,7 @@ class SheerkaExecute:
evaluated_items = []
to_delete = []
for evaluator in grouped_evaluators[priority]:
evaluator = self._preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy
evaluator = self.preprocess(execution_context, evaluator.__class__()) # fresh copy
sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})"
with iteration_context.push(desc=sub_context_desc, logger=evaluator.verbose_log) as sub_context:
@@ -215,22 +217,29 @@ class SheerkaExecute:
return return_values
def _preprocess_evaluators(self, context, evaluators):
def preprocess(self, context, parsers_or_evaluators):
if not context.preprocess:
return evaluators
return parsers_or_evaluators
if not hasattr(evaluators, "__iter__"):
if not hasattr(parsers_or_evaluators, "__iter__"):
single_one = True
evaluators = [evaluators]
parsers_or_evaluators = [parsers_or_evaluators]
else:
single_one = False
for preprocess in context.preprocess:
for e in evaluators:
if preprocess.props["name"].value == e.name:
for e in parsers_or_evaluators:
if self.matches(e.name, preprocess.get_prop("name")):
for prop, value in preprocess.props.items():
if prop == "name":
continue
if hasattr(e, prop):
setattr(e, prop, value.value)
return evaluators[0] if single_one else evaluators
return parsers_or_evaluators[0] if single_one else parsers_or_evaluators
@staticmethod
def matches(parser_or_evaluator_name, preprocessor_name):
if preprocessor_name.endswith("*"):
return parser_or_evaluator_name.startswith(preprocessor_name[:-1])
else:
return parser_or_evaluator_name == preprocessor_name
+94 -10
View File
@@ -17,12 +17,7 @@ from core.sheerka_logger import console_handler
import logging
# CONCEPT_EVALUATION_STEPS = [
# BuiltinConcepts.BEFORE_EVALUATION,
# BuiltinConcepts.EVALUATION,
# BuiltinConcepts.AFTER_EVALUATION]
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
CONCEPT_LEXER_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser"
BNF_PARSER_CLASS = "parsers.BnfParser.BnfParser"
CONCEPTS_FILE = "_concepts.txt"
@@ -37,6 +32,9 @@ class Sheerka(Concept):
CONCEPTS_BY_NAME_ENTRY = "Concepts_By_Name"
CONCEPTS_BY_HASH_ENTRY = "Concepts_By_Hash" # store hash of concepts definitions (not values)
CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "Concepts_By_First_Keyword"
CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions"
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts
@@ -65,6 +63,10 @@ class Sheerka(Concept):
# a grammar is a resolved BNF
self.concepts_grammars = {}
# cache for SYA concepts
self.concepts_by_first_keyword = {}
self.sya_definitions = {}
# a concept can be instantiated
# ex: File is a concept, but File('foo.txt') is an instance
# TODO: manage contexts
@@ -119,7 +121,8 @@ class Sheerka(Concept):
self.initialize_builtin_concepts()
self.initialize_builtin_parsers()
self.initialize_builtin_evaluators()
self.initialize_concepts_definitions(exec_context)
self.initialize_bnf_parsing(exec_context)
self.initialize_sya_parsing()
res = ReturnValueConcept(self, True, self)
exec_context.add_values(return_values=res)
@@ -174,12 +177,25 @@ class Sheerka(Concept):
"""
core.utils.init_package_import("parsers")
base_class = core.utils.get_class("parsers.BaseParser.BaseParser")
modules_to_skip = ["parsers.BaseNodeParser"]
temp_result = {}
for parser in core.utils.get_sub_classes("parsers", base_class):
if parser.__module__ == base_class.__module__:
continue
self.init_log.debug(f"Adding builtin parser '{parser.__name__}'")
self.parsers[core.utils.get_full_qualified_name(parser)] = parser
if parser.__module__ in modules_to_skip:
continue
qualified_name = core.utils.get_full_qualified_name(parser)
self.init_log.debug(f"Adding builtin parser '{qualified_name}'")
temp_result[qualified_name] = parser
# Now we sort the parser by name.
# It's not important for the logic of their usage as they have their priority anyway,
# We do that for the unit tests. They are to complicated to write otherwise
for name in sorted(temp_result.keys()):
self.parsers[name] = temp_result[name]
def initialize_builtin_evaluators(self):
"""
@@ -195,7 +211,7 @@ class Sheerka(Concept):
self.init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
self.evaluators.append(evaluator)
def initialize_concepts_definitions(self, execution_context):
def initialize_bnf_parsing(self, execution_context):
self.init_log.debug("Initializing concepts grammars.")
definitions = self.get_concepts_definitions(execution_context)
@@ -211,6 +227,25 @@ class Sheerka(Concept):
self.concepts_grammars = lexer_parser.concepts_grammars
def initialize_sya_parsing(self):
self.init_log.debug("Initializing sya definitions.")
self.concepts_by_first_keyword = self.sdp.get_safe(
self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
load_origin=False) or {}
self.sya_definitions = self.sdp.get_safe(
self.CONCEPTS_SYA_DEFINITION_ENTRY,
load_origin=False) or {}
def reset(self):
self.reset_cache()
self.concepts_by_first_keyword = {}
self.concepts_grammars = {}
self.sya_definitions = {}
self.sdp.reset()
self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000)
def reset_cache(self, filter_to_use=None):
"""
reset the different cache that exists
@@ -220,6 +255,7 @@ class Sheerka(Concept):
if filter_to_use is None:
self.cache_by_key = {}
self.cache_by_id = {}
self.cache_by_name = {}
else:
raise NotImplementedError()
@@ -324,6 +360,38 @@ class Sheerka(Concept):
"""
return self.sets_handler.set_isa(context, concept, concept_set)
def set_sya_def(self, context, list_of_def):
"""
Set the precedence and/or the associativity of a concept
:param context:
:param list_of_def list of tuple(concept_id, precedence (int), SyaAssociativity)
:return:
"""
# validate the entries
for concept_id, precedence, associativity in list_of_def:
if concept_id == BuiltinConcepts.UNKNOWN_CONCEPT:
return self.ret(self.name,
False,
self.new(BuiltinConcepts.ERROR, body=f"Concept {concept_id} is not known"))
# update the definitions
for concept_id, precedence, associativity in list_of_def:
if precedence is None and associativity is None:
try:
del self.sya_definitions[concept_id]
except KeyError:
pass
else:
self.sya_definitions[concept_id] = (precedence, associativity.value)
# then save
self.sdp.set(context.event.get_digest(),
self.CONCEPTS_SYA_DEFINITION_ENTRY,
self.sya_definitions)
return self.ret(self.name, True, self.new(BuiltinConcepts.SUCCESS))
def get_set_elements(self, context, concept):
"""
Concept is supposed to be a set
@@ -571,6 +639,22 @@ class Sheerka(Concept):
return self.value(body_to_use)
def get_error(self, obj):
if isinstance(obj, Concept) and obj.metadata.is_builtin and obj.key in BuiltinErrors:
return obj
if isinstance(obj, list):
return obj
if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE):
if obj.status:
return None
if self.isinstance(obj.body, BuiltinConcepts.PARSER_RESULT):
return self.get_error(obj.body.body)
return NotImplementedError()
def get_values(self, objs):
if not (isinstance(objs, list) or
self.isinstance(objs, BuiltinConcepts.LIST) or
+36 -2
View File
@@ -163,7 +163,7 @@ def remove_list_from_list(lst, to_remove):
def product(a, b):
"""
Kind of cartesian product between lists a and b
knowing that a is also a list
knowing that a is also a list : a is a list of list !!!
So it's a cartesian product between a list of list and a list
"""
@@ -176,7 +176,12 @@ def product(a, b):
res = []
for item_b in b:
for item_a in a:
items = item_a + [item_b]
#items = item_a + [item_b]
items = item_a[:]
if hasattr(item_b, "__iter__"):
items.extend(item_b)
else:
items.append(item_b)
res.append(items)
return res
@@ -276,6 +281,7 @@ def str_concept(t):
>>> assert str_concept((None, "id")) == "c:|id:"
>>> assert str_concept(("key", None)) == "c:key:"
>>> assert str_concept((None, None)) == ""
>>> assert str_concept(Concept(key="foo", id="bar")) == "c:foo|bar:"
:param t:
:return:
"""
@@ -297,6 +303,12 @@ def unstr_concept(concept_repr):
"""
if concept_repr is like :c:key:id:
return the key and the id
>>> assert unstr_concept("c:key:") == "key"
>>> assert unstr_concept("c:key|id:") == ("key", "id")
>>> assert unstr_concept("c:|id:") == ("None", "id")
>>> assert unstr_concept("c:key|:") == ("key", "None")
>>> # Otherwise, return (None,None)
:param concept_repr:
:return:
"""
@@ -371,3 +383,25 @@ def decode_concept(text):
return key, id_, use_concept
return None, None, None
def tokens_index(tokens, sub_tokens, skip=0):
"""
Index of the sub tokens in tokens
:param tokens: tokens
:param sub_tokens: sub tokens to search
:param skip: number of found to skip
:return:
"""
expected = [token.value for token in sub_tokens if token.type != TokenKind.EOF]
for i in range(0, len(tokens) - len(expected) + 1):
for j in range(len(expected)):
if tokens[i + j].value != expected[j]:
break
else:
if skip == 0:
return i
else:
skip -= 1
raise ValueError(f"sub tokens '{sub_tokens}' not found")
+1 -1
View File
@@ -5,7 +5,7 @@ from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from core.tokenizer import TokenKind
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.BaseParser import NotInitializedNode
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
from parsers.BnfNodeParser import ParsingExpression, ParsingExpressionVisitor
from parsers.DefaultParser import DefConceptNode, NameNode
from parsers.PythonParser import PythonNode
import core.utils
+2 -1
View File
@@ -1,6 +1,7 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BnfNodeParser import ConceptNode
from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode
+51
View File
@@ -0,0 +1,51 @@
from core.builtin_concepts import BuiltinConcepts
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
from parsers.BaseParser import BaseParser
class MultipleErrorsEvaluator(AllReturnValuesEvaluator):
"""
Use to reduce to evaluator errors
All parser error will be discarded
Cannot match if there is at least one successful evaluator
"""
NAME = "MultipleErrors"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 30)
self.return_values_in_error = []
def matches(self, context, return_values):
nb_evaluators_in_error = 0
to_process = False
for ret in return_values:
if ret.status and (ret.who.startswith(self.PREFIX) or ret.who.startswith(BaseParser.PREFIX)):
return False
elif ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
to_process = True
self.eaten.append(ret)
elif not ret.status and ret.who.startswith(self.PREFIX):
nb_evaluators_in_error += 1
self.return_values_in_error.append(ret)
self.eaten.append(ret)
elif not ret.status and ret.who.startswith(BaseParser.PREFIX):
self.eaten.append(ret)
# else:
# other concepts. We do not care if there are successful or not
# They won't be part of result nor part of the parent
# --> So they will be handled by other evaluators
return to_process and nb_evaluators_in_error > 1
def eval(self, context, return_values):
context.log(f"{len(self.return_values_in_error)} return value in error, {len(self.eaten)} item(s) eaten",
who=self)
context.log(f"{self.return_values_in_error}", who=self)
sheerka = context.sheerka
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.MULTIPLE_ERRORS, body=self.return_values_in_error),
parents=self.eaten)
+4
View File
@@ -31,6 +31,10 @@ class OneErrorEvaluator(AllReturnValuesEvaluator):
self.eaten.append(ret)
elif not ret.status and ret.who.startswith(BaseParser.PREFIX):
self.eaten.append(ret)
# else:
# other concepts. We do not care if there are successful or not
# They won't be part of result nor part of the parent
# --> So they will be handled by other evaluators
return to_process and nb_evaluators_in_error == 1
+2 -1
View File
@@ -1,4 +1,5 @@
import copy
import traceback
from enum import Enum
from core.ast.visitors import UnreferencedNamesVisitor
@@ -59,7 +60,7 @@ class PythonEvaluator(OneReturnValueEvaluator):
return sheerka.ret(self.name, True, evaluated, parents=[return_value])
except Exception as error:
context.log_error(error, self.name)
context.log_error(error, who=self.name, exc=traceback.format_exc())
error = sheerka.new(BuiltinConcepts.ERROR, body=error)
return sheerka.ret(self.name, False, error, parents=[return_value])
+369
View File
@@ -0,0 +1,369 @@
import copy
from dataclasses import dataclass
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.tokenizer import TokenKind, Tokenizer
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, ErrorNode
PARSERS = ["BnfNode", "SyaNode", "Python"]
@dataclass()
class TokensNodeFound(ErrorNode):
expected_tokens: list
def __eq__(self, other):
if id(other) == id(self):
return True
if not isinstance(other, UnexpectedTokenErrorNode):
return False
if self.message != other.message:
return False
if self.token.type != other.token.type or self.token.value != other.token.value:
return False
if len(self.expected_tokens) != len(other.expected_tokens):
return False
for i, t in enumerate(self.expected_tokens):
if t != other.expected_tokens[i]:
return False
return True
def __hash__(self):
return hash((self.message, self.token, self.expected_tokens))
class AtomConceptParserHelper:
def __init__(self, context):
self.context = context
self.debug = []
self.sequence = [] # sequence of concepts already found found
self.current_concept: ConceptNode = None # concept being parsed
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # buffer that keeps tracks of tokens positions
self.expected_tokens = None # expected tokens for this concepts
self.is_locked = False
self.errors = []
self.has_unrecognized = False
self.forked = [] # use to duplicate AtomConceptParserHelper. See manage_unrecognized()
def __eq__(self, other):
if id(other) == id(self):
return True
if not isinstance(other, AtomConceptParserHelper):
return False
if len(self.sequence) != len(other.sequence):
return False
for item_self, item_other in zip(self.sequence, other.sequence):
if item_self != item_other:
return False
return True
def __hash__(self):
return hash(len(self.sequence))
def __repr__(self):
return f"{self.sequence}"
def lock(self):
self.is_locked = True
def reset(self):
self.is_locked = False
def has_error(self):
return len(self.errors) > 0
def eat_token(self, token, pos):
if not self.expected_tokens:
return False
self.debug.append(token)
if self.expected_tokens[0] != BaseNodeParser.get_token_value(token):
self.errors.append(UnexpectedTokenErrorNode(
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
token,
[self.expected_tokens[0]]))
return False
self.current_concept.end = pos
del self.expected_tokens[0]
if not self.expected_tokens:
# the concept is fully matched
self.sequence.append(self.current_concept)
self.expected_tokens = None
return True
def eat_concept(self, concept, pos):
if self.is_locked:
return
self.debug.append(concept)
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.eat_concept(concept, pos)
concept_node = ConceptNode(concept, pos, pos)
expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1]
if not expected:
# the concept is already matched
self.sequence.append(concept_node)
else:
self.current_concept = concept_node
self.expected_tokens = expected
def manage_unrecognized(self):
if self.unrecognized_tokens.is_empty():
return
# do not put empty UnrecognizedToken in out
if self.unrecognized_tokens.is_whitespace():
self.unrecognized_tokens.reset()
return
self.unrecognized_tokens.fix_source()
# try to recognize concepts
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
if nodes_sequences:
instances = [self]
for i in range(len(nodes_sequences) - 1):
clone = self.clone()
instances.append(clone)
self.forked.append(clone)
for instance, node_sequence in zip(instances, nodes_sequences):
for node in node_sequence:
instance.sequence.append(node)
if isinstance(node, UnrecognizedTokensNode) or \
hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens:
instance.has_unrecognized = True
instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
else:
self.sequence.append(self.unrecognized_tokens)
self.has_unrecognized = True
# create another instance
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
def eat_unrecognized(self, token, pos):
if self.is_locked:
return
self.debug.append(token)
self.unrecognized_tokens.add_token(token, pos)
def finalize(self):
if len(self.sequence) > 0:
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize()
if self.expected_tokens:
self.errors.append(TokensNodeFound(self.expected_tokens))
def clone(self):
clone = AtomConceptParserHelper(self.context)
clone.debug = self.debug[:]
clone.sequence = self.sequence[:]
clone.current_concept = self.current_concept.clone() if self.current_concept else None
clone.unrecognized_tokens = self.unrecognized_tokens.clone()
clone.expected_tokens = self.expected_tokens[:] if self.expected_tokens else None
clone.is_locked = self.is_locked
clone.errors = self.errors[:]
clone.has_unrecognized = self.has_unrecognized
return clone
def _get_lexer_nodes_from_unrecognized(self):
"""
Use the source of self.unrecognized_tokens gto find concepts or source code
:return:
"""
res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
if not only_parsers_results.status:
return None
return builtin_helpers.get_lexer_nodes(
only_parsers_results.body.body,
self.unrecognized_tokens.start,
self.unrecognized_tokens.tokens)
class AtomNodeParser(BaseNodeParser):
"""
Parser used to recognize atoms concepts or sequence of atoms concepts
An atom concept is concept that does not have any property thought it may have a body
So, if 'one', 'two', 'three' are defined as atom concepts (with no property/parameter)
This parser can recognize the sequence 'one two three'
as [ConceptNode(one), ConceptNode(two), ConceptNode(three)]
It can partly recognized 'one x$1!! two three'
as [ConceptNode(one), UnrecognizedTokensNode(x$1!!), [ConceptNode(two), [ConceptNode(three)]
It cannot recognize concepts with parameters (non atom)
ex: 'one plus two' won't be recognized as ConceptNode(plus, one, two)
it will be [ConceptNode(one), UnrecognizedTokensNode(plus), [ConceptNode(two)]
Note 'one plus two' will be recognized by the SyaParser
"""
def __init__(self, **kwargs):
super().__init__("AtomNode", 50, **kwargs)
self.enabled = False
@staticmethod
def _is_eligible(concept):
"""
Predicate that select concepts that must handled by AtomNodeParser
:param concept:
:return:
"""
return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF
def get_concepts_sequences(self):
forked = []
def _add_forked_to_concept_parser_helpers():
# check that if some new InfixToPostfix are created
for parser in concept_parser_helpers:
if len(parser.forked) > 0:
forked.extend(parser.forked)
parser.forked.clear()
if len(forked) > 0:
concept_parser_helpers.extend(forked)
forked.clear()
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
while self.next_token(False):
for concept_parser in concept_parser_helpers:
concept_parser.reset()
token = self.token
try:
for concept_parser in concept_parser_helpers:
if concept_parser.eat_token(self.token, self.pos):
concept_parser.lock()
concepts = self.get_concepts(token, self._is_eligible)
if not concepts:
for concept_parser in concept_parser_helpers:
concept_parser.eat_unrecognized(token, self.pos)
continue
if len(concepts) == 1:
for concept_parser in concept_parser_helpers:
concept_parser.eat_concept(concepts[0], self.pos)
continue
# make the cartesian product
temp_res = []
for concept_parser in concept_parser_helpers:
if concept_parser.is_locked:
# It means that it already eat the token
# so simply add it, do not clone
temp_res.append(concept_parser)
continue
for concept in concepts:
clone = concept_parser.clone()
temp_res.append(clone)
clone.eat_concept(concept, self.pos)
concept_parser_helpers = temp_res
finally:
_add_forked_to_concept_parser_helpers()
# make sure that remaining items in stack are moved to out
for concept_parser in concept_parser_helpers:
concept_parser.reset()
concept_parser.finalize()
_add_forked_to_concept_parser_helpers()
return concept_parser_helpers
def get_valid(self, concept_parser_helpers):
valid_parser_helpers = [] # be careful, it will be a list of list
for parser_helper in concept_parser_helpers:
if parser_helper.has_error():
continue
if len(parser_helper.sequence) == 0:
continue
for node in parser_helper.sequence:
node.tokens = self.tokens[node.start:node.end + 1]
node.fix_source()
if parser_helper in valid_parser_helpers:
continue
valid_parser_helpers.append(parser_helper)
return valid_parser_helpers
def parse(self, context, parser_input):
if parser_input == "":
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
)
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
parser_helpers = self.get_valid(self.get_concepts_sequences())
if len(parser_helpers):
ret = []
for parser_helper in parser_helpers:
ret.append(
self.sheerka.ret(
self.name,
not parser_helper.has_unrecognized,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
body=parser_helper.sequence,
try_parsed=parser_helper.sequence)))
if len(ret) == 1:
self.log_result(context, parser_input, ret[0])
return ret[0]
else:
self.log_multiple_results(context, parser_input, ret)
return ret
else:
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
+669
View File
@@ -0,0 +1,669 @@
from collections import namedtuple
from dataclasses import dataclass
from enum import Enum
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept
from core.sheerka.ExecutionContext import ExecutionContext
from core.tokenizer import TokenKind, LexerError, Token
from parsers.BaseParser import Node, BaseParser, ErrorNode
DEBUG_COMPILED = True
@dataclass()
class LexerNode(Node):
start: int # starting index in the tokens list
end: int # ending index in the tokens list
tokens: list = None # tokens
source: str = None # string representation of what was parsed
def __post_init__(self):
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if not isinstance(other, LexerNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.tokens == other.tokens
def fix_source(self, force=True):
if force or self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
return self
class UnrecognizedTokensNode(LexerNode):
def __init__(self, start, end, tokens):
super().__init__(start, end, tokens)
self.is_frozen = False
self.parenthesis_count = 0
def freeze(self):
self.is_frozen = True
def reset(self):
self.start = self.end = -1
self.tokens.clear()
self.is_frozen = False
self.parenthesis_count = 0
def has_open_paren(self):
return self.parenthesis_count > 0
def add_token(self, token, pos):
if self.is_frozen:
raise Exception("The node is frozen")
if self.end != -1 and pos == self.end + 2:
# add the missing whitespace
p = self.tokens[-1] # previous token
self.tokens.append(Token(TokenKind.WHITESPACE, " ", p.index + 1, p.line, p.column + 1))
self.tokens.append(token)
self.end = pos
if self.start == -1:
self.start = pos
if token.type == TokenKind.LPAR:
self.parenthesis_count += 1
if token.type == TokenKind.RPAR:
self.parenthesis_count -= 1
return self
def not_whitespace(self):
return not self.is_whitespace()
def is_whitespace(self):
for t in self.tokens:
if t.type not in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
return False
return True
def is_empty(self):
return len(self.tokens) == 0
def __eq__(self, other):
if isinstance(other, utnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, UTN):
return other == self
if not isinstance(other, UnrecognizedTokensNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
def clone(self):
clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:])
clone.is_frozen = self.is_frozen
clone.parenthesis_count = self.parenthesis_count
return clone
class ConceptNode(LexerNode):
"""
Returned by the BnfNodeParser
It represents a recognized concept
"""
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
super().__init__(start, end, tokens, source)
self.concept = concept
self.underlying = underlying
self.fix_source(False)
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, (CN, CNC)):
return other == self
if isinstance(other, cnode):
return self.concept.key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, short_cnode):
return self.concept.key == other.concept_key and self.source == other.source
if not isinstance(other, ConceptNode):
return False
return self.concept == other.concept and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.underlying == other.underlying
def __hash__(self):
return hash((self.concept, self.start, self.end, self.source, self.underlying))
def __repr__(self):
text = f"ConceptNode(concept='{self.concept}', source='{self.source}', start={self.start}, end={self.end}"
if DEBUG_COMPILED:
for k, v in self.concept.compiled.items():
text += f", {k}='{v}'"
return text + ")"
def clone(self):
# do we need to clone the concept as well ?
clone = ConceptNode(self.concept, self.start, self.end, self.tokens, self.source, self.underlying)
return clone
class SourceCodeNode(LexerNode):
"""
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, node, start, end, tokens=None, source=None, return_value=None):
super().__init__(start, end, tokens, source)
self.node = node # The PythonNode (or whatever language node) that is found
self.return_value = return_value # original result of the parsing
def __eq__(self, other):
if isinstance(other, scnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, SourceCodeNode):
return False
return self.node == other.node and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
class SourceCodeWithConceptNode(LexerNode):
"""
Kind of temporary version for SourceCodeNode
I know that there is some code,
I know that there are some concepts
I just don't want to make the glue yet
So I push all the nodes into one big bag
"""
def __init__(self, first_node, last_node, content_nodes=None):
super().__init__(9999, -1, None) # why not sys.maxint ?
self.first = first_node
self.last = last_node
self.nodes = content_nodes or []
self.has_unrecognized = False
self.fix_all_pos()
def add_node(self, node):
self.nodes.append(node)
self.fix_pos(node)
return self
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, SourceCodeWithConceptNode):
return False
if self.start != other.start or self.end != other.end:
return False
if self.first != other.first:
return False
if self.last != other.last:
return False
if len(self.nodes) != len(other.nodes):
return False
for self_node, other_node in zip(self.nodes, other.nodes):
if self_node != other_node:
return False
# at last
return True
def __hash__(self):
return hash((self.first, self.last, len(self.nodes)))
def __repr__(self):
return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')"
def fix_all_pos(self):
for n in [self.first, self.last] + self.nodes:
self.fix_pos(n)
def fix_pos(self, node):
if hasattr(node, "start") and node.start is not None:
if node.start < self.start:
self.start = node.start
if hasattr(node, "end") and node.end is not None:
if node.end > self.end:
self.end = node.end
return self
def pseudo_fix_source(self):
self.source = self.first.source
for n in self.nodes:
self.source += " "
if hasattr(n, "source"):
self.source += n.source
elif hasattr(n, "concept"):
self.source += str(n.concept)
else:
self.source += " unknown"
self.source += self.last.source
return self
def clone(self):
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes)
return clone
@dataclass()
class GrammarErrorNode(ErrorNode):
message: str
class SyaAssociativity(Enum):
Left = "left"
Right = "right"
No = "No"
def __repr__(self):
return self.value
cnode = namedtuple("ConceptNode", "concept_key start end source")
short_cnode = namedtuple("ConceptNode", "concept_key source")
utnode = namedtuple("utnode", "start end source")
scnode = namedtuple("scnode", "start end source")
@dataclass(init=False)
class SCWC:
"""
SourceNodeWithConcept tester class
It matches with a SourceNodeWithConcept
but it's easier to instantiate during the tests
"""
first: LexerNode
last: LexerNode
content: tuple
def __init__(self, first, last, *args):
self.first = first
self.last = last
self.content = args
class HelperWithPos:
def __init__(self, start=None, end=None):
self.start = start
self.end = end
self.start_is_fixed = start is not None
self.end_is_fixed = end is not None
def fix_pos(self, node):
if not self.start_is_fixed:
start = node.start if hasattr(node, "start") else \
node[0] if isinstance(node, tuple) else None
if start is not None and (self.start is None or start < self.start):
self.start = start
if not self.end_is_fixed:
end = node.end if hasattr(node, "end") else \
node[1] if isinstance(node, tuple) else None
if end is not None and (self.end is None or end > self.end):
self.end = end
return self
class CN(HelperWithPos):
"""
ConceptNode tester class
It matches with ConceptNode but with less constraints
CNC == ConceptNode if concept key, start, end and source are the same
"""
def __init__(self, concept, start=None, end=None, source=None):
"""
:param concept: Concept or concept_key (only the key is used anyway)
:param start:
:param end:
:param source:
"""
super().__init__(start, end)
self.concept_key = concept.key if isinstance(concept, Concept) else concept
self.source = source
self.concept = concept if isinstance(concept, Concept) else None
def fix_source(self, str_tokens):
self.source = "".join(str_tokens)
return self
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, ConceptNode):
if other.concept is None:
return False
if other.concept.key != self.concept_key:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
return True
if not isinstance(other, CN):
return False
return self.concept_key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.concept_key, self.start, self.end, self.source))
def __repr__(self):
if self.concept:
txt = f"CN(concept='{self.concept}'"
else:
txt = f"CN(concept_key='{self.concept_key}'"
txt += f", source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
class CNC(CN):
"""
ConceptNode for Compiled tester class
It matches with ConceptNode
But focuses on the 'compiled' property of the concept
CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
"""
def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
super().__init__(concept_key, start, end, source)
self.compiled = kwargs
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, ConceptNode):
if other.concept is None:
return False
if other.concept.key != self.concept_key:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
return self.compiled == other.concept.compiled # assert instead of return to help debugging tests
if not isinstance(other, CNC):
return False
return self.concept_key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.compiled == other.compiled
def __repr__(self):
if self.concept:
txt = f"CNC(concept='{self.concept}'"
else:
txt = f"CNC(concept_key='{self.concept_key}'"
txt += f", source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
for k, v in self.compiled.items():
txt += f", {k}='{v}'"
return txt + ")"
class BaseNodeParser(BaseParser):
def __init__(self, name, priority, **kwargs):
super().__init__(name, priority)
if 'sheerka' in kwargs:
sheerka = kwargs.get("sheerka")
self.init_from_sheerka(sheerka)
else:
self.concepts_by_first_keyword = None
self.sya_definitions = None
self.token = None
self.pos = -1
self.tokens = None
self.context: ExecutionContext = None
self.text = None
self.sheerka = None
def init_from_sheerka(self, sheerka):
"""
Use the definitons from Sheerka to initialize
:param sheerka:
:return:
"""
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
if sheerka.sya_definitions:
self.sya_definitions = {}
for k, v in sheerka.sya_definitions.items():
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.text = text
try:
self.tokens = list(self.get_input_as_tokens(text))
except LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
self.token = None
self.pos = -1
return True
def add_error(self, error, next_token=True):
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self.token
def next_token(self, skip_whitespace=True):
if self.token and self.token.type == TokenKind.EOF:
return False
self.pos += 1
self.token = self.tokens[self.pos]
if skip_whitespace:
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
self.pos += 1
self.token = self.tokens[self.pos]
return self.token.type != TokenKind.EOF
def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
"""
To quickly find a concept, we store them in an hash where the key is the first token of the concept
example :
Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
So the key to use will be "foo"
Concept("a foo").def_prop("a") -> first token is "foo"
Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
Note that under the same key, there will be multiple entry
a B-Tree may be a better implementation in the future
We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
:param context:
:param concepts: list[Concept]
:param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
:param use_sheerka: first init with the definitions from Sheerka
:return:
"""
self.context = context
self.sheerka = context.sheerka
if use_sheerka:
self.init_from_sheerka(self.sheerka)
if sya_definitions:
if self.sya_definitions:
self.sya_definitions.update(sya_definitions)
else:
self.sya_definitions = sya_definitions
if self.concepts_by_first_keyword is None:
self.concepts_by_first_keyword = {}
for concept in concepts:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
break
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
def get_concepts(self, token, to_keep, to_map=None):
"""
Tries to find if there are concepts that match the value of the token
:param token:
:param to_keep: predicate to tell if the concept is eligible
:param to_map:
:return:
"""
if token.type == TokenKind.STRING:
name = token.value[1:-1]
elif token.type == TokenKind.KEYWORD:
name = token.value.value
else:
name = token.value
result = []
if name in self.concepts_by_first_keyword:
for concept_id in self.concepts_by_first_keyword[name]:
concept = self.sheerka.get_by_id(concept_id)
if not to_keep(concept):
continue
concept = to_map(concept) if to_map else concept
result.append(concept)
return result
return None
@staticmethod
def get_token_value(token):
if token.type == TokenKind.STRING:
return token.value[1:-1]
elif token.type == TokenKind.KEYWORD:
return token.value.value
else:
return token.value
class UTN(HelperWithPos):
"""
Tester class for UnrecognizedTokenNode
compare the source, and start, end if defined
"""
def __init__(self, source, start=None, end=None):
"""
:param concept: Concept or concept_key (only the key is used anyway)
:param start:
:param end:
:param source:
"""
super().__init__(start, end)
self.source = source
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, UnrecognizedTokensNode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, UTN):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.source, self.start, self.end))
def __repr__(self):
txt = f"UTN( source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
+54 -4
View File
@@ -1,8 +1,8 @@
from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept
from core.tokenizer import TokenKind, Keywords, Token
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
from core.sheerka_logger import get_logger
import core.utils
import logging
@@ -77,7 +77,6 @@ class BaseParser:
self.priority = priority
self.enabled = enabled
self.has_error = False
self.error_sink = []
def __eq__(self, other):
@@ -91,9 +90,13 @@ class BaseParser:
def __repr__(self):
return self.name
def parse(self, context, text):
def parse(self, context, parser_input):
pass
@property
def has_error(self):
return len(self.error_sink) > 0
def log_result(self, context, source, ret):
if not self.log.isEnabledFor(logging.DEBUG):
return
@@ -132,6 +135,53 @@ class BaseParser:
body=self.error_sink if self.has_error else tree,
try_parsed=try_parse)
def get_input_as_text(self, parser_input, custom_switcher=None):
if isinstance(parser_input, list):
return self.get_text_from_tokens(parser_input, custom_switcher)
if isinstance(parser_input, ParserResultConcept):
parser_input = parser_input.source
if "c:" in parser_input:
return self.get_text_from_tokens(list(Tokenizer(parser_input)), custom_switcher)
return parser_input
def get_input_as_tokens(self, parser_input):
if isinstance(parser_input, list):
return self.add_eof_if_needed(parser_input)
if isinstance(parser_input, ParserResultConcept):
if parser_input.tokens:
return self.add_eof_if_needed(parser_input.tokens)
else:
return Tokenizer(parser_input.source)
return Tokenizer(parser_input)
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
if not isinstance(parser_input, ParserResultConcept):
return None
if expected_parser and parser_input.parser != expected_parser:
return None
if len(parser_input.value) == 0:
return None
for node in parser_input.value:
from parsers.BaseNodeParser import LexerNode
if not isinstance(node, LexerNode):
return None
return parser_input.value
@staticmethod
def add_eof_if_needed(lst):
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
return lst
@staticmethod
def get_text_from_tokens(tokens, custom_switcher=None):
if tokens is None:
@@ -9,147 +9,17 @@
from collections import namedtuple
from dataclasses import dataclass
from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept, ConceptParts, DoNotResolve
from core.tokenizer import TokenKind, Tokenizer, Token
from parsers.BaseParser import BaseParser, Node, ErrorNode
from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, ErrorNode
import core.utils
@dataclass()
class LexerNode(Node):
start: int # starting index in the tokens list
end: int # ending index in the tokens list
tokens: list = None # tokens
source: str = None # string representation of what was parsed
def __post_init__(self):
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if not isinstance(other, LexerNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.tokens == other.tokens
class UnrecognizedTokensNode(LexerNode):
def __init__(self, start, end, tokens):
super().__init__(start, end, tokens)
def add_token(self, token, pos):
self.tokens.append(token)
self.end = pos
def fix_source(self):
self.source = BaseParser.get_text_from_tokens(self.tokens)
def not_whitespace(self):
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
def __eq__(self, other):
if isinstance(other, utnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, UnrecognizedTokensNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
class ConceptNode(LexerNode):
"""
Returned by the ConceptLexerParser
It represents a recognized concept
"""
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
super().__init__(start, end, tokens, source)
self.concept = concept
self.underlying = underlying
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if isinstance(other, cnode):
return self.concept.key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, short_cnode):
return self.concept.key == other.concept_key and self.source == other.source
if not isinstance(other, ConceptNode):
return False
return self.concept == other.concept and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.underlying == other.underlying
def __hash__(self):
return hash((self.concept, self.start, self.end, self.source, self.underlying))
def __repr__(self):
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
class SourceCodeNode(LexerNode):
"""
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, node, start, end, tokens=None, source=None):
super().__init__(start, end, tokens, source)
self.node = node # The PythonNode (or whatever language node) that is found
def __eq__(self, other):
if isinstance(other, scnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, SourceCodeNode):
return False
return self.node == other.node and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
cnode = namedtuple("ConceptNode", "concept_key start end source")
short_cnode = namedtuple("ConceptNode", "concept_key source")
utnode = namedtuple("UnrecognizedTokensNode", "start end source")
scnode = namedtuple("SourceCodeNode", "start end source")
class NonTerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
Returned by the BnfNodeParser
"""
def __init__(self, parsing_expression, start, end, tokens, children=None):
@@ -180,7 +50,7 @@ class NonTerminalNode(LexerNode):
class TerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
Returned by the BnfNodeParser
"""
def __init__(self, parsing_expression, start, end, value):
@@ -205,11 +75,6 @@ class TerminalNode(LexerNode):
return hash((self.parsing_expression, self.start, self.end, self.value))
@dataclass()
class GrammarErrorNode(ErrorNode):
message: str
@dataclass()
class UnknownConceptNode(ErrorNode):
concept_key: str
@@ -574,9 +439,9 @@ class StrMatch(Match):
return None
class ConceptLexerParser(BaseParser):
class BnfNodeParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("ConceptLexer", 50)
super().__init__("BnfNode", 50)
if 'grammars' in kwargs:
self.concepts_grammars = kwargs.get("grammars")
elif 'sheerka' in kwargs:
@@ -595,7 +460,6 @@ class ConceptLexerParser(BaseParser):
self.sheerka = None
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
@@ -606,16 +470,11 @@ class ConceptLexerParser(BaseParser):
self.sheerka = context.sheerka
self.text = text
if isinstance(text, str):
try:
self.tokens = list(Tokenizer(text))
except core.tokenizer.LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
else:
self.tokens = list(text)
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
try:
self.tokens = list(self.get_input_as_tokens(text))
except core.tokenizer.LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
self.token = None
self.pos = -1
@@ -785,15 +644,15 @@ class ConceptLexerParser(BaseParser):
removed_concepts.append(e)
return removed_concepts
def parse(self, context, text):
if text == "":
def parse(self, context, parser_input):
if parser_input == "":
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
)
if not self.reset_parser(context, text):
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
self.name,
False,
@@ -877,15 +736,15 @@ class ConceptLexerParser(BaseParser):
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
source=parser_input,
body=choice,
try_parsed=choice)))
if len(ret) == 1:
self.log_result(context, text, ret[0])
self.log_result(context, parser_input, ret[0])
return ret[0]
else:
self.log_multiple_results(context, text, ret)
self.log_multiple_results(context, parser_input, ret)
return ret
def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
@@ -915,6 +774,11 @@ class ConceptLexerParser(BaseParser):
_concept.compiled[prop_name] = new_value
def _look_for_concept_match(_underlying):
"""
At some point, there is either an StrMatch or a ConceptMatch,
that allowed the recognition.
Look for the ConceptMatch, with recursion if needed
"""
if isinstance(_underlying.parsing_expression, ConceptExpression):
return _underlying
@@ -929,6 +793,7 @@ class ConceptLexerParser(BaseParser):
def _get_underlying_value(_underlying):
concept_match_node = _look_for_concept_match(_underlying)
if concept_match_node:
# the value is a concept
if id(concept_match_node) in _underlying_value_cache:
result = _underlying_value_cache[id(concept_match_node)]
else:
@@ -936,6 +801,7 @@ class ConceptLexerParser(BaseParser):
result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
_underlying_value_cache[id(concept_match_node)] = result
else:
# the value is a string
result = DoNotResolve(_underlying.source)
return result
@@ -957,6 +823,7 @@ class ConceptLexerParser(BaseParser):
concept.compiled[ConceptParts.BODY] = value
if underlying.parsing_expression.rule_name:
_add_prop(concept, underlying.parsing_expression.rule_name, value)
# KSI : Why don't we set concept.metadata.need_validation to True ?
if isinstance(underlying, NonTerminalNode):
for node in underlying.children:
+3 -5
View File
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts
from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
StrMatch, ConceptGroupExpression
@@ -30,7 +30,6 @@ class BnfParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("Bnf", 50, False)
# self.has_error = False
# self.error_sink = []
# self.name = BaseParser.PREFIX + "Bnf"
@@ -61,7 +60,6 @@ class BnfParser(BaseParser):
self.eat_white_space()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
@@ -115,11 +113,11 @@ class BnfParser(BaseParser):
token = self.get_token()
return token.type == second or token.type == first and self.next_after().type == second
def parse(self, context: ExecutionContext, text):
def parse(self, context: ExecutionContext, parser_input):
tree = None
try:
self.reset_parser(context, text)
self.reset_parser(context, parser_input)
tree = self.parse_choice()
token = self.get_token()
+12 -11
View File
@@ -1,10 +1,14 @@
# try to match something like
# ConceptNode 'plus' ConceptNode
#
# Replaced by SyaNodeParser
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind, Token
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from core.concept import VARIABLE_PREFIX
import logging
multiple_concepts_parser = MultipleConceptsParser()
@@ -12,6 +16,7 @@ multiple_concepts_parser = MultipleConceptsParser()
class ConceptsWithConceptsParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("ConceptsWithConcepts", 25)
self.enabled = False
@staticmethod
def get_tokens(nodes):
@@ -71,23 +76,19 @@ class ConceptsWithConceptsParser(BaseParser):
return concept
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
if not nodes:
return None
if not text.parser == multiple_concepts_parser:
return None
nodes = text.body
concept_key = self.get_key(nodes)
concept = sheerka.new(concept_key)
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
concepts = concept if hasattr(concept, "__iter__") else [concept]
for concept in concepts:
@@ -101,7 +102,7 @@ class ConceptsWithConceptsParser(BaseParser):
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text.source,
source=parser_input.source,
body=concept,
try_parsed=None)))
+8 -9
View File
@@ -110,7 +110,7 @@ class DefaultParser(BaseParser):
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "Default", 50)
BaseParser.__init__(self, "Default", 60)
self.lexer_iter = None
self._current = None
self.context: ExecutionContext = None
@@ -168,7 +168,6 @@ class DefaultParser(BaseParser):
self.next_token()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
@@ -188,19 +187,19 @@ class DefaultParser(BaseParser):
return
def parse(self, context, text):
def parse(self, context, parser_input):
# default parser can only manage string text
if not isinstance(text, str):
if not isinstance(parser_input, str):
ret = context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
self.log_result(context, text, ret)
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
tree = None
try:
self.reset_parser(context, text)
self.reset_parser(context, parser_input)
tree = self.parse_statement()
except core.tokenizer.LexerError as e:
self.add_error(e, False)
@@ -211,7 +210,7 @@ class DefaultParser(BaseParser):
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
else:
body = self.get_return_value_body(context.sheerka, text, tree, tree)
body = self.get_return_value_body(context.sheerka, parser_input, tree, tree)
# body = self.sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
@@ -224,7 +223,7 @@ class DefaultParser(BaseParser):
not self.has_error,
body)
self.log_result(context, text, ret)
self.log_result(context, parser_input, ret)
return ret
def parse_statement(self):
+5 -5
View File
@@ -10,12 +10,12 @@ class EmptyStringParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "EmptyString", 90)
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if isinstance(text, str) and text.strip() == "" or \
isinstance(text, list) and text == [] or \
text is None:
if isinstance(parser_input, str) and parser_input.strip() == "" or \
isinstance(parser_input, list) and parser_input == [] or \
parser_input is None:
ret = sheerka.ret(self.name, True, sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
@@ -24,5 +24,5 @@ class EmptyStringParser(BaseParser):
else:
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
self.log_result(context, text, ret)
self.log_result(context, parser_input, ret)
return ret
+12 -13
View File
@@ -16,26 +16,26 @@ class ExactConceptParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "ExactConcept", 80)
def parse(self, context, text):
def parse(self, context, parser_input):
"""
text can be string, but text can also be an list of tokens
:param context:
:param text:
:param parser_input:
:return:
"""
context.log(f"Parsing '{text}'", self.name)
context.log(f"Parsing '{parser_input}'", self.name)
res = []
sheerka = context.sheerka
try:
words = self.get_words(text)
words = self.get_words(parser_input)
except LexerError as e:
context.log(f"Error found in tokenizer {e}", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
if len(words) > self.MAX_WORDS_SIZE:
context.log(f"Max words reached. Stopping.", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input))
recognized = False
for combination in self.combinations(words):
@@ -69,26 +69,25 @@ class ExactConceptParser(BaseParser):
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
body=concept,
try_parsed=concept)))
recognized = True
if recognized:
if len(res) == 1:
self.log_result(context, text, res[0])
self.log_result(context, parser_input, res[0])
else:
self.log_multiple_results(context, text, res)
self.log_multiple_results(context, parser_input, res)
return res
return res
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text))
self.log_result(context, text, ret)
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
@staticmethod
def get_words(text):
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
def get_words(self, text):
tokens = self.get_input_as_tokens(text)
res = []
for t in tokens:
if t.type == TokenKind.EOF:
+12 -13
View File
@@ -1,18 +1,20 @@
# to be replaced by SyaNodeParser
import ast
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
import core.utils
from parsers.PythonParser import PythonParser
concept_lexer_parser = ConceptLexerParser()
concept_lexer_parser = BnfNodeParser()
class MultipleConceptsParser(BaseParser):
"""
Parser that will take the result of ConceptLexerParser and
Parser that will take the result of BnfNodeParser and
try to resolve the unrecognized tokens token by token
It is a success when it returns a list ConceptNode exclusively
@@ -20,6 +22,7 @@ class MultipleConceptsParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "MultipleConcepts", 45)
self.enabled = False
@staticmethod
def finalize(nodes_found, unrecognized_tokens):
@@ -40,16 +43,12 @@ class MultipleConceptsParser(BaseParser):
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
return unrecognized_tokens
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
if not nodes:
return None
if not text.parser == concept_lexer_parser:
return None
sheerka = context.sheerka
nodes = text.value
nodes_found = [[]]
concepts_only = True
@@ -97,16 +96,16 @@ class MultipleConceptsParser(BaseParser):
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text.source,
source=parser_input.source,
body=choice,
try_parsed=None))
)
if len(ret) == 1:
self.log_result(context, text.source, ret[0])
self.log_result(context, parser_input.source, ret[0])
return ret[0]
else:
self.log_multiple_results(context, text.source, ret)
self.log_multiple_results(context, parser_input.source, ret)
return ret
@staticmethod
+25 -24
View File
@@ -1,4 +1,4 @@
from core.builtin_concepts import BuiltinConcepts
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.tokenizer import Tokenizer, LexerError, TokenKind
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass
@@ -6,7 +6,7 @@ import ast
import logging
import core.utils
from parsers.ConceptLexerParser import ConceptNode
from parsers.BnfNodeParser import ConceptNode
log = logging.getLogger(__name__)
@@ -67,7 +67,7 @@ class PythonParser(BaseParser):
BaseParser.__init__(self, "Python", 50)
self.source = kwargs.get("source", "<undef>")
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
tree = None
@@ -76,15 +76,9 @@ class PythonParser(BaseParser):
}
try:
if isinstance(text, str) and "c:" in text:
source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher)
elif isinstance(text, str):
source = text
else:
source = self.get_text_from_tokens(text, python_switcher)
source = self.get_input_as_text(parser_input, python_switcher)
source = source.strip()
text = text if isinstance(text, str) else source
parser_input = parser_input if isinstance(parser_input, str) else source
# first, try to parse an expression
res, tree, error = self.try_parse_expression(source)
@@ -92,25 +86,32 @@ class PythonParser(BaseParser):
# then try to parse a statement
res, tree, error = self.try_parse_statement(source)
if not res:
self.has_error = True
error_node = PythonErrorNode(text, error)
error_node = PythonErrorNode(parser_input, error)
self.error_sink.append(error_node)
except LexerError as e:
self.has_error = True
self.error_sink.append(e)
ret = sheerka.ret(
self.name,
not self.has_error,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
body=self.error_sink if self.has_error else PythonNode(text, tree),
try_parsed=None))
if self.has_error:
ret = sheerka.ret(
self.name,
False,
sheerka.new(
BuiltinConcepts.NOT_FOR_ME,
body=parser_input,
reason=self.error_sink))
else:
ret = sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
body=PythonNode(parser_input, tree),
try_parsed=None))
self.log_result(context, text, ret)
self.log_result(context, parser_input, ret)
return ret
def try_parse_expression(self, text):
+7 -8
View File
@@ -1,10 +1,11 @@
from core.builtin_concepts import BuiltinConcepts
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode
from parsers.BnfNodeParser import ConceptNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
multiple_concepts_parser = MultipleConceptsParser()
unrecognized_nodes_parser = UnrecognizedNodeParser()
class PythonWithConceptsParser(BaseParser):
@@ -20,15 +21,12 @@ class PythonWithConceptsParser(BaseParser):
res += c if c.isalnum() else "0"
return res
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser)
if not nodes:
return None
if not text.parser == multiple_concepts_parser:
return None
nodes = text.body
source = ""
to_parse = ""
identifiers = {}
@@ -74,6 +72,7 @@ class PythonWithConceptsParser(BaseParser):
python_id = _get_identifier(concept)
to_parse += python_id
python_ids_mappings[python_id] = concept
else:
source += node.source
to_parse += node.source
File diff suppressed because it is too large Load Diff
+114
View File
@@ -0,0 +1,114 @@
from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ErrorNode
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
import core.utils
PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"]
@dataclass()
class CannotParseNode(ErrorNode):
unrecognized: UnrecognizedTokensNode
class UnrecognizedNodeParser(BaseParser):
"""
This parser comes after the other NodeParsers (Atom, Bnf or Sya)
It will try to resolve all UnrecognizedTokensNode.
"""
def __init__(self, **kwargs):
super().__init__("UnrecognizedNode", 45) # lower than AtomNode, BnfNode and SyaNode
def add_error(self, error):
if hasattr(error, "__iter__"):
self.error_sink.extend(error)
else:
self.error_sink.append(error)
def parse(self, context, parser_input):
sheerka = context.sheerka
nodes = self.get_input_as_lexer_nodes(parser_input, None)
if not nodes:
return None
sequences_found = [[]]
has_unrecognized = False
for node in nodes:
if isinstance(node, ConceptNode):
res = self.validate_concept_node(context, node)
if not res.status:
self.add_error(res.body)
else:
sequences_found = core.utils.product(sequences_found, [res.body])
elif isinstance(node, UnrecognizedTokensNode):
res = parse_unrecognized(context, node.source, PARSERS)
res = only_successful(context, res)
if res.status:
lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens)
sequences_found = core.utils.product(sequences_found, lexer_nodes)
else:
sequences_found = core.utils.product(sequences_found, [node])
has_unrecognized = True
else: # cannot happen as of today :-)
raise NotImplementedError()
# concept with UnrecognizedToken in their properties is considered as fatal error
if self.has_error:
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
ret = []
for choice in sequences_found:
ret.append(
sheerka.ret(
self.name,
not has_unrecognized,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
body=choice,
try_parsed=choice)))
if len(ret) == 1:
self.log_result(context, parser_input, ret[0])
return ret[0]
else:
self.log_multiple_results(context, parser_input, ret)
return ret
def validate_concept_node(self, context, concept_node):
sheerka = context.sheerka
errors = []
def _validate_concept(concept):
"""
Recursively browse the compiled properties in order to find unrecognized
:param concept:
:return:
"""
for name, value in concept.compiled.items():
if isinstance(value, Concept):
_validate_concept(value)
elif isinstance(value, UnrecognizedTokensNode):
res = parse_unrecognized(context, value.tokens, PARSERS)
res = only_successful(context, res) # only key successful parsers
if res.status:
concept.compiled[name] = res.body.body
else:
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'"))
_validate_concept(concept_node.concept)
if len(errors) > 0:
return context.sheerka.ret(self.name, False, errors)
else:
return context.sheerka.ret(self.name, True, concept_node)
+18 -1
View File
@@ -20,6 +20,9 @@ def json_default_converter(o):
if isinstance(o, (date, datetime)):
return o.isoformat()
if isinstance(o, SheerkaDataProviderRef):
return f"##XREF##:{o.target}"
class Event(object):
"""
@@ -389,7 +392,7 @@ class SheerkaDataProvider:
return getattr(obj, Serializer.ORIGIN)
if isinstance(obj, SheerkaDataProviderRef):
return obj.original_target
return obj.original_target
return None
@@ -406,6 +409,11 @@ class SheerkaDataProvider:
def is_reference(obj):
return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX)
def reset(self):
self.first_time = self.io.first_time
if hasattr(self.io, "reset"):
self.io.reset()
def add(self, event_digest: str, entry, obj, allow_multiple=True, use_ref=False):
"""
Adds obj to the entry 'entry'
@@ -999,3 +1007,12 @@ class SheerkaDataProvider:
keys[entry] = value
self.save_keys(keys)
return str(value)
def dump_state(self, digest=None):
digest = digest or self.get_snapshot(SheerkaDataProvider.HeadFile)
state = self.load_state(digest)
print(json.dumps(state.data, sort_keys=True, default=json_default_converter, indent=True))
def dump_obj(self, digest):
obj = self.load_obj(digest)
print(json.dumps(obj.__dict__, sort_keys=True, default=json_default_converter, indent=True))
+4
View File
@@ -170,6 +170,10 @@ class SheerkaDataProviderDictionaryIO(SheerkaDataProviderIO):
return io.BytesIO(self.cache[file_path]) if "b" in mode else io.StringIO(self.cache[file_path])
def reset(self):
self.cache.clear()
self.first_time = True
def on_close(dictionary_io, file_path, stream):
"""