Added SyaNodeParser (finally, after one month)

This commit is contained in:
2020-04-09 15:42:36 +02:00
parent c9acfa99a1
commit 6c7c529016
56 changed files with 5322 additions and 404 deletions
+5 -5
View File
@@ -659,7 +659,7 @@ For the two questions, I will first try the simple implementations and see there
Going back on BNF implementation. As it's Christmas eve today, I won't stay very long.
So, the implementation lies in the class ConceptLexerParser, a it's a lexer not for token, but for concept.
So, the implementation lies in the class BnfNodeParser, a it's a lexer not for token, but for concept.
The purpose of this class is to recognize a sequence of Concept.
So if we defines the following concepts
@@ -675,7 +675,7 @@ when you input
one two three four five
the list of :code:`[foo, bar]` will be returned by the ConceptLexerParser (as return values)
the list of :code:`[foo, bar]` will be returned by the BnfNodeParser (as return values)
How does it works ?
@@ -696,7 +696,7 @@ Some example :
and so on...
So when a concept is defined using its bnf definition, I use the **BnfParser** to create the grammar, and then
I use the **ConceptLexerParser** to recognize the concepts
I use the **BnfNodeParser** to recognize the concepts
The current implementation to recognize a concept is not very efficient. All the definitions are in a dictionary
and I go thru the whole dictionary to see if some concepts are recognized. Once a concept is found, I loop again
@@ -713,7 +713,7 @@ So once the parsing is effective, I return a **ConceptNode** object
class ConceptNode(LexerNode):
"""
Returned by the ConceptLexerParser
Returned by the BnfNodeParser
It represents a recognized concept
"""
@@ -859,7 +859,7 @@ As of now, I have implemented the following parsers:
* DefaultParser (the name is not accurate)
To recognize builtin syntax (like 'def concept' or 'isa')
* ConceptLexerParser
* BnfNodeParser
To recognize concept defined with BNF language
All theses parsers are executed in the row (the order in not very important)
+1
View File
@@ -0,0 +1 @@
ReturnValue(who=evaluators.TooManySuccess, status=False, value=(21)__TOO_MANY_SUCCESS, message=None)
+28 -1
View File
@@ -37,6 +37,8 @@ class BuiltinConcepts(Enum):
PARSER_RESULT = "parser result"
TOO_MANY_SUCCESS = "too many success" # when expecting a limited number of successful return value
TOO_MANY_ERRORS = "too many errors" # when expecting a limited number of successful return value
ONLY_SUCCESSFUL = "only successful" # filter the result, only keep successful ones
MULTIPLE_ERRORS = "multiple errors" # filter the result, only keep evaluator in error
NOT_FOR_ME = "not for me" # a parser recognize that the entry is not meant for it
IS_EMPTY = "is empty" # when a set is empty
INVALID_RETURN_VALUE = "invalid return value" # the return value of an evaluator is not correct
@@ -45,6 +47,7 @@ class BuiltinConcepts(Enum):
CONCEPT_EVAL_ERROR = "concept evaluation error" # cannot evaluate a property or metadata of a concept
ENUMERATION = "enum" # represents a list or a set
LIST = "list" # represents a list
FILTERED = "filtered" # represents the result of a filtering
CONCEPT_ALREADY_IN_SET = "concept already in set"
EVALUATOR_PRE_PROCESS = "evaluator pre process" # used modify / tweak behaviour of evaluators
EVAL_BODY_REQUESTED = "eval body requested" # to evaluate the body
@@ -91,6 +94,7 @@ BuiltinErrors = [str(e) for e in {
BuiltinConcepts.UNKNOWN_PROPERTY,
BuiltinConcepts.TOO_MANY_SUCCESS,
BuiltinConcepts.TOO_MANY_ERRORS,
BuiltinConcepts.MULTIPLE_ERRORS,
BuiltinConcepts.INVALID_RETURN_VALUE,
BuiltinConcepts.CONCEPT_ALREADY_DEFINED,
BuiltinConcepts.CONCEPT_EVAL_ERROR,
@@ -249,11 +253,12 @@ class ParserResultConcept(Concept):
Result of a parsing
"""
def __init__(self, parser=None, source=None, value=None, try_parsed=None, validate_concept=None):
def __init__(self, parser=None, source=None, tokens=None, value=None, try_parsed=None):
super().__init__(BuiltinConcepts.PARSER_RESULT, True, False, BuiltinConcepts.PARSER_RESULT)
self.set_metadata_value(ConceptParts.BODY, value)
self.set_prop("parser", parser)
self.set_prop("source", source)
self.set_prop("tokens", tokens)
self.set_prop("try_parsed", try_parsed) # in case of error, what was found before the error
self.metadata.is_evaluated = True
@@ -372,6 +377,14 @@ class ListConcept(Concept):
# return item in self.body
class FilteredConcept(Concept):
def __init__(self, filtered=None, iterable=None, predicate=None):
super().__init__(BuiltinConcepts.FILTERED, True, False, BuiltinConcepts.FILTERED)
self.set_metadata_value(ConceptParts.BODY, filtered)
self.def_prop("iterable", iterable)
self.def_prop("predicate", predicate)
class ConceptAlreadyInSet(Concept):
def __init__(self, concept=None, concept_set=None):
super().__init__(BuiltinConcepts.CONCEPT_ALREADY_IN_SET,
@@ -409,3 +422,17 @@ class WhereClauseFailed(Concept):
@property
def concept(self):
return self.body
class NotForMeConcept(Concept):
def __init__(self, source=None, reason=None):
super().__init__(BuiltinConcepts.NOT_FOR_ME,
True,
False,
BuiltinConcepts.NOT_FOR_ME)
self.set_metadata_value(ConceptParts.BODY, source)
self.def_prop("reason", reason)
self.metadata.is_evaluated = True
def __repr__(self):
return f"NotForMeConcept(source={self.body}, reason={self.get_prop('reason')})"
+177
View File
@@ -6,6 +6,8 @@ from core.ast.nodes import CallNodeConcept, GenericNodeConcept
from core.ast.visitors import UnreferencedNamesVisitor
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, ErrorNode
def is_same_success(context, return_values):
@@ -132,6 +134,181 @@ def expect_one(context, return_values):
parents=return_values)
def only_successful(context, return_values):
"""
Removes all return values that are not successful
Return error when no successful return value
:param context:
:param return_values:
:return:
"""
if not isinstance(return_values, list):
return return_values
sheerka = context.sheerka
if len(return_values) == 0:
return sheerka.ret(
context.who,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values),
parents=return_values)
successful_results = [item for item in return_values if item.status]
if len(successful_results) == 0:
return sheerka.ret(
context.who,
False,
sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values),
parents=return_values)
return sheerka.ret(
context.who,
True,
sheerka.new(BuiltinConcepts.ONLY_SUCCESSFUL, body=successful_results),
parents=return_values)
def only_parsers_results(context, return_values):
"""
Filters the return_values and returns when the result is a ParserResult
regardless of the status
So it filters errors
:param context:
:param return_values:
:return:
"""
if not isinstance(return_values, list):
return return_values
sheerka = context.sheerka
if len(return_values) == 0:
return sheerka.ret(
context.who,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values),
parents=return_values)
return_values_ok = [item for item in return_values if sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)]
# hack because some parsers don't follow the NOT_FOR_ME rule
temp_ret_val = []
for ret_val in return_values_ok:
if isinstance(ret_val.body.body, ErrorNode):
continue
if isinstance(ret_val.body.body, list) and \
len(ret_val.body.body) == 1 and \
isinstance(ret_val.body.body[0], UnrecognizedTokensNode):
continue
temp_ret_val.append(ret_val)
return_values_ok = temp_ret_val
if len(return_values_ok) == 0:
return sheerka.ret(
context.who,
False,
sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values),
parents=return_values)
return sheerka.ret(
context.who,
True,
sheerka.new(BuiltinConcepts.FILTERED,
body=return_values_ok,
iterable=return_values,
predicate="sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)"),
parents=return_values)
def parse_unrecognized(context, tokens, parsers):
"""
Try to recognize concepts or code from tokens using the given parsers
:param context:
:param tokens:
:param parsers:
:return:
"""
steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
sheerka = context.sheerka
with context.push(desc=f"Parsing unrecognized '{tokens}'") as sub_context:
# disable all parsers but the following ones
sub_context.add_preprocess(BaseParser.PREFIX + "*", enabled=False)
for parser in parsers:
sub_context.add_preprocess(BaseParser.PREFIX + parser, enabled=True)
sub_context.add_inputs(source=tokens)
to_parse = sheerka.ret(
context.who,
True,
sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
res = sheerka.execute(sub_context, to_parse, steps)
sub_context.add_values(return_values=res)
# discard Python response if accepted by AtomNode
is_concept = False
for r in res:
if r.status and r.who == "parsers.AtomNode":
is_concept = True
if not is_concept:
return res
filtered = []
for r in res:
if r.who == "parsers.Python":
continue
filtered.append(r)
return filtered
def get_lexer_nodes(return_values, start, tokens):
"""
From a parser result, return the corresponding LexerNode
either ConceptNode, UnrecognizedTokensNode or SourceCodeNode
:param return_values:
:param start:
:param tokens:
:return: list of list (list of concept node sequence)
"""
lexer_nodes = []
for ret_val in return_values:
if ret_val.who == "parsers.Python":
if ret_val.body.source.strip().isalnum() and not ret_val.body.source.strip().isnumeric():
# Discard SourceCodeNode which seems to be a concept
# It may be a wrong idea, so let's see
continue
end = start + len(tokens) - 1
lexer_nodes.append([SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)])
elif ret_val.who == "parsers.ExactConcept":
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
end = start + len(tokens) - 1
for concept in concepts:
lexer_nodes.append([ConceptNode(concept, start, end, tokens, ret_val.body.source)])
elif ret_val.who in ("parsers.BnfNode", "parsers.SyaNode", "parsers.AtomNode"):
nodes = [node for node in ret_val.body.body]
for node in nodes:
node.start += start
node.end += start
# but append the whole sequence if when it's a sequence
lexer_nodes.append(nodes)
else:
raise NotImplementedError()
return lexer_nodes
def get_names(sheerka, concept_node):
"""
Finds all the names referenced by the concept_node
+81 -2
View File
@@ -108,11 +108,14 @@ class Concept:
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, simplec):
return self.name == other.name and self.body == other.body
if id(self) == id(other):
return True
if isinstance(other, CC):
return other == self
if not isinstance(other, Concept):
return False
@@ -346,6 +349,17 @@ class Concept:
"""
return self.props[prop_name].value
def set_prop_by_index(self, index: int, value):
"""
Set the value of a property (not the metadata) using the index
:param index: Name the property or another concept
:param value:
:return:
"""
prop_name = list(self.props.keys())[index]
self.props[prop_name].value = value
return self
def set_metadata_value(self, metadata: ConceptParts, value):
"""
Set the resolved value of a metadata (not the metadata itself)
@@ -438,3 +452,68 @@ class InfiniteRecursionResolved:
def get_value(self):
return self.value
class CC:
"""
Concept class for test purpose
CC means concept for compiled (or concept with compiled)
It matches a concept if the compiles are equals
"""
# The only properties that are testes are concept_key and compiled
# The other properties (concept, source, start and end)
# are used in tests/parsers/parsers_utils.py to help creating helper objects
def __init__(self, concept, source=None, **kwargs):
self.concept_key = concept.key if isinstance(concept, Concept) else concept
self.compiled = kwargs
self.concept = concept if isinstance(concept, Concept) else None
self.source = source # to use when the key is different from the sub str to search when filling start and stop
self.start = None # for debug purpose, indicate where the concept starts
self.end = None # for debug purpose, indicate where the concept ends
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, Concept):
if other.key != self.concept_key:
return False
return self.compiled == other.compiled
if not isinstance(other, CC):
return False
return self.concept_key == other.concept_key and \
self.compiled == other.compiled
def __hash__(self):
if self.concept:
return hash(self.concept)
return hash(self.concept_key)
def __repr__(self):
if self.concept:
txt = f"CC(concept='{self.concept}'"
else:
txt = f"CC(concept_key='{self.concept_key}'"
for k, v in self.compiled.items():
txt += f", {k}='{v}'"
return txt + ")"
def fix_pos(self, node):
start = node.start if hasattr(node, "start") else \
node[0] if isinstance(node, tuple) else None
end = node.end if hasattr(node, "end") else \
node[1] if isinstance(node, tuple) else None
if start is not None:
if self.start is None or start < self.start:
self.start = start
if end is not None:
if self.end is None or end > self.end:
self.end = end
return self
+7 -3
View File
@@ -43,6 +43,7 @@ class ExecutionContext:
desc: str = None,
logger=None,
global_hints=None,
global_errors=None,
**kwargs):
self._parent = None
@@ -61,6 +62,7 @@ class ExecutionContext:
self.logger = logger
self.local_hints = set()
self.global_hints = set() if global_hints is None else global_hints
self.global_errors = [] if global_errors is None else global_errors
self.inputs = {} # what was the parameters of the execution context
self.values = {} # what was produced by the execution context
@@ -146,8 +148,8 @@ class ExecutionContext:
preprocess.set_prop(k, v)
if not self.preprocess:
self.preprocess = set()
self.preprocess.add(preprocess)
self.preprocess = []
self.preprocess.append(preprocess)
return self
def add_inputs(self, **kwargs):
@@ -212,6 +214,7 @@ class ExecutionContext:
desc,
logger,
self.global_hints,
self.global_errors,
**_kwargs)
new._parent = self
new._tab = self._tab + " " * DEBUG_TAB_SIZE
@@ -230,7 +233,8 @@ class ExecutionContext:
if self.logger and not self.logger.disabled:
self.logger.debug(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
def log_error(self, message, who=None):
def log_error(self, message, who=None, exc=None):
self.global_errors.append(exc or message)
if self.logger and not self.logger.disabled:
self.logger.exception(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
@@ -1,8 +1,10 @@
from core.builtin_concepts import BuiltinConcepts, ErrorConcept
from core.concept import Concept
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError, SheerkaDataProviderRef
import core.utils
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
BNF_NODE_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser"
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
class SheerkaCreateNewConcept:
@@ -13,6 +15,7 @@ class SheerkaCreateNewConcept:
def __init__(self, sheerka):
self.sheerka = sheerka
self.logger_name = self.create_new_concept.__name__
self.base_lexer_parser = core.utils.get_class(BASE_NODE_PARSER_CLASS)("BaseNodeParser", 0)
def create_new_concept(self, context, concept: Concept):
"""
@@ -25,7 +28,7 @@ class SheerkaCreateNewConcept:
concept.init_key()
concepts_definitions = None
init_ret_value = None
init_bnf_ret_value = None
sdp = self.sheerka.sdp
@@ -49,13 +52,19 @@ class SheerkaCreateNewConcept:
concepts_definitions[concept] = concept.bnf
# check if it's a valid BNF or whether it breaks the known rules
concept_lexer_parser = self.sheerka.parsers[CONCEPT_LEXER_PARSER_CLASS]()
bnf_lexer_parser = self.sheerka.parsers[BNF_NODE_PARSER_CLASS]()
with context.push(self.sheerka.name, desc=f"Initializing concept definition for {concept}") as sub_context:
sub_context.concepts[concept.key] = concept # the concept is not in the real cache yet
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
sub_context.add_values(return_values=init_ret_value)
if not init_ret_value.status:
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
init_bnf_ret_value = bnf_lexer_parser.initialize(sub_context, concepts_definitions)
sub_context.add_values(return_values=init_bnf_ret_value)
if not init_bnf_ret_value.status:
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_bnf_ret_value.value))
# update concept definition by key
init_sya_ret_value = self.base_lexer_parser.initialize(context, [concept], use_sheerka=True)
if not init_sya_ret_value.status:
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value))
concepts_by_first_keyword = init_sya_ret_value.body
concept.freeze_definition_hash()
@@ -97,9 +106,15 @@ class SheerkaCreateNewConcept:
sdp.set(
context.event.get_digest(),
self.sheerka.CONCEPTS_DEFINITIONS_ENTRY,
concept_lexer_parser.encode_grammar(init_ret_value.body),
bnf_lexer_parser.encode_grammar(init_bnf_ret_value.body),
use_ref=True)
self.sheerka.concepts_definitions_cache = None # invalidate cache
# update the concepts by first keyword
sdp.set(context.event.get_digest(),
self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
concepts_by_first_keyword)
except SheerkaDataProviderDuplicateKeyError as error:
context.log_error("Failed to create a new concept.", who=self.logger_name)
return self.sheerka.ret(
@@ -109,13 +124,13 @@ class SheerkaCreateNewConcept:
error.args[0])
# Updates the caches
self.sheerka.cache_by_key[concept.key] = sdp.get_safe(self.sheerka.CONCEPTS_ENTRY, concept.key)
self.sheerka.cache_by_name[concept.name] = sdp.get_safe(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name)
self.sheerka.cache_by_id[concept.id] = concept
if init_ret_value is not None and init_ret_value.status:
self.sheerka.concepts_grammars = init_ret_value.body
if init_bnf_ret_value is not None and init_bnf_ret_value.status:
self.sheerka.concepts_grammars = init_bnf_ret_value.body
self.sheerka.concepts_by_first_keyword = concepts_by_first_keyword
# process the return in needed
# process the return if needed
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
return ret
@@ -1,6 +1,6 @@
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved
from core.builtin_helpers import add_to_ret_val, remove_from_ret_val, expect_one
from core.builtin_helpers import expect_one
CONCEPT_EVALUATION_STEPS = [
BuiltinConcepts.BEFORE_EVALUATION,
+24 -15
View File
@@ -33,6 +33,8 @@ class SheerkaExecute:
# group the parsers by priorities
instantiated_parsers = [parser(sheerka=self.sheerka) for parser in self.sheerka.parsers.values()]
instantiated_parsers = self.preprocess(execution_context, instantiated_parsers)
grouped_parsers = {}
for parser in [p for p in instantiated_parsers if p.enabled]:
grouped_parsers.setdefault(parser.priority, []).append(parser)
@@ -44,7 +46,6 @@ class SheerkaExecute:
for parser in grouped_parsers[priority]:
return_value_success_found = False
for return_value in inputs_for_this_group:
to_parse = return_value.body.body \
@@ -67,22 +68,23 @@ class SheerkaExecute:
r.parents = [return_value]
result.append(r)
if self.sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT):
# if a ParserResultConcept is returned, it will be used by the parsers
# of the following groups
to_process.append(r)
if r.status:
return_value_success_found = True
stop_processing = True
else:
res.parents = [return_value]
result.append(res)
if self.sheerka.isinstance(res.body, BuiltinConcepts.PARSER_RESULT):
# if a ParserResultConcept is returned, it will be used by the parsers
# of the following groups
to_process.append(res)
if res.status:
return_value_success_found = True
stop_processing = True
sub_context.add_values(return_values=res)
if return_value_success_found:
stop_processing = True
break # Stop the other return_values (but not the other parsers with the same priority)
if stop_processing:
break # Do not try the other priorities if a match is found
@@ -102,7 +104,7 @@ class SheerkaExecute:
instantiated_evaluators = [e_class() for e_class in self.sheerka.evaluators]
# pre-process evaluators if needed
instantiated_evaluators = self._preprocess_evaluators(execution_context, instantiated_evaluators)
instantiated_evaluators = self.preprocess(execution_context, instantiated_evaluators)
for evaluator in [e for e in instantiated_evaluators if e.enabled and process_step in e.steps]:
grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator)
@@ -123,7 +125,7 @@ class SheerkaExecute:
evaluated_items = []
to_delete = []
for evaluator in grouped_evaluators[priority]:
evaluator = self._preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy
evaluator = self.preprocess(execution_context, evaluator.__class__()) # fresh copy
sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})"
with iteration_context.push(desc=sub_context_desc, logger=evaluator.verbose_log) as sub_context:
@@ -215,22 +217,29 @@ class SheerkaExecute:
return return_values
def _preprocess_evaluators(self, context, evaluators):
def preprocess(self, context, parsers_or_evaluators):
if not context.preprocess:
return evaluators
return parsers_or_evaluators
if not hasattr(evaluators, "__iter__"):
if not hasattr(parsers_or_evaluators, "__iter__"):
single_one = True
evaluators = [evaluators]
parsers_or_evaluators = [parsers_or_evaluators]
else:
single_one = False
for preprocess in context.preprocess:
for e in evaluators:
if preprocess.props["name"].value == e.name:
for e in parsers_or_evaluators:
if self.matches(e.name, preprocess.get_prop("name")):
for prop, value in preprocess.props.items():
if prop == "name":
continue
if hasattr(e, prop):
setattr(e, prop, value.value)
return evaluators[0] if single_one else evaluators
return parsers_or_evaluators[0] if single_one else parsers_or_evaluators
@staticmethod
def matches(parser_or_evaluator_name, preprocessor_name):
if preprocessor_name.endswith("*"):
return parser_or_evaluator_name.startswith(preprocessor_name[:-1])
else:
return parser_or_evaluator_name == preprocessor_name
+94 -10
View File
@@ -17,12 +17,7 @@ from core.sheerka_logger import console_handler
import logging
# CONCEPT_EVALUATION_STEPS = [
# BuiltinConcepts.BEFORE_EVALUATION,
# BuiltinConcepts.EVALUATION,
# BuiltinConcepts.AFTER_EVALUATION]
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
CONCEPT_LEXER_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser"
BNF_PARSER_CLASS = "parsers.BnfParser.BnfParser"
CONCEPTS_FILE = "_concepts.txt"
@@ -37,6 +32,9 @@ class Sheerka(Concept):
CONCEPTS_BY_NAME_ENTRY = "Concepts_By_Name"
CONCEPTS_BY_HASH_ENTRY = "Concepts_By_Hash" # store hash of concepts definitions (not values)
CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "Concepts_By_First_Keyword"
CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions"
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts
@@ -65,6 +63,10 @@ class Sheerka(Concept):
# a grammar is a resolved BNF
self.concepts_grammars = {}
# cache for SYA concepts
self.concepts_by_first_keyword = {}
self.sya_definitions = {}
# a concept can be instantiated
# ex: File is a concept, but File('foo.txt') is an instance
# TODO: manage contexts
@@ -119,7 +121,8 @@ class Sheerka(Concept):
self.initialize_builtin_concepts()
self.initialize_builtin_parsers()
self.initialize_builtin_evaluators()
self.initialize_concepts_definitions(exec_context)
self.initialize_bnf_parsing(exec_context)
self.initialize_sya_parsing()
res = ReturnValueConcept(self, True, self)
exec_context.add_values(return_values=res)
@@ -174,12 +177,25 @@ class Sheerka(Concept):
"""
core.utils.init_package_import("parsers")
base_class = core.utils.get_class("parsers.BaseParser.BaseParser")
modules_to_skip = ["parsers.BaseNodeParser"]
temp_result = {}
for parser in core.utils.get_sub_classes("parsers", base_class):
if parser.__module__ == base_class.__module__:
continue
self.init_log.debug(f"Adding builtin parser '{parser.__name__}'")
self.parsers[core.utils.get_full_qualified_name(parser)] = parser
if parser.__module__ in modules_to_skip:
continue
qualified_name = core.utils.get_full_qualified_name(parser)
self.init_log.debug(f"Adding builtin parser '{qualified_name}'")
temp_result[qualified_name] = parser
# Now we sort the parser by name.
# It's not important for the logic of their usage as they have their priority anyway,
# We do that for the unit tests. They are to complicated to write otherwise
for name in sorted(temp_result.keys()):
self.parsers[name] = temp_result[name]
def initialize_builtin_evaluators(self):
"""
@@ -195,7 +211,7 @@ class Sheerka(Concept):
self.init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
self.evaluators.append(evaluator)
def initialize_concepts_definitions(self, execution_context):
def initialize_bnf_parsing(self, execution_context):
self.init_log.debug("Initializing concepts grammars.")
definitions = self.get_concepts_definitions(execution_context)
@@ -211,6 +227,25 @@ class Sheerka(Concept):
self.concepts_grammars = lexer_parser.concepts_grammars
def initialize_sya_parsing(self):
self.init_log.debug("Initializing sya definitions.")
self.concepts_by_first_keyword = self.sdp.get_safe(
self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
load_origin=False) or {}
self.sya_definitions = self.sdp.get_safe(
self.CONCEPTS_SYA_DEFINITION_ENTRY,
load_origin=False) or {}
def reset(self):
self.reset_cache()
self.concepts_by_first_keyword = {}
self.concepts_grammars = {}
self.sya_definitions = {}
self.sdp.reset()
self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000)
def reset_cache(self, filter_to_use=None):
"""
reset the different cache that exists
@@ -220,6 +255,7 @@ class Sheerka(Concept):
if filter_to_use is None:
self.cache_by_key = {}
self.cache_by_id = {}
self.cache_by_name = {}
else:
raise NotImplementedError()
@@ -324,6 +360,38 @@ class Sheerka(Concept):
"""
return self.sets_handler.set_isa(context, concept, concept_set)
def set_sya_def(self, context, list_of_def):
"""
Set the precedence and/or the associativity of a concept
:param context:
:param list_of_def list of tuple(concept_id, precedence (int), SyaAssociativity)
:return:
"""
# validate the entries
for concept_id, precedence, associativity in list_of_def:
if concept_id == BuiltinConcepts.UNKNOWN_CONCEPT:
return self.ret(self.name,
False,
self.new(BuiltinConcepts.ERROR, body=f"Concept {concept_id} is not known"))
# update the definitions
for concept_id, precedence, associativity in list_of_def:
if precedence is None and associativity is None:
try:
del self.sya_definitions[concept_id]
except KeyError:
pass
else:
self.sya_definitions[concept_id] = (precedence, associativity.value)
# then save
self.sdp.set(context.event.get_digest(),
self.CONCEPTS_SYA_DEFINITION_ENTRY,
self.sya_definitions)
return self.ret(self.name, True, self.new(BuiltinConcepts.SUCCESS))
def get_set_elements(self, context, concept):
"""
Concept is supposed to be a set
@@ -571,6 +639,22 @@ class Sheerka(Concept):
return self.value(body_to_use)
def get_error(self, obj):
if isinstance(obj, Concept) and obj.metadata.is_builtin and obj.key in BuiltinErrors:
return obj
if isinstance(obj, list):
return obj
if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE):
if obj.status:
return None
if self.isinstance(obj.body, BuiltinConcepts.PARSER_RESULT):
return self.get_error(obj.body.body)
return NotImplementedError()
def get_values(self, objs):
if not (isinstance(objs, list) or
self.isinstance(objs, BuiltinConcepts.LIST) or
+36 -2
View File
@@ -163,7 +163,7 @@ def remove_list_from_list(lst, to_remove):
def product(a, b):
"""
Kind of cartesian product between lists a and b
knowing that a is also a list
knowing that a is also a list : a is a list of list !!!
So it's a cartesian product between a list of list and a list
"""
@@ -176,7 +176,12 @@ def product(a, b):
res = []
for item_b in b:
for item_a in a:
items = item_a + [item_b]
#items = item_a + [item_b]
items = item_a[:]
if hasattr(item_b, "__iter__"):
items.extend(item_b)
else:
items.append(item_b)
res.append(items)
return res
@@ -276,6 +281,7 @@ def str_concept(t):
>>> assert str_concept((None, "id")) == "c:|id:"
>>> assert str_concept(("key", None)) == "c:key:"
>>> assert str_concept((None, None)) == ""
>>> assert str_concept(Concept(key="foo", id="bar")) == "c:foo|bar:"
:param t:
:return:
"""
@@ -297,6 +303,12 @@ def unstr_concept(concept_repr):
"""
if concept_repr is like :c:key:id:
return the key and the id
>>> assert unstr_concept("c:key:") == "key"
>>> assert unstr_concept("c:key|id:") == ("key", "id")
>>> assert unstr_concept("c:|id:") == ("None", "id")
>>> assert unstr_concept("c:key|:") == ("key", "None")
>>> # Otherwise, return (None,None)
:param concept_repr:
:return:
"""
@@ -371,3 +383,25 @@ def decode_concept(text):
return key, id_, use_concept
return None, None, None
def tokens_index(tokens, sub_tokens, skip=0):
"""
Index of the sub tokens in tokens
:param tokens: tokens
:param sub_tokens: sub tokens to search
:param skip: number of found to skip
:return:
"""
expected = [token.value for token in sub_tokens if token.type != TokenKind.EOF]
for i in range(0, len(tokens) - len(expected) + 1):
for j in range(len(expected)):
if tokens[i + j].value != expected[j]:
break
else:
if skip == 0:
return i
else:
skip -= 1
raise ValueError(f"sub tokens '{sub_tokens}' not found")
+1 -1
View File
@@ -5,7 +5,7 @@ from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from core.tokenizer import TokenKind
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.BaseParser import NotInitializedNode
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
from parsers.BnfNodeParser import ParsingExpression, ParsingExpressionVisitor
from parsers.DefaultParser import DefConceptNode, NameNode
from parsers.PythonParser import PythonNode
import core.utils
+2 -1
View File
@@ -1,6 +1,7 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BnfNodeParser import ConceptNode
from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode
+51
View File
@@ -0,0 +1,51 @@
from core.builtin_concepts import BuiltinConcepts
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
from parsers.BaseParser import BaseParser
class MultipleErrorsEvaluator(AllReturnValuesEvaluator):
"""
Use to reduce to evaluator errors
All parser error will be discarded
Cannot match if there is at least one successful evaluator
"""
NAME = "MultipleErrors"
def __init__(self):
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 30)
self.return_values_in_error = []
def matches(self, context, return_values):
nb_evaluators_in_error = 0
to_process = False
for ret in return_values:
if ret.status and (ret.who.startswith(self.PREFIX) or ret.who.startswith(BaseParser.PREFIX)):
return False
elif ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
to_process = True
self.eaten.append(ret)
elif not ret.status and ret.who.startswith(self.PREFIX):
nb_evaluators_in_error += 1
self.return_values_in_error.append(ret)
self.eaten.append(ret)
elif not ret.status and ret.who.startswith(BaseParser.PREFIX):
self.eaten.append(ret)
# else:
# other concepts. We do not care if there are successful or not
# They won't be part of result nor part of the parent
# --> So they will be handled by other evaluators
return to_process and nb_evaluators_in_error > 1
def eval(self, context, return_values):
context.log(f"{len(self.return_values_in_error)} return value in error, {len(self.eaten)} item(s) eaten",
who=self)
context.log(f"{self.return_values_in_error}", who=self)
sheerka = context.sheerka
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.MULTIPLE_ERRORS, body=self.return_values_in_error),
parents=self.eaten)
+4
View File
@@ -31,6 +31,10 @@ class OneErrorEvaluator(AllReturnValuesEvaluator):
self.eaten.append(ret)
elif not ret.status and ret.who.startswith(BaseParser.PREFIX):
self.eaten.append(ret)
# else:
# other concepts. We do not care if there are successful or not
# They won't be part of result nor part of the parent
# --> So they will be handled by other evaluators
return to_process and nb_evaluators_in_error == 1
+2 -1
View File
@@ -1,4 +1,5 @@
import copy
import traceback
from enum import Enum
from core.ast.visitors import UnreferencedNamesVisitor
@@ -59,7 +60,7 @@ class PythonEvaluator(OneReturnValueEvaluator):
return sheerka.ret(self.name, True, evaluated, parents=[return_value])
except Exception as error:
context.log_error(error, self.name)
context.log_error(error, who=self.name, exc=traceback.format_exc())
error = sheerka.new(BuiltinConcepts.ERROR, body=error)
return sheerka.ret(self.name, False, error, parents=[return_value])
+369
View File
@@ -0,0 +1,369 @@
import copy
from dataclasses import dataclass
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.tokenizer import TokenKind, Tokenizer
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, ErrorNode
PARSERS = ["BnfNode", "SyaNode", "Python"]
@dataclass()
class TokensNodeFound(ErrorNode):
expected_tokens: list
def __eq__(self, other):
if id(other) == id(self):
return True
if not isinstance(other, UnexpectedTokenErrorNode):
return False
if self.message != other.message:
return False
if self.token.type != other.token.type or self.token.value != other.token.value:
return False
if len(self.expected_tokens) != len(other.expected_tokens):
return False
for i, t in enumerate(self.expected_tokens):
if t != other.expected_tokens[i]:
return False
return True
def __hash__(self):
return hash((self.message, self.token, self.expected_tokens))
class AtomConceptParserHelper:
def __init__(self, context):
self.context = context
self.debug = []
self.sequence = [] # sequence of concepts already found found
self.current_concept: ConceptNode = None # concept being parsed
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # buffer that keeps tracks of tokens positions
self.expected_tokens = None # expected tokens for this concepts
self.is_locked = False
self.errors = []
self.has_unrecognized = False
self.forked = [] # use to duplicate AtomConceptParserHelper. See manage_unrecognized()
def __eq__(self, other):
if id(other) == id(self):
return True
if not isinstance(other, AtomConceptParserHelper):
return False
if len(self.sequence) != len(other.sequence):
return False
for item_self, item_other in zip(self.sequence, other.sequence):
if item_self != item_other:
return False
return True
def __hash__(self):
return hash(len(self.sequence))
def __repr__(self):
return f"{self.sequence}"
def lock(self):
self.is_locked = True
def reset(self):
self.is_locked = False
def has_error(self):
return len(self.errors) > 0
def eat_token(self, token, pos):
if not self.expected_tokens:
return False
self.debug.append(token)
if self.expected_tokens[0] != BaseNodeParser.get_token_value(token):
self.errors.append(UnexpectedTokenErrorNode(
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
token,
[self.expected_tokens[0]]))
return False
self.current_concept.end = pos
del self.expected_tokens[0]
if not self.expected_tokens:
# the concept is fully matched
self.sequence.append(self.current_concept)
self.expected_tokens = None
return True
def eat_concept(self, concept, pos):
if self.is_locked:
return
self.debug.append(concept)
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.eat_concept(concept, pos)
concept_node = ConceptNode(concept, pos, pos)
expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1]
if not expected:
# the concept is already matched
self.sequence.append(concept_node)
else:
self.current_concept = concept_node
self.expected_tokens = expected
def manage_unrecognized(self):
if self.unrecognized_tokens.is_empty():
return
# do not put empty UnrecognizedToken in out
if self.unrecognized_tokens.is_whitespace():
self.unrecognized_tokens.reset()
return
self.unrecognized_tokens.fix_source()
# try to recognize concepts
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
if nodes_sequences:
instances = [self]
for i in range(len(nodes_sequences) - 1):
clone = self.clone()
instances.append(clone)
self.forked.append(clone)
for instance, node_sequence in zip(instances, nodes_sequences):
for node in node_sequence:
instance.sequence.append(node)
if isinstance(node, UnrecognizedTokensNode) or \
hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens:
instance.has_unrecognized = True
instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
else:
self.sequence.append(self.unrecognized_tokens)
self.has_unrecognized = True
# create another instance
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
def eat_unrecognized(self, token, pos):
if self.is_locked:
return
self.debug.append(token)
self.unrecognized_tokens.add_token(token, pos)
def finalize(self):
if len(self.sequence) > 0:
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize()
if self.expected_tokens:
self.errors.append(TokensNodeFound(self.expected_tokens))
def clone(self):
clone = AtomConceptParserHelper(self.context)
clone.debug = self.debug[:]
clone.sequence = self.sequence[:]
clone.current_concept = self.current_concept.clone() if self.current_concept else None
clone.unrecognized_tokens = self.unrecognized_tokens.clone()
clone.expected_tokens = self.expected_tokens[:] if self.expected_tokens else None
clone.is_locked = self.is_locked
clone.errors = self.errors[:]
clone.has_unrecognized = self.has_unrecognized
return clone
def _get_lexer_nodes_from_unrecognized(self):
"""
Use the source of self.unrecognized_tokens gto find concepts or source code
:return:
"""
res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
if not only_parsers_results.status:
return None
return builtin_helpers.get_lexer_nodes(
only_parsers_results.body.body,
self.unrecognized_tokens.start,
self.unrecognized_tokens.tokens)
class AtomNodeParser(BaseNodeParser):
"""
Parser used to recognize atoms concepts or sequence of atoms concepts
An atom concept is concept that does not have any property thought it may have a body
So, if 'one', 'two', 'three' are defined as atom concepts (with no property/parameter)
This parser can recognize the sequence 'one two three'
as [ConceptNode(one), ConceptNode(two), ConceptNode(three)]
It can partly recognized 'one x$1!! two three'
as [ConceptNode(one), UnrecognizedTokensNode(x$1!!), [ConceptNode(two), [ConceptNode(three)]
It cannot recognize concepts with parameters (non atom)
ex: 'one plus two' won't be recognized as ConceptNode(plus, one, two)
it will be [ConceptNode(one), UnrecognizedTokensNode(plus), [ConceptNode(two)]
Note 'one plus two' will be recognized by the SyaParser
"""
def __init__(self, **kwargs):
super().__init__("AtomNode", 50, **kwargs)
self.enabled = False
@staticmethod
def _is_eligible(concept):
"""
Predicate that select concepts that must handled by AtomNodeParser
:param concept:
:return:
"""
return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF
def get_concepts_sequences(self):
forked = []
def _add_forked_to_concept_parser_helpers():
# check that if some new InfixToPostfix are created
for parser in concept_parser_helpers:
if len(parser.forked) > 0:
forked.extend(parser.forked)
parser.forked.clear()
if len(forked) > 0:
concept_parser_helpers.extend(forked)
forked.clear()
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
while self.next_token(False):
for concept_parser in concept_parser_helpers:
concept_parser.reset()
token = self.token
try:
for concept_parser in concept_parser_helpers:
if concept_parser.eat_token(self.token, self.pos):
concept_parser.lock()
concepts = self.get_concepts(token, self._is_eligible)
if not concepts:
for concept_parser in concept_parser_helpers:
concept_parser.eat_unrecognized(token, self.pos)
continue
if len(concepts) == 1:
for concept_parser in concept_parser_helpers:
concept_parser.eat_concept(concepts[0], self.pos)
continue
# make the cartesian product
temp_res = []
for concept_parser in concept_parser_helpers:
if concept_parser.is_locked:
# It means that it already eat the token
# so simply add it, do not clone
temp_res.append(concept_parser)
continue
for concept in concepts:
clone = concept_parser.clone()
temp_res.append(clone)
clone.eat_concept(concept, self.pos)
concept_parser_helpers = temp_res
finally:
_add_forked_to_concept_parser_helpers()
# make sure that remaining items in stack are moved to out
for concept_parser in concept_parser_helpers:
concept_parser.reset()
concept_parser.finalize()
_add_forked_to_concept_parser_helpers()
return concept_parser_helpers
def get_valid(self, concept_parser_helpers):
valid_parser_helpers = [] # be careful, it will be a list of list
for parser_helper in concept_parser_helpers:
if parser_helper.has_error():
continue
if len(parser_helper.sequence) == 0:
continue
for node in parser_helper.sequence:
node.tokens = self.tokens[node.start:node.end + 1]
node.fix_source()
if parser_helper in valid_parser_helpers:
continue
valid_parser_helpers.append(parser_helper)
return valid_parser_helpers
def parse(self, context, parser_input):
if parser_input == "":
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
)
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
parser_helpers = self.get_valid(self.get_concepts_sequences())
if len(parser_helpers):
ret = []
for parser_helper in parser_helpers:
ret.append(
self.sheerka.ret(
self.name,
not parser_helper.has_unrecognized,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
body=parser_helper.sequence,
try_parsed=parser_helper.sequence)))
if len(ret) == 1:
self.log_result(context, parser_input, ret[0])
return ret[0]
else:
self.log_multiple_results(context, parser_input, ret)
return ret
else:
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
+669
View File
@@ -0,0 +1,669 @@
from collections import namedtuple
from dataclasses import dataclass
from enum import Enum
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept
from core.sheerka.ExecutionContext import ExecutionContext
from core.tokenizer import TokenKind, LexerError, Token
from parsers.BaseParser import Node, BaseParser, ErrorNode
DEBUG_COMPILED = True
@dataclass()
class LexerNode(Node):
start: int # starting index in the tokens list
end: int # ending index in the tokens list
tokens: list = None # tokens
source: str = None # string representation of what was parsed
def __post_init__(self):
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if not isinstance(other, LexerNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.tokens == other.tokens
def fix_source(self, force=True):
if force or self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
return self
class UnrecognizedTokensNode(LexerNode):
def __init__(self, start, end, tokens):
super().__init__(start, end, tokens)
self.is_frozen = False
self.parenthesis_count = 0
def freeze(self):
self.is_frozen = True
def reset(self):
self.start = self.end = -1
self.tokens.clear()
self.is_frozen = False
self.parenthesis_count = 0
def has_open_paren(self):
return self.parenthesis_count > 0
def add_token(self, token, pos):
if self.is_frozen:
raise Exception("The node is frozen")
if self.end != -1 and pos == self.end + 2:
# add the missing whitespace
p = self.tokens[-1] # previous token
self.tokens.append(Token(TokenKind.WHITESPACE, " ", p.index + 1, p.line, p.column + 1))
self.tokens.append(token)
self.end = pos
if self.start == -1:
self.start = pos
if token.type == TokenKind.LPAR:
self.parenthesis_count += 1
if token.type == TokenKind.RPAR:
self.parenthesis_count -= 1
return self
def not_whitespace(self):
return not self.is_whitespace()
def is_whitespace(self):
for t in self.tokens:
if t.type not in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
return False
return True
def is_empty(self):
return len(self.tokens) == 0
def __eq__(self, other):
if isinstance(other, utnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, UTN):
return other == self
if not isinstance(other, UnrecognizedTokensNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
def clone(self):
clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:])
clone.is_frozen = self.is_frozen
clone.parenthesis_count = self.parenthesis_count
return clone
class ConceptNode(LexerNode):
"""
Returned by the BnfNodeParser
It represents a recognized concept
"""
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
super().__init__(start, end, tokens, source)
self.concept = concept
self.underlying = underlying
self.fix_source(False)
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, (CN, CNC)):
return other == self
if isinstance(other, cnode):
return self.concept.key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, short_cnode):
return self.concept.key == other.concept_key and self.source == other.source
if not isinstance(other, ConceptNode):
return False
return self.concept == other.concept and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.underlying == other.underlying
def __hash__(self):
return hash((self.concept, self.start, self.end, self.source, self.underlying))
def __repr__(self):
text = f"ConceptNode(concept='{self.concept}', source='{self.source}', start={self.start}, end={self.end}"
if DEBUG_COMPILED:
for k, v in self.concept.compiled.items():
text += f", {k}='{v}'"
return text + ")"
def clone(self):
# do we need to clone the concept as well ?
clone = ConceptNode(self.concept, self.start, self.end, self.tokens, self.source, self.underlying)
return clone
class SourceCodeNode(LexerNode):
"""
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, node, start, end, tokens=None, source=None, return_value=None):
super().__init__(start, end, tokens, source)
self.node = node # The PythonNode (or whatever language node) that is found
self.return_value = return_value # original result of the parsing
def __eq__(self, other):
if isinstance(other, scnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, SourceCodeNode):
return False
return self.node == other.node and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
class SourceCodeWithConceptNode(LexerNode):
"""
Kind of temporary version for SourceCodeNode
I know that there is some code,
I know that there are some concepts
I just don't want to make the glue yet
So I push all the nodes into one big bag
"""
def __init__(self, first_node, last_node, content_nodes=None):
super().__init__(9999, -1, None) # why not sys.maxint ?
self.first = first_node
self.last = last_node
self.nodes = content_nodes or []
self.has_unrecognized = False
self.fix_all_pos()
def add_node(self, node):
self.nodes.append(node)
self.fix_pos(node)
return self
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, SourceCodeWithConceptNode):
return False
if self.start != other.start or self.end != other.end:
return False
if self.first != other.first:
return False
if self.last != other.last:
return False
if len(self.nodes) != len(other.nodes):
return False
for self_node, other_node in zip(self.nodes, other.nodes):
if self_node != other_node:
return False
# at last
return True
def __hash__(self):
return hash((self.first, self.last, len(self.nodes)))
def __repr__(self):
return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')"
def fix_all_pos(self):
for n in [self.first, self.last] + self.nodes:
self.fix_pos(n)
def fix_pos(self, node):
if hasattr(node, "start") and node.start is not None:
if node.start < self.start:
self.start = node.start
if hasattr(node, "end") and node.end is not None:
if node.end > self.end:
self.end = node.end
return self
def pseudo_fix_source(self):
self.source = self.first.source
for n in self.nodes:
self.source += " "
if hasattr(n, "source"):
self.source += n.source
elif hasattr(n, "concept"):
self.source += str(n.concept)
else:
self.source += " unknown"
self.source += self.last.source
return self
def clone(self):
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes)
return clone
@dataclass()
class GrammarErrorNode(ErrorNode):
message: str
class SyaAssociativity(Enum):
Left = "left"
Right = "right"
No = "No"
def __repr__(self):
return self.value
cnode = namedtuple("ConceptNode", "concept_key start end source")
short_cnode = namedtuple("ConceptNode", "concept_key source")
utnode = namedtuple("utnode", "start end source")
scnode = namedtuple("scnode", "start end source")
@dataclass(init=False)
class SCWC:
"""
SourceNodeWithConcept tester class
It matches with a SourceNodeWithConcept
but it's easier to instantiate during the tests
"""
first: LexerNode
last: LexerNode
content: tuple
def __init__(self, first, last, *args):
self.first = first
self.last = last
self.content = args
class HelperWithPos:
def __init__(self, start=None, end=None):
self.start = start
self.end = end
self.start_is_fixed = start is not None
self.end_is_fixed = end is not None
def fix_pos(self, node):
if not self.start_is_fixed:
start = node.start if hasattr(node, "start") else \
node[0] if isinstance(node, tuple) else None
if start is not None and (self.start is None or start < self.start):
self.start = start
if not self.end_is_fixed:
end = node.end if hasattr(node, "end") else \
node[1] if isinstance(node, tuple) else None
if end is not None and (self.end is None or end > self.end):
self.end = end
return self
class CN(HelperWithPos):
"""
ConceptNode tester class
It matches with ConceptNode but with less constraints
CNC == ConceptNode if concept key, start, end and source are the same
"""
def __init__(self, concept, start=None, end=None, source=None):
"""
:param concept: Concept or concept_key (only the key is used anyway)
:param start:
:param end:
:param source:
"""
super().__init__(start, end)
self.concept_key = concept.key if isinstance(concept, Concept) else concept
self.source = source
self.concept = concept if isinstance(concept, Concept) else None
def fix_source(self, str_tokens):
self.source = "".join(str_tokens)
return self
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, ConceptNode):
if other.concept is None:
return False
if other.concept.key != self.concept_key:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
return True
if not isinstance(other, CN):
return False
return self.concept_key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.concept_key, self.start, self.end, self.source))
def __repr__(self):
if self.concept:
txt = f"CN(concept='{self.concept}'"
else:
txt = f"CN(concept_key='{self.concept_key}'"
txt += f", source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
class CNC(CN):
"""
ConceptNode for Compiled tester class
It matches with ConceptNode
But focuses on the 'compiled' property of the concept
CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
"""
def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
super().__init__(concept_key, start, end, source)
self.compiled = kwargs
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, ConceptNode):
if other.concept is None:
return False
if other.concept.key != self.concept_key:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
return self.compiled == other.concept.compiled # assert instead of return to help debugging tests
if not isinstance(other, CNC):
return False
return self.concept_key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.compiled == other.compiled
def __repr__(self):
if self.concept:
txt = f"CNC(concept='{self.concept}'"
else:
txt = f"CNC(concept_key='{self.concept_key}'"
txt += f", source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
for k, v in self.compiled.items():
txt += f", {k}='{v}'"
return txt + ")"
class BaseNodeParser(BaseParser):
def __init__(self, name, priority, **kwargs):
super().__init__(name, priority)
if 'sheerka' in kwargs:
sheerka = kwargs.get("sheerka")
self.init_from_sheerka(sheerka)
else:
self.concepts_by_first_keyword = None
self.sya_definitions = None
self.token = None
self.pos = -1
self.tokens = None
self.context: ExecutionContext = None
self.text = None
self.sheerka = None
def init_from_sheerka(self, sheerka):
"""
Use the definitons from Sheerka to initialize
:param sheerka:
:return:
"""
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
if sheerka.sya_definitions:
self.sya_definitions = {}
for k, v in sheerka.sya_definitions.items():
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.text = text
try:
self.tokens = list(self.get_input_as_tokens(text))
except LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
self.token = None
self.pos = -1
return True
def add_error(self, error, next_token=True):
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self.token
def next_token(self, skip_whitespace=True):
if self.token and self.token.type == TokenKind.EOF:
return False
self.pos += 1
self.token = self.tokens[self.pos]
if skip_whitespace:
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
self.pos += 1
self.token = self.tokens[self.pos]
return self.token.type != TokenKind.EOF
def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
"""
To quickly find a concept, we store them in an hash where the key is the first token of the concept
example :
Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
So the key to use will be "foo"
Concept("a foo").def_prop("a") -> first token is "foo"
Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
Note that under the same key, there will be multiple entry
a B-Tree may be a better implementation in the future
We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
:param context:
:param concepts: list[Concept]
:param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
:param use_sheerka: first init with the definitions from Sheerka
:return:
"""
self.context = context
self.sheerka = context.sheerka
if use_sheerka:
self.init_from_sheerka(self.sheerka)
if sya_definitions:
if self.sya_definitions:
self.sya_definitions.update(sya_definitions)
else:
self.sya_definitions = sya_definitions
if self.concepts_by_first_keyword is None:
self.concepts_by_first_keyword = {}
for concept in concepts:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
break
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
def get_concepts(self, token, to_keep, to_map=None):
"""
Tries to find if there are concepts that match the value of the token
:param token:
:param to_keep: predicate to tell if the concept is eligible
:param to_map:
:return:
"""
if token.type == TokenKind.STRING:
name = token.value[1:-1]
elif token.type == TokenKind.KEYWORD:
name = token.value.value
else:
name = token.value
result = []
if name in self.concepts_by_first_keyword:
for concept_id in self.concepts_by_first_keyword[name]:
concept = self.sheerka.get_by_id(concept_id)
if not to_keep(concept):
continue
concept = to_map(concept) if to_map else concept
result.append(concept)
return result
return None
@staticmethod
def get_token_value(token):
if token.type == TokenKind.STRING:
return token.value[1:-1]
elif token.type == TokenKind.KEYWORD:
return token.value.value
else:
return token.value
class UTN(HelperWithPos):
"""
Tester class for UnrecognizedTokenNode
compare the source, and start, end if defined
"""
def __init__(self, source, start=None, end=None):
"""
:param concept: Concept or concept_key (only the key is used anyway)
:param start:
:param end:
:param source:
"""
super().__init__(start, end)
self.source = source
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, UnrecognizedTokensNode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, UTN):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.source, self.start, self.end))
def __repr__(self):
txt = f"UTN( source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
+54 -4
View File
@@ -1,8 +1,8 @@
from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept
from core.tokenizer import TokenKind, Keywords, Token
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
from core.sheerka_logger import get_logger
import core.utils
import logging
@@ -77,7 +77,6 @@ class BaseParser:
self.priority = priority
self.enabled = enabled
self.has_error = False
self.error_sink = []
def __eq__(self, other):
@@ -91,9 +90,13 @@ class BaseParser:
def __repr__(self):
return self.name
def parse(self, context, text):
def parse(self, context, parser_input):
pass
@property
def has_error(self):
return len(self.error_sink) > 0
def log_result(self, context, source, ret):
if not self.log.isEnabledFor(logging.DEBUG):
return
@@ -132,6 +135,53 @@ class BaseParser:
body=self.error_sink if self.has_error else tree,
try_parsed=try_parse)
def get_input_as_text(self, parser_input, custom_switcher=None):
if isinstance(parser_input, list):
return self.get_text_from_tokens(parser_input, custom_switcher)
if isinstance(parser_input, ParserResultConcept):
parser_input = parser_input.source
if "c:" in parser_input:
return self.get_text_from_tokens(list(Tokenizer(parser_input)), custom_switcher)
return parser_input
def get_input_as_tokens(self, parser_input):
if isinstance(parser_input, list):
return self.add_eof_if_needed(parser_input)
if isinstance(parser_input, ParserResultConcept):
if parser_input.tokens:
return self.add_eof_if_needed(parser_input.tokens)
else:
return Tokenizer(parser_input.source)
return Tokenizer(parser_input)
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
if not isinstance(parser_input, ParserResultConcept):
return None
if expected_parser and parser_input.parser != expected_parser:
return None
if len(parser_input.value) == 0:
return None
for node in parser_input.value:
from parsers.BaseNodeParser import LexerNode
if not isinstance(node, LexerNode):
return None
return parser_input.value
@staticmethod
def add_eof_if_needed(lst):
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
return lst
@staticmethod
def get_text_from_tokens(tokens, custom_switcher=None):
if tokens is None:
@@ -9,147 +9,17 @@
from collections import namedtuple
from dataclasses import dataclass
from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept, ConceptParts, DoNotResolve
from core.tokenizer import TokenKind, Tokenizer, Token
from parsers.BaseParser import BaseParser, Node, ErrorNode
from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, ErrorNode
import core.utils
@dataclass()
class LexerNode(Node):
start: int # starting index in the tokens list
end: int # ending index in the tokens list
tokens: list = None # tokens
source: str = None # string representation of what was parsed
def __post_init__(self):
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if not isinstance(other, LexerNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.tokens == other.tokens
class UnrecognizedTokensNode(LexerNode):
def __init__(self, start, end, tokens):
super().__init__(start, end, tokens)
def add_token(self, token, pos):
self.tokens.append(token)
self.end = pos
def fix_source(self):
self.source = BaseParser.get_text_from_tokens(self.tokens)
def not_whitespace(self):
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
def __eq__(self, other):
if isinstance(other, utnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, UnrecognizedTokensNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
class ConceptNode(LexerNode):
"""
Returned by the ConceptLexerParser
It represents a recognized concept
"""
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
super().__init__(start, end, tokens, source)
self.concept = concept
self.underlying = underlying
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if isinstance(other, cnode):
return self.concept.key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, short_cnode):
return self.concept.key == other.concept_key and self.source == other.source
if not isinstance(other, ConceptNode):
return False
return self.concept == other.concept and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.underlying == other.underlying
def __hash__(self):
return hash((self.concept, self.start, self.end, self.source, self.underlying))
def __repr__(self):
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
class SourceCodeNode(LexerNode):
"""
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, node, start, end, tokens=None, source=None):
super().__init__(start, end, tokens, source)
self.node = node # The PythonNode (or whatever language node) that is found
def __eq__(self, other):
if isinstance(other, scnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, SourceCodeNode):
return False
return self.node == other.node and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
cnode = namedtuple("ConceptNode", "concept_key start end source")
short_cnode = namedtuple("ConceptNode", "concept_key source")
utnode = namedtuple("UnrecognizedTokensNode", "start end source")
scnode = namedtuple("SourceCodeNode", "start end source")
class NonTerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
Returned by the BnfNodeParser
"""
def __init__(self, parsing_expression, start, end, tokens, children=None):
@@ -180,7 +50,7 @@ class NonTerminalNode(LexerNode):
class TerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
Returned by the BnfNodeParser
"""
def __init__(self, parsing_expression, start, end, value):
@@ -205,11 +75,6 @@ class TerminalNode(LexerNode):
return hash((self.parsing_expression, self.start, self.end, self.value))
@dataclass()
class GrammarErrorNode(ErrorNode):
message: str
@dataclass()
class UnknownConceptNode(ErrorNode):
concept_key: str
@@ -574,9 +439,9 @@ class StrMatch(Match):
return None
class ConceptLexerParser(BaseParser):
class BnfNodeParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("ConceptLexer", 50)
super().__init__("BnfNode", 50)
if 'grammars' in kwargs:
self.concepts_grammars = kwargs.get("grammars")
elif 'sheerka' in kwargs:
@@ -595,7 +460,6 @@ class ConceptLexerParser(BaseParser):
self.sheerka = None
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
@@ -606,17 +470,12 @@ class ConceptLexerParser(BaseParser):
self.sheerka = context.sheerka
self.text = text
if isinstance(text, str):
try:
self.tokens = list(Tokenizer(text))
self.tokens = list(self.get_input_as_tokens(text))
except core.tokenizer.LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
else:
self.tokens = list(text)
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
self.token = None
self.pos = -1
self.next_token(False)
@@ -785,15 +644,15 @@ class ConceptLexerParser(BaseParser):
removed_concepts.append(e)
return removed_concepts
def parse(self, context, text):
if text == "":
def parse(self, context, parser_input):
if parser_input == "":
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
)
if not self.reset_parser(context, text):
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
self.name,
False,
@@ -877,15 +736,15 @@ class ConceptLexerParser(BaseParser):
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
source=parser_input,
body=choice,
try_parsed=choice)))
if len(ret) == 1:
self.log_result(context, text, ret[0])
self.log_result(context, parser_input, ret[0])
return ret[0]
else:
self.log_multiple_results(context, text, ret)
self.log_multiple_results(context, parser_input, ret)
return ret
def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
@@ -915,6 +774,11 @@ class ConceptLexerParser(BaseParser):
_concept.compiled[prop_name] = new_value
def _look_for_concept_match(_underlying):
"""
At some point, there is either an StrMatch or a ConceptMatch,
that allowed the recognition.
Look for the ConceptMatch, with recursion if needed
"""
if isinstance(_underlying.parsing_expression, ConceptExpression):
return _underlying
@@ -929,6 +793,7 @@ class ConceptLexerParser(BaseParser):
def _get_underlying_value(_underlying):
concept_match_node = _look_for_concept_match(_underlying)
if concept_match_node:
# the value is a concept
if id(concept_match_node) in _underlying_value_cache:
result = _underlying_value_cache[id(concept_match_node)]
else:
@@ -936,6 +801,7 @@ class ConceptLexerParser(BaseParser):
result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
_underlying_value_cache[id(concept_match_node)] = result
else:
# the value is a string
result = DoNotResolve(_underlying.source)
return result
@@ -957,6 +823,7 @@ class ConceptLexerParser(BaseParser):
concept.compiled[ConceptParts.BODY] = value
if underlying.parsing_expression.rule_name:
_add_prop(concept, underlying.parsing_expression.rule_name, value)
# KSI : Why don't we set concept.metadata.need_validation to True ?
if isinstance(underlying, NonTerminalNode):
for node in underlying.children:
+3 -5
View File
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts
from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
StrMatch, ConceptGroupExpression
@@ -30,7 +30,6 @@ class BnfParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("Bnf", 50, False)
# self.has_error = False
# self.error_sink = []
# self.name = BaseParser.PREFIX + "Bnf"
@@ -61,7 +60,6 @@ class BnfParser(BaseParser):
self.eat_white_space()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
@@ -115,11 +113,11 @@ class BnfParser(BaseParser):
token = self.get_token()
return token.type == second or token.type == first and self.next_after().type == second
def parse(self, context: ExecutionContext, text):
def parse(self, context: ExecutionContext, parser_input):
tree = None
try:
self.reset_parser(context, text)
self.reset_parser(context, parser_input)
tree = self.parse_choice()
token = self.get_token()
+12 -11
View File
@@ -1,10 +1,14 @@
# try to match something like
# ConceptNode 'plus' ConceptNode
#
# Replaced by SyaNodeParser
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind, Token
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from core.concept import VARIABLE_PREFIX
import logging
multiple_concepts_parser = MultipleConceptsParser()
@@ -12,6 +16,7 @@ multiple_concepts_parser = MultipleConceptsParser()
class ConceptsWithConceptsParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("ConceptsWithConcepts", 25)
self.enabled = False
@staticmethod
def get_tokens(nodes):
@@ -71,23 +76,19 @@ class ConceptsWithConceptsParser(BaseParser):
return concept
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
if not nodes:
return None
if not text.parser == multiple_concepts_parser:
return None
nodes = text.body
concept_key = self.get_key(nodes)
concept = sheerka.new(concept_key)
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
concepts = concept if hasattr(concept, "__iter__") else [concept]
for concept in concepts:
@@ -101,7 +102,7 @@ class ConceptsWithConceptsParser(BaseParser):
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text.source,
source=parser_input.source,
body=concept,
try_parsed=None)))
+8 -9
View File
@@ -110,7 +110,7 @@ class DefaultParser(BaseParser):
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "Default", 50)
BaseParser.__init__(self, "Default", 60)
self.lexer_iter = None
self._current = None
self.context: ExecutionContext = None
@@ -168,7 +168,6 @@ class DefaultParser(BaseParser):
self.next_token()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
@@ -188,19 +187,19 @@ class DefaultParser(BaseParser):
return
def parse(self, context, text):
def parse(self, context, parser_input):
# default parser can only manage string text
if not isinstance(text, str):
if not isinstance(parser_input, str):
ret = context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
self.log_result(context, text, ret)
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
tree = None
try:
self.reset_parser(context, text)
self.reset_parser(context, parser_input)
tree = self.parse_statement()
except core.tokenizer.LexerError as e:
self.add_error(e, False)
@@ -211,7 +210,7 @@ class DefaultParser(BaseParser):
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
else:
body = self.get_return_value_body(context.sheerka, text, tree, tree)
body = self.get_return_value_body(context.sheerka, parser_input, tree, tree)
# body = self.sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
@@ -224,7 +223,7 @@ class DefaultParser(BaseParser):
not self.has_error,
body)
self.log_result(context, text, ret)
self.log_result(context, parser_input, ret)
return ret
def parse_statement(self):
+5 -5
View File
@@ -10,12 +10,12 @@ class EmptyStringParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "EmptyString", 90)
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if isinstance(text, str) and text.strip() == "" or \
isinstance(text, list) and text == [] or \
text is None:
if isinstance(parser_input, str) and parser_input.strip() == "" or \
isinstance(parser_input, list) and parser_input == [] or \
parser_input is None:
ret = sheerka.ret(self.name, True, sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
@@ -24,5 +24,5 @@ class EmptyStringParser(BaseParser):
else:
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
self.log_result(context, text, ret)
self.log_result(context, parser_input, ret)
return ret
+12 -13
View File
@@ -16,26 +16,26 @@ class ExactConceptParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "ExactConcept", 80)
def parse(self, context, text):
def parse(self, context, parser_input):
"""
text can be string, but text can also be an list of tokens
:param context:
:param text:
:param parser_input:
:return:
"""
context.log(f"Parsing '{text}'", self.name)
context.log(f"Parsing '{parser_input}'", self.name)
res = []
sheerka = context.sheerka
try:
words = self.get_words(text)
words = self.get_words(parser_input)
except LexerError as e:
context.log(f"Error found in tokenizer {e}", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
if len(words) > self.MAX_WORDS_SIZE:
context.log(f"Max words reached. Stopping.", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input))
recognized = False
for combination in self.combinations(words):
@@ -69,26 +69,25 @@ class ExactConceptParser(BaseParser):
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
body=concept,
try_parsed=concept)))
recognized = True
if recognized:
if len(res) == 1:
self.log_result(context, text, res[0])
self.log_result(context, parser_input, res[0])
else:
self.log_multiple_results(context, text, res)
self.log_multiple_results(context, parser_input, res)
return res
return res
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text))
self.log_result(context, text, ret)
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
@staticmethod
def get_words(text):
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
def get_words(self, text):
tokens = self.get_input_as_tokens(text)
res = []
for t in tokens:
if t.type == TokenKind.EOF:
+12 -13
View File
@@ -1,18 +1,20 @@
# to be replaced by SyaNodeParser
import ast
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
import core.utils
from parsers.PythonParser import PythonParser
concept_lexer_parser = ConceptLexerParser()
concept_lexer_parser = BnfNodeParser()
class MultipleConceptsParser(BaseParser):
"""
Parser that will take the result of ConceptLexerParser and
Parser that will take the result of BnfNodeParser and
try to resolve the unrecognized tokens token by token
It is a success when it returns a list ConceptNode exclusively
@@ -20,6 +22,7 @@ class MultipleConceptsParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "MultipleConcepts", 45)
self.enabled = False
@staticmethod
def finalize(nodes_found, unrecognized_tokens):
@@ -40,16 +43,12 @@ class MultipleConceptsParser(BaseParser):
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
return unrecognized_tokens
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
if not nodes:
return None
if not text.parser == concept_lexer_parser:
return None
sheerka = context.sheerka
nodes = text.value
nodes_found = [[]]
concepts_only = True
@@ -97,16 +96,16 @@ class MultipleConceptsParser(BaseParser):
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text.source,
source=parser_input.source,
body=choice,
try_parsed=None))
)
if len(ret) == 1:
self.log_result(context, text.source, ret[0])
self.log_result(context, parser_input.source, ret[0])
return ret[0]
else:
self.log_multiple_results(context, text.source, ret)
self.log_multiple_results(context, parser_input.source, ret)
return ret
@staticmethod
+19 -18
View File
@@ -1,4 +1,4 @@
from core.builtin_concepts import BuiltinConcepts
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.tokenizer import Tokenizer, LexerError, TokenKind
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass
@@ -6,7 +6,7 @@ import ast
import logging
import core.utils
from parsers.ConceptLexerParser import ConceptNode
from parsers.BnfNodeParser import ConceptNode
log = logging.getLogger(__name__)
@@ -67,7 +67,7 @@ class PythonParser(BaseParser):
BaseParser.__init__(self, "Python", 50)
self.source = kwargs.get("source", "<undef>")
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
tree = None
@@ -76,15 +76,9 @@ class PythonParser(BaseParser):
}
try:
if isinstance(text, str) and "c:" in text:
source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher)
elif isinstance(text, str):
source = text
else:
source = self.get_text_from_tokens(text, python_switcher)
source = self.get_input_as_text(parser_input, python_switcher)
source = source.strip()
text = text if isinstance(text, str) else source
parser_input = parser_input if isinstance(parser_input, str) else source
# first, try to parse an expression
res, tree, error = self.try_parse_expression(source)
@@ -92,25 +86,32 @@ class PythonParser(BaseParser):
# then try to parse a statement
res, tree, error = self.try_parse_statement(source)
if not res:
self.has_error = True
error_node = PythonErrorNode(text, error)
error_node = PythonErrorNode(parser_input, error)
self.error_sink.append(error_node)
except LexerError as e:
self.has_error = True
self.error_sink.append(e)
if self.has_error:
ret = sheerka.ret(
self.name,
not self.has_error,
False,
sheerka.new(
BuiltinConcepts.NOT_FOR_ME,
body=parser_input,
reason=self.error_sink))
else:
ret = sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
body=self.error_sink if self.has_error else PythonNode(text, tree),
source=parser_input,
body=PythonNode(parser_input, tree),
try_parsed=None))
self.log_result(context, text, ret)
self.log_result(context, parser_input, ret)
return ret
def try_parse_expression(self, text):
+7 -8
View File
@@ -1,10 +1,11 @@
from core.builtin_concepts import BuiltinConcepts
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode
from parsers.BnfNodeParser import ConceptNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
multiple_concepts_parser = MultipleConceptsParser()
unrecognized_nodes_parser = UnrecognizedNodeParser()
class PythonWithConceptsParser(BaseParser):
@@ -20,15 +21,12 @@ class PythonWithConceptsParser(BaseParser):
res += c if c.isalnum() else "0"
return res
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser)
if not nodes:
return None
if not text.parser == multiple_concepts_parser:
return None
nodes = text.body
source = ""
to_parse = ""
identifiers = {}
@@ -74,6 +72,7 @@ class PythonWithConceptsParser(BaseParser):
python_id = _get_identifier(concept)
to_parse += python_id
python_ids_mappings[python_id] = concept
else:
source += node.source
to_parse += node.source
File diff suppressed because it is too large Load Diff
+114
View File
@@ -0,0 +1,114 @@
from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ErrorNode
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
import core.utils
PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"]
@dataclass()
class CannotParseNode(ErrorNode):
unrecognized: UnrecognizedTokensNode
class UnrecognizedNodeParser(BaseParser):
"""
This parser comes after the other NodeParsers (Atom, Bnf or Sya)
It will try to resolve all UnrecognizedTokensNode.
"""
def __init__(self, **kwargs):
super().__init__("UnrecognizedNode", 45) # lower than AtomNode, BnfNode and SyaNode
def add_error(self, error):
if hasattr(error, "__iter__"):
self.error_sink.extend(error)
else:
self.error_sink.append(error)
def parse(self, context, parser_input):
sheerka = context.sheerka
nodes = self.get_input_as_lexer_nodes(parser_input, None)
if not nodes:
return None
sequences_found = [[]]
has_unrecognized = False
for node in nodes:
if isinstance(node, ConceptNode):
res = self.validate_concept_node(context, node)
if not res.status:
self.add_error(res.body)
else:
sequences_found = core.utils.product(sequences_found, [res.body])
elif isinstance(node, UnrecognizedTokensNode):
res = parse_unrecognized(context, node.source, PARSERS)
res = only_successful(context, res)
if res.status:
lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens)
sequences_found = core.utils.product(sequences_found, lexer_nodes)
else:
sequences_found = core.utils.product(sequences_found, [node])
has_unrecognized = True
else: # cannot happen as of today :-)
raise NotImplementedError()
# concept with UnrecognizedToken in their properties is considered as fatal error
if self.has_error:
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
ret = []
for choice in sequences_found:
ret.append(
sheerka.ret(
self.name,
not has_unrecognized,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
body=choice,
try_parsed=choice)))
if len(ret) == 1:
self.log_result(context, parser_input, ret[0])
return ret[0]
else:
self.log_multiple_results(context, parser_input, ret)
return ret
def validate_concept_node(self, context, concept_node):
sheerka = context.sheerka
errors = []
def _validate_concept(concept):
"""
Recursively browse the compiled properties in order to find unrecognized
:param concept:
:return:
"""
for name, value in concept.compiled.items():
if isinstance(value, Concept):
_validate_concept(value)
elif isinstance(value, UnrecognizedTokensNode):
res = parse_unrecognized(context, value.tokens, PARSERS)
res = only_successful(context, res) # only key successful parsers
if res.status:
concept.compiled[name] = res.body.body
else:
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'"))
_validate_concept(concept_node.concept)
if len(errors) > 0:
return context.sheerka.ret(self.name, False, errors)
else:
return context.sheerka.ret(self.name, True, concept_node)
+17
View File
@@ -20,6 +20,9 @@ def json_default_converter(o):
if isinstance(o, (date, datetime)):
return o.isoformat()
if isinstance(o, SheerkaDataProviderRef):
return f"##XREF##:{o.target}"
class Event(object):
"""
@@ -406,6 +409,11 @@ class SheerkaDataProvider:
def is_reference(obj):
return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX)
def reset(self):
self.first_time = self.io.first_time
if hasattr(self.io, "reset"):
self.io.reset()
def add(self, event_digest: str, entry, obj, allow_multiple=True, use_ref=False):
"""
Adds obj to the entry 'entry'
@@ -999,3 +1007,12 @@ class SheerkaDataProvider:
keys[entry] = value
self.save_keys(keys)
return str(value)
def dump_state(self, digest=None):
digest = digest or self.get_snapshot(SheerkaDataProvider.HeadFile)
state = self.load_state(digest)
print(json.dumps(state.data, sort_keys=True, default=json_default_converter, indent=True))
def dump_obj(self, digest):
obj = self.load_obj(digest)
print(json.dumps(obj.__dict__, sort_keys=True, default=json_default_converter, indent=True))
+4
View File
@@ -170,6 +170,10 @@ class SheerkaDataProviderDictionaryIO(SheerkaDataProviderIO):
return io.BytesIO(self.cache[file_path]) if "b" in mode else io.StringIO(self.cache[file_path])
def reset(self):
self.cache.clear()
self.first_time = True
def on_close(dictionary_io, file_path, stream):
"""
+4 -1
View File
@@ -1,7 +1,7 @@
import ast
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
from core.concept import Concept
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.sheerka.ExecutionContext import ExecutionContext
from parsers.BnfParser import BnfParser
from sdp.sheerkaDataProvider import Event
@@ -54,6 +54,9 @@ class BaseTest:
res = bnf_parser.parse(context, c.metadata.definition)
if res.status:
c.bnf = res.value.value
c.metadata.definition_type = DEFINITION_TYPE_BNF
else:
raise Exception(f"Error in bnf definition '{c.metadata.definition}'", sheerka.get_error(res))
sheerka.create_new_concept(context, c)
elif create_new:
sheerka.create_new_concept(context, c)
+27 -2
View File
@@ -3,9 +3,34 @@ from tests.BaseTest import BaseTest
class TestUsingMemoryBasedSheerka(BaseTest):
singleton_instance = None
builtin_concepts = {}
def get_sheerka(self, **kwargs):
skip_builtins_in_db = kwargs.get("skip_builtins_in_db", True)
@staticmethod
def _inner_get_sheerka(skip_builtins_in_db):
sheerka = Sheerka(skip_builtins_in_db=skip_builtins_in_db)
sheerka.initialize("mem://")
return sheerka
def get_sheerka(self, **kwargs):
skip_builtins_in_db = kwargs.get("skip_builtins_in_db", True)
use_singleton = kwargs.get("singleton", False)
sheerka = kwargs.get("sheerka", False)
if sheerka:
return sheerka
if use_singleton:
singleton_instance = TestUsingMemoryBasedSheerka.singleton_instance
if singleton_instance:
singleton_instance.reset()
singleton_instance.cache_by_key.update(TestUsingMemoryBasedSheerka.builtin_concepts) # quicker ?
# singleton_instance.cache_by_key = TestUsingMemoryBasedSheerka.builtin_concepts
return singleton_instance
else:
new_instance = self._inner_get_sheerka(skip_builtins_in_db)
TestUsingMemoryBasedSheerka.builtin_concepts.update(new_instance.cache_by_key)
TestUsingMemoryBasedSheerka.singleton_instance = new_instance
return TestUsingMemoryBasedSheerka.singleton_instance
return self._inner_get_sheerka(skip_builtins_in_db)
@@ -3,6 +3,7 @@ from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF
from core.sheerka.Sheerka import Sheerka
from sdp.sheerkaDataProvider import SheerkaDataProvider
from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -32,6 +33,7 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka):
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, concept.id)
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name)
assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, concept.key)
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+")
def test_i_can_add_a_concept_when_name_differs_from_the_key(self):
sheerka = self.get_sheerka()
@@ -229,3 +231,29 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka):
assert res.status
class TestSheerkaCreateNewConceptFileBased(TestUsingFileBasedSheerka):
def test_i_can_add_several_concepts(self):
sheerka, context, hello, greeting = self.init_concepts(
Concept("Hello world a").def_prop("a"),
Concept("Greeting a").def_prop("a"),
use_dict=False
)
res = sheerka.create_new_concept(self.get_context(sheerka), hello)
assert res.status
sheerka = self.get_sheerka(use_dict=False)
res = sheerka.create_new_concept(self.get_context(sheerka), greeting)
assert res.status
assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, hello.key)
assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, greeting.key)
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, hello.id)
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, greeting.id)
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, "Hello world a")
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, "Greeting a")
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Hello")
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Greeting")
+1 -1
View File
@@ -4,7 +4,7 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class TestSheerkaHistoryManager(TestUsingMemoryBasedSheerka):
def test_i_can_retrieve_history(self):
sheerka = self.get_sheerka(skip_builtins_in_db=False)
sheerka = self.get_sheerka(skip_builtins_in_db=False, singleton=False)
sheerka.evaluate_user_input("def concept one as 1")
sheerka.evaluate_user_input("one")
+1 -1
View File
@@ -244,7 +244,7 @@ class TestSheerka(TestUsingFileBasedSheerka):
assert not sheerka.is_success(sheerka.new(BuiltinConcepts.TOO_MANY_SUCCESS))
def test_cache_is_updated_after_get(self):
sheerka = self.get_sheerka()
sheerka = self.get_sheerka(skip_builtins_in_db=False)
# updated when by_key returns one element
sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="1"))
+20 -17
View File
@@ -171,22 +171,22 @@ class TestSheerkaExecuteParsers(TestUsingMemoryBasedSheerka):
'name=Enabled50True, priority=50, status=True, source=Enabled80False:Enabled90False:hello world',
]
def test_parsing_stop_at_the_first_success(self):
sheerka = self.get_sheerka()
sheerka.parsers = {
"Enabled80False": Enabled80FalseParser,
"Enabled50bisTrue": Enabled50bisTrueParser,
"Enabled10True": Enabled10TrueParser,
}
user_input = [get_ret_val("hello world")]
BaseTestParser.debug_out = []
sheerka.execute(self.get_context(sheerka), user_input, [BuiltinConcepts.PARSING])
assert BaseTestParser.debug_out == [
'name=Enabled80False, priority=80, status=False, source=hello world',
'name=Enabled50BisTrue, priority=50, status=True, source=hello world',
]
# def test_parsing_stop_at_the_first_success(self):
# sheerka = self.get_sheerka()
# sheerka.parsers = {
# "Enabled80False": Enabled80FalseParser,
# "Enabled50bisTrue": Enabled50bisTrueParser,
# "Enabled10True": Enabled10TrueParser,
# }
#
# user_input = [get_ret_val("hello world")]
# BaseTestParser.debug_out = []
# sheerka.execute(self.get_context(sheerka), user_input, [BuiltinConcepts.PARSING])
#
# assert BaseTestParser.debug_out == [
# 'name=Enabled80False, priority=80, status=False, source=hello world',
# 'name=Enabled50BisTrue, priority=50, status=True, source=hello world',
# ]
def test_parsing_stop_at_the_first_success_2(self):
"""
@@ -243,10 +243,13 @@ class TestSheerkaExecuteParsers(TestUsingMemoryBasedSheerka):
'name=Enabled50True, priority=50, status=False, source=Enabled80False:hello world',
'name=Enabled50True, priority=50, status=True, source=Enabled80False:Enabled90False:hello world',
'name=Enabled50BisTrue, priority=50, status=True, source=hello world',
'name=Enabled50BisTrue, priority=50, status=True, source=Enabled90False:hello world',
'name=Enabled50BisTrue, priority=50, status=True, source=Enabled80False:hello world',
'name=Enabled50BisTrue, priority=50, status=True, source=Enabled80False:Enabled90False:hello world',
'name=Enabled50False, priority=50, status=False, source=hello world',
'name=Enabled50False, priority=50, status=False, source=Enabled90False:hello world',
'name=Enabled50False, priority=50, status=False, source=Enabled80False:hello world',
'name=Enabled50False, priority=50, status=False, source=Enabled80False:Enabled90False:hello world',
'name=Enabled50False, priority=50, status=False, source=Enabled80False:Enabled90False:hello world'
]
def test_a_parser_has_access_to_the_output_of_its_predecessors(self):
+1 -1
View File
@@ -69,7 +69,7 @@ def test_i_can_get_sub_classes():
default_parser = core.utils.get_class("parsers.DefaultParser.DefaultParser")
exact_concept_parser = core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser")
python_parser = core.utils.get_class("parsers.PythonParser.PythonParser")
concept_lexer_parser = core.utils.get_class("parsers.ConceptLexerParser.ConceptLexerParser")
concept_lexer_parser = core.utils.get_class("parsers.BnfNodeParser.BnfNodeParser")
assert base_parser not in sub_classes
assert default_parser in sub_classes
+4 -4
View File
@@ -2,11 +2,11 @@ import ast
import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from core.tokenizer import Tokenizer
from evaluators.AddConceptEvaluator import AddConceptEvaluator
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression
from parsers.BnfNodeParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression
from parsers.BnfParser import BnfParser
from parsers.DefaultParser import DefConceptNode, NameNode
from parsers.PythonParser import PythonNode, PythonParser
@@ -65,10 +65,10 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka):
def_concept.post = self.get_concept_part(post)
if bnf_def:
def_concept.definition = bnf_def
def_concept.definition_type = "bnf"
def_concept.definition_type = DEFINITION_TYPE_BNF
if definition:
def_concept.definition = NameNode(list(Tokenizer(definition)))
def_concept.definition_type = "def"
def_concept.definition_type = DEFINITION_TYPE_DEF
return ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=def_concept))
@@ -69,12 +69,14 @@ class TestAddConceptInSetEvaluator(TestUsingMemoryBasedSheerka):
def test_i_can_add_bnf_concept_to_a_set_of_concept(self):
"""
This test is the reason why I have started the whole eval on demand stuff
Sheerka tries to evaluate the body but it can (as a and b are not defined)
Sheerka tries to evaluate the body but it can't (as a and b are not defined)
So 'foo' cannot be put is set
:return:
"""
sheerka, context, foo, bar = self.init_concepts(
Concept("foo", definition="a plus b", body="a + b").def_prop("a").def_prop("b"),
sheerka, context, one, two, foo, bar = self.init_concepts(
"one",
"two",
Concept("foo", definition="(one|two)=a 'plus' (one|two)=b", body="a + b").def_prop("a").def_prop("b"),
"bar",
create_new=True)
+3 -2
View File
@@ -4,7 +4,8 @@ import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve
from evaluators.LexerNodeEvaluator import LexerNodeEvaluator
from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, StrMatch, UnrecognizedTokensNode, SourceCodeNode
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BnfNodeParser import ConceptNode, BnfNodeParser, StrMatch, UnrecognizedTokensNode
from parsers.PythonParser import PythonNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -12,7 +13,7 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class TestLexerNodeEvaluator(TestUsingMemoryBasedSheerka):
def from_parsing(self, context, grammar, expression):
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
ret_val = parser.parse(context, expression)
@@ -0,0 +1,98 @@
import pytest
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from core.concept import Concept
from evaluators.BaseEvaluator import BaseEvaluator
from evaluators.MultipleErrorsEvaluator import MultipleErrorsEvaluator
from parsers.BaseParser import BaseParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
def r(value, status=True):
return ReturnValueConcept(value, status, value)
def eval_false(name):
return ReturnValueConcept(BaseEvaluator.PREFIX + name, False, "value")
def eval_true(name):
return ReturnValueConcept(BaseEvaluator.PREFIX + name, True, "value")
def parser_false(name):
return ReturnValueConcept(BaseParser.PREFIX + name, False, "value")
def parser_true(name):
return ReturnValueConcept(BaseParser.PREFIX + name, True, "value")
reduce_requested = ReturnValueConcept(
"some_name",
True,
Concept(name=BuiltinConcepts.REDUCE_REQUESTED, key=BuiltinConcepts.REDUCE_REQUESTED))
class TestMultipleErrorsEvaluator(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("return_values, expected", [
([eval_false("one"), reduce_requested], False),
([eval_false("one"), eval_false("two"), reduce_requested], True),
([eval_false("one"), eval_false("two"), eval_false("three"), reduce_requested], True),
([eval_false("one"), eval_false("two"), parser_false("one"), reduce_requested], True),
([eval_false("one"), eval_false("two"), parser_true("one"), reduce_requested], False),
([eval_false("one"), eval_false("two"), eval_true("three"), reduce_requested], False),
([eval_false("one"), eval_false("two"), r("other concept", False), reduce_requested], True),
([eval_false("one"), eval_false("two"), r("other concept", True), reduce_requested], True),
([eval_false("reduce not required 1"), eval_false("reduce not required 2")], False),
])
def test_i_can_match(self, return_values, expected):
context = self.get_context()
assert MultipleErrorsEvaluator().matches(context, return_values) == expected
def test_i_can_eval(self):
context = self.get_context()
return_values = [
eval_false("one"),
eval_false("two"),
eval_false("three"),
parser_false("one"),
parser_false("two"),
reduce_requested
]
evaluator = MultipleErrorsEvaluator()
evaluator.matches(context, return_values)
res = evaluator.eval(context, return_values)
assert not res.status
assert context.sheerka.isinstance(res.body, BuiltinConcepts.MULTIPLE_ERRORS)
assert res.body.body == [eval_false("one"), eval_false("two"), eval_false("three")]
assert len(res.parents) == 6
def test_unwanted_return_values_are_not_eaten(self):
context = self.get_context()
a_successful_concept = r("successful concept")
a_concept_in_error = r("concept in error", False)
return_values = [
eval_false("one"),
eval_false("two"),
parser_false("one"),
a_successful_concept,
a_concept_in_error,
reduce_requested
]
evaluator = MultipleErrorsEvaluator()
evaluator.matches(context, return_values)
res = evaluator.eval(context, return_values)
assert not res.status
assert res.body.body == [eval_false("one"), eval_false("two")]
assert len(res.parents) == 4
assert a_successful_concept not in res.parents
assert a_concept_in_error not in res.parents
@@ -71,3 +71,4 @@ class TestOneErrorEvaluator(TestUsingMemoryBasedSheerka):
assert len(res.parents) == 4
assert a_successful_concept not in res.parents
assert a_concept_in_error not in res.parents
+111 -21
View File
@@ -1,9 +1,9 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, PROPERTIES_TO_SERIALIZE, Property, simplec
from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator
from parsers.ConceptLexerParser import Sequence, StrMatch, OrderedChoice, Optional, ConceptExpression
from parsers.BaseNodeParser import SyaAssociativity
from parsers.BnfNodeParser import Sequence, StrMatch, OrderedChoice, Optional, ConceptExpression
from sdp.sheerkaDataProvider import SheerkaDataProvider
from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka
@@ -125,6 +125,17 @@ as:
assert sheerka.sdp.io.exists(
sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_origin()))
def test_i_can_def_several_concepts(self):
sheerka = self.get_sheerka(use_dict=False)
sheerka.evaluate_user_input("def concept foo")
sheerka = self.get_sheerka(use_dict=False)
res = sheerka.evaluate_user_input("def concept bar")
assert len(res) == 1
assert res[0].status
assert res[0].body.body.id == "1002"
def test_i_can_evaluate_def_concept_part_when_one_part_is_a_ref_of_another_concept(self):
"""
In this test, we test that the properties of 'concept a xx b' (which are 'a' and 'b')
@@ -393,6 +404,7 @@ as:
assert concept_found.get_prop("a") is None
assert not concept_found.metadata.need_validation
# @pytest.mark.xfail
@pytest.mark.parametrize("desc, definitions", [
("Simple form", [
"def concept one as 1",
@@ -467,6 +479,7 @@ as:
assert res[0].status
assert res[0].body == 23
# @pytest.mark.xfail
def test_i_can_mix_bnf_and_isa(self):
"""
if 'one' isa 'number, twenty number should be recognized
@@ -531,7 +544,44 @@ as:
assert res[0].status
assert res[0].body == 21
def test_i_can_mix_concept_of_concept(self):
# @pytest.mark.xfail
def test_i_can_use_concepts_defined_with_from(self):
sheerka = self.get_sheerka()
init = [
"def concept plus from a plus b as a + b",
"def concept one as 1",
]
for exp in init:
sheerka.evaluate_user_input(exp)
res = sheerka.evaluate_user_input("eval one plus one")
assert len(res) == 1
assert res[0].status
assert res[0].body == 2
res = sheerka.evaluate_user_input("eval 1 plus one")
assert len(res) == 1
assert res[0].status
assert res[0].body == 2
res = sheerka.evaluate_user_input("eval one plus 1")
assert len(res) == 1
assert res[0].status
assert res[0].body == 2
res = sheerka.evaluate_user_input("eval 1 plus 2")
assert len(res) == 1
assert res[0].status
assert res[0].body == 3
res = sheerka.evaluate_user_input("eval 1 plus 1")
assert len(res) == 1
assert res[0].status
assert res[0].body == 2
def test_i_can_mix_bnf_concept_and_concept(self):
definitions = [
"def concept one as 1",
"def concept two as 2",
@@ -631,24 +681,6 @@ as:
assert res[1].status
assert res[1].body == "little blue(house)"
@pytest.mark.xfail
def test_i_can_recognize_composition_of_concept_with_priority(self):
sheerka = self.get_sheerka()
definitions = [
"def concept a plus b where a,b",
"def concept a times b where a,b",
"modify concept 1001 set priority = 1",
"modify concept 1002 set priority = 2",
]
for definition in definitions:
sheerka.evaluate_user_input(definition)
res = sheerka.evaluate_user_input("1 plus 2 times 3")
assert res[0].status
# check that the priority is applied
def test_i_can_say_that_a_concept_isa_another_concept(self):
sheerka = self.get_sheerka()
sheerka.evaluate_user_input("def concept foo")
@@ -768,6 +800,7 @@ as:
assert not res[0].status
assert sheerka.isinstance(res[0].body, BuiltinConcepts.WHERE_CLAUSE_FAILED)
# def test_i_can_detect_when_only_one_evaluator_is_in_error(self):
# sheerka = self.get_sheerka()
#
@@ -864,3 +897,60 @@ as:
twenties = sheerka.get("twenties")
number = sheerka.get("number")
assert sheerka.isa(twenties, number)
def test_i_can_mix_sya_concepts_and_bnf_concept(self):
definitions = [
"def concept one as 1",
"def concept two as 2",
"def concept three as 3",
"def concept plus from a plus b as a + b",
"def concept mult from a mult b as a * b",
"def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit",
]
sheerka = self.init_scenario(definitions)
context = self.get_context(sheerka)
sheerka.set_sya_def(context, [
(sheerka.get("mult").id, 20, SyaAssociativity.Right),
(sheerka.get("plus").id, 10, SyaAssociativity.Right),
])
res = sheerka.evaluate_user_input("eval one plus two mult three")
assert len(res) == 1
assert res[0].status
assert res[0].body == 7
res = sheerka.evaluate_user_input("eval two mult three plus one")
assert len(res) == 1
assert res[0].status
assert res[0].body == 7
res = sheerka.evaluate_user_input("eval 1 plus two mult 3")
assert len(res) == 1
assert res[0].status
assert res[0].body == 7
res = sheerka.evaluate_user_input("eval 2 mult 3 plus one")
assert len(res) == 1
assert res[0].status
assert res[0].body == 7
res = sheerka.evaluate_user_input("eval twenty two plus 1")
assert len(res) == 1
assert res[0].status
assert res[0].body == 23
res = sheerka.evaluate_user_input("eval 1 plus twenty two")
assert len(res) == 1
assert res[0].status
assert res[0].body == 23
res = sheerka.evaluate_user_input("eval twenty one plus twenty two")
assert len(res) == 1
assert res[0].status
assert res[0].body == 43
res = sheerka.evaluate_user_input("eval twenty two plus twenty one mult two")
assert len(res) == 1
assert res[0].status
assert res[0].body == 64
+150
View File
@@ -0,0 +1,150 @@
from core.concept import CC, Concept
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.BaseNodeParser import scnode, utnode, cnode, SCWC, CNC, short_cnode, SourceCodeWithConceptNode, CN, UTN
from parsers.SyaNodeParser import SyaConceptParserHelper
def _index(tokens, expr, index):
"""
Finds a sub list in a bigger list
:param tokens:
:param expr:
:param index:
:return:
"""
expected = [token.value for token in Tokenizer(expr) if token.type != TokenKind.EOF]
for i in range(0, len(tokens) - len(expected) + 1):
for j in range(len(expected)):
if tokens[i + j] != expected[j]:
break
else:
if index == 0:
return i, len(expected)
else:
index -= 1
raise ValueError(f"substring '{expr}' not found")
def compute_debug_array(res):
to_compare = []
for r in res:
res_debug = []
for token in r.debug:
if isinstance(token, Token):
if token.type == TokenKind.WHITESPACE:
continue
else:
res_debug.append(token.value)
else:
res_debug.append(token.concept.name)
to_compare.append(res_debug)
return to_compare
def get_node(concepts_map, expression_as_tokens, sub_expr, concept_key=None, skip=0, is_bnf=False, sya=False):
"""
Tries to find sub in expression
When found, transform it to its correct type
:param expression_as_tokens: full expression
:param sub_expr: sub expression to search in the full expression
:param concepts_map: hash of the known concepts
:param concept_key: key of the concept if different from sub_expr
:param skip: number of occurrences of sub_expr to skip
:param is_bnf: True if the concept to search is a bnf definition
:param sya: Return SyaConceptParserHelper instead of a ConceptNode when needed
:return:
"""
if sub_expr == "')'":
return ")"
if isinstance(sub_expr, (scnode, utnode)):
return sub_expr
if isinstance(sub_expr, cnode):
# for cnode, map the concept key to the one from concepts_maps if needed
if sub_expr.concept_key.startswith("#"):
return cnode(
concepts_map[sub_expr.concept_key[1:]].key,
sub_expr.start,
sub_expr.end,
sub_expr.source
)
else:
return sub_expr
if isinstance(sub_expr, SCWC):
first = get_node(concepts_map, expression_as_tokens, sub_expr.first, sya=sya)
last = get_node(concepts_map, expression_as_tokens, sub_expr.last, sya=sya)
content = [get_node(concepts_map, expression_as_tokens, c, sya=sya) for c in sub_expr.content]
return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source()
if isinstance(sub_expr, (CNC, CC, CN)):
concept_node = get_node(
concepts_map,
expression_as_tokens,
sub_expr.source or sub_expr.concept_key,
sub_expr.concept_key, sya=sya)
concept_found = concept_node.concept
sub_expr.concept_key = concept_found.key
sub_expr.concept = concept_found
sub_expr.fix_pos((concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start))
if hasattr(sub_expr, "compiled"):
for k, v in sub_expr.compiled.items():
node = get_node(concepts_map, expression_as_tokens, v, sya=sya) # need to get start and end positions
new_value = CC(Concept().update_from(concepts_map[v])) if (isinstance(v, str) and v in concepts_map) \
else node
sub_expr.compiled[k] = new_value
sub_expr.fix_pos(node)
if hasattr(sub_expr, "fix_source"):
sub_expr.fix_source(expression_as_tokens[sub_expr.start: sub_expr.end + 1])
return sub_expr
if isinstance(sub_expr, UTN):
node = get_node(concepts_map, expression_as_tokens, sub_expr.source)
sub_expr.fix_pos(node)
return sub_expr
if isinstance(sub_expr, short_cnode):
return get_node(concepts_map, expression_as_tokens, sub_expr.source,
concept_key=sub_expr.concept_key, skip=skip, is_bnf=True, sya=sya)
if isinstance(sub_expr, tuple):
return get_node(concepts_map, expression_as_tokens, sub_expr[0],
concept_key=concept_key, skip=sub_expr[1], is_bnf=is_bnf, sya=sya)
start, length = _index(expression_as_tokens, sub_expr, skip)
# special case of python source code
if "+" in sub_expr and sub_expr.strip() != "+":
return scnode(start, start + length - 1, sub_expr)
# try to match one of the concept from the map
concept_key = concept_key or sub_expr
concept_found = concepts_map.get(concept_key, None)
if concept_found:
concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests
if not sya or len(concept_found.metadata.props) == 0 or is_bnf:
# if it's an atom, then return a ConceptNode
return CN(concept_found, start, start + length - 1, source=sub_expr)
else:
# else return a ParserHelper
return SyaConceptParserHelper(concept_found, start)
else:
# else an UnrecognizedTokensNode
return utnode(start, start + length - 1, sub_expr)
def compute_expected_array(concepts_map, expression, expected, sya=False):
"""
Computes a simple but sufficient version of the result of infix_to_postfix()
:param concepts_map:
:param expression:
:param expected:
:param sya: if true, generate an SyaConceptParserHelper instead of a cnode
:return:
"""
expression_as_tokens = [token.value for token in Tokenizer(expression) if token.type != TokenKind.EOF]
return [get_node(concepts_map, expression_as_tokens, sub_expr, sya=sya) for sub_expr in expected]
+241
View File
@@ -0,0 +1,241 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from parsers.AtomNodeParser import AtomNodeParser
from parsers.BaseNodeParser import cnode, utnode, CNC
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array
class TestAtomsParser(TestUsingMemoryBasedSheerka):
def init_parser(self, concepts_map, **kwargs):
sheerka, context, *updated_concepts = self.init_concepts(singleton=True, *concepts_map.values(), **kwargs)
parser = AtomNodeParser()
parser.initialize(context, updated_concepts)
return sheerka, context, parser
def test_i_cannot_parse_empty_string(self):
sheerka, context, parser = self.init_parser({})
res = parser.parse(context, "")
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
@pytest.mark.parametrize("text, expected", [
("foo", ["foo"]),
("foo bar", ["foo", "bar"]),
("foo bar twenties", ["foo", "bar", "twenties"]),
])
def test_i_can_parse_simple_sequences(self, text, expected):
concepts_map = {
"foo": Concept("foo"),
"bar": Concept("bar"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.parse(context, text)
wrapper = res.body
lexer_nodes = res.body.body
assert res.status
expected_array = compute_expected_array(concepts_map, text, expected)
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text, expected", [
("foo bar", ["foo bar"]),
("one two three", ["one two three"]),
("foo bar twenties one two three", ["foo bar", "twenties", "one two three"]),
])
def test_i_can_parse_long_names(self, text, expected):
concepts_map = {
"foo bar": Concept("foo bar"),
"one two three": Concept("one two three"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.parse(context, text)
wrapper = res.body
lexer_nodes = res.body.body
assert res.status
expected_array = compute_expected_array(concepts_map, text, expected)
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text, expected_status, expected", [
("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]),
("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]),
("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]),
("foo bar 1 + 1", True, ["foo bar", " 1 + 1"]),
("foo bar twenty one", False, ["foo bar", " twenty ", "one"]),
("foo bar x$!#", False, ["foo bar", " x$!#"]),
("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]),
("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]),
("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]),
("1 + 1 foo bar", True, ["1 + 1 ", "foo bar"]),
("twenty one foo bar", False, ["twenty ", "one", "foo bar"]),
("x$!# foo bar", False, ["x$!# ", "foo bar"]),
("func(one)", False, ["func(", "one", ")"]),
])
def test_i_can_parse_when_unrecognized(self, text, expected_status, expected):
concepts_map = {
"prefixed": Concept("a prefixed").def_prop("a"),
"suffixed": Concept("prefixed a").def_prop("a"),
"infix": Concept("a infix b").def_prop("a").def_prop("b"),
"foo bar": Concept("foo bar"),
"one": Concept("one"),
"two": Concept("two"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.parse(context, text)
wrapper = res.body
lexer_nodes = res.body.body
assert res.status == expected_status
expected_array = compute_expected_array(concepts_map, text, expected)
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text, expected_status, expected", [
(" one two ", True, [cnode("one", 1, 1, "one"), cnode("two", 3, 3, "two")]),
(" one x$!# ", False, [cnode("one", 1, 1, "one"), utnode(2, 7, " x$!# ")]),
(" foo bar x$!# ", False, [cnode("foo bar", 1, 3, "foo bar"), utnode(4, 9, " x$!# ")]),
])
def test_i_can_parse_when_surrounded_by_spaces(self, text, expected_status, expected):
concepts_map = {
"prefixed": Concept("a prefixed").def_prop("a"),
"suffixed": Concept("prefixed a").def_prop("a"),
"infix": Concept("a infix b").def_prop("a").def_prop("b"),
"foo bar": Concept("foo bar"),
"one": Concept("one"),
"two": Concept("two"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.parse(context, text)
wrapper = res.body
lexer_nodes = res.body.body
assert res.status == expected_status
expected_array = compute_expected_array(concepts_map, text, expected)
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text, expected", [
("one two", [["one", "two"], ["one two"]])
])
def test_i_can_parse_when_multiple_concepts_start_with_the_same_token(self, text, expected):
concepts_map = {
"one": Concept("one"),
"two": Concept("two"),
"one two": Concept("one two"),
}
sheerka, context, parser = self.init_parser(concepts_map)
list_of_res = parser.parse(context, text)
assert len(list_of_res) == len(expected)
for i, res in enumerate(list_of_res):
wrapper = res.body
lexer_nodes = res.body.body
assert res.status
expected_array = compute_expected_array(concepts_map, text, expected[i])
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
def test_i_can_parse_multiple_concepts_when_long_names_and_unrecognized(self):
concepts_map = {
"one": Concept("one"),
"two": Concept("two"),
"one two": Concept("one two"),
"one two x$!# one two": Concept("one two x$!# one two"),
}
text = "one two x$!# one two"
sheerka, context, parser = self.init_parser(concepts_map)
list_of_res = parser.parse(context, text)
expected = [
(False, ["one", "two", " x$!# ", ("one", 1), ("two", 1)]),
(False, ["one", "two", " x$!# ", ("one two", 1)]),
(False, ["one two", " x$!# ", ("one", 1), ("two", 1)]),
(False, ["one two", " x$!# ", ("one two", 1)]),
(True, ["one two x$!# one two"]),
]
assert len(list_of_res) == len(expected)
for res, expected in zip(list_of_res, expected):
wrapper = res.body
lexer_nodes = res.body.body
assert res.status == expected[0]
expected_array = compute_expected_array(concepts_map, text, expected[1])
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text", [
"foo",
f"foo one",
"x$!#",
"twenty one",
"1 + 1",
"foo x$!#",
"1 + 1 twenty one",
])
def test_i_cannot_parse_concepts_with_property_or_bnf_or_unrecognized(self, text):
concepts_map = {
"foo": Concept("foo a").def_prop("a"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.parse(context, text)
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert res.body.body == text
@pytest.mark.parametrize("text, expected", [
("hello foo bar",
[
(True, [CNC("hello1", source="hello foo ", a=" foo "), "bar"]),
(True, [CNC("hello2", source="hello foo ", b=" foo "), "bar"]),
]),
])
def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected):
concepts_map = {
"hello1": Concept("hello a").def_prop("a"),
"hello2": Concept("hello b").def_prop("b"),
"bar": Concept("bar")
}
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
list_of_res = parser.parse(context, text)
assert len(list_of_res) == len(expected)
for res, expected in zip(list_of_res, expected):
wrapper = res.body
lexer_nodes = res.body.body
assert res.status == expected[0]
expected_array = compute_expected_array(concepts_map, text, expected[1])
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@@ -4,10 +4,11 @@ import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.BaseNodeParser import cnode, short_cnode
from parsers.BnfParser import BnfParser
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, ZeroOrMore, OneOrMore, \
UnrecognizedTokensNode, cnode, short_cnode, ConceptExpression, ConceptGroupExpression
UnrecognizedTokensNode, ConceptExpression, ConceptGroupExpression
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -72,15 +73,16 @@ def cprop(concept, prop_name):
return concept.compiled[prop_name]
class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
class TestBnfConceptLexerParser(TestUsingMemoryBasedSheerka):
def init(self, concepts, grammar):
context = self.get_context()
sheerka = self.get_sheerka(singleton=True)
context = self.get_context(sheerka)
for c in concepts:
context.sheerka.add_in_cache(c)
context.sheerka.set_id_if_needed(c, False)
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
return context, parser
@@ -602,7 +604,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
grammar = {foo: Optional("one", ConceptExpression("foo"))}
context = self.get_context()
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression("foo", rule_name="foo"))
@@ -612,7 +614,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
context = self.get_context()
context.concepts["foo"] = foo
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo"))
@@ -636,7 +638,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
grammar = {foo: Sequence("twenty", number)}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
res = parser.parse(context, "twenty two")
@@ -686,7 +688,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
grammar = {foo: ZeroOrMore("one")}
context, parser = self.init([foo], grammar)
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
res = parser.parse(context, "one two")
@@ -779,7 +781,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
bar: foo,
foo: bar
}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(self.get_context(), grammar)
assert bar not in parser.concepts_grammars
@@ -793,7 +795,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
foo: OrderedChoice(bar, "foo")
}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(self.get_context(), grammar)
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
@@ -824,7 +826,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
bar: foo,
foo: Sequence("one", bar, "two")
}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(self.get_context(), grammar)
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
@@ -838,7 +840,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
bar: foo,
foo: Sequence("one", OrderedChoice(bar, "other"), "two")
}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(self.get_context(), grammar)
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
@@ -851,7 +853,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
grammar = {
foo: bar
}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(self.get_context(), grammar)
assert foo in parser.concepts_grammars
@@ -883,7 +885,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
bar = Concept(name="bar")
grammar = {foo: Sequence("one", "two"), bar: foo}
parser = ConceptLexerParser()
parser = BnfNodeParser()
ret = parser.initialize(context, grammar)
return_value = ret.body
@@ -1209,7 +1211,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
context.sheerka.set_id_if_needed(c, False)
context.sheerka.add_concept_to_set(context, baz, bar)
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
encoded = parser.encode_grammar(parser.concepts_grammars)
@@ -1260,7 +1262,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
# atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')),
# }
#
# parser = ConceptLexerParser()
# parser = BnfNodeParser()
# parser.register(grammar)
#
# # res = parser.parse(context, "1")
+4 -3
View File
@@ -3,10 +3,11 @@ import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Tokenizer, TokenKind, LexerError, Token
from parsers.BaseNodeParser import cnode
from parsers.BaseParser import UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
ConceptLexerParser, ConceptExpression, cnode
from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
BnfNodeParser, ConceptExpression
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -155,7 +156,7 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = ConceptLexerParser()
concept_parser = BnfNodeParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
@@ -5,7 +5,8 @@ import pytest
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode
@@ -65,7 +66,7 @@ class TestConceptsWithConceptsParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, interested", [
("not parser result", False),
(ParserResultConcept(parser="not multiple_concepts_parser"), False),
(ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
(ParserResultConcept(parser=multiple_concepts_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True),
])
def test_not_interested(self, text, interested):
context = self.get_context()
+2 -2
View File
@@ -3,7 +3,7 @@ import ast
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptExpression
from parsers.BnfNodeParser import OrderedChoice, StrMatch, ConceptExpression
from parsers.PythonParser import PythonParser, PythonNode
from core.tokenizer import Keywords, Tokenizer, LexerError
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode, IsaConceptNode
@@ -251,7 +251,7 @@ def concept add one to a as
res = parser.parse(context, text)
node = res.value.value
definition = OrderedChoice(ConceptExpression(a_concept, rule_name="a_concept"), StrMatch("a_string"))
parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", definition, definition)
parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", None, definition, definition)
expected = get_def_concept(name="name", body="__definition[0]", bnf_def=parser_result)
assert res.status
+3 -2
View File
@@ -3,7 +3,8 @@ import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, cnode, utnode, scnode, SourceCodeNode
from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode
from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode
@@ -11,7 +12,7 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
def get_return_value(context, grammar, expression):
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
ret_val = parser.parse(context, expression)
+13 -8
View File
@@ -1,6 +1,6 @@
import ast
import pytest
from core.builtin_concepts import ParserResultConcept
from core.builtin_concepts import ParserResultConcept, NotForMeConcept
from core.tokenizer import Tokenizer, LexerError
from parsers.PythonParser import PythonNode, PythonParser, PythonErrorNode
import core.utils
@@ -48,9 +48,11 @@ class TestPythonParser(TestUsingMemoryBasedSheerka):
assert not res.status
assert res.who == parser.name
assert isinstance(res.value, ParserResultConcept)
assert isinstance(res.value.value[0], PythonErrorNode)
assert isinstance(res.value.value[0].exception, SyntaxError)
assert isinstance(res.value, NotForMeConcept)
assert res.value.body == text
assert len(res.value.get_prop("reason")) == 1
assert isinstance(res.value.get_prop("reason")[0], PythonErrorNode)
assert isinstance(res.value.get_prop("reason")[0].exception, SyntaxError)
@pytest.mark.parametrize("text, error_msg, error_text", [
("c::", "Concept identifiers not found", ""),
@@ -61,10 +63,13 @@ class TestPythonParser(TestUsingMemoryBasedSheerka):
res = parser.parse(self.get_context(), text)
assert not res.status
assert isinstance(res.body, ParserResultConcept)
assert isinstance(res.body.body[0], LexerError)
assert res.body.body[0].message == error_msg
assert res.body.body[0].text == error_text
assert isinstance(res.value, NotForMeConcept)
assert res.value.body == text
assert len(res.value.get_prop("reason")) == 1
assert isinstance(res.value.get_prop("reason")[0], LexerError)
assert res.value.get_prop("reason")[0].message == error_msg
assert res.value.get_prop("reason")[0].text == error_text
def test_i_can_parse_a_concept(self):
text = "c:name|key: + 1"
+7 -11
View File
@@ -1,18 +1,17 @@
import ast
import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import Concept
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode, PythonErrorNode
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
from parsers.PythonParser import PythonNode
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
multiple_concepts_parser = MultipleConceptsParser()
unrecognized_nodes_parser = UnrecognizedNodeParser()
def ret_val(*args):
@@ -28,7 +27,7 @@ def ret_val(*args):
result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens))
index += len(tokens)
return ReturnValueConcept("who", False, ParserResultConcept(parser=multiple_concepts_parser, value=result))
return ReturnValueConcept("who", False, ParserResultConcept(parser=unrecognized_nodes_parser, value=result))
def to_str_ast(expression):
@@ -40,7 +39,7 @@ class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, interested", [
("not parser result", False),
(ParserResultConcept(parser="not multiple_concepts_parser"), False),
(ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
(ParserResultConcept(parser=unrecognized_nodes_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True),
])
def test_not_interested(self, text, interested):
context = self.get_context()
@@ -130,9 +129,6 @@ class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka):
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
wrapper = result.value
return_value = result.value.value
assert not result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert isinstance(return_value[0], PythonErrorNode)
assert context.sheerka.isinstance(result.value, BuiltinConcepts.NOT_FOR_ME)
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,383 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.concept import Concept, CC
from core.tokenizer import Tokenizer, TokenKind
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, scnode, cnode, \
utnode, SyaAssociativity, CN, CNC, UTN
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array, get_node
def get_input_nodes_from(my_concepts_map, full_expr, *args):
def _get_real_node(n):
if isinstance(n, CC):
concept = n.concept or Concept.update_from(my_concepts_map[n.concept_key])
for k, v in n.compiled.items():
concept.compiled[k] = _get_real_node(v)
return concept
if isinstance(n, (utnode, UTN)):
return UnrecognizedTokensNode(n.start, n.end, full_expr_as_tokens[n.start: n.end + 1])
if isinstance(n, (CNC, CN, cnode)):
concept = n.concept if hasattr(n, "concept") and n.concept else \
Concept().update_from(my_concepts_map[n.concept_key])
tokens = full_expr_as_tokens[n.start: n.end + 1]
if hasattr(node, "compiled"):
for k, v in n.compiled.items():
concept.compiled[k] = _get_real_node(v)
return ConceptNode(concept, n.start, n.end, tokens)
raise NotImplementedError()
res = []
full_expr_as_tokens = list(Tokenizer(full_expr))
tokens_for_get_node = [token.value for token in full_expr_as_tokens if token.type != TokenKind.EOF]
for arg in args:
node = get_node(my_concepts_map, tokens_for_get_node, arg)
res.append(_get_real_node(node))
return res
concepts_map = {
"5params": Concept("5params").def_prop("a").def_prop("b").def_prop("c").def_prop("d").def_prop("e"),
"plus": Concept("a plus b", body="a + b").def_prop("a").def_prop("b"),
"mult": Concept("a mult b", body="a * b").def_prop("a").def_prop("b"),
"one": Concept("one", body="1"),
"two": Concept("two", body="2"),
"three": Concept("three", body="3"),
"twenties": Concept("twenties", definition="'twenty' (one|two)=unit", body="20 + unit").def_prop("unit"),
"hello_atom": Concept("hello one"),
"hello_sya": Concept("hello a").def_prop("a"),
"greetings_a": Concept("greetings a").def_prop("a"),
"greetings_b": Concept("greetings b").def_prop("b"),
}
class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka):
sheerka = None
@classmethod
def setup_class(cls):
t = TestUnrecognizedNodeParser()
TestUnrecognizedNodeParser.sheerka, context, _ = t.init_parser(concepts_map, create_new=True)
TestUnrecognizedNodeParser.sheerka.set_sya_def(context, [
(concepts_map["mult"].id, 20, SyaAssociativity.Right),
(concepts_map["plus"].id, 10, SyaAssociativity.Right),
])
def init_parser(self, my_concepts_map=None, **kwargs):
if my_concepts_map:
sheerka, context, *updated_concepts = self.init_concepts(*my_concepts_map.values(), **kwargs)
for i, pair in enumerate(my_concepts_map):
my_concepts_map[pair] = updated_concepts[i]
else:
sheerka = TestUnrecognizedNodeParser.sheerka
context = self.get_context(sheerka)
parser = UnrecognizedNodeParser()
return sheerka, context, parser
def test_i_can_validate_a_valid_concept_node(self):
sheerka, context, parser = self.init_parser()
node = get_input_nodes_from(concepts_map, "one", "one")[0]
res = UnrecognizedNodeParser().validate_concept_node(context, node)
assert res.status
assert res.body.concept == concepts_map["one"]
def test_i_can_validate_concept_unrecognized_tokens(self):
sheerka, context, parser = self.init_parser()
node = get_input_nodes_from(
concepts_map,
"5params one two three twenty one 1 + 2 one plus two mult three",
CNC("5params",
a=" one ",
b=" two three ",
c=" twenty one ",
d=utnode(12, 18, " 1 + 2 "),
e=" one plus two mult three"))[0]
res = UnrecognizedNodeParser().validate_concept_node(context, node)
assert res.status
concept = res.body.concept
assert concept == concepts_map["5params"]
assert len(concept.compiled["a"]) == 1
assert sheerka.isinstance(concept.compiled["a"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.compiled["a"][0].status
assert concept.compiled["a"][0].who == "parsers.AtomNode"
assert concept.compiled["a"][0].body.body == [cnode("one", 1, 1, "one")]
assert len(concept.compiled["b"]) == 1
assert sheerka.isinstance(concept.compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.compiled["b"][0].status
assert concept.compiled["b"][0].who == "parsers.AtomNode"
assert concept.compiled["b"][0].body.body == [cnode("two", 1, 1, "two"), cnode("three", 3, 3, "three")]
assert len(concept.compiled["c"]) == 1
assert sheerka.isinstance(concept.compiled["c"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.compiled["c"][0].status
assert concept.compiled["c"][0].who == "parsers.BnfNode"
expected_nodes = compute_expected_array(
concepts_map,
" twenty one ",
[CNC("twenties", source="twenty one", unit="one", one="one")])
assert concept.compiled["c"][0].body.body == expected_nodes
assert len(concept.compiled["d"]) == 1
assert sheerka.isinstance(concept.compiled["d"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.compiled["d"][0].status
assert concept.compiled["d"][0].who == "parsers.Python"
assert concept.compiled["d"][0].body.source == "1 + 2"
assert len(concept.compiled["e"]) == 1
assert sheerka.isinstance(concept.compiled["e"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.compiled["e"][0].status
assert concept.compiled["e"][0].who == "parsers.SyaNode"
expected_nodes = compute_expected_array(
concepts_map,
" one plus two mult three ",
[CNC("plus", a="one", b=CC("mult", a="two", b="three"))])
assert concept.compiled["e"][0].body.body == expected_nodes
# # sanity check, I can evaluate the concept
# evaluated = sheerka.evaluate_concept(self.get_context(sheerka, eval_body=True), concept)
# assert evaluated.key == concept.key
# assert evaluated.get_prop("a") ==
def test_i_can_validate_with_recursion(self):
sheerka, context, parser = self.init_parser()
node = get_input_nodes_from(
concepts_map,
"1 plus 2 mult twenty two",
CNC("plus",
a="1 ",
b=CC("mult", a=" 2 ", b=" twenty two")))[0]
res = UnrecognizedNodeParser().validate_concept_node(context, node)
assert res.status
assert res.body.concept == concepts_map["plus"]
assert len(res.body.concept.compiled["a"]) == 1
assert res.body.concept.compiled["a"][0].status
assert res.body.concept.compiled["a"][0].who == "parsers.Python"
assert res.body.concept.compiled["a"][0].body.source == "1"
assert res.body.concept.compiled["b"] == concepts_map["mult"]
assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["a"][0], BuiltinConcepts.RETURN_VALUE)
assert res.body.concept.compiled["b"].compiled["a"][0].status
assert res.body.concept.compiled["b"].compiled["a"][0].who == "parsers.Python"
assert res.body.concept.compiled["b"].compiled["a"][0].body.source == "2"
assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
assert res.body.concept.compiled["b"].compiled["b"][0].status
assert res.body.concept.compiled["b"].compiled["b"][0].who == "parsers.BnfNode"
expected_nodes = compute_expected_array(
concepts_map,
" twenty two",
[CNC("twenties", source="twenty two", unit="two", two="two")])
assert res.body.concept.compiled["b"].compiled["b"][0].body.body == expected_nodes
# def test_i_can_validate_and_evaluate_a_concept_node_with_python(self):
# sheerka, context, parser = self.init_parser()
#
# node = get_input_nodes_from(
# concepts_map,
# "one plus 1 + 1",
# CNC("plus",
# a=UTN("one "),
# b=UTN("1 + 1")))[0]
#
# res = UnrecognizedNodeParser().validate_concept_node(context, node)
#
# assert res.status
# assert res.body.concept == concepts_map["plus"]
# assert res.body.concept.compiled["a"] == concepts_map["one"]
# assert len(res.body.concept.compiled["b"]) == 1
# assert sheerka.isinstance(res.body.concept.compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
# assert res.body.concept.compiled["b"][0].status
# assert res.body.concept.compiled["b"][0].who == "parsers.Python"
# assert res.body.concept.compiled["b"][0].body.source == "1 + 1"
#
# # # evaluate
# # context = self.get_context(sheerka, eval_body=True)
# # evaluated = sheerka.evaluate_concept(context, res.body.concept)
# # assert evaluated.body == 3
# def test_i_can_validate_and_evaluate_concept_when_bnf_concept(self):
# sheerka, context, parser = self.init_parser()
# node = get_concept_node(concepts_map, "one plus twenty one", "plus", "one", "twenty one")
#
# res = UnrecognizedNodeParser().validate_concept_node(context, node)
#
# assert res.status
# assert res.body.concept == concepts_map["plus"]
# assert res.body.concept.compiled["a"] == concepts_map["one"]
# assert len(res.body.concept.compiled["b"]) == 1
# assert res.body.concept.compiled["b"][0].status
# assert res.body.concept.compiled["b"][0].who == "parsers.BnfNode"
#
# # evaluate
# context = self.get_context(sheerka, eval_body=True)
# evaluated = sheerka.evaluate_concept(context, res.body.concept)
# assert evaluated.body == 22
def test_i_can_parse_and_evaluate_unrecognized_python_node(self):
sheerka, context, parser = self.init_parser()
expression = "1 + 1"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert len(actual_nodes) == 1
assert actual_nodes[0] == scnode(0, 4, expression)
def test_i_can_parse_unrecognized_bnf_concept_node(self):
sheerka, context, parser = self.init_parser()
expression = "twenty one"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert len(actual_nodes) == 1
expected_array = compute_expected_array(
concepts_map,
expression, [CNC("twenties", source=expression, unit="one", one="one")])
assert actual_nodes == expected_array
def test_i_can_parse_unrecognized_sya_concept_node(self):
sheerka, context, parser = self.init_parser()
expression = "one plus two mult three"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert len(actual_nodes) == 1
expected_array = compute_expected_array(
concepts_map,
expression, [CNC("plus",
a="one",
b=CC("mult", source="two mult three", a="two", b="three"))])
assert actual_nodes == expected_array
def test_i_can_parse_sequences(self):
sheerka, context, parser = self.init_parser()
expression = "one plus two three"
sequence = get_input_nodes_from(concepts_map, expression,
CNC("plus", a="one", b="two"),
utnode(5, 6, " three"))
parser_input = ParserResultConcept("parsers.xxx", source="one plus two three", value=sequence)
res = parser.parse(context, parser_input)
actual_nodes = res.body.body
assert res.status
expected_array = compute_expected_array(
concepts_map,
expression, [
CNC("plus", a="one", b="two"),
CN("three", start=6, end=6)])
assert actual_nodes == expected_array
def test_i_can_parse_when_multiple_atom_and_sya(self):
sheerka, context, parser = self.init_parser()
expression = "two hello one three"
nodes = get_input_nodes_from(concepts_map, expression,
"two", UTN("hello one"), "three")
parser_input = ParserResultConcept("parsers.xxx", source="one plus two hello one", value=nodes)
res = parser.parse(context, parser_input)
assert len(res) == 2
assert res[0].status
assert res[1].status
actual_nodes0 = res[0].body.body
expected_0 = compute_expected_array(concepts_map, expression, [
CN("two", 0, 0),
CN("hello_atom", source="hello one", start=2, end=4),
CN("three", 6, 6)])
assert actual_nodes0 == expected_0
actual_nodes1 = res[1].body.body
expected_1 = compute_expected_array(concepts_map, expression, [
CN("two", 0, 0),
CNC("hello_sya", source="hello one", start=2, end=4, a="one"),
CN("three", 6, 6)])
assert actual_nodes1 == expected_1
def test_i_can_parse_when_multiple_sya_concepts(self):
sheerka, context, parser = self.init_parser()
expression = "greetings two"
nodes = get_input_nodes_from(concepts_map, expression, UTN("greetings two"))
parser_input = ParserResultConcept("parsers.xxx", source="greetings two", value=nodes)
res = parser.parse(context, parser_input)
assert len(res) == 2
assert res[0].status
assert res[1].status
actual_nodes0 = res[0].body.body
expected_0 = compute_expected_array(concepts_map, expression, [
CNC("greetings_a", source="greetings two", start=0, end=2, a="two")])
assert actual_nodes0 == expected_0
actual_nodes1 = res[1].body.body
expected_1 = compute_expected_array(concepts_map, expression, [
CNC("greetings_b", source="greetings two", start=0, end=2, b="two")])
assert actual_nodes1 == expected_1
def test_i_cannot_parse_when_i_cannot_validate(self):
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
expression = "one plus unknown tokens"
nodes = get_input_nodes_from(concepts_map, expression,
CNC("plus", a="one ", b=" unknown tokens"))
parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes)
res = parser.parse(context, parser_input)
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
def test_i_cannot_parse_when_unrecognized(self):
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
expression = "unknown tokens"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes)
res = parser.parse(context, parser_input)
actual_nodes = res.body.body
assert not res.status
assert actual_nodes == nodes