Added SyaNodeParser (finally, after one month)
This commit is contained in:
+5
-5
@@ -659,7 +659,7 @@ For the two questions, I will first try the simple implementations and see there
|
||||
|
||||
Going back on BNF implementation. As it's Christmas eve today, I won't stay very long.
|
||||
|
||||
So, the implementation lies in the class ConceptLexerParser, a it's a lexer not for token, but for concept.
|
||||
So, the implementation lies in the class BnfNodeParser, a it's a lexer not for token, but for concept.
|
||||
The purpose of this class is to recognize a sequence of Concept.
|
||||
|
||||
So if we defines the following concepts
|
||||
@@ -675,7 +675,7 @@ when you input
|
||||
|
||||
one two three four five
|
||||
|
||||
the list of :code:`[foo, bar]` will be returned by the ConceptLexerParser (as return values)
|
||||
the list of :code:`[foo, bar]` will be returned by the BnfNodeParser (as return values)
|
||||
|
||||
How does it works ?
|
||||
|
||||
@@ -696,7 +696,7 @@ Some example :
|
||||
and so on...
|
||||
|
||||
So when a concept is defined using its bnf definition, I use the **BnfParser** to create the grammar, and then
|
||||
I use the **ConceptLexerParser** to recognize the concepts
|
||||
I use the **BnfNodeParser** to recognize the concepts
|
||||
|
||||
The current implementation to recognize a concept is not very efficient. All the definitions are in a dictionary
|
||||
and I go thru the whole dictionary to see if some concepts are recognized. Once a concept is found, I loop again
|
||||
@@ -713,7 +713,7 @@ So once the parsing is effective, I return a **ConceptNode** object
|
||||
|
||||
class ConceptNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
Returned by the BnfNodeParser
|
||||
It represents a recognized concept
|
||||
"""
|
||||
|
||||
@@ -859,7 +859,7 @@ As of now, I have implemented the following parsers:
|
||||
* DefaultParser (the name is not accurate)
|
||||
To recognize builtin syntax (like 'def concept' or 'isa')
|
||||
|
||||
* ConceptLexerParser
|
||||
* BnfNodeParser
|
||||
To recognize concept defined with BNF language
|
||||
|
||||
All theses parsers are executed in the row (the order in not very important)
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
ReturnValue(who=evaluators.TooManySuccess, status=False, value=(21)__TOO_MANY_SUCCESS, message=None)
|
||||
@@ -37,6 +37,8 @@ class BuiltinConcepts(Enum):
|
||||
PARSER_RESULT = "parser result"
|
||||
TOO_MANY_SUCCESS = "too many success" # when expecting a limited number of successful return value
|
||||
TOO_MANY_ERRORS = "too many errors" # when expecting a limited number of successful return value
|
||||
ONLY_SUCCESSFUL = "only successful" # filter the result, only keep successful ones
|
||||
MULTIPLE_ERRORS = "multiple errors" # filter the result, only keep evaluator in error
|
||||
NOT_FOR_ME = "not for me" # a parser recognize that the entry is not meant for it
|
||||
IS_EMPTY = "is empty" # when a set is empty
|
||||
INVALID_RETURN_VALUE = "invalid return value" # the return value of an evaluator is not correct
|
||||
@@ -45,6 +47,7 @@ class BuiltinConcepts(Enum):
|
||||
CONCEPT_EVAL_ERROR = "concept evaluation error" # cannot evaluate a property or metadata of a concept
|
||||
ENUMERATION = "enum" # represents a list or a set
|
||||
LIST = "list" # represents a list
|
||||
FILTERED = "filtered" # represents the result of a filtering
|
||||
CONCEPT_ALREADY_IN_SET = "concept already in set"
|
||||
EVALUATOR_PRE_PROCESS = "evaluator pre process" # used modify / tweak behaviour of evaluators
|
||||
EVAL_BODY_REQUESTED = "eval body requested" # to evaluate the body
|
||||
@@ -91,6 +94,7 @@ BuiltinErrors = [str(e) for e in {
|
||||
BuiltinConcepts.UNKNOWN_PROPERTY,
|
||||
BuiltinConcepts.TOO_MANY_SUCCESS,
|
||||
BuiltinConcepts.TOO_MANY_ERRORS,
|
||||
BuiltinConcepts.MULTIPLE_ERRORS,
|
||||
BuiltinConcepts.INVALID_RETURN_VALUE,
|
||||
BuiltinConcepts.CONCEPT_ALREADY_DEFINED,
|
||||
BuiltinConcepts.CONCEPT_EVAL_ERROR,
|
||||
@@ -249,11 +253,12 @@ class ParserResultConcept(Concept):
|
||||
Result of a parsing
|
||||
"""
|
||||
|
||||
def __init__(self, parser=None, source=None, value=None, try_parsed=None, validate_concept=None):
|
||||
def __init__(self, parser=None, source=None, tokens=None, value=None, try_parsed=None):
|
||||
super().__init__(BuiltinConcepts.PARSER_RESULT, True, False, BuiltinConcepts.PARSER_RESULT)
|
||||
self.set_metadata_value(ConceptParts.BODY, value)
|
||||
self.set_prop("parser", parser)
|
||||
self.set_prop("source", source)
|
||||
self.set_prop("tokens", tokens)
|
||||
self.set_prop("try_parsed", try_parsed) # in case of error, what was found before the error
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
@@ -372,6 +377,14 @@ class ListConcept(Concept):
|
||||
# return item in self.body
|
||||
|
||||
|
||||
class FilteredConcept(Concept):
|
||||
def __init__(self, filtered=None, iterable=None, predicate=None):
|
||||
super().__init__(BuiltinConcepts.FILTERED, True, False, BuiltinConcepts.FILTERED)
|
||||
self.set_metadata_value(ConceptParts.BODY, filtered)
|
||||
self.def_prop("iterable", iterable)
|
||||
self.def_prop("predicate", predicate)
|
||||
|
||||
|
||||
class ConceptAlreadyInSet(Concept):
|
||||
def __init__(self, concept=None, concept_set=None):
|
||||
super().__init__(BuiltinConcepts.CONCEPT_ALREADY_IN_SET,
|
||||
@@ -409,3 +422,17 @@ class WhereClauseFailed(Concept):
|
||||
@property
|
||||
def concept(self):
|
||||
return self.body
|
||||
|
||||
|
||||
class NotForMeConcept(Concept):
|
||||
def __init__(self, source=None, reason=None):
|
||||
super().__init__(BuiltinConcepts.NOT_FOR_ME,
|
||||
True,
|
||||
False,
|
||||
BuiltinConcepts.NOT_FOR_ME)
|
||||
self.set_metadata_value(ConceptParts.BODY, source)
|
||||
self.def_prop("reason", reason)
|
||||
self.metadata.is_evaluated = True
|
||||
|
||||
def __repr__(self):
|
||||
return f"NotForMeConcept(source={self.body}, reason={self.get_prop('reason')})"
|
||||
|
||||
@@ -6,6 +6,8 @@ from core.ast.nodes import CallNodeConcept, GenericNodeConcept
|
||||
from core.ast.visitors import UnreferencedNamesVisitor
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
|
||||
|
||||
def is_same_success(context, return_values):
|
||||
@@ -132,6 +134,181 @@ def expect_one(context, return_values):
|
||||
parents=return_values)
|
||||
|
||||
|
||||
def only_successful(context, return_values):
|
||||
"""
|
||||
Removes all return values that are not successful
|
||||
Return error when no successful return value
|
||||
:param context:
|
||||
:param return_values:
|
||||
:return:
|
||||
"""
|
||||
if not isinstance(return_values, list):
|
||||
return return_values
|
||||
|
||||
sheerka = context.sheerka
|
||||
|
||||
if len(return_values) == 0:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values),
|
||||
parents=return_values)
|
||||
|
||||
successful_results = [item for item in return_values if item.status]
|
||||
if len(successful_results) == 0:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values),
|
||||
parents=return_values)
|
||||
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
True,
|
||||
sheerka.new(BuiltinConcepts.ONLY_SUCCESSFUL, body=successful_results),
|
||||
parents=return_values)
|
||||
|
||||
|
||||
def only_parsers_results(context, return_values):
|
||||
"""
|
||||
Filters the return_values and returns when the result is a ParserResult
|
||||
regardless of the status
|
||||
|
||||
So it filters errors
|
||||
:param context:
|
||||
:param return_values:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(return_values, list):
|
||||
return return_values
|
||||
|
||||
sheerka = context.sheerka
|
||||
|
||||
if len(return_values) == 0:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values),
|
||||
parents=return_values)
|
||||
|
||||
return_values_ok = [item for item in return_values if sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)]
|
||||
|
||||
# hack because some parsers don't follow the NOT_FOR_ME rule
|
||||
temp_ret_val = []
|
||||
for ret_val in return_values_ok:
|
||||
if isinstance(ret_val.body.body, ErrorNode):
|
||||
continue
|
||||
if isinstance(ret_val.body.body, list) and \
|
||||
len(ret_val.body.body) == 1 and \
|
||||
isinstance(ret_val.body.body[0], UnrecognizedTokensNode):
|
||||
continue
|
||||
temp_ret_val.append(ret_val)
|
||||
return_values_ok = temp_ret_val
|
||||
|
||||
if len(return_values_ok) == 0:
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.TOO_MANY_ERRORS, body=return_values),
|
||||
parents=return_values)
|
||||
|
||||
return sheerka.ret(
|
||||
context.who,
|
||||
True,
|
||||
sheerka.new(BuiltinConcepts.FILTERED,
|
||||
body=return_values_ok,
|
||||
iterable=return_values,
|
||||
predicate="sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)"),
|
||||
parents=return_values)
|
||||
|
||||
|
||||
def parse_unrecognized(context, tokens, parsers):
|
||||
"""
|
||||
Try to recognize concepts or code from tokens using the given parsers
|
||||
:param context:
|
||||
:param tokens:
|
||||
:param parsers:
|
||||
:return:
|
||||
"""
|
||||
steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
|
||||
sheerka = context.sheerka
|
||||
|
||||
with context.push(desc=f"Parsing unrecognized '{tokens}'") as sub_context:
|
||||
# disable all parsers but the following ones
|
||||
sub_context.add_preprocess(BaseParser.PREFIX + "*", enabled=False)
|
||||
for parser in parsers:
|
||||
sub_context.add_preprocess(BaseParser.PREFIX + parser, enabled=True)
|
||||
|
||||
sub_context.add_inputs(source=tokens)
|
||||
to_parse = sheerka.ret(
|
||||
context.who,
|
||||
True,
|
||||
sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
|
||||
res = sheerka.execute(sub_context, to_parse, steps)
|
||||
sub_context.add_values(return_values=res)
|
||||
|
||||
# discard Python response if accepted by AtomNode
|
||||
is_concept = False
|
||||
for r in res:
|
||||
if r.status and r.who == "parsers.AtomNode":
|
||||
is_concept = True
|
||||
|
||||
if not is_concept:
|
||||
return res
|
||||
|
||||
filtered = []
|
||||
for r in res:
|
||||
if r.who == "parsers.Python":
|
||||
continue
|
||||
filtered.append(r)
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
def get_lexer_nodes(return_values, start, tokens):
|
||||
"""
|
||||
From a parser result, return the corresponding LexerNode
|
||||
either ConceptNode, UnrecognizedTokensNode or SourceCodeNode
|
||||
:param return_values:
|
||||
:param start:
|
||||
:param tokens:
|
||||
:return: list of list (list of concept node sequence)
|
||||
"""
|
||||
|
||||
lexer_nodes = []
|
||||
for ret_val in return_values:
|
||||
if ret_val.who == "parsers.Python":
|
||||
|
||||
if ret_val.body.source.strip().isalnum() and not ret_val.body.source.strip().isnumeric():
|
||||
# Discard SourceCodeNode which seems to be a concept
|
||||
# It may be a wrong idea, so let's see
|
||||
continue
|
||||
|
||||
end = start + len(tokens) - 1
|
||||
lexer_nodes.append([SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)])
|
||||
|
||||
elif ret_val.who == "parsers.ExactConcept":
|
||||
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
|
||||
end = start + len(tokens) - 1
|
||||
for concept in concepts:
|
||||
lexer_nodes.append([ConceptNode(concept, start, end, tokens, ret_val.body.source)])
|
||||
|
||||
elif ret_val.who in ("parsers.BnfNode", "parsers.SyaNode", "parsers.AtomNode"):
|
||||
nodes = [node for node in ret_val.body.body]
|
||||
for node in nodes:
|
||||
node.start += start
|
||||
node.end += start
|
||||
|
||||
# but append the whole sequence if when it's a sequence
|
||||
lexer_nodes.append(nodes)
|
||||
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
return lexer_nodes
|
||||
|
||||
|
||||
def get_names(sheerka, concept_node):
|
||||
"""
|
||||
Finds all the names referenced by the concept_node
|
||||
|
||||
+81
-2
@@ -108,11 +108,14 @@ class Concept:
|
||||
|
||||
def __eq__(self, other):
|
||||
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, simplec):
|
||||
return self.name == other.name and self.body == other.body
|
||||
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
if isinstance(other, CC):
|
||||
return other == self
|
||||
|
||||
if not isinstance(other, Concept):
|
||||
return False
|
||||
@@ -346,6 +349,17 @@ class Concept:
|
||||
"""
|
||||
return self.props[prop_name].value
|
||||
|
||||
def set_prop_by_index(self, index: int, value):
|
||||
"""
|
||||
Set the value of a property (not the metadata) using the index
|
||||
:param index: Name the property or another concept
|
||||
:param value:
|
||||
:return:
|
||||
"""
|
||||
prop_name = list(self.props.keys())[index]
|
||||
self.props[prop_name].value = value
|
||||
return self
|
||||
|
||||
def set_metadata_value(self, metadata: ConceptParts, value):
|
||||
"""
|
||||
Set the resolved value of a metadata (not the metadata itself)
|
||||
@@ -438,3 +452,68 @@ class InfiniteRecursionResolved:
|
||||
|
||||
def get_value(self):
|
||||
return self.value
|
||||
|
||||
|
||||
class CC:
|
||||
"""
|
||||
Concept class for test purpose
|
||||
CC means concept for compiled (or concept with compiled)
|
||||
It matches a concept if the compiles are equals
|
||||
"""
|
||||
|
||||
# The only properties that are testes are concept_key and compiled
|
||||
# The other properties (concept, source, start and end)
|
||||
# are used in tests/parsers/parsers_utils.py to help creating helper objects
|
||||
|
||||
def __init__(self, concept, source=None, **kwargs):
|
||||
self.concept_key = concept.key if isinstance(concept, Concept) else concept
|
||||
self.compiled = kwargs
|
||||
self.concept = concept if isinstance(concept, Concept) else None
|
||||
self.source = source # to use when the key is different from the sub str to search when filling start and stop
|
||||
self.start = None # for debug purpose, indicate where the concept starts
|
||||
self.end = None # for debug purpose, indicate where the concept ends
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, Concept):
|
||||
if other.key != self.concept_key:
|
||||
return False
|
||||
return self.compiled == other.compiled
|
||||
|
||||
if not isinstance(other, CC):
|
||||
return False
|
||||
|
||||
return self.concept_key == other.concept_key and \
|
||||
self.compiled == other.compiled
|
||||
|
||||
def __hash__(self):
|
||||
if self.concept:
|
||||
return hash(self.concept)
|
||||
return hash(self.concept_key)
|
||||
|
||||
def __repr__(self):
|
||||
if self.concept:
|
||||
txt = f"CC(concept='{self.concept}'"
|
||||
else:
|
||||
txt = f"CC(concept_key='{self.concept_key}'"
|
||||
|
||||
for k, v in self.compiled.items():
|
||||
txt += f", {k}='{v}'"
|
||||
return txt + ")"
|
||||
|
||||
def fix_pos(self, node):
|
||||
start = node.start if hasattr(node, "start") else \
|
||||
node[0] if isinstance(node, tuple) else None
|
||||
end = node.end if hasattr(node, "end") else \
|
||||
node[1] if isinstance(node, tuple) else None
|
||||
|
||||
if start is not None:
|
||||
if self.start is None or start < self.start:
|
||||
self.start = start
|
||||
|
||||
if end is not None:
|
||||
if self.end is None or end > self.end:
|
||||
self.end = end
|
||||
return self
|
||||
|
||||
@@ -43,6 +43,7 @@ class ExecutionContext:
|
||||
desc: str = None,
|
||||
logger=None,
|
||||
global_hints=None,
|
||||
global_errors=None,
|
||||
**kwargs):
|
||||
|
||||
self._parent = None
|
||||
@@ -61,6 +62,7 @@ class ExecutionContext:
|
||||
self.logger = logger
|
||||
self.local_hints = set()
|
||||
self.global_hints = set() if global_hints is None else global_hints
|
||||
self.global_errors = [] if global_errors is None else global_errors
|
||||
|
||||
self.inputs = {} # what was the parameters of the execution context
|
||||
self.values = {} # what was produced by the execution context
|
||||
@@ -146,8 +148,8 @@ class ExecutionContext:
|
||||
preprocess.set_prop(k, v)
|
||||
|
||||
if not self.preprocess:
|
||||
self.preprocess = set()
|
||||
self.preprocess.add(preprocess)
|
||||
self.preprocess = []
|
||||
self.preprocess.append(preprocess)
|
||||
return self
|
||||
|
||||
def add_inputs(self, **kwargs):
|
||||
@@ -212,6 +214,7 @@ class ExecutionContext:
|
||||
desc,
|
||||
logger,
|
||||
self.global_hints,
|
||||
self.global_errors,
|
||||
**_kwargs)
|
||||
new._parent = self
|
||||
new._tab = self._tab + " " * DEBUG_TAB_SIZE
|
||||
@@ -230,7 +233,8 @@ class ExecutionContext:
|
||||
if self.logger and not self.logger.disabled:
|
||||
self.logger.debug(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
|
||||
|
||||
def log_error(self, message, who=None):
|
||||
def log_error(self, message, who=None, exc=None):
|
||||
self.global_errors.append(exc or message)
|
||||
if self.logger and not self.logger.disabled:
|
||||
self.logger.exception(f"[{self._id:2}]" + self._tab + (f"[{who}] " if who else "") + str(message))
|
||||
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ErrorConcept
|
||||
from core.concept import Concept
|
||||
from sdp.sheerkaDataProvider import SheerkaDataProviderDuplicateKeyError, SheerkaDataProviderRef
|
||||
import core.utils
|
||||
|
||||
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
|
||||
BNF_NODE_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser"
|
||||
BASE_NODE_PARSER_CLASS = "parsers.BaseNodeParser.BaseNodeParser"
|
||||
|
||||
|
||||
class SheerkaCreateNewConcept:
|
||||
@@ -13,6 +15,7 @@ class SheerkaCreateNewConcept:
|
||||
def __init__(self, sheerka):
|
||||
self.sheerka = sheerka
|
||||
self.logger_name = self.create_new_concept.__name__
|
||||
self.base_lexer_parser = core.utils.get_class(BASE_NODE_PARSER_CLASS)("BaseNodeParser", 0)
|
||||
|
||||
def create_new_concept(self, context, concept: Concept):
|
||||
"""
|
||||
@@ -25,7 +28,7 @@ class SheerkaCreateNewConcept:
|
||||
|
||||
concept.init_key()
|
||||
concepts_definitions = None
|
||||
init_ret_value = None
|
||||
init_bnf_ret_value = None
|
||||
|
||||
sdp = self.sheerka.sdp
|
||||
|
||||
@@ -49,13 +52,19 @@ class SheerkaCreateNewConcept:
|
||||
concepts_definitions[concept] = concept.bnf
|
||||
|
||||
# check if it's a valid BNF or whether it breaks the known rules
|
||||
concept_lexer_parser = self.sheerka.parsers[CONCEPT_LEXER_PARSER_CLASS]()
|
||||
bnf_lexer_parser = self.sheerka.parsers[BNF_NODE_PARSER_CLASS]()
|
||||
with context.push(self.sheerka.name, desc=f"Initializing concept definition for {concept}") as sub_context:
|
||||
sub_context.concepts[concept.key] = concept # the concept is not in the real cache yet
|
||||
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
|
||||
sub_context.add_values(return_values=init_ret_value)
|
||||
if not init_ret_value.status:
|
||||
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_ret_value.value))
|
||||
init_bnf_ret_value = bnf_lexer_parser.initialize(sub_context, concepts_definitions)
|
||||
sub_context.add_values(return_values=init_bnf_ret_value)
|
||||
if not init_bnf_ret_value.status:
|
||||
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_bnf_ret_value.value))
|
||||
|
||||
# update concept definition by key
|
||||
init_sya_ret_value = self.base_lexer_parser.initialize(context, [concept], use_sheerka=True)
|
||||
if not init_sya_ret_value.status:
|
||||
return self.sheerka.ret(self.logger_name, False, ErrorConcept(init_sya_ret_value.value))
|
||||
concepts_by_first_keyword = init_sya_ret_value.body
|
||||
|
||||
concept.freeze_definition_hash()
|
||||
|
||||
@@ -97,9 +106,15 @@ class SheerkaCreateNewConcept:
|
||||
sdp.set(
|
||||
context.event.get_digest(),
|
||||
self.sheerka.CONCEPTS_DEFINITIONS_ENTRY,
|
||||
concept_lexer_parser.encode_grammar(init_ret_value.body),
|
||||
bnf_lexer_parser.encode_grammar(init_bnf_ret_value.body),
|
||||
use_ref=True)
|
||||
self.sheerka.concepts_definitions_cache = None # invalidate cache
|
||||
|
||||
# update the concepts by first keyword
|
||||
sdp.set(context.event.get_digest(),
|
||||
self.sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
|
||||
concepts_by_first_keyword)
|
||||
|
||||
except SheerkaDataProviderDuplicateKeyError as error:
|
||||
context.log_error("Failed to create a new concept.", who=self.logger_name)
|
||||
return self.sheerka.ret(
|
||||
@@ -109,13 +124,13 @@ class SheerkaCreateNewConcept:
|
||||
error.args[0])
|
||||
|
||||
# Updates the caches
|
||||
|
||||
self.sheerka.cache_by_key[concept.key] = sdp.get_safe(self.sheerka.CONCEPTS_ENTRY, concept.key)
|
||||
self.sheerka.cache_by_name[concept.name] = sdp.get_safe(self.sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name)
|
||||
self.sheerka.cache_by_id[concept.id] = concept
|
||||
if init_ret_value is not None and init_ret_value.status:
|
||||
self.sheerka.concepts_grammars = init_ret_value.body
|
||||
if init_bnf_ret_value is not None and init_bnf_ret_value.status:
|
||||
self.sheerka.concepts_grammars = init_bnf_ret_value.body
|
||||
self.sheerka.concepts_by_first_keyword = concepts_by_first_keyword
|
||||
|
||||
# process the return in needed
|
||||
# process the return if needed
|
||||
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
|
||||
return ret
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved
|
||||
from core.builtin_helpers import add_to_ret_val, remove_from_ret_val, expect_one
|
||||
from core.builtin_helpers import expect_one
|
||||
|
||||
CONCEPT_EVALUATION_STEPS = [
|
||||
BuiltinConcepts.BEFORE_EVALUATION,
|
||||
|
||||
@@ -33,6 +33,8 @@ class SheerkaExecute:
|
||||
|
||||
# group the parsers by priorities
|
||||
instantiated_parsers = [parser(sheerka=self.sheerka) for parser in self.sheerka.parsers.values()]
|
||||
instantiated_parsers = self.preprocess(execution_context, instantiated_parsers)
|
||||
|
||||
grouped_parsers = {}
|
||||
for parser in [p for p in instantiated_parsers if p.enabled]:
|
||||
grouped_parsers.setdefault(parser.priority, []).append(parser)
|
||||
@@ -44,7 +46,6 @@ class SheerkaExecute:
|
||||
|
||||
for parser in grouped_parsers[priority]:
|
||||
|
||||
return_value_success_found = False
|
||||
for return_value in inputs_for_this_group:
|
||||
|
||||
to_parse = return_value.body.body \
|
||||
@@ -67,22 +68,23 @@ class SheerkaExecute:
|
||||
r.parents = [return_value]
|
||||
result.append(r)
|
||||
if self.sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT):
|
||||
# if a ParserResultConcept is returned, it will be used by the parsers
|
||||
# of the following groups
|
||||
to_process.append(r)
|
||||
if r.status:
|
||||
return_value_success_found = True
|
||||
stop_processing = True
|
||||
|
||||
else:
|
||||
res.parents = [return_value]
|
||||
result.append(res)
|
||||
if self.sheerka.isinstance(res.body, BuiltinConcepts.PARSER_RESULT):
|
||||
# if a ParserResultConcept is returned, it will be used by the parsers
|
||||
# of the following groups
|
||||
to_process.append(res)
|
||||
if res.status:
|
||||
return_value_success_found = True
|
||||
stop_processing = True
|
||||
sub_context.add_values(return_values=res)
|
||||
|
||||
if return_value_success_found:
|
||||
stop_processing = True
|
||||
break # Stop the other return_values (but not the other parsers with the same priority)
|
||||
|
||||
if stop_processing:
|
||||
break # Do not try the other priorities if a match is found
|
||||
@@ -102,7 +104,7 @@ class SheerkaExecute:
|
||||
instantiated_evaluators = [e_class() for e_class in self.sheerka.evaluators]
|
||||
|
||||
# pre-process evaluators if needed
|
||||
instantiated_evaluators = self._preprocess_evaluators(execution_context, instantiated_evaluators)
|
||||
instantiated_evaluators = self.preprocess(execution_context, instantiated_evaluators)
|
||||
|
||||
for evaluator in [e for e in instantiated_evaluators if e.enabled and process_step in e.steps]:
|
||||
grouped_evaluators.setdefault(evaluator.priority, []).append(evaluator)
|
||||
@@ -123,7 +125,7 @@ class SheerkaExecute:
|
||||
evaluated_items = []
|
||||
to_delete = []
|
||||
for evaluator in grouped_evaluators[priority]:
|
||||
evaluator = self._preprocess_evaluators(execution_context, evaluator.__class__()) # fresh copy
|
||||
evaluator = self.preprocess(execution_context, evaluator.__class__()) # fresh copy
|
||||
|
||||
sub_context_desc = f"Evaluating using {evaluator.name} ({priority=})"
|
||||
with iteration_context.push(desc=sub_context_desc, logger=evaluator.verbose_log) as sub_context:
|
||||
@@ -215,22 +217,29 @@ class SheerkaExecute:
|
||||
|
||||
return return_values
|
||||
|
||||
def _preprocess_evaluators(self, context, evaluators):
|
||||
def preprocess(self, context, parsers_or_evaluators):
|
||||
if not context.preprocess:
|
||||
return evaluators
|
||||
return parsers_or_evaluators
|
||||
|
||||
if not hasattr(evaluators, "__iter__"):
|
||||
if not hasattr(parsers_or_evaluators, "__iter__"):
|
||||
single_one = True
|
||||
evaluators = [evaluators]
|
||||
parsers_or_evaluators = [parsers_or_evaluators]
|
||||
else:
|
||||
single_one = False
|
||||
|
||||
for preprocess in context.preprocess:
|
||||
for e in evaluators:
|
||||
if preprocess.props["name"].value == e.name:
|
||||
for e in parsers_or_evaluators:
|
||||
if self.matches(e.name, preprocess.get_prop("name")):
|
||||
for prop, value in preprocess.props.items():
|
||||
if prop == "name":
|
||||
continue
|
||||
if hasattr(e, prop):
|
||||
setattr(e, prop, value.value)
|
||||
return evaluators[0] if single_one else evaluators
|
||||
return parsers_or_evaluators[0] if single_one else parsers_or_evaluators
|
||||
|
||||
@staticmethod
|
||||
def matches(parser_or_evaluator_name, preprocessor_name):
|
||||
if preprocessor_name.endswith("*"):
|
||||
return parser_or_evaluator_name.startswith(preprocessor_name[:-1])
|
||||
else:
|
||||
return parser_or_evaluator_name == preprocessor_name
|
||||
|
||||
+94
-10
@@ -17,12 +17,7 @@ from core.sheerka_logger import console_handler
|
||||
|
||||
import logging
|
||||
|
||||
# CONCEPT_EVALUATION_STEPS = [
|
||||
# BuiltinConcepts.BEFORE_EVALUATION,
|
||||
# BuiltinConcepts.EVALUATION,
|
||||
# BuiltinConcepts.AFTER_EVALUATION]
|
||||
|
||||
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
|
||||
CONCEPT_LEXER_PARSER_CLASS = "parsers.BnfNodeParser.BnfNodeParser"
|
||||
BNF_PARSER_CLASS = "parsers.BnfParser.BnfParser"
|
||||
CONCEPTS_FILE = "_concepts.txt"
|
||||
|
||||
@@ -37,6 +32,9 @@ class Sheerka(Concept):
|
||||
CONCEPTS_BY_NAME_ENTRY = "Concepts_By_Name"
|
||||
CONCEPTS_BY_HASH_ENTRY = "Concepts_By_Hash" # store hash of concepts definitions (not values)
|
||||
CONCEPTS_DEFINITIONS_ENTRY = "Concepts_Definitions" # to store definitions (bnf) of concepts
|
||||
CONCEPTS_BY_FIRST_KEYWORD_ENTRY = "Concepts_By_First_Keyword"
|
||||
CONCEPTS_SYA_DEFINITION_ENTRY = "Concepts_Sya_Definitions"
|
||||
|
||||
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" # sequential key for builtin concepts
|
||||
USER_CONCEPTS_KEYS = "User_Concepts" # sequential key for user defined concepts
|
||||
|
||||
@@ -65,6 +63,10 @@ class Sheerka(Concept):
|
||||
# a grammar is a resolved BNF
|
||||
self.concepts_grammars = {}
|
||||
|
||||
# cache for SYA concepts
|
||||
self.concepts_by_first_keyword = {}
|
||||
self.sya_definitions = {}
|
||||
|
||||
# a concept can be instantiated
|
||||
# ex: File is a concept, but File('foo.txt') is an instance
|
||||
# TODO: manage contexts
|
||||
@@ -119,7 +121,8 @@ class Sheerka(Concept):
|
||||
self.initialize_builtin_concepts()
|
||||
self.initialize_builtin_parsers()
|
||||
self.initialize_builtin_evaluators()
|
||||
self.initialize_concepts_definitions(exec_context)
|
||||
self.initialize_bnf_parsing(exec_context)
|
||||
self.initialize_sya_parsing()
|
||||
res = ReturnValueConcept(self, True, self)
|
||||
|
||||
exec_context.add_values(return_values=res)
|
||||
@@ -174,12 +177,25 @@ class Sheerka(Concept):
|
||||
"""
|
||||
core.utils.init_package_import("parsers")
|
||||
base_class = core.utils.get_class("parsers.BaseParser.BaseParser")
|
||||
modules_to_skip = ["parsers.BaseNodeParser"]
|
||||
|
||||
temp_result = {}
|
||||
for parser in core.utils.get_sub_classes("parsers", base_class):
|
||||
if parser.__module__ == base_class.__module__:
|
||||
continue
|
||||
|
||||
self.init_log.debug(f"Adding builtin parser '{parser.__name__}'")
|
||||
self.parsers[core.utils.get_full_qualified_name(parser)] = parser
|
||||
if parser.__module__ in modules_to_skip:
|
||||
continue
|
||||
|
||||
qualified_name = core.utils.get_full_qualified_name(parser)
|
||||
self.init_log.debug(f"Adding builtin parser '{qualified_name}'")
|
||||
temp_result[qualified_name] = parser
|
||||
|
||||
# Now we sort the parser by name.
|
||||
# It's not important for the logic of their usage as they have their priority anyway,
|
||||
# We do that for the unit tests. They are to complicated to write otherwise
|
||||
for name in sorted(temp_result.keys()):
|
||||
self.parsers[name] = temp_result[name]
|
||||
|
||||
def initialize_builtin_evaluators(self):
|
||||
"""
|
||||
@@ -195,7 +211,7 @@ class Sheerka(Concept):
|
||||
self.init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
||||
self.evaluators.append(evaluator)
|
||||
|
||||
def initialize_concepts_definitions(self, execution_context):
|
||||
def initialize_bnf_parsing(self, execution_context):
|
||||
self.init_log.debug("Initializing concepts grammars.")
|
||||
definitions = self.get_concepts_definitions(execution_context)
|
||||
|
||||
@@ -211,6 +227,25 @@ class Sheerka(Concept):
|
||||
|
||||
self.concepts_grammars = lexer_parser.concepts_grammars
|
||||
|
||||
def initialize_sya_parsing(self):
|
||||
self.init_log.debug("Initializing sya definitions.")
|
||||
|
||||
self.concepts_by_first_keyword = self.sdp.get_safe(
|
||||
self.CONCEPTS_BY_FIRST_KEYWORD_ENTRY,
|
||||
load_origin=False) or {}
|
||||
|
||||
self.sya_definitions = self.sdp.get_safe(
|
||||
self.CONCEPTS_SYA_DEFINITION_ENTRY,
|
||||
load_origin=False) or {}
|
||||
|
||||
def reset(self):
|
||||
self.reset_cache()
|
||||
self.concepts_by_first_keyword = {}
|
||||
self.concepts_grammars = {}
|
||||
self.sya_definitions = {}
|
||||
self.sdp.reset()
|
||||
self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000)
|
||||
|
||||
def reset_cache(self, filter_to_use=None):
|
||||
"""
|
||||
reset the different cache that exists
|
||||
@@ -220,6 +255,7 @@ class Sheerka(Concept):
|
||||
if filter_to_use is None:
|
||||
self.cache_by_key = {}
|
||||
self.cache_by_id = {}
|
||||
self.cache_by_name = {}
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
@@ -324,6 +360,38 @@ class Sheerka(Concept):
|
||||
"""
|
||||
return self.sets_handler.set_isa(context, concept, concept_set)
|
||||
|
||||
def set_sya_def(self, context, list_of_def):
|
||||
"""
|
||||
Set the precedence and/or the associativity of a concept
|
||||
:param context:
|
||||
:param list_of_def list of tuple(concept_id, precedence (int), SyaAssociativity)
|
||||
:return:
|
||||
"""
|
||||
|
||||
# validate the entries
|
||||
for concept_id, precedence, associativity in list_of_def:
|
||||
if concept_id == BuiltinConcepts.UNKNOWN_CONCEPT:
|
||||
return self.ret(self.name,
|
||||
False,
|
||||
self.new(BuiltinConcepts.ERROR, body=f"Concept {concept_id} is not known"))
|
||||
|
||||
# update the definitions
|
||||
for concept_id, precedence, associativity in list_of_def:
|
||||
if precedence is None and associativity is None:
|
||||
try:
|
||||
del self.sya_definitions[concept_id]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
self.sya_definitions[concept_id] = (precedence, associativity.value)
|
||||
|
||||
# then save
|
||||
self.sdp.set(context.event.get_digest(),
|
||||
self.CONCEPTS_SYA_DEFINITION_ENTRY,
|
||||
self.sya_definitions)
|
||||
|
||||
return self.ret(self.name, True, self.new(BuiltinConcepts.SUCCESS))
|
||||
|
||||
def get_set_elements(self, context, concept):
|
||||
"""
|
||||
Concept is supposed to be a set
|
||||
@@ -571,6 +639,22 @@ class Sheerka(Concept):
|
||||
|
||||
return self.value(body_to_use)
|
||||
|
||||
def get_error(self, obj):
|
||||
if isinstance(obj, Concept) and obj.metadata.is_builtin and obj.key in BuiltinErrors:
|
||||
return obj
|
||||
|
||||
if isinstance(obj, list):
|
||||
return obj
|
||||
|
||||
if self.isinstance(obj, BuiltinConcepts.RETURN_VALUE):
|
||||
if obj.status:
|
||||
return None
|
||||
|
||||
if self.isinstance(obj.body, BuiltinConcepts.PARSER_RESULT):
|
||||
return self.get_error(obj.body.body)
|
||||
|
||||
return NotImplementedError()
|
||||
|
||||
def get_values(self, objs):
|
||||
if not (isinstance(objs, list) or
|
||||
self.isinstance(objs, BuiltinConcepts.LIST) or
|
||||
|
||||
+36
-2
@@ -163,7 +163,7 @@ def remove_list_from_list(lst, to_remove):
|
||||
def product(a, b):
|
||||
"""
|
||||
Kind of cartesian product between lists a and b
|
||||
knowing that a is also a list
|
||||
knowing that a is also a list : a is a list of list !!!
|
||||
|
||||
So it's a cartesian product between a list of list and a list
|
||||
"""
|
||||
@@ -176,7 +176,12 @@ def product(a, b):
|
||||
res = []
|
||||
for item_b in b:
|
||||
for item_a in a:
|
||||
items = item_a + [item_b]
|
||||
#items = item_a + [item_b]
|
||||
items = item_a[:]
|
||||
if hasattr(item_b, "__iter__"):
|
||||
items.extend(item_b)
|
||||
else:
|
||||
items.append(item_b)
|
||||
res.append(items)
|
||||
|
||||
return res
|
||||
@@ -276,6 +281,7 @@ def str_concept(t):
|
||||
>>> assert str_concept((None, "id")) == "c:|id:"
|
||||
>>> assert str_concept(("key", None)) == "c:key:"
|
||||
>>> assert str_concept((None, None)) == ""
|
||||
>>> assert str_concept(Concept(key="foo", id="bar")) == "c:foo|bar:"
|
||||
:param t:
|
||||
:return:
|
||||
"""
|
||||
@@ -297,6 +303,12 @@ def unstr_concept(concept_repr):
|
||||
"""
|
||||
if concept_repr is like :c:key:id:
|
||||
return the key and the id
|
||||
>>> assert unstr_concept("c:key:") == "key"
|
||||
>>> assert unstr_concept("c:key|id:") == ("key", "id")
|
||||
>>> assert unstr_concept("c:|id:") == ("None", "id")
|
||||
>>> assert unstr_concept("c:key|:") == ("key", "None")
|
||||
>>> # Otherwise, return (None,None)
|
||||
|
||||
:param concept_repr:
|
||||
:return:
|
||||
"""
|
||||
@@ -371,3 +383,25 @@ def decode_concept(text):
|
||||
return key, id_, use_concept
|
||||
|
||||
return None, None, None
|
||||
|
||||
|
||||
def tokens_index(tokens, sub_tokens, skip=0):
|
||||
"""
|
||||
Index of the sub tokens in tokens
|
||||
:param tokens: tokens
|
||||
:param sub_tokens: sub tokens to search
|
||||
:param skip: number of found to skip
|
||||
:return:
|
||||
"""
|
||||
expected = [token.value for token in sub_tokens if token.type != TokenKind.EOF]
|
||||
for i in range(0, len(tokens) - len(expected) + 1):
|
||||
for j in range(len(expected)):
|
||||
if tokens[i + j].value != expected[j]:
|
||||
break
|
||||
else:
|
||||
if skip == 0:
|
||||
return i
|
||||
else:
|
||||
skip -= 1
|
||||
|
||||
raise ValueError(f"sub tokens '{sub_tokens}' not found")
|
||||
|
||||
@@ -5,7 +5,7 @@ from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
|
||||
from core.tokenizer import TokenKind
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
from parsers.BaseParser import NotInitializedNode
|
||||
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
|
||||
from parsers.BnfNodeParser import ParsingExpression, ParsingExpressionVisitor
|
||||
from parsers.DefaultParser import DefConceptNode, NameNode
|
||||
from parsers.PythonParser import PythonNode
|
||||
import core.utils
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
from parsers.PythonParser import LexerNodeParserHelperForPython, PythonNode
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from evaluators.BaseEvaluator import AllReturnValuesEvaluator
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
|
||||
class MultipleErrorsEvaluator(AllReturnValuesEvaluator):
|
||||
"""
|
||||
Use to reduce to evaluator errors
|
||||
All parser error will be discarded
|
||||
Cannot match if there is at least one successful evaluator
|
||||
"""
|
||||
NAME = "MultipleErrors"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, [BuiltinConcepts.AFTER_EVALUATION], 30)
|
||||
self.return_values_in_error = []
|
||||
|
||||
def matches(self, context, return_values):
|
||||
nb_evaluators_in_error = 0
|
||||
to_process = False
|
||||
|
||||
for ret in return_values:
|
||||
if ret.status and (ret.who.startswith(self.PREFIX) or ret.who.startswith(BaseParser.PREFIX)):
|
||||
return False
|
||||
elif ret.status and context.sheerka.isinstance(ret.body, BuiltinConcepts.REDUCE_REQUESTED):
|
||||
to_process = True
|
||||
self.eaten.append(ret)
|
||||
elif not ret.status and ret.who.startswith(self.PREFIX):
|
||||
nb_evaluators_in_error += 1
|
||||
self.return_values_in_error.append(ret)
|
||||
self.eaten.append(ret)
|
||||
elif not ret.status and ret.who.startswith(BaseParser.PREFIX):
|
||||
self.eaten.append(ret)
|
||||
# else:
|
||||
# other concepts. We do not care if there are successful or not
|
||||
# They won't be part of result nor part of the parent
|
||||
# --> So they will be handled by other evaluators
|
||||
|
||||
return to_process and nb_evaluators_in_error > 1
|
||||
|
||||
def eval(self, context, return_values):
|
||||
context.log(f"{len(self.return_values_in_error)} return value in error, {len(self.eaten)} item(s) eaten",
|
||||
who=self)
|
||||
context.log(f"{self.return_values_in_error}", who=self)
|
||||
|
||||
sheerka = context.sheerka
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.MULTIPLE_ERRORS, body=self.return_values_in_error),
|
||||
parents=self.eaten)
|
||||
@@ -31,6 +31,10 @@ class OneErrorEvaluator(AllReturnValuesEvaluator):
|
||||
self.eaten.append(ret)
|
||||
elif not ret.status and ret.who.startswith(BaseParser.PREFIX):
|
||||
self.eaten.append(ret)
|
||||
# else:
|
||||
# other concepts. We do not care if there are successful or not
|
||||
# They won't be part of result nor part of the parent
|
||||
# --> So they will be handled by other evaluators
|
||||
|
||||
return to_process and nb_evaluators_in_error == 1
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import copy
|
||||
import traceback
|
||||
from enum import Enum
|
||||
|
||||
from core.ast.visitors import UnreferencedNamesVisitor
|
||||
@@ -59,7 +60,7 @@ class PythonEvaluator(OneReturnValueEvaluator):
|
||||
return sheerka.ret(self.name, True, evaluated, parents=[return_value])
|
||||
|
||||
except Exception as error:
|
||||
context.log_error(error, self.name)
|
||||
context.log_error(error, who=self.name, exc=traceback.format_exc())
|
||||
error = sheerka.new(BuiltinConcepts.ERROR, body=error)
|
||||
return sheerka.ret(self.name, False, error, parents=[return_value])
|
||||
|
||||
|
||||
@@ -0,0 +1,369 @@
|
||||
import copy
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, DEFINITION_TYPE_BNF
|
||||
from core.tokenizer import TokenKind, Tokenizer
|
||||
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, ErrorNode
|
||||
|
||||
PARSERS = ["BnfNode", "SyaNode", "Python"]
|
||||
|
||||
|
||||
@dataclass()
|
||||
class TokensNodeFound(ErrorNode):
|
||||
expected_tokens: list
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(other) == id(self):
|
||||
return True
|
||||
|
||||
if not isinstance(other, UnexpectedTokenErrorNode):
|
||||
return False
|
||||
|
||||
if self.message != other.message:
|
||||
return False
|
||||
|
||||
if self.token.type != other.token.type or self.token.value != other.token.value:
|
||||
return False
|
||||
|
||||
if len(self.expected_tokens) != len(other.expected_tokens):
|
||||
return False
|
||||
|
||||
for i, t in enumerate(self.expected_tokens):
|
||||
if t != other.expected_tokens[i]:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.message, self.token, self.expected_tokens))
|
||||
|
||||
|
||||
class AtomConceptParserHelper:
|
||||
def __init__(self, context):
|
||||
|
||||
self.context = context
|
||||
self.debug = []
|
||||
self.sequence = [] # sequence of concepts already found found
|
||||
self.current_concept: ConceptNode = None # concept being parsed
|
||||
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # buffer that keeps tracks of tokens positions
|
||||
self.expected_tokens = None # expected tokens for this concepts
|
||||
self.is_locked = False
|
||||
self.errors = []
|
||||
self.has_unrecognized = False
|
||||
self.forked = [] # use to duplicate AtomConceptParserHelper. See manage_unrecognized()
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(other) == id(self):
|
||||
return True
|
||||
|
||||
if not isinstance(other, AtomConceptParserHelper):
|
||||
return False
|
||||
|
||||
if len(self.sequence) != len(other.sequence):
|
||||
return False
|
||||
|
||||
for item_self, item_other in zip(self.sequence, other.sequence):
|
||||
if item_self != item_other:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash(len(self.sequence))
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.sequence}"
|
||||
|
||||
def lock(self):
|
||||
self.is_locked = True
|
||||
|
||||
def reset(self):
|
||||
self.is_locked = False
|
||||
|
||||
def has_error(self):
|
||||
return len(self.errors) > 0
|
||||
|
||||
def eat_token(self, token, pos):
|
||||
if not self.expected_tokens:
|
||||
return False
|
||||
|
||||
self.debug.append(token)
|
||||
|
||||
if self.expected_tokens[0] != BaseNodeParser.get_token_value(token):
|
||||
self.errors.append(UnexpectedTokenErrorNode(
|
||||
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
|
||||
token,
|
||||
[self.expected_tokens[0]]))
|
||||
return False
|
||||
|
||||
self.current_concept.end = pos
|
||||
del self.expected_tokens[0]
|
||||
|
||||
if not self.expected_tokens:
|
||||
# the concept is fully matched
|
||||
self.sequence.append(self.current_concept)
|
||||
self.expected_tokens = None
|
||||
|
||||
return True
|
||||
|
||||
def eat_concept(self, concept, pos):
|
||||
if self.is_locked:
|
||||
return
|
||||
|
||||
self.debug.append(concept)
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage that some clones may have been forked
|
||||
forked.eat_concept(concept, pos)
|
||||
|
||||
concept_node = ConceptNode(concept, pos, pos)
|
||||
expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1]
|
||||
|
||||
if not expected:
|
||||
# the concept is already matched
|
||||
self.sequence.append(concept_node)
|
||||
else:
|
||||
self.current_concept = concept_node
|
||||
self.expected_tokens = expected
|
||||
|
||||
def manage_unrecognized(self):
|
||||
if self.unrecognized_tokens.is_empty():
|
||||
return
|
||||
|
||||
# do not put empty UnrecognizedToken in out
|
||||
if self.unrecognized_tokens.is_whitespace():
|
||||
self.unrecognized_tokens.reset()
|
||||
return
|
||||
|
||||
self.unrecognized_tokens.fix_source()
|
||||
|
||||
# try to recognize concepts
|
||||
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
|
||||
if nodes_sequences:
|
||||
instances = [self]
|
||||
for i in range(len(nodes_sequences) - 1):
|
||||
clone = self.clone()
|
||||
instances.append(clone)
|
||||
self.forked.append(clone)
|
||||
|
||||
for instance, node_sequence in zip(instances, nodes_sequences):
|
||||
for node in node_sequence:
|
||||
instance.sequence.append(node)
|
||||
if isinstance(node, UnrecognizedTokensNode) or \
|
||||
hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens:
|
||||
instance.has_unrecognized = True
|
||||
instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
|
||||
else:
|
||||
self.sequence.append(self.unrecognized_tokens)
|
||||
self.has_unrecognized = True
|
||||
|
||||
# create another instance
|
||||
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
|
||||
def eat_unrecognized(self, token, pos):
|
||||
if self.is_locked:
|
||||
return
|
||||
|
||||
self.debug.append(token)
|
||||
self.unrecognized_tokens.add_token(token, pos)
|
||||
|
||||
def finalize(self):
|
||||
if len(self.sequence) > 0:
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage that some clones may have been forked
|
||||
forked.finalize()
|
||||
|
||||
if self.expected_tokens:
|
||||
self.errors.append(TokensNodeFound(self.expected_tokens))
|
||||
|
||||
def clone(self):
|
||||
clone = AtomConceptParserHelper(self.context)
|
||||
clone.debug = self.debug[:]
|
||||
clone.sequence = self.sequence[:]
|
||||
clone.current_concept = self.current_concept.clone() if self.current_concept else None
|
||||
clone.unrecognized_tokens = self.unrecognized_tokens.clone()
|
||||
clone.expected_tokens = self.expected_tokens[:] if self.expected_tokens else None
|
||||
clone.is_locked = self.is_locked
|
||||
clone.errors = self.errors[:]
|
||||
clone.has_unrecognized = self.has_unrecognized
|
||||
return clone
|
||||
|
||||
def _get_lexer_nodes_from_unrecognized(self):
|
||||
"""
|
||||
Use the source of self.unrecognized_tokens gto find concepts or source code
|
||||
:return:
|
||||
"""
|
||||
|
||||
res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
|
||||
only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
|
||||
|
||||
if not only_parsers_results.status:
|
||||
return None
|
||||
|
||||
return builtin_helpers.get_lexer_nodes(
|
||||
only_parsers_results.body.body,
|
||||
self.unrecognized_tokens.start,
|
||||
self.unrecognized_tokens.tokens)
|
||||
|
||||
|
||||
class AtomNodeParser(BaseNodeParser):
|
||||
"""
|
||||
Parser used to recognize atoms concepts or sequence of atoms concepts
|
||||
An atom concept is concept that does not have any property thought it may have a body
|
||||
|
||||
So, if 'one', 'two', 'three' are defined as atom concepts (with no property/parameter)
|
||||
This parser can recognize the sequence 'one two three'
|
||||
as [ConceptNode(one), ConceptNode(two), ConceptNode(three)]
|
||||
It can partly recognized 'one x$1!! two three'
|
||||
as [ConceptNode(one), UnrecognizedTokensNode(x$1!!), [ConceptNode(two), [ConceptNode(three)]
|
||||
It cannot recognize concepts with parameters (non atom)
|
||||
ex: 'one plus two' won't be recognized as ConceptNode(plus, one, two)
|
||||
it will be [ConceptNode(one), UnrecognizedTokensNode(plus), [ConceptNode(two)]
|
||||
|
||||
Note 'one plus two' will be recognized by the SyaParser
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("AtomNode", 50, **kwargs)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def _is_eligible(concept):
|
||||
"""
|
||||
Predicate that select concepts that must handled by AtomNodeParser
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF
|
||||
|
||||
def get_concepts_sequences(self):
|
||||
|
||||
forked = []
|
||||
|
||||
def _add_forked_to_concept_parser_helpers():
|
||||
# check that if some new InfixToPostfix are created
|
||||
for parser in concept_parser_helpers:
|
||||
if len(parser.forked) > 0:
|
||||
forked.extend(parser.forked)
|
||||
parser.forked.clear()
|
||||
if len(forked) > 0:
|
||||
concept_parser_helpers.extend(forked)
|
||||
forked.clear()
|
||||
|
||||
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
|
||||
|
||||
while self.next_token(False):
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.reset()
|
||||
|
||||
token = self.token
|
||||
|
||||
try:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
if concept_parser.eat_token(self.token, self.pos):
|
||||
concept_parser.lock()
|
||||
|
||||
concepts = self.get_concepts(token, self._is_eligible)
|
||||
if not concepts:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.eat_unrecognized(token, self.pos)
|
||||
continue
|
||||
|
||||
if len(concepts) == 1:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.eat_concept(concepts[0], self.pos)
|
||||
continue
|
||||
|
||||
# make the cartesian product
|
||||
temp_res = []
|
||||
for concept_parser in concept_parser_helpers:
|
||||
if concept_parser.is_locked:
|
||||
# It means that it already eat the token
|
||||
# so simply add it, do not clone
|
||||
temp_res.append(concept_parser)
|
||||
continue
|
||||
|
||||
for concept in concepts:
|
||||
clone = concept_parser.clone()
|
||||
temp_res.append(clone)
|
||||
clone.eat_concept(concept, self.pos)
|
||||
|
||||
concept_parser_helpers = temp_res
|
||||
finally:
|
||||
_add_forked_to_concept_parser_helpers()
|
||||
|
||||
# make sure that remaining items in stack are moved to out
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.reset()
|
||||
concept_parser.finalize()
|
||||
_add_forked_to_concept_parser_helpers()
|
||||
|
||||
return concept_parser_helpers
|
||||
|
||||
def get_valid(self, concept_parser_helpers):
|
||||
valid_parser_helpers = [] # be careful, it will be a list of list
|
||||
for parser_helper in concept_parser_helpers:
|
||||
if parser_helper.has_error():
|
||||
continue
|
||||
|
||||
if len(parser_helper.sequence) == 0:
|
||||
continue
|
||||
|
||||
for node in parser_helper.sequence:
|
||||
node.tokens = self.tokens[node.start:node.end + 1]
|
||||
node.fix_source()
|
||||
|
||||
if parser_helper in valid_parser_helpers:
|
||||
continue
|
||||
|
||||
valid_parser_helpers.append(parser_helper)
|
||||
|
||||
return valid_parser_helpers
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
if parser_input == "":
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
||||
)
|
||||
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
parser_helpers = self.get_valid(self.get_concepts_sequences())
|
||||
|
||||
if len(parser_helpers):
|
||||
ret = []
|
||||
for parser_helper in parser_helpers:
|
||||
ret.append(
|
||||
self.sheerka.ret(
|
||||
self.name,
|
||||
not parser_helper.has_unrecognized,
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input,
|
||||
body=parser_helper.sequence,
|
||||
try_parsed=parser_helper.sequence)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, parser_input, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
else:
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
@@ -0,0 +1,669 @@
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.tokenizer import TokenKind, LexerError, Token
|
||||
from parsers.BaseParser import Node, BaseParser, ErrorNode
|
||||
|
||||
DEBUG_COMPILED = True
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerNode(Node):
|
||||
start: int # starting index in the tokens list
|
||||
end: int # ending index in the tokens list
|
||||
tokens: list = None # tokens
|
||||
source: str = None # string representation of what was parsed
|
||||
|
||||
def __post_init__(self):
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LexerNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.tokens == other.tokens
|
||||
|
||||
def fix_source(self, force=True):
|
||||
if force or self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
return self
|
||||
|
||||
|
||||
class UnrecognizedTokensNode(LexerNode):
|
||||
def __init__(self, start, end, tokens):
|
||||
super().__init__(start, end, tokens)
|
||||
self.is_frozen = False
|
||||
self.parenthesis_count = 0
|
||||
|
||||
def freeze(self):
|
||||
self.is_frozen = True
|
||||
|
||||
def reset(self):
|
||||
self.start = self.end = -1
|
||||
self.tokens.clear()
|
||||
self.is_frozen = False
|
||||
self.parenthesis_count = 0
|
||||
|
||||
def has_open_paren(self):
|
||||
return self.parenthesis_count > 0
|
||||
|
||||
def add_token(self, token, pos):
|
||||
if self.is_frozen:
|
||||
raise Exception("The node is frozen")
|
||||
|
||||
if self.end != -1 and pos == self.end + 2:
|
||||
# add the missing whitespace
|
||||
p = self.tokens[-1] # previous token
|
||||
self.tokens.append(Token(TokenKind.WHITESPACE, " ", p.index + 1, p.line, p.column + 1))
|
||||
|
||||
self.tokens.append(token)
|
||||
self.end = pos
|
||||
if self.start == -1:
|
||||
self.start = pos
|
||||
|
||||
if token.type == TokenKind.LPAR:
|
||||
self.parenthesis_count += 1
|
||||
|
||||
if token.type == TokenKind.RPAR:
|
||||
self.parenthesis_count -= 1
|
||||
|
||||
return self
|
||||
|
||||
def not_whitespace(self):
|
||||
return not self.is_whitespace()
|
||||
|
||||
def is_whitespace(self):
|
||||
for t in self.tokens:
|
||||
if t.type not in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_empty(self):
|
||||
return len(self.tokens) == 0
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, utnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, UTN):
|
||||
return other == self
|
||||
|
||||
if not isinstance(other, UnrecognizedTokensNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
def clone(self):
|
||||
clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:])
|
||||
clone.is_frozen = self.is_frozen
|
||||
clone.parenthesis_count = self.parenthesis_count
|
||||
return clone
|
||||
|
||||
|
||||
class ConceptNode(LexerNode):
|
||||
"""
|
||||
Returned by the BnfNodeParser
|
||||
It represents a recognized concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.concept = concept
|
||||
self.underlying = underlying
|
||||
self.fix_source(False)
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, (CN, CNC)):
|
||||
return other == self
|
||||
|
||||
if isinstance(other, cnode):
|
||||
return self.concept.key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, short_cnode):
|
||||
return self.concept.key == other.concept_key and self.source == other.source
|
||||
|
||||
if not isinstance(other, ConceptNode):
|
||||
return False
|
||||
|
||||
return self.concept == other.concept and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.underlying == other.underlying
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.start, self.end, self.source, self.underlying))
|
||||
|
||||
def __repr__(self):
|
||||
text = f"ConceptNode(concept='{self.concept}', source='{self.source}', start={self.start}, end={self.end}"
|
||||
if DEBUG_COMPILED:
|
||||
for k, v in self.concept.compiled.items():
|
||||
text += f", {k}='{v}'"
|
||||
return text + ")"
|
||||
|
||||
def clone(self):
|
||||
# do we need to clone the concept as well ?
|
||||
clone = ConceptNode(self.concept, self.start, self.end, self.tokens, self.source, self.underlying)
|
||||
return clone
|
||||
|
||||
|
||||
class SourceCodeNode(LexerNode):
|
||||
"""
|
||||
Returned when some source code (like Python source code is recognized)
|
||||
"""
|
||||
|
||||
def __init__(self, node, start, end, tokens=None, source=None, return_value=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.node = node # The PythonNode (or whatever language node) that is found
|
||||
self.return_value = return_value # original result of the parsing
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, scnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, SourceCodeNode):
|
||||
return False
|
||||
|
||||
return self.node == other.node and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class SourceCodeWithConceptNode(LexerNode):
|
||||
"""
|
||||
Kind of temporary version for SourceCodeNode
|
||||
I know that there is some code,
|
||||
I know that there are some concepts
|
||||
I just don't want to make the glue yet
|
||||
|
||||
So I push all the nodes into one big bag
|
||||
"""
|
||||
|
||||
def __init__(self, first_node, last_node, content_nodes=None):
|
||||
super().__init__(9999, -1, None) # why not sys.maxint ?
|
||||
self.first = first_node
|
||||
self.last = last_node
|
||||
self.nodes = content_nodes or []
|
||||
self.has_unrecognized = False
|
||||
self.fix_all_pos()
|
||||
|
||||
def add_node(self, node):
|
||||
self.nodes.append(node)
|
||||
self.fix_pos(node)
|
||||
|
||||
return self
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if not isinstance(other, SourceCodeWithConceptNode):
|
||||
return False
|
||||
|
||||
if self.start != other.start or self.end != other.end:
|
||||
return False
|
||||
|
||||
if self.first != other.first:
|
||||
return False
|
||||
|
||||
if self.last != other.last:
|
||||
return False
|
||||
|
||||
if len(self.nodes) != len(other.nodes):
|
||||
return False
|
||||
|
||||
for self_node, other_node in zip(self.nodes, other.nodes):
|
||||
if self_node != other_node:
|
||||
return False
|
||||
|
||||
# at last
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.first, self.last, len(self.nodes)))
|
||||
|
||||
def __repr__(self):
|
||||
return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
def fix_all_pos(self):
|
||||
for n in [self.first, self.last] + self.nodes:
|
||||
self.fix_pos(n)
|
||||
|
||||
def fix_pos(self, node):
|
||||
if hasattr(node, "start") and node.start is not None:
|
||||
if node.start < self.start:
|
||||
self.start = node.start
|
||||
|
||||
if hasattr(node, "end") and node.end is not None:
|
||||
if node.end > self.end:
|
||||
self.end = node.end
|
||||
return self
|
||||
|
||||
def pseudo_fix_source(self):
|
||||
self.source = self.first.source
|
||||
for n in self.nodes:
|
||||
self.source += " "
|
||||
if hasattr(n, "source"):
|
||||
self.source += n.source
|
||||
elif hasattr(n, "concept"):
|
||||
self.source += str(n.concept)
|
||||
else:
|
||||
self.source += " unknown"
|
||||
self.source += self.last.source
|
||||
return self
|
||||
|
||||
def clone(self):
|
||||
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes)
|
||||
return clone
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GrammarErrorNode(ErrorNode):
|
||||
message: str
|
||||
|
||||
|
||||
class SyaAssociativity(Enum):
|
||||
Left = "left"
|
||||
Right = "right"
|
||||
No = "No"
|
||||
|
||||
def __repr__(self):
|
||||
return self.value
|
||||
|
||||
|
||||
cnode = namedtuple("ConceptNode", "concept_key start end source")
|
||||
short_cnode = namedtuple("ConceptNode", "concept_key source")
|
||||
utnode = namedtuple("utnode", "start end source")
|
||||
scnode = namedtuple("scnode", "start end source")
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
class SCWC:
|
||||
"""
|
||||
SourceNodeWithConcept tester class
|
||||
It matches with a SourceNodeWithConcept
|
||||
but it's easier to instantiate during the tests
|
||||
"""
|
||||
first: LexerNode
|
||||
last: LexerNode
|
||||
content: tuple
|
||||
|
||||
def __init__(self, first, last, *args):
|
||||
self.first = first
|
||||
self.last = last
|
||||
self.content = args
|
||||
|
||||
|
||||
class HelperWithPos:
|
||||
def __init__(self, start=None, end=None):
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
self.start_is_fixed = start is not None
|
||||
self.end_is_fixed = end is not None
|
||||
|
||||
def fix_pos(self, node):
|
||||
if not self.start_is_fixed:
|
||||
start = node.start if hasattr(node, "start") else \
|
||||
node[0] if isinstance(node, tuple) else None
|
||||
|
||||
if start is not None and (self.start is None or start < self.start):
|
||||
self.start = start
|
||||
|
||||
if not self.end_is_fixed:
|
||||
end = node.end if hasattr(node, "end") else \
|
||||
node[1] if isinstance(node, tuple) else None
|
||||
|
||||
if end is not None and (self.end is None or end > self.end):
|
||||
self.end = end
|
||||
return self
|
||||
|
||||
|
||||
class CN(HelperWithPos):
|
||||
"""
|
||||
ConceptNode tester class
|
||||
It matches with ConceptNode but with less constraints
|
||||
|
||||
CNC == ConceptNode if concept key, start, end and source are the same
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start=None, end=None, source=None):
|
||||
"""
|
||||
|
||||
:param concept: Concept or concept_key (only the key is used anyway)
|
||||
:param start:
|
||||
:param end:
|
||||
:param source:
|
||||
"""
|
||||
super().__init__(start, end)
|
||||
self.concept_key = concept.key if isinstance(concept, Concept) else concept
|
||||
self.source = source
|
||||
self.concept = concept if isinstance(concept, Concept) else None
|
||||
|
||||
def fix_source(self, str_tokens):
|
||||
self.source = "".join(str_tokens)
|
||||
return self
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, ConceptNode):
|
||||
if other.concept is None:
|
||||
return False
|
||||
if other.concept.key != self.concept_key:
|
||||
return False
|
||||
if self.start is not None and self.start != other.start:
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
return True
|
||||
|
||||
if not isinstance(other, CN):
|
||||
return False
|
||||
|
||||
return self.concept_key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept_key, self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
if self.concept:
|
||||
txt = f"CN(concept='{self.concept}'"
|
||||
else:
|
||||
txt = f"CN(concept_key='{self.concept_key}'"
|
||||
txt += f", source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
txt += f", end={self.end}"
|
||||
return txt + ")"
|
||||
|
||||
|
||||
class CNC(CN):
|
||||
"""
|
||||
ConceptNode for Compiled tester class
|
||||
It matches with ConceptNode
|
||||
But focuses on the 'compiled' property of the concept
|
||||
|
||||
CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
|
||||
"""
|
||||
|
||||
def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
|
||||
super().__init__(concept_key, start, end, source)
|
||||
self.compiled = kwargs
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, ConceptNode):
|
||||
if other.concept is None:
|
||||
return False
|
||||
if other.concept.key != self.concept_key:
|
||||
return False
|
||||
if self.start is not None and self.start != other.start:
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
return self.compiled == other.concept.compiled # assert instead of return to help debugging tests
|
||||
|
||||
if not isinstance(other, CNC):
|
||||
return False
|
||||
|
||||
return self.concept_key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.compiled == other.compiled
|
||||
|
||||
def __repr__(self):
|
||||
if self.concept:
|
||||
txt = f"CNC(concept='{self.concept}'"
|
||||
else:
|
||||
txt = f"CNC(concept_key='{self.concept_key}'"
|
||||
txt += f", source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
txt += f", end={self.end}"
|
||||
|
||||
for k, v in self.compiled.items():
|
||||
txt += f", {k}='{v}'"
|
||||
return txt + ")"
|
||||
|
||||
|
||||
class BaseNodeParser(BaseParser):
|
||||
def __init__(self, name, priority, **kwargs):
|
||||
super().__init__(name, priority)
|
||||
if 'sheerka' in kwargs:
|
||||
sheerka = kwargs.get("sheerka")
|
||||
self.init_from_sheerka(sheerka)
|
||||
|
||||
else:
|
||||
self.concepts_by_first_keyword = None
|
||||
self.sya_definitions = None
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
self.tokens = None
|
||||
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
def init_from_sheerka(self, sheerka):
|
||||
"""
|
||||
Use the definitons from Sheerka to initialize
|
||||
:param sheerka:
|
||||
:return:
|
||||
"""
|
||||
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
|
||||
if sheerka.sya_definitions:
|
||||
self.sya_definitions = {}
|
||||
for k, v in sheerka.sya_definitions.items():
|
||||
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.text = text
|
||||
|
||||
try:
|
||||
self.tokens = list(self.get_input_as_tokens(text))
|
||||
except LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
return True
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self.token
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
if self.token and self.token.type == TokenKind.EOF:
|
||||
return False
|
||||
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
|
||||
def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
|
||||
"""
|
||||
To quickly find a concept, we store them in an hash where the key is the first token of the concept
|
||||
example :
|
||||
Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
|
||||
So the key to use will be "foo"
|
||||
|
||||
Concept("a foo").def_prop("a") -> first token is "foo"
|
||||
|
||||
Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
|
||||
Note that under the same key, there will be multiple entry
|
||||
a B-Tree may be a better implementation in the future
|
||||
|
||||
We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
|
||||
:param context:
|
||||
:param concepts: list[Concept]
|
||||
:param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
|
||||
:param use_sheerka: first init with the definitions from Sheerka
|
||||
:return:
|
||||
"""
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
if use_sheerka:
|
||||
self.init_from_sheerka(self.sheerka)
|
||||
|
||||
if sya_definitions:
|
||||
if self.sya_definitions:
|
||||
self.sya_definitions.update(sya_definitions)
|
||||
else:
|
||||
self.sya_definitions = sya_definitions
|
||||
|
||||
if self.concepts_by_first_keyword is None:
|
||||
self.concepts_by_first_keyword = {}
|
||||
|
||||
for concept in concepts:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
|
||||
break
|
||||
|
||||
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
|
||||
|
||||
def get_concepts(self, token, to_keep, to_map=None):
|
||||
"""
|
||||
Tries to find if there are concepts that match the value of the token
|
||||
:param token:
|
||||
:param to_keep: predicate to tell if the concept is eligible
|
||||
:param to_map:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
name = token.value[1:-1]
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
name = token.value.value
|
||||
else:
|
||||
name = token.value
|
||||
|
||||
result = []
|
||||
if name in self.concepts_by_first_keyword:
|
||||
for concept_id in self.concepts_by_first_keyword[name]:
|
||||
|
||||
concept = self.sheerka.get_by_id(concept_id)
|
||||
|
||||
if not to_keep(concept):
|
||||
continue
|
||||
|
||||
concept = to_map(concept) if to_map else concept
|
||||
result.append(concept)
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_token_value(token):
|
||||
if token.type == TokenKind.STRING:
|
||||
return token.value[1:-1]
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
return token.value.value
|
||||
else:
|
||||
return token.value
|
||||
|
||||
|
||||
class UTN(HelperWithPos):
|
||||
"""
|
||||
Tester class for UnrecognizedTokenNode
|
||||
compare the source, and start, end if defined
|
||||
"""
|
||||
|
||||
def __init__(self, source, start=None, end=None):
|
||||
"""
|
||||
:param concept: Concept or concept_key (only the key is used anyway)
|
||||
:param start:
|
||||
:param end:
|
||||
:param source:
|
||||
"""
|
||||
super().__init__(start, end)
|
||||
self.source = source
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, UnrecognizedTokensNode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, UTN):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.source, self.start, self.end))
|
||||
|
||||
def __repr__(self):
|
||||
txt = f"UTN( source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
txt += f", end={self.end}"
|
||||
return txt + ")"
|
||||
@@ -1,8 +1,8 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import TokenKind, Keywords, Token
|
||||
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
|
||||
from core.sheerka_logger import get_logger
|
||||
import core.utils
|
||||
import logging
|
||||
@@ -77,7 +77,6 @@ class BaseParser:
|
||||
self.priority = priority
|
||||
self.enabled = enabled
|
||||
|
||||
self.has_error = False
|
||||
self.error_sink = []
|
||||
|
||||
def __eq__(self, other):
|
||||
@@ -91,9 +90,13 @@ class BaseParser:
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
pass
|
||||
|
||||
@property
|
||||
def has_error(self):
|
||||
return len(self.error_sink) > 0
|
||||
|
||||
def log_result(self, context, source, ret):
|
||||
if not self.log.isEnabledFor(logging.DEBUG):
|
||||
return
|
||||
@@ -132,6 +135,53 @@ class BaseParser:
|
||||
body=self.error_sink if self.has_error else tree,
|
||||
try_parsed=try_parse)
|
||||
|
||||
def get_input_as_text(self, parser_input, custom_switcher=None):
|
||||
if isinstance(parser_input, list):
|
||||
return self.get_text_from_tokens(parser_input, custom_switcher)
|
||||
|
||||
if isinstance(parser_input, ParserResultConcept):
|
||||
parser_input = parser_input.source
|
||||
|
||||
if "c:" in parser_input:
|
||||
return self.get_text_from_tokens(list(Tokenizer(parser_input)), custom_switcher)
|
||||
|
||||
return parser_input
|
||||
|
||||
def get_input_as_tokens(self, parser_input):
|
||||
if isinstance(parser_input, list):
|
||||
return self.add_eof_if_needed(parser_input)
|
||||
|
||||
if isinstance(parser_input, ParserResultConcept):
|
||||
if parser_input.tokens:
|
||||
return self.add_eof_if_needed(parser_input.tokens)
|
||||
else:
|
||||
return Tokenizer(parser_input.source)
|
||||
|
||||
return Tokenizer(parser_input)
|
||||
|
||||
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
|
||||
if not isinstance(parser_input, ParserResultConcept):
|
||||
return None
|
||||
|
||||
if expected_parser and parser_input.parser != expected_parser:
|
||||
return None
|
||||
|
||||
if len(parser_input.value) == 0:
|
||||
return None
|
||||
|
||||
for node in parser_input.value:
|
||||
from parsers.BaseNodeParser import LexerNode
|
||||
if not isinstance(node, LexerNode):
|
||||
return None
|
||||
|
||||
return parser_input.value
|
||||
|
||||
@staticmethod
|
||||
def add_eof_if_needed(lst):
|
||||
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
|
||||
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
|
||||
return lst
|
||||
|
||||
@staticmethod
|
||||
def get_text_from_tokens(tokens, custom_switcher=None):
|
||||
if tokens is None:
|
||||
|
||||
@@ -9,147 +9,17 @@
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from collections import defaultdict
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.concept import Concept, ConceptParts, DoNotResolve
|
||||
from core.tokenizer import TokenKind, Tokenizer, Token
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
import core.utils
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerNode(Node):
|
||||
start: int # starting index in the tokens list
|
||||
end: int # ending index in the tokens list
|
||||
tokens: list = None # tokens
|
||||
source: str = None # string representation of what was parsed
|
||||
|
||||
def __post_init__(self):
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LexerNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.tokens == other.tokens
|
||||
|
||||
|
||||
class UnrecognizedTokensNode(LexerNode):
|
||||
def __init__(self, start, end, tokens):
|
||||
super().__init__(start, end, tokens)
|
||||
|
||||
def add_token(self, token, pos):
|
||||
self.tokens.append(token)
|
||||
self.end = pos
|
||||
|
||||
def fix_source(self):
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def not_whitespace(self):
|
||||
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, utnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, UnrecognizedTokensNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class ConceptNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
It represents a recognized concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.concept = concept
|
||||
self.underlying = underlying
|
||||
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, cnode):
|
||||
return self.concept.key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, short_cnode):
|
||||
return self.concept.key == other.concept_key and self.source == other.source
|
||||
|
||||
if not isinstance(other, ConceptNode):
|
||||
return False
|
||||
|
||||
return self.concept == other.concept and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.underlying == other.underlying
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.start, self.end, self.source, self.underlying))
|
||||
|
||||
def __repr__(self):
|
||||
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class SourceCodeNode(LexerNode):
|
||||
"""
|
||||
Returned when some source code (like Python source code is recognized)
|
||||
"""
|
||||
|
||||
def __init__(self, node, start, end, tokens=None, source=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.node = node # The PythonNode (or whatever language node) that is found
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, scnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, SourceCodeNode):
|
||||
return False
|
||||
|
||||
return self.node == other.node and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
cnode = namedtuple("ConceptNode", "concept_key start end source")
|
||||
short_cnode = namedtuple("ConceptNode", "concept_key source")
|
||||
utnode = namedtuple("UnrecognizedTokensNode", "start end source")
|
||||
scnode = namedtuple("SourceCodeNode", "start end source")
|
||||
|
||||
|
||||
class NonTerminalNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
Returned by the BnfNodeParser
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, tokens, children=None):
|
||||
@@ -180,7 +50,7 @@ class NonTerminalNode(LexerNode):
|
||||
|
||||
class TerminalNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
Returned by the BnfNodeParser
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, value):
|
||||
@@ -205,11 +75,6 @@ class TerminalNode(LexerNode):
|
||||
return hash((self.parsing_expression, self.start, self.end, self.value))
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GrammarErrorNode(ErrorNode):
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnknownConceptNode(ErrorNode):
|
||||
concept_key: str
|
||||
@@ -574,9 +439,9 @@ class StrMatch(Match):
|
||||
return None
|
||||
|
||||
|
||||
class ConceptLexerParser(BaseParser):
|
||||
class BnfNodeParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptLexer", 50)
|
||||
super().__init__("BnfNode", 50)
|
||||
if 'grammars' in kwargs:
|
||||
self.concepts_grammars = kwargs.get("grammars")
|
||||
elif 'sheerka' in kwargs:
|
||||
@@ -595,7 +460,6 @@ class ConceptLexerParser(BaseParser):
|
||||
self.sheerka = None
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
@@ -606,17 +470,12 @@ class ConceptLexerParser(BaseParser):
|
||||
self.sheerka = context.sheerka
|
||||
self.text = text
|
||||
|
||||
if isinstance(text, str):
|
||||
try:
|
||||
self.tokens = list(Tokenizer(text))
|
||||
self.tokens = list(self.get_input_as_tokens(text))
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
else:
|
||||
self.tokens = list(text)
|
||||
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
self.next_token(False)
|
||||
@@ -785,15 +644,15 @@ class ConceptLexerParser(BaseParser):
|
||||
removed_concepts.append(e)
|
||||
return removed_concepts
|
||||
|
||||
def parse(self, context, text):
|
||||
if text == "":
|
||||
def parse(self, context, parser_input):
|
||||
if parser_input == "":
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
||||
)
|
||||
|
||||
if not self.reset_parser(context, text):
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
@@ -877,15 +736,15 @@ class ConceptLexerParser(BaseParser):
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
source=parser_input,
|
||||
body=choice,
|
||||
try_parsed=choice)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, text, ret[0])
|
||||
self.log_result(context, parser_input, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, text, ret)
|
||||
self.log_multiple_results(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
|
||||
@@ -915,6 +774,11 @@ class ConceptLexerParser(BaseParser):
|
||||
_concept.compiled[prop_name] = new_value
|
||||
|
||||
def _look_for_concept_match(_underlying):
|
||||
"""
|
||||
At some point, there is either an StrMatch or a ConceptMatch,
|
||||
that allowed the recognition.
|
||||
Look for the ConceptMatch, with recursion if needed
|
||||
"""
|
||||
if isinstance(_underlying.parsing_expression, ConceptExpression):
|
||||
return _underlying
|
||||
|
||||
@@ -929,6 +793,7 @@ class ConceptLexerParser(BaseParser):
|
||||
def _get_underlying_value(_underlying):
|
||||
concept_match_node = _look_for_concept_match(_underlying)
|
||||
if concept_match_node:
|
||||
# the value is a concept
|
||||
if id(concept_match_node) in _underlying_value_cache:
|
||||
result = _underlying_value_cache[id(concept_match_node)]
|
||||
else:
|
||||
@@ -936,6 +801,7 @@ class ConceptLexerParser(BaseParser):
|
||||
result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
|
||||
_underlying_value_cache[id(concept_match_node)] = result
|
||||
else:
|
||||
# the value is a string
|
||||
result = DoNotResolve(_underlying.source)
|
||||
|
||||
return result
|
||||
@@ -957,6 +823,7 @@ class ConceptLexerParser(BaseParser):
|
||||
concept.compiled[ConceptParts.BODY] = value
|
||||
if underlying.parsing_expression.rule_name:
|
||||
_add_prop(concept, underlying.parsing_expression.rule_name, value)
|
||||
# KSI : Why don't we set concept.metadata.need_validation to True ?
|
||||
|
||||
if isinstance(underlying, NonTerminalNode):
|
||||
for node in underlying.children:
|
||||
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
|
||||
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
|
||||
StrMatch, ConceptGroupExpression
|
||||
|
||||
|
||||
@@ -30,7 +30,6 @@ class BnfParser(BaseParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("Bnf", 50, False)
|
||||
# self.has_error = False
|
||||
# self.error_sink = []
|
||||
# self.name = BaseParser.PREFIX + "Bnf"
|
||||
|
||||
@@ -61,7 +60,6 @@ class BnfParser(BaseParser):
|
||||
self.eat_white_space()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
@@ -115,11 +113,11 @@ class BnfParser(BaseParser):
|
||||
token = self.get_token()
|
||||
return token.type == second or token.type == first and self.next_after().type == second
|
||||
|
||||
def parse(self, context: ExecutionContext, text):
|
||||
def parse(self, context: ExecutionContext, parser_input):
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
self.reset_parser(context, parser_input)
|
||||
tree = self.parse_choice()
|
||||
|
||||
token = self.get_token()
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
# try to match something like
|
||||
# ConceptNode 'plus' ConceptNode
|
||||
#
|
||||
# Replaced by SyaNodeParser
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
import logging
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
|
||||
@@ -12,6 +16,7 @@ multiple_concepts_parser = MultipleConceptsParser()
|
||||
class ConceptsWithConceptsParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptsWithConcepts", 25)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def get_tokens(nodes):
|
||||
@@ -71,23 +76,19 @@ class ConceptsWithConceptsParser(BaseParser):
|
||||
|
||||
return concept
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
if not text.parser == multiple_concepts_parser:
|
||||
return None
|
||||
|
||||
nodes = text.body
|
||||
|
||||
concept_key = self.get_key(nodes)
|
||||
concept = sheerka.new(concept_key)
|
||||
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
|
||||
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
|
||||
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
for concept in concepts:
|
||||
@@ -101,7 +102,7 @@ class ConceptsWithConceptsParser(BaseParser):
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text.source,
|
||||
source=parser_input.source,
|
||||
body=concept,
|
||||
try_parsed=None)))
|
||||
|
||||
|
||||
@@ -110,7 +110,7 @@ class DefaultParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "Default", 50)
|
||||
BaseParser.__init__(self, "Default", 60)
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.context: ExecutionContext = None
|
||||
@@ -168,7 +168,6 @@ class DefaultParser(BaseParser):
|
||||
self.next_token()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
@@ -188,19 +187,19 @@ class DefaultParser(BaseParser):
|
||||
|
||||
return
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
# default parser can only manage string text
|
||||
if not isinstance(text, str):
|
||||
if not isinstance(parser_input, str):
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
|
||||
self.log_result(context, text, ret)
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
self.reset_parser(context, parser_input)
|
||||
tree = self.parse_statement()
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(e, False)
|
||||
@@ -211,7 +210,7 @@ class DefaultParser(BaseParser):
|
||||
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
|
||||
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
|
||||
else:
|
||||
body = self.get_return_value_body(context.sheerka, text, tree, tree)
|
||||
body = self.get_return_value_body(context.sheerka, parser_input, tree, tree)
|
||||
# body = self.sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
@@ -224,7 +223,7 @@ class DefaultParser(BaseParser):
|
||||
not self.has_error,
|
||||
body)
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def parse_statement(self):
|
||||
|
||||
@@ -10,12 +10,12 @@ class EmptyStringParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "EmptyString", 90)
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
|
||||
if isinstance(text, str) and text.strip() == "" or \
|
||||
isinstance(text, list) and text == [] or \
|
||||
text is None:
|
||||
if isinstance(parser_input, str) and parser_input.strip() == "" or \
|
||||
isinstance(parser_input, list) and parser_input == [] or \
|
||||
parser_input is None:
|
||||
ret = sheerka.ret(self.name, True, sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
@@ -24,5 +24,5 @@ class EmptyStringParser(BaseParser):
|
||||
else:
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
@@ -16,26 +16,26 @@ class ExactConceptParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "ExactConcept", 80)
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
"""
|
||||
text can be string, but text can also be an list of tokens
|
||||
:param context:
|
||||
:param text:
|
||||
:param parser_input:
|
||||
:return:
|
||||
"""
|
||||
|
||||
context.log(f"Parsing '{text}'", self.name)
|
||||
context.log(f"Parsing '{parser_input}'", self.name)
|
||||
res = []
|
||||
sheerka = context.sheerka
|
||||
try:
|
||||
words = self.get_words(text)
|
||||
words = self.get_words(parser_input)
|
||||
except LexerError as e:
|
||||
context.log(f"Error found in tokenizer {e}", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
|
||||
|
||||
if len(words) > self.MAX_WORDS_SIZE:
|
||||
context.log(f"Max words reached. Stopping.", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input))
|
||||
|
||||
recognized = False
|
||||
for combination in self.combinations(words):
|
||||
@@ -69,26 +69,25 @@ class ExactConceptParser(BaseParser):
|
||||
context.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
|
||||
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
|
||||
body=concept,
|
||||
try_parsed=concept)))
|
||||
recognized = True
|
||||
|
||||
if recognized:
|
||||
if len(res) == 1:
|
||||
self.log_result(context, text, res[0])
|
||||
self.log_result(context, parser_input, res[0])
|
||||
else:
|
||||
self.log_multiple_results(context, text, res)
|
||||
self.log_multiple_results(context, parser_input, res)
|
||||
return res
|
||||
return res
|
||||
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text))
|
||||
self.log_result(context, text, ret)
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input))
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def get_words(text):
|
||||
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
|
||||
def get_words(self, text):
|
||||
tokens = self.get_input_as_tokens(text)
|
||||
res = []
|
||||
for t in tokens:
|
||||
if t.type == TokenKind.EOF:
|
||||
|
||||
@@ -1,18 +1,20 @@
|
||||
# to be replaced by SyaNodeParser
|
||||
import ast
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
|
||||
from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
|
||||
import core.utils
|
||||
from parsers.PythonParser import PythonParser
|
||||
|
||||
concept_lexer_parser = ConceptLexerParser()
|
||||
concept_lexer_parser = BnfNodeParser()
|
||||
|
||||
|
||||
class MultipleConceptsParser(BaseParser):
|
||||
"""
|
||||
Parser that will take the result of ConceptLexerParser and
|
||||
Parser that will take the result of BnfNodeParser and
|
||||
try to resolve the unrecognized tokens token by token
|
||||
|
||||
It is a success when it returns a list ConceptNode exclusively
|
||||
@@ -20,6 +22,7 @@ class MultipleConceptsParser(BaseParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "MultipleConcepts", 45)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def finalize(nodes_found, unrecognized_tokens):
|
||||
@@ -40,16 +43,12 @@ class MultipleConceptsParser(BaseParser):
|
||||
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
||||
return unrecognized_tokens
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
if not text.parser == concept_lexer_parser:
|
||||
return None
|
||||
|
||||
sheerka = context.sheerka
|
||||
nodes = text.value
|
||||
nodes_found = [[]]
|
||||
concepts_only = True
|
||||
|
||||
@@ -97,16 +96,16 @@ class MultipleConceptsParser(BaseParser):
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text.source,
|
||||
source=parser_input.source,
|
||||
body=choice,
|
||||
try_parsed=None))
|
||||
)
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, text.source, ret[0])
|
||||
self.log_result(context, parser_input.source, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, text.source, ret)
|
||||
self.log_multiple_results(context, parser_input.source, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
|
||||
+19
-18
@@ -1,4 +1,4 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.tokenizer import Tokenizer, LexerError, TokenKind
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from dataclasses import dataclass
|
||||
@@ -6,7 +6,7 @@ import ast
|
||||
import logging
|
||||
import core.utils
|
||||
|
||||
from parsers.ConceptLexerParser import ConceptNode
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -67,7 +67,7 @@ class PythonParser(BaseParser):
|
||||
BaseParser.__init__(self, "Python", 50)
|
||||
self.source = kwargs.get("source", "<undef>")
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
tree = None
|
||||
|
||||
@@ -76,15 +76,9 @@ class PythonParser(BaseParser):
|
||||
}
|
||||
|
||||
try:
|
||||
if isinstance(text, str) and "c:" in text:
|
||||
source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher)
|
||||
elif isinstance(text, str):
|
||||
source = text
|
||||
else:
|
||||
source = self.get_text_from_tokens(text, python_switcher)
|
||||
source = self.get_input_as_text(parser_input, python_switcher)
|
||||
source = source.strip()
|
||||
|
||||
text = text if isinstance(text, str) else source
|
||||
parser_input = parser_input if isinstance(parser_input, str) else source
|
||||
|
||||
# first, try to parse an expression
|
||||
res, tree, error = self.try_parse_expression(source)
|
||||
@@ -92,25 +86,32 @@ class PythonParser(BaseParser):
|
||||
# then try to parse a statement
|
||||
res, tree, error = self.try_parse_statement(source)
|
||||
if not res:
|
||||
self.has_error = True
|
||||
error_node = PythonErrorNode(text, error)
|
||||
error_node = PythonErrorNode(parser_input, error)
|
||||
self.error_sink.append(error_node)
|
||||
|
||||
except LexerError as e:
|
||||
self.has_error = True
|
||||
self.error_sink.append(e)
|
||||
|
||||
if self.has_error:
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
False,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.NOT_FOR_ME,
|
||||
body=parser_input,
|
||||
reason=self.error_sink))
|
||||
else:
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
body=self.error_sink if self.has_error else PythonNode(text, tree),
|
||||
source=parser_input,
|
||||
body=PythonNode(parser_input, tree),
|
||||
try_parsed=None))
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def try_parse_expression(self, text):
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptNode
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from parsers.PythonParser import PythonParser
|
||||
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
unrecognized_nodes_parser = UnrecognizedNodeParser()
|
||||
|
||||
|
||||
class PythonWithConceptsParser(BaseParser):
|
||||
@@ -20,15 +21,12 @@ class PythonWithConceptsParser(BaseParser):
|
||||
res += c if c.isalnum() else "0"
|
||||
return res
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
if not text.parser == multiple_concepts_parser:
|
||||
return None
|
||||
|
||||
nodes = text.body
|
||||
source = ""
|
||||
to_parse = ""
|
||||
identifiers = {}
|
||||
@@ -74,6 +72,7 @@ class PythonWithConceptsParser(BaseParser):
|
||||
python_id = _get_identifier(concept)
|
||||
to_parse += python_id
|
||||
python_ids_mappings[python_id] = concept
|
||||
|
||||
else:
|
||||
source += node.source
|
||||
to_parse += node.source
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,114 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
|
||||
import core.utils
|
||||
|
||||
PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"]
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CannotParseNode(ErrorNode):
|
||||
unrecognized: UnrecognizedTokensNode
|
||||
|
||||
|
||||
class UnrecognizedNodeParser(BaseParser):
|
||||
"""
|
||||
This parser comes after the other NodeParsers (Atom, Bnf or Sya)
|
||||
It will try to resolve all UnrecognizedTokensNode.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("UnrecognizedNode", 45) # lower than AtomNode, BnfNode and SyaNode
|
||||
|
||||
def add_error(self, error):
|
||||
if hasattr(error, "__iter__"):
|
||||
self.error_sink.extend(error)
|
||||
else:
|
||||
self.error_sink.append(error)
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, None)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
sequences_found = [[]]
|
||||
has_unrecognized = False
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
res = self.validate_concept_node(context, node)
|
||||
if not res.status:
|
||||
self.add_error(res.body)
|
||||
else:
|
||||
sequences_found = core.utils.product(sequences_found, [res.body])
|
||||
|
||||
elif isinstance(node, UnrecognizedTokensNode):
|
||||
res = parse_unrecognized(context, node.source, PARSERS)
|
||||
res = only_successful(context, res)
|
||||
if res.status:
|
||||
lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens)
|
||||
sequences_found = core.utils.product(sequences_found, lexer_nodes)
|
||||
else:
|
||||
sequences_found = core.utils.product(sequences_found, [node])
|
||||
has_unrecognized = True
|
||||
|
||||
else: # cannot happen as of today :-)
|
||||
raise NotImplementedError()
|
||||
|
||||
# concept with UnrecognizedToken in their properties is considered as fatal error
|
||||
if self.has_error:
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
ret = []
|
||||
for choice in sequences_found:
|
||||
ret.append(
|
||||
sheerka.ret(
|
||||
self.name,
|
||||
not has_unrecognized,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input,
|
||||
body=choice,
|
||||
try_parsed=choice)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, parser_input, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def validate_concept_node(self, context, concept_node):
|
||||
|
||||
sheerka = context.sheerka
|
||||
errors = []
|
||||
|
||||
def _validate_concept(concept):
|
||||
"""
|
||||
Recursively browse the compiled properties in order to find unrecognized
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
for name, value in concept.compiled.items():
|
||||
if isinstance(value, Concept):
|
||||
_validate_concept(value)
|
||||
|
||||
elif isinstance(value, UnrecognizedTokensNode):
|
||||
res = parse_unrecognized(context, value.tokens, PARSERS)
|
||||
res = only_successful(context, res) # only key successful parsers
|
||||
if res.status:
|
||||
concept.compiled[name] = res.body.body
|
||||
else:
|
||||
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'"))
|
||||
|
||||
_validate_concept(concept_node.concept)
|
||||
if len(errors) > 0:
|
||||
return context.sheerka.ret(self.name, False, errors)
|
||||
else:
|
||||
return context.sheerka.ret(self.name, True, concept_node)
|
||||
@@ -20,6 +20,9 @@ def json_default_converter(o):
|
||||
if isinstance(o, (date, datetime)):
|
||||
return o.isoformat()
|
||||
|
||||
if isinstance(o, SheerkaDataProviderRef):
|
||||
return f"##XREF##:{o.target}"
|
||||
|
||||
|
||||
class Event(object):
|
||||
"""
|
||||
@@ -406,6 +409,11 @@ class SheerkaDataProvider:
|
||||
def is_reference(obj):
|
||||
return isinstance(obj, str) and obj.startswith(SheerkaDataProvider.REF_PREFIX)
|
||||
|
||||
def reset(self):
|
||||
self.first_time = self.io.first_time
|
||||
if hasattr(self.io, "reset"):
|
||||
self.io.reset()
|
||||
|
||||
def add(self, event_digest: str, entry, obj, allow_multiple=True, use_ref=False):
|
||||
"""
|
||||
Adds obj to the entry 'entry'
|
||||
@@ -999,3 +1007,12 @@ class SheerkaDataProvider:
|
||||
keys[entry] = value
|
||||
self.save_keys(keys)
|
||||
return str(value)
|
||||
|
||||
def dump_state(self, digest=None):
|
||||
digest = digest or self.get_snapshot(SheerkaDataProvider.HeadFile)
|
||||
state = self.load_state(digest)
|
||||
print(json.dumps(state.data, sort_keys=True, default=json_default_converter, indent=True))
|
||||
|
||||
def dump_obj(self, digest):
|
||||
obj = self.load_obj(digest)
|
||||
print(json.dumps(obj.__dict__, sort_keys=True, default=json_default_converter, indent=True))
|
||||
|
||||
@@ -170,6 +170,10 @@ class SheerkaDataProviderDictionaryIO(SheerkaDataProviderIO):
|
||||
|
||||
return io.BytesIO(self.cache[file_path]) if "b" in mode else io.StringIO(self.cache[file_path])
|
||||
|
||||
def reset(self):
|
||||
self.cache.clear()
|
||||
self.first_time = True
|
||||
|
||||
|
||||
def on_close(dictionary_io, file_path, stream):
|
||||
"""
|
||||
|
||||
+4
-1
@@ -1,7 +1,7 @@
|
||||
import ast
|
||||
|
||||
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from core.concept import Concept, DEFINITION_TYPE_BNF
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from parsers.BnfParser import BnfParser
|
||||
from sdp.sheerkaDataProvider import Event
|
||||
@@ -54,6 +54,9 @@ class BaseTest:
|
||||
res = bnf_parser.parse(context, c.metadata.definition)
|
||||
if res.status:
|
||||
c.bnf = res.value.value
|
||||
c.metadata.definition_type = DEFINITION_TYPE_BNF
|
||||
else:
|
||||
raise Exception(f"Error in bnf definition '{c.metadata.definition}'", sheerka.get_error(res))
|
||||
sheerka.create_new_concept(context, c)
|
||||
elif create_new:
|
||||
sheerka.create_new_concept(context, c)
|
||||
|
||||
@@ -3,9 +3,34 @@ from tests.BaseTest import BaseTest
|
||||
|
||||
|
||||
class TestUsingMemoryBasedSheerka(BaseTest):
|
||||
singleton_instance = None
|
||||
builtin_concepts = {}
|
||||
|
||||
def get_sheerka(self, **kwargs):
|
||||
skip_builtins_in_db = kwargs.get("skip_builtins_in_db", True)
|
||||
@staticmethod
|
||||
def _inner_get_sheerka(skip_builtins_in_db):
|
||||
sheerka = Sheerka(skip_builtins_in_db=skip_builtins_in_db)
|
||||
sheerka.initialize("mem://")
|
||||
return sheerka
|
||||
|
||||
def get_sheerka(self, **kwargs):
|
||||
skip_builtins_in_db = kwargs.get("skip_builtins_in_db", True)
|
||||
use_singleton = kwargs.get("singleton", False)
|
||||
|
||||
sheerka = kwargs.get("sheerka", False)
|
||||
if sheerka:
|
||||
return sheerka
|
||||
|
||||
if use_singleton:
|
||||
singleton_instance = TestUsingMemoryBasedSheerka.singleton_instance
|
||||
if singleton_instance:
|
||||
singleton_instance.reset()
|
||||
singleton_instance.cache_by_key.update(TestUsingMemoryBasedSheerka.builtin_concepts) # quicker ?
|
||||
# singleton_instance.cache_by_key = TestUsingMemoryBasedSheerka.builtin_concepts
|
||||
return singleton_instance
|
||||
else:
|
||||
new_instance = self._inner_get_sheerka(skip_builtins_in_db)
|
||||
TestUsingMemoryBasedSheerka.builtin_concepts.update(new_instance.cache_by_key)
|
||||
TestUsingMemoryBasedSheerka.singleton_instance = new_instance
|
||||
return TestUsingMemoryBasedSheerka.singleton_instance
|
||||
|
||||
return self._inner_get_sheerka(skip_builtins_in_db)
|
||||
|
||||
@@ -3,6 +3,7 @@ from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF
|
||||
from core.sheerka.Sheerka import Sheerka
|
||||
from sdp.sheerkaDataProvider import SheerkaDataProvider
|
||||
|
||||
from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka
|
||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
|
||||
|
||||
@@ -32,6 +33,7 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka):
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, concept.id)
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, concept.name)
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, concept.key)
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "+")
|
||||
|
||||
def test_i_can_add_a_concept_when_name_differs_from_the_key(self):
|
||||
sheerka = self.get_sheerka()
|
||||
@@ -229,3 +231,29 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka):
|
||||
assert res.status
|
||||
|
||||
|
||||
class TestSheerkaCreateNewConceptFileBased(TestUsingFileBasedSheerka):
|
||||
def test_i_can_add_several_concepts(self):
|
||||
sheerka, context, hello, greeting = self.init_concepts(
|
||||
Concept("Hello world a").def_prop("a"),
|
||||
Concept("Greeting a").def_prop("a"),
|
||||
use_dict=False
|
||||
)
|
||||
|
||||
res = sheerka.create_new_concept(self.get_context(sheerka), hello)
|
||||
assert res.status
|
||||
|
||||
sheerka = self.get_sheerka(use_dict=False)
|
||||
res = sheerka.create_new_concept(self.get_context(sheerka), greeting)
|
||||
assert res.status
|
||||
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, hello.key)
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_ENTRY, greeting.key)
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, hello.id)
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_ID_ENTRY, greeting.id)
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, "Hello world a")
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_NAME_ENTRY, "Greeting a")
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Hello")
|
||||
assert sheerka.sdp.exists(Sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "Greeting")
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
|
||||
class TestSheerkaHistoryManager(TestUsingMemoryBasedSheerka):
|
||||
def test_i_can_retrieve_history(self):
|
||||
sheerka = self.get_sheerka(skip_builtins_in_db=False)
|
||||
sheerka = self.get_sheerka(skip_builtins_in_db=False, singleton=False)
|
||||
|
||||
sheerka.evaluate_user_input("def concept one as 1")
|
||||
sheerka.evaluate_user_input("one")
|
||||
|
||||
@@ -244,7 +244,7 @@ class TestSheerka(TestUsingFileBasedSheerka):
|
||||
assert not sheerka.is_success(sheerka.new(BuiltinConcepts.TOO_MANY_SUCCESS))
|
||||
|
||||
def test_cache_is_updated_after_get(self):
|
||||
sheerka = self.get_sheerka()
|
||||
sheerka = self.get_sheerka(skip_builtins_in_db=False)
|
||||
|
||||
# updated when by_key returns one element
|
||||
sheerka.create_new_concept(self.get_context(sheerka), Concept("foo", body="1"))
|
||||
|
||||
@@ -171,22 +171,22 @@ class TestSheerkaExecuteParsers(TestUsingMemoryBasedSheerka):
|
||||
'name=Enabled50True, priority=50, status=True, source=Enabled80False:Enabled90False:hello world',
|
||||
]
|
||||
|
||||
def test_parsing_stop_at_the_first_success(self):
|
||||
sheerka = self.get_sheerka()
|
||||
sheerka.parsers = {
|
||||
"Enabled80False": Enabled80FalseParser,
|
||||
"Enabled50bisTrue": Enabled50bisTrueParser,
|
||||
"Enabled10True": Enabled10TrueParser,
|
||||
}
|
||||
|
||||
user_input = [get_ret_val("hello world")]
|
||||
BaseTestParser.debug_out = []
|
||||
sheerka.execute(self.get_context(sheerka), user_input, [BuiltinConcepts.PARSING])
|
||||
|
||||
assert BaseTestParser.debug_out == [
|
||||
'name=Enabled80False, priority=80, status=False, source=hello world',
|
||||
'name=Enabled50BisTrue, priority=50, status=True, source=hello world',
|
||||
]
|
||||
# def test_parsing_stop_at_the_first_success(self):
|
||||
# sheerka = self.get_sheerka()
|
||||
# sheerka.parsers = {
|
||||
# "Enabled80False": Enabled80FalseParser,
|
||||
# "Enabled50bisTrue": Enabled50bisTrueParser,
|
||||
# "Enabled10True": Enabled10TrueParser,
|
||||
# }
|
||||
#
|
||||
# user_input = [get_ret_val("hello world")]
|
||||
# BaseTestParser.debug_out = []
|
||||
# sheerka.execute(self.get_context(sheerka), user_input, [BuiltinConcepts.PARSING])
|
||||
#
|
||||
# assert BaseTestParser.debug_out == [
|
||||
# 'name=Enabled80False, priority=80, status=False, source=hello world',
|
||||
# 'name=Enabled50BisTrue, priority=50, status=True, source=hello world',
|
||||
# ]
|
||||
|
||||
def test_parsing_stop_at_the_first_success_2(self):
|
||||
"""
|
||||
@@ -243,10 +243,13 @@ class TestSheerkaExecuteParsers(TestUsingMemoryBasedSheerka):
|
||||
'name=Enabled50True, priority=50, status=False, source=Enabled80False:hello world',
|
||||
'name=Enabled50True, priority=50, status=True, source=Enabled80False:Enabled90False:hello world',
|
||||
'name=Enabled50BisTrue, priority=50, status=True, source=hello world',
|
||||
'name=Enabled50BisTrue, priority=50, status=True, source=Enabled90False:hello world',
|
||||
'name=Enabled50BisTrue, priority=50, status=True, source=Enabled80False:hello world',
|
||||
'name=Enabled50BisTrue, priority=50, status=True, source=Enabled80False:Enabled90False:hello world',
|
||||
'name=Enabled50False, priority=50, status=False, source=hello world',
|
||||
'name=Enabled50False, priority=50, status=False, source=Enabled90False:hello world',
|
||||
'name=Enabled50False, priority=50, status=False, source=Enabled80False:hello world',
|
||||
'name=Enabled50False, priority=50, status=False, source=Enabled80False:Enabled90False:hello world',
|
||||
'name=Enabled50False, priority=50, status=False, source=Enabled80False:Enabled90False:hello world'
|
||||
]
|
||||
|
||||
def test_a_parser_has_access_to_the_output_of_its_predecessors(self):
|
||||
|
||||
@@ -69,7 +69,7 @@ def test_i_can_get_sub_classes():
|
||||
default_parser = core.utils.get_class("parsers.DefaultParser.DefaultParser")
|
||||
exact_concept_parser = core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser")
|
||||
python_parser = core.utils.get_class("parsers.PythonParser.PythonParser")
|
||||
concept_lexer_parser = core.utils.get_class("parsers.ConceptLexerParser.ConceptLexerParser")
|
||||
concept_lexer_parser = core.utils.get_class("parsers.BnfNodeParser.BnfNodeParser")
|
||||
|
||||
assert base_parser not in sub_classes
|
||||
assert default_parser in sub_classes
|
||||
|
||||
@@ -2,11 +2,11 @@ import ast
|
||||
import pytest
|
||||
|
||||
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept
|
||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
|
||||
from core.tokenizer import Tokenizer
|
||||
from evaluators.AddConceptEvaluator import AddConceptEvaluator
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression
|
||||
from parsers.BnfNodeParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression
|
||||
from parsers.BnfParser import BnfParser
|
||||
from parsers.DefaultParser import DefConceptNode, NameNode
|
||||
from parsers.PythonParser import PythonNode, PythonParser
|
||||
@@ -65,10 +65,10 @@ class TestAddConceptEvaluator(TestUsingMemoryBasedSheerka):
|
||||
def_concept.post = self.get_concept_part(post)
|
||||
if bnf_def:
|
||||
def_concept.definition = bnf_def
|
||||
def_concept.definition_type = "bnf"
|
||||
def_concept.definition_type = DEFINITION_TYPE_BNF
|
||||
if definition:
|
||||
def_concept.definition = NameNode(list(Tokenizer(definition)))
|
||||
def_concept.definition_type = "def"
|
||||
def_concept.definition_type = DEFINITION_TYPE_DEF
|
||||
|
||||
return ReturnValueConcept(BaseParser.PREFIX + "some_name", True, ParserResultConcept(value=def_concept))
|
||||
|
||||
|
||||
@@ -69,12 +69,14 @@ class TestAddConceptInSetEvaluator(TestUsingMemoryBasedSheerka):
|
||||
def test_i_can_add_bnf_concept_to_a_set_of_concept(self):
|
||||
"""
|
||||
This test is the reason why I have started the whole eval on demand stuff
|
||||
Sheerka tries to evaluate the body but it can (as a and b are not defined)
|
||||
Sheerka tries to evaluate the body but it can't (as a and b are not defined)
|
||||
So 'foo' cannot be put is set
|
||||
:return:
|
||||
"""
|
||||
sheerka, context, foo, bar = self.init_concepts(
|
||||
Concept("foo", definition="a plus b", body="a + b").def_prop("a").def_prop("b"),
|
||||
sheerka, context, one, two, foo, bar = self.init_concepts(
|
||||
"one",
|
||||
"two",
|
||||
Concept("foo", definition="(one|two)=a 'plus' (one|two)=b", body="a + b").def_prop("a").def_prop("b"),
|
||||
"bar",
|
||||
create_new=True)
|
||||
|
||||
|
||||
@@ -4,7 +4,8 @@ import pytest
|
||||
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, BuiltinConcepts
|
||||
from core.concept import Concept, ConceptParts, DoNotResolve
|
||||
from evaluators.LexerNodeEvaluator import LexerNodeEvaluator
|
||||
from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, StrMatch, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BnfNodeParser import ConceptNode, BnfNodeParser, StrMatch, UnrecognizedTokensNode
|
||||
from parsers.PythonParser import PythonNode
|
||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
|
||||
@@ -12,7 +13,7 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
class TestLexerNodeEvaluator(TestUsingMemoryBasedSheerka):
|
||||
|
||||
def from_parsing(self, context, grammar, expression):
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(context, grammar)
|
||||
|
||||
ret_val = parser.parse(context, expression)
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
import pytest
|
||||
|
||||
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from evaluators.BaseEvaluator import BaseEvaluator
|
||||
from evaluators.MultipleErrorsEvaluator import MultipleErrorsEvaluator
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
|
||||
|
||||
def r(value, status=True):
|
||||
return ReturnValueConcept(value, status, value)
|
||||
|
||||
|
||||
def eval_false(name):
|
||||
return ReturnValueConcept(BaseEvaluator.PREFIX + name, False, "value")
|
||||
|
||||
|
||||
def eval_true(name):
|
||||
return ReturnValueConcept(BaseEvaluator.PREFIX + name, True, "value")
|
||||
|
||||
|
||||
def parser_false(name):
|
||||
return ReturnValueConcept(BaseParser.PREFIX + name, False, "value")
|
||||
|
||||
|
||||
def parser_true(name):
|
||||
return ReturnValueConcept(BaseParser.PREFIX + name, True, "value")
|
||||
|
||||
|
||||
reduce_requested = ReturnValueConcept(
|
||||
"some_name",
|
||||
True,
|
||||
Concept(name=BuiltinConcepts.REDUCE_REQUESTED, key=BuiltinConcepts.REDUCE_REQUESTED))
|
||||
|
||||
|
||||
class TestMultipleErrorsEvaluator(TestUsingMemoryBasedSheerka):
|
||||
@pytest.mark.parametrize("return_values, expected", [
|
||||
([eval_false("one"), reduce_requested], False),
|
||||
([eval_false("one"), eval_false("two"), reduce_requested], True),
|
||||
([eval_false("one"), eval_false("two"), eval_false("three"), reduce_requested], True),
|
||||
([eval_false("one"), eval_false("two"), parser_false("one"), reduce_requested], True),
|
||||
([eval_false("one"), eval_false("two"), parser_true("one"), reduce_requested], False),
|
||||
([eval_false("one"), eval_false("two"), eval_true("three"), reduce_requested], False),
|
||||
([eval_false("one"), eval_false("two"), r("other concept", False), reduce_requested], True),
|
||||
([eval_false("one"), eval_false("two"), r("other concept", True), reduce_requested], True),
|
||||
([eval_false("reduce not required 1"), eval_false("reduce not required 2")], False),
|
||||
])
|
||||
def test_i_can_match(self, return_values, expected):
|
||||
context = self.get_context()
|
||||
assert MultipleErrorsEvaluator().matches(context, return_values) == expected
|
||||
|
||||
def test_i_can_eval(self):
|
||||
context = self.get_context()
|
||||
|
||||
return_values = [
|
||||
eval_false("one"),
|
||||
eval_false("two"),
|
||||
eval_false("three"),
|
||||
parser_false("one"),
|
||||
parser_false("two"),
|
||||
reduce_requested
|
||||
]
|
||||
|
||||
evaluator = MultipleErrorsEvaluator()
|
||||
evaluator.matches(context, return_values)
|
||||
res = evaluator.eval(context, return_values)
|
||||
|
||||
assert not res.status
|
||||
assert context.sheerka.isinstance(res.body, BuiltinConcepts.MULTIPLE_ERRORS)
|
||||
assert res.body.body == [eval_false("one"), eval_false("two"), eval_false("three")]
|
||||
assert len(res.parents) == 6
|
||||
|
||||
def test_unwanted_return_values_are_not_eaten(self):
|
||||
context = self.get_context()
|
||||
|
||||
a_successful_concept = r("successful concept")
|
||||
a_concept_in_error = r("concept in error", False)
|
||||
return_values = [
|
||||
eval_false("one"),
|
||||
eval_false("two"),
|
||||
parser_false("one"),
|
||||
a_successful_concept,
|
||||
a_concept_in_error,
|
||||
reduce_requested
|
||||
]
|
||||
|
||||
evaluator = MultipleErrorsEvaluator()
|
||||
evaluator.matches(context, return_values)
|
||||
res = evaluator.eval(context, return_values)
|
||||
|
||||
assert not res.status
|
||||
assert res.body.body == [eval_false("one"), eval_false("two")]
|
||||
assert len(res.parents) == 4
|
||||
|
||||
assert a_successful_concept not in res.parents
|
||||
assert a_concept_in_error not in res.parents
|
||||
@@ -71,3 +71,4 @@ class TestOneErrorEvaluator(TestUsingMemoryBasedSheerka):
|
||||
assert len(res.parents) == 4
|
||||
|
||||
assert a_successful_concept not in res.parents
|
||||
assert a_concept_in_error not in res.parents
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import pytest
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, PROPERTIES_TO_SERIALIZE, Property, simplec
|
||||
from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator
|
||||
from parsers.ConceptLexerParser import Sequence, StrMatch, OrderedChoice, Optional, ConceptExpression
|
||||
from parsers.BaseNodeParser import SyaAssociativity
|
||||
from parsers.BnfNodeParser import Sequence, StrMatch, OrderedChoice, Optional, ConceptExpression
|
||||
from sdp.sheerkaDataProvider import SheerkaDataProvider
|
||||
|
||||
from tests.TestUsingFileBasedSheerka import TestUsingFileBasedSheerka
|
||||
@@ -125,6 +125,17 @@ as:
|
||||
assert sheerka.sdp.io.exists(
|
||||
sheerka.sdp.io.get_obj_path(SheerkaDataProvider.ObjectsFolder, concept_saved.get_origin()))
|
||||
|
||||
def test_i_can_def_several_concepts(self):
|
||||
sheerka = self.get_sheerka(use_dict=False)
|
||||
sheerka.evaluate_user_input("def concept foo")
|
||||
|
||||
sheerka = self.get_sheerka(use_dict=False)
|
||||
res = sheerka.evaluate_user_input("def concept bar")
|
||||
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body.body.id == "1002"
|
||||
|
||||
def test_i_can_evaluate_def_concept_part_when_one_part_is_a_ref_of_another_concept(self):
|
||||
"""
|
||||
In this test, we test that the properties of 'concept a xx b' (which are 'a' and 'b')
|
||||
@@ -393,6 +404,7 @@ as:
|
||||
assert concept_found.get_prop("a") is None
|
||||
assert not concept_found.metadata.need_validation
|
||||
|
||||
# @pytest.mark.xfail
|
||||
@pytest.mark.parametrize("desc, definitions", [
|
||||
("Simple form", [
|
||||
"def concept one as 1",
|
||||
@@ -467,6 +479,7 @@ as:
|
||||
assert res[0].status
|
||||
assert res[0].body == 23
|
||||
|
||||
# @pytest.mark.xfail
|
||||
def test_i_can_mix_bnf_and_isa(self):
|
||||
"""
|
||||
if 'one' isa 'number, twenty number should be recognized
|
||||
@@ -531,7 +544,44 @@ as:
|
||||
assert res[0].status
|
||||
assert res[0].body == 21
|
||||
|
||||
def test_i_can_mix_concept_of_concept(self):
|
||||
# @pytest.mark.xfail
|
||||
def test_i_can_use_concepts_defined_with_from(self):
|
||||
sheerka = self.get_sheerka()
|
||||
|
||||
init = [
|
||||
"def concept plus from a plus b as a + b",
|
||||
"def concept one as 1",
|
||||
]
|
||||
|
||||
for exp in init:
|
||||
sheerka.evaluate_user_input(exp)
|
||||
|
||||
res = sheerka.evaluate_user_input("eval one plus one")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 2
|
||||
|
||||
res = sheerka.evaluate_user_input("eval 1 plus one")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 2
|
||||
|
||||
res = sheerka.evaluate_user_input("eval one plus 1")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 2
|
||||
|
||||
res = sheerka.evaluate_user_input("eval 1 plus 2")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 3
|
||||
|
||||
res = sheerka.evaluate_user_input("eval 1 plus 1")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 2
|
||||
|
||||
def test_i_can_mix_bnf_concept_and_concept(self):
|
||||
definitions = [
|
||||
"def concept one as 1",
|
||||
"def concept two as 2",
|
||||
@@ -631,24 +681,6 @@ as:
|
||||
assert res[1].status
|
||||
assert res[1].body == "little blue(house)"
|
||||
|
||||
@pytest.mark.xfail
|
||||
def test_i_can_recognize_composition_of_concept_with_priority(self):
|
||||
sheerka = self.get_sheerka()
|
||||
|
||||
definitions = [
|
||||
"def concept a plus b where a,b",
|
||||
"def concept a times b where a,b",
|
||||
"modify concept 1001 set priority = 1",
|
||||
"modify concept 1002 set priority = 2",
|
||||
]
|
||||
|
||||
for definition in definitions:
|
||||
sheerka.evaluate_user_input(definition)
|
||||
|
||||
res = sheerka.evaluate_user_input("1 plus 2 times 3")
|
||||
assert res[0].status
|
||||
# check that the priority is applied
|
||||
|
||||
def test_i_can_say_that_a_concept_isa_another_concept(self):
|
||||
sheerka = self.get_sheerka()
|
||||
sheerka.evaluate_user_input("def concept foo")
|
||||
@@ -768,6 +800,7 @@ as:
|
||||
assert not res[0].status
|
||||
assert sheerka.isinstance(res[0].body, BuiltinConcepts.WHERE_CLAUSE_FAILED)
|
||||
|
||||
|
||||
# def test_i_can_detect_when_only_one_evaluator_is_in_error(self):
|
||||
# sheerka = self.get_sheerka()
|
||||
#
|
||||
@@ -864,3 +897,60 @@ as:
|
||||
twenties = sheerka.get("twenties")
|
||||
number = sheerka.get("number")
|
||||
assert sheerka.isa(twenties, number)
|
||||
|
||||
def test_i_can_mix_sya_concepts_and_bnf_concept(self):
|
||||
definitions = [
|
||||
"def concept one as 1",
|
||||
"def concept two as 2",
|
||||
"def concept three as 3",
|
||||
"def concept plus from a plus b as a + b",
|
||||
"def concept mult from a mult b as a * b",
|
||||
"def concept twenties from bnf 'twenty' (one|two)=unit as 20 + unit",
|
||||
]
|
||||
|
||||
sheerka = self.init_scenario(definitions)
|
||||
context = self.get_context(sheerka)
|
||||
sheerka.set_sya_def(context, [
|
||||
(sheerka.get("mult").id, 20, SyaAssociativity.Right),
|
||||
(sheerka.get("plus").id, 10, SyaAssociativity.Right),
|
||||
])
|
||||
|
||||
res = sheerka.evaluate_user_input("eval one plus two mult three")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 7
|
||||
|
||||
res = sheerka.evaluate_user_input("eval two mult three plus one")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 7
|
||||
|
||||
res = sheerka.evaluate_user_input("eval 1 plus two mult 3")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 7
|
||||
|
||||
res = sheerka.evaluate_user_input("eval 2 mult 3 plus one")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 7
|
||||
|
||||
res = sheerka.evaluate_user_input("eval twenty two plus 1")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 23
|
||||
|
||||
res = sheerka.evaluate_user_input("eval 1 plus twenty two")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 23
|
||||
|
||||
res = sheerka.evaluate_user_input("eval twenty one plus twenty two")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 43
|
||||
|
||||
res = sheerka.evaluate_user_input("eval twenty two plus twenty one mult two")
|
||||
assert len(res) == 1
|
||||
assert res[0].status
|
||||
assert res[0].body == 64
|
||||
|
||||
@@ -0,0 +1,150 @@
|
||||
from core.concept import CC, Concept
|
||||
from core.tokenizer import Tokenizer, TokenKind, Token
|
||||
from parsers.BaseNodeParser import scnode, utnode, cnode, SCWC, CNC, short_cnode, SourceCodeWithConceptNode, CN, UTN
|
||||
from parsers.SyaNodeParser import SyaConceptParserHelper
|
||||
|
||||
|
||||
def _index(tokens, expr, index):
|
||||
"""
|
||||
Finds a sub list in a bigger list
|
||||
:param tokens:
|
||||
:param expr:
|
||||
:param index:
|
||||
:return:
|
||||
"""
|
||||
expected = [token.value for token in Tokenizer(expr) if token.type != TokenKind.EOF]
|
||||
for i in range(0, len(tokens) - len(expected) + 1):
|
||||
for j in range(len(expected)):
|
||||
if tokens[i + j] != expected[j]:
|
||||
break
|
||||
else:
|
||||
if index == 0:
|
||||
return i, len(expected)
|
||||
else:
|
||||
index -= 1
|
||||
|
||||
raise ValueError(f"substring '{expr}' not found")
|
||||
|
||||
|
||||
def compute_debug_array(res):
|
||||
to_compare = []
|
||||
for r in res:
|
||||
res_debug = []
|
||||
for token in r.debug:
|
||||
if isinstance(token, Token):
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
else:
|
||||
res_debug.append(token.value)
|
||||
else:
|
||||
res_debug.append(token.concept.name)
|
||||
to_compare.append(res_debug)
|
||||
|
||||
return to_compare
|
||||
|
||||
|
||||
def get_node(concepts_map, expression_as_tokens, sub_expr, concept_key=None, skip=0, is_bnf=False, sya=False):
|
||||
"""
|
||||
Tries to find sub in expression
|
||||
When found, transform it to its correct type
|
||||
:param expression_as_tokens: full expression
|
||||
:param sub_expr: sub expression to search in the full expression
|
||||
:param concepts_map: hash of the known concepts
|
||||
:param concept_key: key of the concept if different from sub_expr
|
||||
:param skip: number of occurrences of sub_expr to skip
|
||||
:param is_bnf: True if the concept to search is a bnf definition
|
||||
:param sya: Return SyaConceptParserHelper instead of a ConceptNode when needed
|
||||
:return:
|
||||
"""
|
||||
if sub_expr == "')'":
|
||||
return ")"
|
||||
|
||||
if isinstance(sub_expr, (scnode, utnode)):
|
||||
return sub_expr
|
||||
|
||||
if isinstance(sub_expr, cnode):
|
||||
# for cnode, map the concept key to the one from concepts_maps if needed
|
||||
if sub_expr.concept_key.startswith("#"):
|
||||
return cnode(
|
||||
concepts_map[sub_expr.concept_key[1:]].key,
|
||||
sub_expr.start,
|
||||
sub_expr.end,
|
||||
sub_expr.source
|
||||
)
|
||||
else:
|
||||
return sub_expr
|
||||
|
||||
if isinstance(sub_expr, SCWC):
|
||||
first = get_node(concepts_map, expression_as_tokens, sub_expr.first, sya=sya)
|
||||
last = get_node(concepts_map, expression_as_tokens, sub_expr.last, sya=sya)
|
||||
content = [get_node(concepts_map, expression_as_tokens, c, sya=sya) for c in sub_expr.content]
|
||||
return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source()
|
||||
|
||||
if isinstance(sub_expr, (CNC, CC, CN)):
|
||||
concept_node = get_node(
|
||||
concepts_map,
|
||||
expression_as_tokens,
|
||||
sub_expr.source or sub_expr.concept_key,
|
||||
sub_expr.concept_key, sya=sya)
|
||||
concept_found = concept_node.concept
|
||||
sub_expr.concept_key = concept_found.key
|
||||
sub_expr.concept = concept_found
|
||||
sub_expr.fix_pos((concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start))
|
||||
if hasattr(sub_expr, "compiled"):
|
||||
for k, v in sub_expr.compiled.items():
|
||||
node = get_node(concepts_map, expression_as_tokens, v, sya=sya) # need to get start and end positions
|
||||
new_value = CC(Concept().update_from(concepts_map[v])) if (isinstance(v, str) and v in concepts_map) \
|
||||
else node
|
||||
sub_expr.compiled[k] = new_value
|
||||
sub_expr.fix_pos(node)
|
||||
|
||||
if hasattr(sub_expr, "fix_source"):
|
||||
sub_expr.fix_source(expression_as_tokens[sub_expr.start: sub_expr.end + 1])
|
||||
return sub_expr
|
||||
|
||||
if isinstance(sub_expr, UTN):
|
||||
node = get_node(concepts_map, expression_as_tokens, sub_expr.source)
|
||||
sub_expr.fix_pos(node)
|
||||
return sub_expr
|
||||
|
||||
if isinstance(sub_expr, short_cnode):
|
||||
return get_node(concepts_map, expression_as_tokens, sub_expr.source,
|
||||
concept_key=sub_expr.concept_key, skip=skip, is_bnf=True, sya=sya)
|
||||
|
||||
if isinstance(sub_expr, tuple):
|
||||
return get_node(concepts_map, expression_as_tokens, sub_expr[0],
|
||||
concept_key=concept_key, skip=sub_expr[1], is_bnf=is_bnf, sya=sya)
|
||||
|
||||
start, length = _index(expression_as_tokens, sub_expr, skip)
|
||||
|
||||
# special case of python source code
|
||||
if "+" in sub_expr and sub_expr.strip() != "+":
|
||||
return scnode(start, start + length - 1, sub_expr)
|
||||
|
||||
# try to match one of the concept from the map
|
||||
concept_key = concept_key or sub_expr
|
||||
concept_found = concepts_map.get(concept_key, None)
|
||||
if concept_found:
|
||||
concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests
|
||||
if not sya or len(concept_found.metadata.props) == 0 or is_bnf:
|
||||
# if it's an atom, then return a ConceptNode
|
||||
return CN(concept_found, start, start + length - 1, source=sub_expr)
|
||||
else:
|
||||
# else return a ParserHelper
|
||||
return SyaConceptParserHelper(concept_found, start)
|
||||
else:
|
||||
# else an UnrecognizedTokensNode
|
||||
return utnode(start, start + length - 1, sub_expr)
|
||||
|
||||
|
||||
def compute_expected_array(concepts_map, expression, expected, sya=False):
|
||||
"""
|
||||
Computes a simple but sufficient version of the result of infix_to_postfix()
|
||||
:param concepts_map:
|
||||
:param expression:
|
||||
:param expected:
|
||||
:param sya: if true, generate an SyaConceptParserHelper instead of a cnode
|
||||
:return:
|
||||
"""
|
||||
expression_as_tokens = [token.value for token in Tokenizer(expression) if token.type != TokenKind.EOF]
|
||||
return [get_node(concepts_map, expression_as_tokens, sub_expr, sya=sya) for sub_expr in expected]
|
||||
@@ -0,0 +1,241 @@
|
||||
import pytest
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from parsers.AtomNodeParser import AtomNodeParser
|
||||
from parsers.BaseNodeParser import cnode, utnode, CNC
|
||||
|
||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
from tests.parsers.parsers_utils import compute_expected_array
|
||||
|
||||
|
||||
class TestAtomsParser(TestUsingMemoryBasedSheerka):
|
||||
def init_parser(self, concepts_map, **kwargs):
|
||||
sheerka, context, *updated_concepts = self.init_concepts(singleton=True, *concepts_map.values(), **kwargs)
|
||||
|
||||
parser = AtomNodeParser()
|
||||
parser.initialize(context, updated_concepts)
|
||||
|
||||
return sheerka, context, parser
|
||||
|
||||
def test_i_cannot_parse_empty_string(self):
|
||||
sheerka, context, parser = self.init_parser({})
|
||||
|
||||
res = parser.parse(context, "")
|
||||
|
||||
assert not res.status
|
||||
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
|
||||
|
||||
@pytest.mark.parametrize("text, expected", [
|
||||
("foo", ["foo"]),
|
||||
("foo bar", ["foo", "bar"]),
|
||||
("foo bar twenties", ["foo", "bar", "twenties"]),
|
||||
])
|
||||
def test_i_can_parse_simple_sequences(self, text, expected):
|
||||
concepts_map = {
|
||||
"foo": Concept("foo"),
|
||||
"bar": Concept("bar"),
|
||||
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
|
||||
}
|
||||
|
||||
sheerka, context, parser = self.init_parser(concepts_map)
|
||||
res = parser.parse(context, text)
|
||||
wrapper = res.body
|
||||
lexer_nodes = res.body.body
|
||||
|
||||
assert res.status
|
||||
|
||||
expected_array = compute_expected_array(concepts_map, text, expected)
|
||||
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
||||
assert lexer_nodes == expected_array
|
||||
|
||||
@pytest.mark.parametrize("text, expected", [
|
||||
("foo bar", ["foo bar"]),
|
||||
("one two three", ["one two three"]),
|
||||
("foo bar twenties one two three", ["foo bar", "twenties", "one two three"]),
|
||||
])
|
||||
def test_i_can_parse_long_names(self, text, expected):
|
||||
concepts_map = {
|
||||
"foo bar": Concept("foo bar"),
|
||||
"one two three": Concept("one two three"),
|
||||
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
|
||||
}
|
||||
|
||||
sheerka, context, parser = self.init_parser(concepts_map)
|
||||
res = parser.parse(context, text)
|
||||
wrapper = res.body
|
||||
lexer_nodes = res.body.body
|
||||
|
||||
assert res.status
|
||||
|
||||
expected_array = compute_expected_array(concepts_map, text, expected)
|
||||
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
||||
assert lexer_nodes == expected_array
|
||||
|
||||
@pytest.mark.parametrize("text, expected_status, expected", [
|
||||
("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]),
|
||||
("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]),
|
||||
("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]),
|
||||
("foo bar 1 + 1", True, ["foo bar", " 1 + 1"]),
|
||||
("foo bar twenty one", False, ["foo bar", " twenty ", "one"]),
|
||||
("foo bar x$!#", False, ["foo bar", " x$!#"]),
|
||||
|
||||
("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]),
|
||||
("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]),
|
||||
("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]),
|
||||
("1 + 1 foo bar", True, ["1 + 1 ", "foo bar"]),
|
||||
("twenty one foo bar", False, ["twenty ", "one", "foo bar"]),
|
||||
("x$!# foo bar", False, ["x$!# ", "foo bar"]),
|
||||
("func(one)", False, ["func(", "one", ")"]),
|
||||
])
|
||||
def test_i_can_parse_when_unrecognized(self, text, expected_status, expected):
|
||||
concepts_map = {
|
||||
"prefixed": Concept("a prefixed").def_prop("a"),
|
||||
"suffixed": Concept("prefixed a").def_prop("a"),
|
||||
"infix": Concept("a infix b").def_prop("a").def_prop("b"),
|
||||
"foo bar": Concept("foo bar"),
|
||||
"one": Concept("one"),
|
||||
"two": Concept("two"),
|
||||
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
|
||||
}
|
||||
|
||||
sheerka, context, parser = self.init_parser(concepts_map)
|
||||
res = parser.parse(context, text)
|
||||
wrapper = res.body
|
||||
lexer_nodes = res.body.body
|
||||
|
||||
assert res.status == expected_status
|
||||
|
||||
expected_array = compute_expected_array(concepts_map, text, expected)
|
||||
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
||||
assert lexer_nodes == expected_array
|
||||
|
||||
@pytest.mark.parametrize("text, expected_status, expected", [
|
||||
(" one two ", True, [cnode("one", 1, 1, "one"), cnode("two", 3, 3, "two")]),
|
||||
(" one x$!# ", False, [cnode("one", 1, 1, "one"), utnode(2, 7, " x$!# ")]),
|
||||
(" foo bar x$!# ", False, [cnode("foo bar", 1, 3, "foo bar"), utnode(4, 9, " x$!# ")]),
|
||||
])
|
||||
def test_i_can_parse_when_surrounded_by_spaces(self, text, expected_status, expected):
|
||||
concepts_map = {
|
||||
"prefixed": Concept("a prefixed").def_prop("a"),
|
||||
"suffixed": Concept("prefixed a").def_prop("a"),
|
||||
"infix": Concept("a infix b").def_prop("a").def_prop("b"),
|
||||
"foo bar": Concept("foo bar"),
|
||||
"one": Concept("one"),
|
||||
"two": Concept("two"),
|
||||
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
|
||||
}
|
||||
|
||||
sheerka, context, parser = self.init_parser(concepts_map)
|
||||
res = parser.parse(context, text)
|
||||
wrapper = res.body
|
||||
lexer_nodes = res.body.body
|
||||
|
||||
assert res.status == expected_status
|
||||
|
||||
expected_array = compute_expected_array(concepts_map, text, expected)
|
||||
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
||||
assert lexer_nodes == expected_array
|
||||
|
||||
@pytest.mark.parametrize("text, expected", [
|
||||
("one two", [["one", "two"], ["one two"]])
|
||||
])
|
||||
def test_i_can_parse_when_multiple_concepts_start_with_the_same_token(self, text, expected):
|
||||
concepts_map = {
|
||||
"one": Concept("one"),
|
||||
"two": Concept("two"),
|
||||
"one two": Concept("one two"),
|
||||
}
|
||||
|
||||
sheerka, context, parser = self.init_parser(concepts_map)
|
||||
list_of_res = parser.parse(context, text)
|
||||
assert len(list_of_res) == len(expected)
|
||||
|
||||
for i, res in enumerate(list_of_res):
|
||||
wrapper = res.body
|
||||
lexer_nodes = res.body.body
|
||||
|
||||
assert res.status
|
||||
expected_array = compute_expected_array(concepts_map, text, expected[i])
|
||||
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
||||
assert lexer_nodes == expected_array
|
||||
|
||||
def test_i_can_parse_multiple_concepts_when_long_names_and_unrecognized(self):
|
||||
concepts_map = {
|
||||
"one": Concept("one"),
|
||||
"two": Concept("two"),
|
||||
"one two": Concept("one two"),
|
||||
"one two x$!# one two": Concept("one two x$!# one two"),
|
||||
}
|
||||
|
||||
text = "one two x$!# one two"
|
||||
sheerka, context, parser = self.init_parser(concepts_map)
|
||||
list_of_res = parser.parse(context, text)
|
||||
|
||||
expected = [
|
||||
(False, ["one", "two", " x$!# ", ("one", 1), ("two", 1)]),
|
||||
(False, ["one", "two", " x$!# ", ("one two", 1)]),
|
||||
(False, ["one two", " x$!# ", ("one", 1), ("two", 1)]),
|
||||
(False, ["one two", " x$!# ", ("one two", 1)]),
|
||||
(True, ["one two x$!# one two"]),
|
||||
]
|
||||
|
||||
assert len(list_of_res) == len(expected)
|
||||
|
||||
for res, expected in zip(list_of_res, expected):
|
||||
wrapper = res.body
|
||||
lexer_nodes = res.body.body
|
||||
|
||||
assert res.status == expected[0]
|
||||
expected_array = compute_expected_array(concepts_map, text, expected[1])
|
||||
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
||||
assert lexer_nodes == expected_array
|
||||
|
||||
@pytest.mark.parametrize("text", [
|
||||
"foo",
|
||||
f"foo one",
|
||||
"x$!#",
|
||||
"twenty one",
|
||||
"1 + 1",
|
||||
"foo x$!#",
|
||||
"1 + 1 twenty one",
|
||||
])
|
||||
def test_i_cannot_parse_concepts_with_property_or_bnf_or_unrecognized(self, text):
|
||||
concepts_map = {
|
||||
"foo": Concept("foo a").def_prop("a"),
|
||||
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
|
||||
}
|
||||
|
||||
sheerka, context, parser = self.init_parser(concepts_map)
|
||||
res = parser.parse(context, text)
|
||||
|
||||
assert not res.status
|
||||
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
|
||||
assert res.body.body == text
|
||||
|
||||
@pytest.mark.parametrize("text, expected", [
|
||||
("hello foo bar",
|
||||
[
|
||||
(True, [CNC("hello1", source="hello foo ", a=" foo "), "bar"]),
|
||||
(True, [CNC("hello2", source="hello foo ", b=" foo "), "bar"]),
|
||||
]),
|
||||
])
|
||||
def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected):
|
||||
concepts_map = {
|
||||
"hello1": Concept("hello a").def_prop("a"),
|
||||
"hello2": Concept("hello b").def_prop("b"),
|
||||
"bar": Concept("bar")
|
||||
}
|
||||
|
||||
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
|
||||
list_of_res = parser.parse(context, text)
|
||||
|
||||
assert len(list_of_res) == len(expected)
|
||||
|
||||
for res, expected in zip(list_of_res, expected):
|
||||
wrapper = res.body
|
||||
lexer_nodes = res.body.body
|
||||
|
||||
assert res.status == expected[0]
|
||||
expected_array = compute_expected_array(concepts_map, text, expected[1])
|
||||
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
||||
assert lexer_nodes == expected_array
|
||||
+19
-17
@@ -4,10 +4,11 @@ import pytest
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, ConceptParts, DoNotResolve
|
||||
from core.tokenizer import Tokenizer, TokenKind, Token
|
||||
from parsers.BaseNodeParser import cnode, short_cnode
|
||||
from parsers.BnfParser import BnfParser
|
||||
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
|
||||
from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
|
||||
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, ZeroOrMore, OneOrMore, \
|
||||
UnrecognizedTokensNode, cnode, short_cnode, ConceptExpression, ConceptGroupExpression
|
||||
UnrecognizedTokensNode, ConceptExpression, ConceptGroupExpression
|
||||
|
||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
|
||||
@@ -72,15 +73,16 @@ def cprop(concept, prop_name):
|
||||
return concept.compiled[prop_name]
|
||||
|
||||
|
||||
class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
class TestBnfConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
|
||||
def init(self, concepts, grammar):
|
||||
context = self.get_context()
|
||||
sheerka = self.get_sheerka(singleton=True)
|
||||
context = self.get_context(sheerka)
|
||||
for c in concepts:
|
||||
context.sheerka.add_in_cache(c)
|
||||
context.sheerka.set_id_if_needed(c, False)
|
||||
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(context, grammar)
|
||||
|
||||
return context, parser
|
||||
@@ -602,7 +604,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
grammar = {foo: Optional("one", ConceptExpression("foo"))}
|
||||
|
||||
context = self.get_context()
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(context, grammar)
|
||||
assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression("foo", rule_name="foo"))
|
||||
|
||||
@@ -612,7 +614,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
|
||||
context = self.get_context()
|
||||
context.concepts["foo"] = foo
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(context, grammar)
|
||||
assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo"))
|
||||
|
||||
@@ -636,7 +638,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
|
||||
grammar = {foo: Sequence("twenty", number)}
|
||||
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(context, grammar)
|
||||
|
||||
res = parser.parse(context, "twenty two")
|
||||
@@ -686,7 +688,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
grammar = {foo: ZeroOrMore("one")}
|
||||
context, parser = self.init([foo], grammar)
|
||||
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(context, grammar)
|
||||
|
||||
res = parser.parse(context, "one two")
|
||||
@@ -779,7 +781,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
bar: foo,
|
||||
foo: bar
|
||||
}
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(self.get_context(), grammar)
|
||||
|
||||
assert bar not in parser.concepts_grammars
|
||||
@@ -793,7 +795,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
foo: OrderedChoice(bar, "foo")
|
||||
}
|
||||
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(self.get_context(), grammar)
|
||||
|
||||
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
||||
@@ -824,7 +826,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
bar: foo,
|
||||
foo: Sequence("one", bar, "two")
|
||||
}
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(self.get_context(), grammar)
|
||||
|
||||
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
||||
@@ -838,7 +840,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
bar: foo,
|
||||
foo: Sequence("one", OrderedChoice(bar, "other"), "two")
|
||||
}
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(self.get_context(), grammar)
|
||||
|
||||
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
||||
@@ -851,7 +853,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
grammar = {
|
||||
foo: bar
|
||||
}
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(self.get_context(), grammar)
|
||||
|
||||
assert foo in parser.concepts_grammars
|
||||
@@ -883,7 +885,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
bar = Concept(name="bar")
|
||||
|
||||
grammar = {foo: Sequence("one", "two"), bar: foo}
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
ret = parser.initialize(context, grammar)
|
||||
return_value = ret.body
|
||||
|
||||
@@ -1209,7 +1211,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
context.sheerka.set_id_if_needed(c, False)
|
||||
context.sheerka.add_concept_to_set(context, baz, bar)
|
||||
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(context, grammar)
|
||||
|
||||
encoded = parser.encode_grammar(parser.concepts_grammars)
|
||||
@@ -1260,7 +1262,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
|
||||
# atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')),
|
||||
# }
|
||||
#
|
||||
# parser = ConceptLexerParser()
|
||||
# parser = BnfNodeParser()
|
||||
# parser.register(grammar)
|
||||
#
|
||||
# # res = parser.parse(context, "1")
|
||||
@@ -3,10 +3,11 @@ import pytest
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import Tokenizer, TokenKind, LexerError, Token
|
||||
from parsers.BaseNodeParser import cnode
|
||||
from parsers.BaseParser import UnexpectedTokenErrorNode
|
||||
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
|
||||
from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
|
||||
ConceptLexerParser, ConceptExpression, cnode
|
||||
from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
|
||||
BnfNodeParser, ConceptExpression
|
||||
|
||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
|
||||
@@ -155,7 +156,7 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
|
||||
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
|
||||
|
||||
concepts = {bar: bar_definition, foo: foo_definition}
|
||||
concept_parser = ConceptLexerParser()
|
||||
concept_parser = BnfNodeParser()
|
||||
concept_parser.initialize(context, concepts)
|
||||
|
||||
res = concept_parser.parse(context, "twenty two")
|
||||
|
||||
@@ -5,7 +5,8 @@ import pytest
|
||||
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import Token, TokenKind, Tokenizer
|
||||
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from parsers.PythonParser import PythonNode
|
||||
@@ -65,7 +66,7 @@ class TestConceptsWithConceptsParser(TestUsingMemoryBasedSheerka):
|
||||
@pytest.mark.parametrize("text, interested", [
|
||||
("not parser result", False),
|
||||
(ParserResultConcept(parser="not multiple_concepts_parser"), False),
|
||||
(ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
|
||||
(ParserResultConcept(parser=multiple_concepts_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True),
|
||||
])
|
||||
def test_not_interested(self, text, interested):
|
||||
context = self.get_context()
|
||||
|
||||
@@ -3,7 +3,7 @@ import ast
|
||||
|
||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
|
||||
from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
|
||||
from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptExpression
|
||||
from parsers.BnfNodeParser import OrderedChoice, StrMatch, ConceptExpression
|
||||
from parsers.PythonParser import PythonParser, PythonNode
|
||||
from core.tokenizer import Keywords, Tokenizer, LexerError
|
||||
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode, IsaConceptNode
|
||||
@@ -251,7 +251,7 @@ def concept add one to a as
|
||||
res = parser.parse(context, text)
|
||||
node = res.value.value
|
||||
definition = OrderedChoice(ConceptExpression(a_concept, rule_name="a_concept"), StrMatch("a_string"))
|
||||
parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", definition, definition)
|
||||
parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", None, definition, definition)
|
||||
expected = get_def_concept(name="name", body="__definition[0]", bnf_def=parser_result)
|
||||
|
||||
assert res.status
|
||||
|
||||
@@ -3,7 +3,8 @@ import pytest
|
||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import Tokenizer, TokenKind, Token
|
||||
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, cnode, utnode, scnode, SourceCodeNode
|
||||
from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode
|
||||
from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from parsers.PythonParser import PythonNode
|
||||
|
||||
@@ -11,7 +12,7 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
|
||||
|
||||
def get_return_value(context, grammar, expression):
|
||||
parser = ConceptLexerParser()
|
||||
parser = BnfNodeParser()
|
||||
parser.initialize(context, grammar)
|
||||
|
||||
ret_val = parser.parse(context, expression)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import ast
|
||||
import pytest
|
||||
from core.builtin_concepts import ParserResultConcept
|
||||
from core.builtin_concepts import ParserResultConcept, NotForMeConcept
|
||||
from core.tokenizer import Tokenizer, LexerError
|
||||
from parsers.PythonParser import PythonNode, PythonParser, PythonErrorNode
|
||||
import core.utils
|
||||
@@ -48,9 +48,11 @@ class TestPythonParser(TestUsingMemoryBasedSheerka):
|
||||
|
||||
assert not res.status
|
||||
assert res.who == parser.name
|
||||
assert isinstance(res.value, ParserResultConcept)
|
||||
assert isinstance(res.value.value[0], PythonErrorNode)
|
||||
assert isinstance(res.value.value[0].exception, SyntaxError)
|
||||
assert isinstance(res.value, NotForMeConcept)
|
||||
assert res.value.body == text
|
||||
assert len(res.value.get_prop("reason")) == 1
|
||||
assert isinstance(res.value.get_prop("reason")[0], PythonErrorNode)
|
||||
assert isinstance(res.value.get_prop("reason")[0].exception, SyntaxError)
|
||||
|
||||
@pytest.mark.parametrize("text, error_msg, error_text", [
|
||||
("c::", "Concept identifiers not found", ""),
|
||||
@@ -61,10 +63,13 @@ class TestPythonParser(TestUsingMemoryBasedSheerka):
|
||||
res = parser.parse(self.get_context(), text)
|
||||
|
||||
assert not res.status
|
||||
assert isinstance(res.body, ParserResultConcept)
|
||||
assert isinstance(res.body.body[0], LexerError)
|
||||
assert res.body.body[0].message == error_msg
|
||||
assert res.body.body[0].text == error_text
|
||||
assert isinstance(res.value, NotForMeConcept)
|
||||
assert res.value.body == text
|
||||
|
||||
assert len(res.value.get_prop("reason")) == 1
|
||||
assert isinstance(res.value.get_prop("reason")[0], LexerError)
|
||||
assert res.value.get_prop("reason")[0].message == error_msg
|
||||
assert res.value.get_prop("reason")[0].text == error_text
|
||||
|
||||
def test_i_can_parse_a_concept(self):
|
||||
text = "c:name|key: + 1"
|
||||
|
||||
@@ -1,18 +1,17 @@
|
||||
import ast
|
||||
|
||||
import pytest
|
||||
|
||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import Token, TokenKind, Tokenizer
|
||||
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from parsers.PythonParser import PythonNode, PythonErrorNode
|
||||
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.PythonParser import PythonNode
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
|
||||
|
||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
unrecognized_nodes_parser = UnrecognizedNodeParser()
|
||||
|
||||
|
||||
def ret_val(*args):
|
||||
@@ -28,7 +27,7 @@ def ret_val(*args):
|
||||
result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens))
|
||||
index += len(tokens)
|
||||
|
||||
return ReturnValueConcept("who", False, ParserResultConcept(parser=multiple_concepts_parser, value=result))
|
||||
return ReturnValueConcept("who", False, ParserResultConcept(parser=unrecognized_nodes_parser, value=result))
|
||||
|
||||
|
||||
def to_str_ast(expression):
|
||||
@@ -40,7 +39,7 @@ class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka):
|
||||
@pytest.mark.parametrize("text, interested", [
|
||||
("not parser result", False),
|
||||
(ParserResultConcept(parser="not multiple_concepts_parser"), False),
|
||||
(ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
|
||||
(ParserResultConcept(parser=unrecognized_nodes_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True),
|
||||
])
|
||||
def test_not_interested(self, text, interested):
|
||||
context = self.get_context()
|
||||
@@ -130,9 +129,6 @@ class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka):
|
||||
|
||||
parser = PythonWithConceptsParser()
|
||||
result = parser.parse(context, input_return_value.body)
|
||||
wrapper = result.value
|
||||
return_value = result.value.value
|
||||
|
||||
assert not result.status
|
||||
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
|
||||
assert isinstance(return_value[0], PythonErrorNode)
|
||||
assert context.sheerka.isinstance(result.value, BuiltinConcepts.NOT_FOR_ME)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,383 @@
|
||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
||||
from core.concept import Concept, CC
|
||||
from core.tokenizer import Tokenizer, TokenKind
|
||||
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, scnode, cnode, \
|
||||
utnode, SyaAssociativity, CN, CNC, UTN
|
||||
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
|
||||
|
||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||
from tests.parsers.parsers_utils import compute_expected_array, get_node
|
||||
|
||||
|
||||
def get_input_nodes_from(my_concepts_map, full_expr, *args):
|
||||
def _get_real_node(n):
|
||||
if isinstance(n, CC):
|
||||
concept = n.concept or Concept.update_from(my_concepts_map[n.concept_key])
|
||||
for k, v in n.compiled.items():
|
||||
concept.compiled[k] = _get_real_node(v)
|
||||
return concept
|
||||
|
||||
if isinstance(n, (utnode, UTN)):
|
||||
return UnrecognizedTokensNode(n.start, n.end, full_expr_as_tokens[n.start: n.end + 1])
|
||||
|
||||
if isinstance(n, (CNC, CN, cnode)):
|
||||
concept = n.concept if hasattr(n, "concept") and n.concept else \
|
||||
Concept().update_from(my_concepts_map[n.concept_key])
|
||||
tokens = full_expr_as_tokens[n.start: n.end + 1]
|
||||
if hasattr(node, "compiled"):
|
||||
for k, v in n.compiled.items():
|
||||
concept.compiled[k] = _get_real_node(v)
|
||||
return ConceptNode(concept, n.start, n.end, tokens)
|
||||
|
||||
raise NotImplementedError()
|
||||
|
||||
res = []
|
||||
full_expr_as_tokens = list(Tokenizer(full_expr))
|
||||
tokens_for_get_node = [token.value for token in full_expr_as_tokens if token.type != TokenKind.EOF]
|
||||
for arg in args:
|
||||
node = get_node(my_concepts_map, tokens_for_get_node, arg)
|
||||
res.append(_get_real_node(node))
|
||||
|
||||
return res
|
||||
|
||||
|
||||
concepts_map = {
|
||||
"5params": Concept("5params").def_prop("a").def_prop("b").def_prop("c").def_prop("d").def_prop("e"),
|
||||
"plus": Concept("a plus b", body="a + b").def_prop("a").def_prop("b"),
|
||||
"mult": Concept("a mult b", body="a * b").def_prop("a").def_prop("b"),
|
||||
"one": Concept("one", body="1"),
|
||||
"two": Concept("two", body="2"),
|
||||
"three": Concept("three", body="3"),
|
||||
"twenties": Concept("twenties", definition="'twenty' (one|two)=unit", body="20 + unit").def_prop("unit"),
|
||||
"hello_atom": Concept("hello one"),
|
||||
"hello_sya": Concept("hello a").def_prop("a"),
|
||||
"greetings_a": Concept("greetings a").def_prop("a"),
|
||||
"greetings_b": Concept("greetings b").def_prop("b"),
|
||||
|
||||
}
|
||||
|
||||
|
||||
class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka):
|
||||
sheerka = None
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
t = TestUnrecognizedNodeParser()
|
||||
TestUnrecognizedNodeParser.sheerka, context, _ = t.init_parser(concepts_map, create_new=True)
|
||||
TestUnrecognizedNodeParser.sheerka.set_sya_def(context, [
|
||||
(concepts_map["mult"].id, 20, SyaAssociativity.Right),
|
||||
(concepts_map["plus"].id, 10, SyaAssociativity.Right),
|
||||
])
|
||||
|
||||
def init_parser(self, my_concepts_map=None, **kwargs):
|
||||
if my_concepts_map:
|
||||
sheerka, context, *updated_concepts = self.init_concepts(*my_concepts_map.values(), **kwargs)
|
||||
for i, pair in enumerate(my_concepts_map):
|
||||
my_concepts_map[pair] = updated_concepts[i]
|
||||
else:
|
||||
sheerka = TestUnrecognizedNodeParser.sheerka
|
||||
context = self.get_context(sheerka)
|
||||
|
||||
parser = UnrecognizedNodeParser()
|
||||
return sheerka, context, parser
|
||||
|
||||
def test_i_can_validate_a_valid_concept_node(self):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
node = get_input_nodes_from(concepts_map, "one", "one")[0]
|
||||
|
||||
res = UnrecognizedNodeParser().validate_concept_node(context, node)
|
||||
|
||||
assert res.status
|
||||
assert res.body.concept == concepts_map["one"]
|
||||
|
||||
def test_i_can_validate_concept_unrecognized_tokens(self):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
node = get_input_nodes_from(
|
||||
concepts_map,
|
||||
"5params one two three twenty one 1 + 2 one plus two mult three",
|
||||
CNC("5params",
|
||||
a=" one ",
|
||||
b=" two three ",
|
||||
c=" twenty one ",
|
||||
d=utnode(12, 18, " 1 + 2 "),
|
||||
e=" one plus two mult three"))[0]
|
||||
|
||||
res = UnrecognizedNodeParser().validate_concept_node(context, node)
|
||||
|
||||
assert res.status
|
||||
|
||||
concept = res.body.concept
|
||||
assert concept == concepts_map["5params"]
|
||||
|
||||
assert len(concept.compiled["a"]) == 1
|
||||
assert sheerka.isinstance(concept.compiled["a"][0], BuiltinConcepts.RETURN_VALUE)
|
||||
assert concept.compiled["a"][0].status
|
||||
assert concept.compiled["a"][0].who == "parsers.AtomNode"
|
||||
assert concept.compiled["a"][0].body.body == [cnode("one", 1, 1, "one")]
|
||||
|
||||
assert len(concept.compiled["b"]) == 1
|
||||
assert sheerka.isinstance(concept.compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
|
||||
assert concept.compiled["b"][0].status
|
||||
assert concept.compiled["b"][0].who == "parsers.AtomNode"
|
||||
assert concept.compiled["b"][0].body.body == [cnode("two", 1, 1, "two"), cnode("three", 3, 3, "three")]
|
||||
|
||||
assert len(concept.compiled["c"]) == 1
|
||||
assert sheerka.isinstance(concept.compiled["c"][0], BuiltinConcepts.RETURN_VALUE)
|
||||
assert concept.compiled["c"][0].status
|
||||
assert concept.compiled["c"][0].who == "parsers.BnfNode"
|
||||
expected_nodes = compute_expected_array(
|
||||
concepts_map,
|
||||
" twenty one ",
|
||||
[CNC("twenties", source="twenty one", unit="one", one="one")])
|
||||
assert concept.compiled["c"][0].body.body == expected_nodes
|
||||
|
||||
assert len(concept.compiled["d"]) == 1
|
||||
assert sheerka.isinstance(concept.compiled["d"][0], BuiltinConcepts.RETURN_VALUE)
|
||||
assert concept.compiled["d"][0].status
|
||||
assert concept.compiled["d"][0].who == "parsers.Python"
|
||||
assert concept.compiled["d"][0].body.source == "1 + 2"
|
||||
|
||||
assert len(concept.compiled["e"]) == 1
|
||||
assert sheerka.isinstance(concept.compiled["e"][0], BuiltinConcepts.RETURN_VALUE)
|
||||
assert concept.compiled["e"][0].status
|
||||
assert concept.compiled["e"][0].who == "parsers.SyaNode"
|
||||
expected_nodes = compute_expected_array(
|
||||
concepts_map,
|
||||
" one plus two mult three ",
|
||||
[CNC("plus", a="one", b=CC("mult", a="two", b="three"))])
|
||||
assert concept.compiled["e"][0].body.body == expected_nodes
|
||||
|
||||
# # sanity check, I can evaluate the concept
|
||||
# evaluated = sheerka.evaluate_concept(self.get_context(sheerka, eval_body=True), concept)
|
||||
# assert evaluated.key == concept.key
|
||||
# assert evaluated.get_prop("a") ==
|
||||
|
||||
def test_i_can_validate_with_recursion(self):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
|
||||
node = get_input_nodes_from(
|
||||
concepts_map,
|
||||
"1 plus 2 mult twenty two",
|
||||
CNC("plus",
|
||||
a="1 ",
|
||||
b=CC("mult", a=" 2 ", b=" twenty two")))[0]
|
||||
|
||||
res = UnrecognizedNodeParser().validate_concept_node(context, node)
|
||||
|
||||
assert res.status
|
||||
assert res.body.concept == concepts_map["plus"]
|
||||
assert len(res.body.concept.compiled["a"]) == 1
|
||||
assert res.body.concept.compiled["a"][0].status
|
||||
assert res.body.concept.compiled["a"][0].who == "parsers.Python"
|
||||
assert res.body.concept.compiled["a"][0].body.source == "1"
|
||||
|
||||
assert res.body.concept.compiled["b"] == concepts_map["mult"]
|
||||
assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["a"][0], BuiltinConcepts.RETURN_VALUE)
|
||||
assert res.body.concept.compiled["b"].compiled["a"][0].status
|
||||
assert res.body.concept.compiled["b"].compiled["a"][0].who == "parsers.Python"
|
||||
assert res.body.concept.compiled["b"].compiled["a"][0].body.source == "2"
|
||||
|
||||
assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
|
||||
assert res.body.concept.compiled["b"].compiled["b"][0].status
|
||||
assert res.body.concept.compiled["b"].compiled["b"][0].who == "parsers.BnfNode"
|
||||
expected_nodes = compute_expected_array(
|
||||
concepts_map,
|
||||
" twenty two",
|
||||
[CNC("twenties", source="twenty two", unit="two", two="two")])
|
||||
assert res.body.concept.compiled["b"].compiled["b"][0].body.body == expected_nodes
|
||||
|
||||
# def test_i_can_validate_and_evaluate_a_concept_node_with_python(self):
|
||||
# sheerka, context, parser = self.init_parser()
|
||||
#
|
||||
# node = get_input_nodes_from(
|
||||
# concepts_map,
|
||||
# "one plus 1 + 1",
|
||||
# CNC("plus",
|
||||
# a=UTN("one "),
|
||||
# b=UTN("1 + 1")))[0]
|
||||
#
|
||||
# res = UnrecognizedNodeParser().validate_concept_node(context, node)
|
||||
#
|
||||
# assert res.status
|
||||
# assert res.body.concept == concepts_map["plus"]
|
||||
# assert res.body.concept.compiled["a"] == concepts_map["one"]
|
||||
# assert len(res.body.concept.compiled["b"]) == 1
|
||||
# assert sheerka.isinstance(res.body.concept.compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
|
||||
# assert res.body.concept.compiled["b"][0].status
|
||||
# assert res.body.concept.compiled["b"][0].who == "parsers.Python"
|
||||
# assert res.body.concept.compiled["b"][0].body.source == "1 + 1"
|
||||
#
|
||||
# # # evaluate
|
||||
# # context = self.get_context(sheerka, eval_body=True)
|
||||
# # evaluated = sheerka.evaluate_concept(context, res.body.concept)
|
||||
# # assert evaluated.body == 3
|
||||
|
||||
# def test_i_can_validate_and_evaluate_concept_when_bnf_concept(self):
|
||||
# sheerka, context, parser = self.init_parser()
|
||||
# node = get_concept_node(concepts_map, "one plus twenty one", "plus", "one", "twenty one")
|
||||
#
|
||||
# res = UnrecognizedNodeParser().validate_concept_node(context, node)
|
||||
#
|
||||
# assert res.status
|
||||
# assert res.body.concept == concepts_map["plus"]
|
||||
# assert res.body.concept.compiled["a"] == concepts_map["one"]
|
||||
# assert len(res.body.concept.compiled["b"]) == 1
|
||||
# assert res.body.concept.compiled["b"][0].status
|
||||
# assert res.body.concept.compiled["b"][0].who == "parsers.BnfNode"
|
||||
#
|
||||
# # evaluate
|
||||
# context = self.get_context(sheerka, eval_body=True)
|
||||
# evaluated = sheerka.evaluate_concept(context, res.body.concept)
|
||||
# assert evaluated.body == 22
|
||||
|
||||
def test_i_can_parse_and_evaluate_unrecognized_python_node(self):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
|
||||
expression = "1 + 1"
|
||||
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
|
||||
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
||||
|
||||
res = parser.parse(context, parser_input)
|
||||
parser_result = res.body
|
||||
actual_nodes = res.body.body
|
||||
|
||||
assert res.status
|
||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||
assert len(actual_nodes) == 1
|
||||
assert actual_nodes[0] == scnode(0, 4, expression)
|
||||
|
||||
def test_i_can_parse_unrecognized_bnf_concept_node(self):
|
||||
|
||||
sheerka, context, parser = self.init_parser()
|
||||
|
||||
expression = "twenty one"
|
||||
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
|
||||
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
||||
|
||||
res = parser.parse(context, parser_input)
|
||||
parser_result = res.body
|
||||
actual_nodes = res.body.body
|
||||
|
||||
assert res.status
|
||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||
assert len(actual_nodes) == 1
|
||||
expected_array = compute_expected_array(
|
||||
concepts_map,
|
||||
expression, [CNC("twenties", source=expression, unit="one", one="one")])
|
||||
assert actual_nodes == expected_array
|
||||
|
||||
def test_i_can_parse_unrecognized_sya_concept_node(self):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
|
||||
expression = "one plus two mult three"
|
||||
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
|
||||
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
|
||||
|
||||
res = parser.parse(context, parser_input)
|
||||
parser_result = res.body
|
||||
actual_nodes = res.body.body
|
||||
|
||||
assert res.status
|
||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||
assert len(actual_nodes) == 1
|
||||
|
||||
expected_array = compute_expected_array(
|
||||
concepts_map,
|
||||
expression, [CNC("plus",
|
||||
a="one",
|
||||
b=CC("mult", source="two mult three", a="two", b="three"))])
|
||||
assert actual_nodes == expected_array
|
||||
|
||||
def test_i_can_parse_sequences(self):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
|
||||
expression = "one plus two three"
|
||||
sequence = get_input_nodes_from(concepts_map, expression,
|
||||
CNC("plus", a="one", b="two"),
|
||||
utnode(5, 6, " three"))
|
||||
parser_input = ParserResultConcept("parsers.xxx", source="one plus two three", value=sequence)
|
||||
|
||||
res = parser.parse(context, parser_input)
|
||||
actual_nodes = res.body.body
|
||||
|
||||
assert res.status
|
||||
|
||||
expected_array = compute_expected_array(
|
||||
concepts_map,
|
||||
expression, [
|
||||
CNC("plus", a="one", b="two"),
|
||||
CN("three", start=6, end=6)])
|
||||
assert actual_nodes == expected_array
|
||||
|
||||
def test_i_can_parse_when_multiple_atom_and_sya(self):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
expression = "two hello one three"
|
||||
nodes = get_input_nodes_from(concepts_map, expression,
|
||||
"two", UTN("hello one"), "three")
|
||||
parser_input = ParserResultConcept("parsers.xxx", source="one plus two hello one", value=nodes)
|
||||
|
||||
res = parser.parse(context, parser_input)
|
||||
assert len(res) == 2
|
||||
assert res[0].status
|
||||
assert res[1].status
|
||||
|
||||
actual_nodes0 = res[0].body.body
|
||||
expected_0 = compute_expected_array(concepts_map, expression, [
|
||||
CN("two", 0, 0),
|
||||
CN("hello_atom", source="hello one", start=2, end=4),
|
||||
CN("three", 6, 6)])
|
||||
assert actual_nodes0 == expected_0
|
||||
|
||||
actual_nodes1 = res[1].body.body
|
||||
expected_1 = compute_expected_array(concepts_map, expression, [
|
||||
CN("two", 0, 0),
|
||||
CNC("hello_sya", source="hello one", start=2, end=4, a="one"),
|
||||
CN("three", 6, 6)])
|
||||
|
||||
assert actual_nodes1 == expected_1
|
||||
|
||||
def test_i_can_parse_when_multiple_sya_concepts(self):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
expression = "greetings two"
|
||||
nodes = get_input_nodes_from(concepts_map, expression, UTN("greetings two"))
|
||||
|
||||
parser_input = ParserResultConcept("parsers.xxx", source="greetings two", value=nodes)
|
||||
|
||||
res = parser.parse(context, parser_input)
|
||||
assert len(res) == 2
|
||||
assert res[0].status
|
||||
assert res[1].status
|
||||
|
||||
actual_nodes0 = res[0].body.body
|
||||
expected_0 = compute_expected_array(concepts_map, expression, [
|
||||
CNC("greetings_a", source="greetings two", start=0, end=2, a="two")])
|
||||
assert actual_nodes0 == expected_0
|
||||
|
||||
actual_nodes1 = res[1].body.body
|
||||
expected_1 = compute_expected_array(concepts_map, expression, [
|
||||
CNC("greetings_b", source="greetings two", start=0, end=2, b="two")])
|
||||
assert actual_nodes1 == expected_1
|
||||
|
||||
def test_i_cannot_parse_when_i_cannot_validate(self):
|
||||
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
|
||||
expression = "one plus unknown tokens"
|
||||
nodes = get_input_nodes_from(concepts_map, expression,
|
||||
CNC("plus", a="one ", b=" unknown tokens"))
|
||||
|
||||
parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes)
|
||||
res = parser.parse(context, parser_input)
|
||||
|
||||
assert not res.status
|
||||
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
|
||||
|
||||
def test_i_cannot_parse_when_unrecognized(self):
|
||||
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
|
||||
expression = "unknown tokens"
|
||||
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
|
||||
|
||||
parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes)
|
||||
res = parser.parse(context, parser_input)
|
||||
actual_nodes = res.body.body
|
||||
|
||||
assert not res.status
|
||||
assert actual_nodes == nodes
|
||||
Reference in New Issue
Block a user