I can define and eval BNF definitions
This commit is contained in:
+33
-13
@@ -1,5 +1,4 @@
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from functools import lru_cache
|
|
||||||
|
|
||||||
from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept
|
from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept
|
||||||
from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST
|
from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST
|
||||||
@@ -40,11 +39,12 @@ class Sheerka(Concept):
|
|||||||
|
|
||||||
#
|
#
|
||||||
# Cache for all concepts BNF
|
# Cache for all concepts BNF
|
||||||
|
#
|
||||||
self.concepts_definitions = {}
|
self.concepts_definitions = {}
|
||||||
|
|
||||||
#
|
#
|
||||||
# cache for concepts grammars
|
# cache for concepts grammars
|
||||||
# a grammar can be seen as a resolved BNF
|
# a grammar is a resolved BNF
|
||||||
self.concepts_grammars = {}
|
self.concepts_grammars = {}
|
||||||
|
|
||||||
# a concept can be instantiated
|
# a concept can be instantiated
|
||||||
@@ -79,14 +79,18 @@ class Sheerka(Concept):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
self.init_logging()
|
self.init_logging()
|
||||||
self.sdp = SheerkaDataProvider(root_folder)
|
|
||||||
|
|
||||||
|
self.sdp = SheerkaDataProvider(root_folder)
|
||||||
if self.sdp.first_time:
|
if self.sdp.first_time:
|
||||||
self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000)
|
self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000)
|
||||||
|
|
||||||
|
evt_digest = self.sdp.save_event(Event("Initializing Sheerka."))
|
||||||
|
exec_context = ExecutionContext(self.key, evt_digest, self)
|
||||||
|
|
||||||
self.initialize_builtin_concepts()
|
self.initialize_builtin_concepts()
|
||||||
self.initialize_builtin_parsers()
|
self.initialize_builtin_parsers()
|
||||||
self.initialize_builtin_evaluators()
|
self.initialize_builtin_evaluators()
|
||||||
|
self.initialize_concepts_definitions(exec_context)
|
||||||
|
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
return ReturnValueConcept(self, False, self.get(BuiltinConcepts.ERROR), e)
|
return ReturnValueConcept(self, False, self.get(BuiltinConcepts.ERROR), e)
|
||||||
@@ -149,19 +153,35 @@ class Sheerka(Concept):
|
|||||||
init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
|
||||||
self.evaluators.append(evaluator)
|
self.evaluators.append(evaluator)
|
||||||
|
|
||||||
def logger_filter(self, record: logging.LogRecord):
|
def initialize_concepts_definitions(self, execution_context):
|
||||||
if 'all' in self.loggers:
|
init_log.debug("Initializing concepts definitions")
|
||||||
return True
|
definitions = self.sdp.get_safe(self.CONCEPTS_DEFINITIONS_ENTRY, load_origin=False)
|
||||||
|
|
||||||
ret = True
|
if definitions is None:
|
||||||
if 'init' not in self.loggers and record.name.endswith(".init"):
|
init_log.debug("No BNF defined")
|
||||||
ret = False
|
return
|
||||||
|
|
||||||
return ret
|
lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS]()
|
||||||
|
ret_val = lexer_parser.initialize(execution_context, definitions)
|
||||||
|
if not ret_val.status:
|
||||||
|
init_log.error("Failed to initialize concepts definitions " + str(ret_val.body))
|
||||||
|
return
|
||||||
|
|
||||||
|
self.concepts_grammars = lexer_parser.concepts_grammars
|
||||||
|
|
||||||
def init_logging(self):
|
def init_logging(self):
|
||||||
|
def _logger_filter(record: logging.LogRecord):
|
||||||
|
if 'all' in self.loggers:
|
||||||
|
return True
|
||||||
|
|
||||||
|
ret = True
|
||||||
|
if 'init' not in self.loggers and record.name.endswith(".init"):
|
||||||
|
ret = False
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
handler = logging.StreamHandler()
|
handler = logging.StreamHandler()
|
||||||
handler.addFilter(self.logger_filter)
|
handler.addFilter(_logger_filter)
|
||||||
if self.debug:
|
if self.debug:
|
||||||
log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
|
log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
|
||||||
log_level = logging.DEBUG
|
log_level = logging.DEBUG
|
||||||
@@ -211,7 +231,7 @@ class Sheerka(Concept):
|
|||||||
else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens"
|
else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens"
|
||||||
log.debug(f"Parsing {debug_text}")
|
log.debug(f"Parsing {debug_text}")
|
||||||
for parser in self.parsers.values():
|
for parser in self.parsers.values():
|
||||||
p = parser()
|
p = parser(sheerka=self)
|
||||||
res = p.parse(context, text)
|
res = p.parse(context, text)
|
||||||
if isinstance(res, list):
|
if isinstance(res, list):
|
||||||
result.extend(res)
|
result.extend(res)
|
||||||
@@ -347,7 +367,7 @@ class Sheerka(Concept):
|
|||||||
concepts_definitions[concept] = concept.bnf
|
concepts_definitions[concept] = concept.bnf
|
||||||
|
|
||||||
# check if it's a valid BNF or whether it breaks the known rules
|
# check if it's a valid BNF or whether it breaks the known rules
|
||||||
concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](self.concepts_grammars.copy())
|
concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](grammars=self.concepts_grammars.copy())
|
||||||
sub_context = context.push(self.name, "Initializing concept definition")
|
sub_context = context.push(self.name, "Initializing concept definition")
|
||||||
sub_context.concepts_cache[concept.key] = concept # the concept is not in the real cache yet
|
sub_context.concepts_cache[concept.key] = concept # the concept is not in the real cache yet
|
||||||
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
|
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
import pkgutil
|
import pkgutil
|
||||||
import sys
|
|
||||||
|
|
||||||
from core.tokenizer import TokenKind
|
from core.tokenizer import TokenKind
|
||||||
|
|
||||||
|
|||||||
@@ -1,14 +1,13 @@
|
|||||||
from core.ast.nodes import python_to_concept
|
from core.ast.nodes import python_to_concept
|
||||||
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
|
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept
|
||||||
from core.builtin_helpers import get_names
|
from core.builtin_helpers import get_names
|
||||||
from core.concept import Concept
|
from core.concept import Concept
|
||||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||||
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
|
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
|
||||||
from parsers.DefaultParser import DefConceptNode
|
from parsers.DefaultParser import DefConceptNode
|
||||||
import functools
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from parsers.PythonParser import PythonGetNamesVisitor, PythonNode
|
from parsers.PythonParser import PythonNode
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -23,7 +22,12 @@ class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
|
|||||||
self.names = set()
|
self.names = set()
|
||||||
|
|
||||||
def visit_ConceptMatch(self, node):
|
def visit_ConceptMatch(self, node):
|
||||||
self.names.add(node.rule_name or node.concept_name)
|
if node.rule_name:
|
||||||
|
self.names.add(node.rule_name)
|
||||||
|
elif isinstance(node.concept, Concept):
|
||||||
|
self.names.add(node.concept.name)
|
||||||
|
else:
|
||||||
|
self.names.add(node.concept)
|
||||||
|
|
||||||
def visit_all(self, node):
|
def visit_all(self, node):
|
||||||
if node.rule_name:
|
if node.rule_name:
|
||||||
|
|||||||
@@ -4,12 +4,16 @@ from core.concept import Concept, ConceptParts
|
|||||||
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from parsers.BaseParser import BaseParser
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ConceptEvaluator(OneReturnValueEvaluator):
|
class ConceptEvaluator(OneReturnValueEvaluator):
|
||||||
|
"""
|
||||||
|
The concept evaluatuor is the main class that know what to do with a concept
|
||||||
|
It verifies the PRE
|
||||||
|
If ok, can execute or not the BODY
|
||||||
|
Then checks the POST conditions
|
||||||
|
"""
|
||||||
NAME = "Concept"
|
NAME = "Concept"
|
||||||
evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION]
|
evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION]
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,92 @@
|
|||||||
|
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
||||||
|
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from parsers.ConceptLexerParser import ConceptNode, TerminalNode, NonTerminalNode, ConceptMatch
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ConceptNodeEvaluator(OneReturnValueEvaluator):
|
||||||
|
"""
|
||||||
|
After a BNF is recognized, generates the concept or the list concepts
|
||||||
|
"""
|
||||||
|
|
||||||
|
NAME = "ConceptNode"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(self.NAME, 60) # more than the ConceptNodeEvaluator
|
||||||
|
|
||||||
|
def matches(self, context, return_value):
|
||||||
|
if not return_value.status:
|
||||||
|
return False
|
||||||
|
if not isinstance(return_value.value, ParserResultConcept):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return (isinstance(return_value.value.value, ConceptNode) or
|
||||||
|
(
|
||||||
|
hasattr(return_value.value.value, "__iter__") and
|
||||||
|
len(return_value.value.value) > 0 and
|
||||||
|
isinstance(return_value.value.value[0], ConceptNode)
|
||||||
|
))
|
||||||
|
|
||||||
|
def eval(self, context, return_value):
|
||||||
|
"""
|
||||||
|
From a concept node, creates a new concept
|
||||||
|
and makes sure that the properties are correctly set
|
||||||
|
"""
|
||||||
|
sheerka = context.sheerka
|
||||||
|
nodes = return_value.value.value
|
||||||
|
if not hasattr(nodes, "__iter__"):
|
||||||
|
nodes = [nodes]
|
||||||
|
|
||||||
|
concepts = []
|
||||||
|
for node in nodes:
|
||||||
|
concept = sheerka.new(node.concept.key)
|
||||||
|
concept = self.update_concept(sheerka, concept, node.underlying)
|
||||||
|
concepts.append(concept)
|
||||||
|
|
||||||
|
if len(concepts) == 1:
|
||||||
|
return sheerka.ret(
|
||||||
|
self.name,
|
||||||
|
True,
|
||||||
|
concepts[0],
|
||||||
|
parents=[return_value])
|
||||||
|
|
||||||
|
raise NotImplementedError("Not yet")
|
||||||
|
|
||||||
|
def update_concept(self, sheerka, concept, underlying):
|
||||||
|
"""
|
||||||
|
Updates the property of the concept
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _add_prop(c, prop_name, value):
|
||||||
|
"""
|
||||||
|
Adds a new entry,
|
||||||
|
makes a list if the property already exists
|
||||||
|
"""
|
||||||
|
if prop_name not in c.props or c.props[prop_name].value is None:
|
||||||
|
c.set_prop(prop_name, value)
|
||||||
|
else:
|
||||||
|
new_value = [c.props[prop_name].value, value]
|
||||||
|
c.set_prop(prop_name, new_value)
|
||||||
|
|
||||||
|
parsing_expression = underlying.parsing_expression
|
||||||
|
|
||||||
|
if parsing_expression.rule_name:
|
||||||
|
_add_prop(concept, parsing_expression.rule_name, underlying.source)
|
||||||
|
|
||||||
|
if isinstance(underlying, NonTerminalNode):
|
||||||
|
for child in underlying.children:
|
||||||
|
if isinstance(child.parsing_expression, ConceptMatch):
|
||||||
|
new_concept = sheerka.new(child.parsing_expression.concept.key)
|
||||||
|
_add_prop(concept, child.parsing_expression.rule_name, new_concept)
|
||||||
|
if sheerka.isinstance(new_concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
self.update_concept(sheerka, new_concept, child.children[0])
|
||||||
|
else:
|
||||||
|
self.update_concept(sheerka, concept, child)
|
||||||
|
|
||||||
|
return concept
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
from core.builtin_concepts import BuiltinConcepts
|
from core.builtin_concepts import BuiltinConcepts
|
||||||
from core.concept import Concept
|
|
||||||
import core.builtin_helpers
|
import core.builtin_helpers
|
||||||
from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator
|
from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator
|
||||||
import logging
|
import logging
|
||||||
@@ -13,6 +12,8 @@ class MultipleSameSuccessEvaluator(AllReturnValuesEvaluator):
|
|||||||
"""
|
"""
|
||||||
Used to filter the responses
|
Used to filter the responses
|
||||||
It has a low priority to let other evaluators try to resolve the errors
|
It has a low priority to let other evaluators try to resolve the errors
|
||||||
|
|
||||||
|
It reduces the responses when several evaluators give the same answer
|
||||||
"""
|
"""
|
||||||
|
|
||||||
NAME = "MultipleSameSuccess"
|
NAME = "MultipleSameSuccess"
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ class OneSuccessEvaluator(AllReturnValuesEvaluator):
|
|||||||
"""
|
"""
|
||||||
Used to filter the responses
|
Used to filter the responses
|
||||||
It has a low priority to let other evaluators try to resolve the errors
|
It has a low priority to let other evaluators try to resolve the errors
|
||||||
|
|
||||||
|
Make sure that there is only one successful answer
|
||||||
"""
|
"""
|
||||||
|
|
||||||
NAME = "OneSuccess"
|
NAME = "OneSuccess"
|
||||||
|
|||||||
@@ -15,6 +15,10 @@ log = logging.getLogger(__name__)
|
|||||||
class PythonEvaluator(OneReturnValueEvaluator):
|
class PythonEvaluator(OneReturnValueEvaluator):
|
||||||
NAME = "Python"
|
NAME = "Python"
|
||||||
|
|
||||||
|
"""
|
||||||
|
Evaluate a Python node, ie, evaluate some Python code
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__(self.NAME, 50)
|
super().__init__(self.NAME, 50)
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ class TooManySuccessEvaluator(AllReturnValuesEvaluator):
|
|||||||
"""
|
"""
|
||||||
Used to filter the responses
|
Used to filter the responses
|
||||||
It has a low priority to let other evaluators try to resolve the errors
|
It has a low priority to let other evaluators try to resolve the errors
|
||||||
|
|
||||||
|
Raises an error when that are several successful answers, with different values
|
||||||
"""
|
"""
|
||||||
|
|
||||||
NAME = "TooManySuccess"
|
NAME = "TooManySuccess"
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import core.utils
|
|||||||
def usage():
|
def usage():
|
||||||
print("Sheerka v0.1\n")
|
print("Sheerka v0.1\n")
|
||||||
print("usage:")
|
print("usage:")
|
||||||
print(sys.argv[0] + "[-hd] command ")
|
print(sys.argv[0] + "[-hdl:] command ")
|
||||||
|
|
||||||
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
|
|||||||
@@ -27,6 +27,12 @@ class ErrorNode(Node):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class UnexpectedTokenErrorNode(ErrorNode):
|
||||||
|
message: str
|
||||||
|
expected_tokens: list
|
||||||
|
|
||||||
|
|
||||||
class BaseParser:
|
class BaseParser:
|
||||||
PREFIX = "Parsers:"
|
PREFIX = "Parsers:"
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,227 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import core.utils
|
||||||
|
from core.builtin_concepts import BuiltinConcepts
|
||||||
|
from core.sheerka import ExecutionContext
|
||||||
|
from core.tokenizer import Tokenizer, Token, TokenKind
|
||||||
|
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||||
|
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptMatch, StrMatch
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass()
|
||||||
|
class UnexpectedEndOfFileError(ErrorNode):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class BnfParser:
|
||||||
|
"""
|
||||||
|
Parser used to transform litteral into ParsingExpression
|
||||||
|
example :
|
||||||
|
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
|
||||||
|
|
||||||
|
'|' (pipe) is used for OrderedChoice
|
||||||
|
',' (comma) is used for Sequence
|
||||||
|
'?' (question mark) is used for Optional
|
||||||
|
'*' (star) is used for ZeroOrMore
|
||||||
|
'+' (plus) is used for OneOrMore
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.has_error = False
|
||||||
|
self.error_sink = []
|
||||||
|
self.name = BaseParser.PREFIX + "RegexParser"
|
||||||
|
|
||||||
|
self.lexer_iter = None
|
||||||
|
self._current = None
|
||||||
|
self.after_current = None
|
||||||
|
self.nb_open_par = 0
|
||||||
|
self.context = None
|
||||||
|
self.source = ""
|
||||||
|
self.sheerka = None
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, BnfParser):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def reset_parser(self, context, text):
|
||||||
|
self.context = context
|
||||||
|
self.sheerka = context.sheerka
|
||||||
|
|
||||||
|
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
|
||||||
|
self._current = None
|
||||||
|
self.after_current = None
|
||||||
|
self.nb_open_par = 0
|
||||||
|
|
||||||
|
self.next_token()
|
||||||
|
self.eat_white_space()
|
||||||
|
|
||||||
|
def add_error(self, error, next_token=True):
|
||||||
|
self.has_error = True
|
||||||
|
self.error_sink.append(error)
|
||||||
|
if next_token:
|
||||||
|
self.next_token()
|
||||||
|
return error
|
||||||
|
|
||||||
|
def get_token(self) -> Token:
|
||||||
|
return self._current
|
||||||
|
|
||||||
|
def next_token(self, skip_whitespace=False):
|
||||||
|
if self._current and self._current.type == TokenKind.EOF:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._current = self.after_current or next(self.lexer_iter)
|
||||||
|
self.source += str(self._current.value)
|
||||||
|
self.after_current = None
|
||||||
|
|
||||||
|
if skip_whitespace:
|
||||||
|
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||||
|
self._current = next(self.lexer_iter)
|
||||||
|
self.source += str(self._current.value)
|
||||||
|
except StopIteration:
|
||||||
|
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||||
|
|
||||||
|
def next_after(self):
|
||||||
|
if self.after_current is not None:
|
||||||
|
return self.after_current
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.after_current = next(self.lexer_iter)
|
||||||
|
# self.source += str(self.after_current.value)
|
||||||
|
return self.after_current
|
||||||
|
except StopIteration:
|
||||||
|
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||||
|
return self.after_current
|
||||||
|
|
||||||
|
def eat_white_space(self):
|
||||||
|
if self.after_current is not None:
|
||||||
|
self._current = self.after_current
|
||||||
|
self.source += str(self._current.value)
|
||||||
|
self.after_current = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||||
|
self._current = next(self.lexer_iter)
|
||||||
|
self.source += str(self._current.value)
|
||||||
|
except StopIteration:
|
||||||
|
self._current = None
|
||||||
|
|
||||||
|
def maybe_sequence(self, first, second):
|
||||||
|
token = self.get_token()
|
||||||
|
return token.type == second or token.type == first and self.next_after().type == second
|
||||||
|
|
||||||
|
def parse(self, context: ExecutionContext, text):
|
||||||
|
self.reset_parser(context, text)
|
||||||
|
tree = self.parse_choice()
|
||||||
|
|
||||||
|
ret = self.sheerka.ret(
|
||||||
|
self.name,
|
||||||
|
not self.has_error,
|
||||||
|
self.sheerka.new(
|
||||||
|
BuiltinConcepts.PARSER_RESULT,
|
||||||
|
parser=self,
|
||||||
|
source=self.source,
|
||||||
|
body=self.error_sink if self.has_error else tree,
|
||||||
|
try_parsed=tree))
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def parse_choice(self):
|
||||||
|
sequence = self.parse_sequence()
|
||||||
|
|
||||||
|
self.eat_white_space()
|
||||||
|
token = self.get_token()
|
||||||
|
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
|
||||||
|
return sequence
|
||||||
|
|
||||||
|
elements = [sequence]
|
||||||
|
while True:
|
||||||
|
# maybe eat the vertical bar
|
||||||
|
self.eat_white_space()
|
||||||
|
token = self.get_token()
|
||||||
|
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
|
||||||
|
break
|
||||||
|
self.next_token(skip_whitespace=True)
|
||||||
|
|
||||||
|
sequence = self.parse_sequence()
|
||||||
|
elements.append(sequence)
|
||||||
|
|
||||||
|
return OrderedChoice(*elements)
|
||||||
|
|
||||||
|
def parse_sequence(self):
|
||||||
|
expr_and_modifier = self.parse_expression_and_modifier()
|
||||||
|
token = self.get_token()
|
||||||
|
if token is None or token.type == TokenKind.EOF or \
|
||||||
|
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||||
|
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||||
|
return expr_and_modifier
|
||||||
|
|
||||||
|
elements = [expr_and_modifier]
|
||||||
|
while True:
|
||||||
|
# maybe eat the comma
|
||||||
|
token = self.get_token()
|
||||||
|
if token is None or token.type == TokenKind.EOF or \
|
||||||
|
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||||
|
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||||
|
break
|
||||||
|
self.eat_white_space()
|
||||||
|
|
||||||
|
sequence = self.parse_expression_and_modifier()
|
||||||
|
elements.append(sequence)
|
||||||
|
|
||||||
|
return Sequence(*elements)
|
||||||
|
|
||||||
|
def parse_expression_and_modifier(self):
|
||||||
|
expression = self.parse_expression()
|
||||||
|
|
||||||
|
token = self.get_token()
|
||||||
|
|
||||||
|
if token.type == TokenKind.QMARK:
|
||||||
|
self.next_token()
|
||||||
|
return Optional(expression)
|
||||||
|
|
||||||
|
if token.type == TokenKind.STAR:
|
||||||
|
self.next_token()
|
||||||
|
return ZeroOrMore(expression)
|
||||||
|
|
||||||
|
if token.type == TokenKind.PLUS:
|
||||||
|
self.next_token()
|
||||||
|
return OneOrMore(expression)
|
||||||
|
|
||||||
|
return expression
|
||||||
|
|
||||||
|
def parse_expression(self):
|
||||||
|
token = self.get_token()
|
||||||
|
if token.type == TokenKind.EOF:
|
||||||
|
self.add_error(UnexpectedEndOfFileError(), False)
|
||||||
|
if token.type == TokenKind.LPAR:
|
||||||
|
self.nb_open_par += 1
|
||||||
|
self.next_token()
|
||||||
|
expression = self.parse_choice()
|
||||||
|
token = self.get_token()
|
||||||
|
if token.type == TokenKind.RPAR:
|
||||||
|
self.nb_open_par -= 1
|
||||||
|
self.next_token()
|
||||||
|
return expression
|
||||||
|
else:
|
||||||
|
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR]))
|
||||||
|
return expression
|
||||||
|
|
||||||
|
if token.type == TokenKind.IDENTIFIER:
|
||||||
|
self.next_token()
|
||||||
|
return ConceptMatch(token.value)
|
||||||
|
# concept = self.sheerka.get(str(token.value))
|
||||||
|
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||||
|
# self.add_error(CannotResolveConceptNode(str(token.value)))
|
||||||
|
# self.next_token()
|
||||||
|
# return None
|
||||||
|
# else:
|
||||||
|
# self.next_token()
|
||||||
|
# return concept
|
||||||
|
|
||||||
|
ret = StrMatch(core.utils.strip_quotes(token.value))
|
||||||
|
self.next_token()
|
||||||
|
return ret
|
||||||
+149
-332
@@ -1,5 +1,5 @@
|
|||||||
#####################################################################################################
|
#####################################################################################################
|
||||||
# This part of code is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
|
# This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
|
||||||
# I don't directly use the project, but it helped me figure out
|
# I don't directly use the project, but it helped me figure out
|
||||||
# what to do.
|
# what to do.
|
||||||
# Dejanović I., Milosavljević G., Vaderna R.:
|
# Dejanović I., Milosavljević G., Vaderna R.:
|
||||||
@@ -10,7 +10,6 @@ from dataclasses import field, dataclass
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from core.builtin_concepts import BuiltinConcepts
|
from core.builtin_concepts import BuiltinConcepts
|
||||||
from core.concept import Concept
|
from core.concept import Concept
|
||||||
from core.sheerka import ExecutionContext
|
|
||||||
from core.tokenizer import TokenKind, Tokenizer, Token
|
from core.tokenizer import TokenKind, Tokenizer, Token
|
||||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||||
import core.utils
|
import core.utils
|
||||||
@@ -40,6 +39,18 @@ def flatten(iterable):
|
|||||||
class LexerNode(Node):
|
class LexerNode(Node):
|
||||||
start: int
|
start: int
|
||||||
end: int
|
end: int
|
||||||
|
tokens: list = None
|
||||||
|
source: str = None
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if self.source is None:
|
||||||
|
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not isinstance(other, LexerNode):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return self.start == other.start and self.end == other.end
|
||||||
|
|
||||||
|
|
||||||
class ConceptNode(LexerNode):
|
class ConceptNode(LexerNode):
|
||||||
@@ -48,17 +59,24 @@ class ConceptNode(LexerNode):
|
|||||||
It represents a recognized concept
|
It represents a recognized concept
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, concept, start, end, tokens=None, source=None, children=None):
|
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
|
||||||
super().__init__(start, end)
|
super().__init__(start, end, tokens, source)
|
||||||
self.concept = concept
|
self.concept = concept
|
||||||
self.tokens = tokens
|
self.underlying = underlying
|
||||||
self.source = source
|
|
||||||
self.children = children
|
|
||||||
|
|
||||||
if self.source is None:
|
if self.source is None:
|
||||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
|
if isinstance(other, tuple):
|
||||||
|
if len(other) == 2:
|
||||||
|
return self.concept == other[0] and self.source == other[1]
|
||||||
|
else:
|
||||||
|
return self.concept == other[0] and \
|
||||||
|
self.start == other[1] and \
|
||||||
|
self.end == other[2] and \
|
||||||
|
self.source == other[3]
|
||||||
|
|
||||||
if not super().__eq__(other):
|
if not super().__eq__(other):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -66,10 +84,14 @@ class ConceptNode(LexerNode):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
return self.concept == other.concept and \
|
return self.concept == other.concept and \
|
||||||
self.source == other.source
|
self.source == other.source and \
|
||||||
|
self.underlying == other.underlying
|
||||||
|
|
||||||
def __hash__(self):
|
def __hash__(self):
|
||||||
return hash((self.concept, self.start, self.end, self.source))
|
return hash((self.concept, self.start, self.end, self.source, self.underlying))
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
|
||||||
|
|
||||||
|
|
||||||
class NonTerminalNode(LexerNode):
|
class NonTerminalNode(LexerNode):
|
||||||
@@ -77,8 +99,8 @@ class NonTerminalNode(LexerNode):
|
|||||||
Returned by the ConceptLexerParser
|
Returned by the ConceptLexerParser
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, parsing_expression, start, end, children=None):
|
def __init__(self, parsing_expression, start, end, tokens, children=None):
|
||||||
super().__init__(start, end)
|
super().__init__(start, end, tokens)
|
||||||
self.parsing_expression = parsing_expression
|
self.parsing_expression = parsing_expression
|
||||||
self.children = children
|
self.children = children
|
||||||
|
|
||||||
@@ -90,6 +112,21 @@ class NonTerminalNode(LexerNode):
|
|||||||
sub_names = ""
|
sub_names = ""
|
||||||
return name + sub_names
|
return name + sub_names
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not super().__eq__(other):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not isinstance(other, NonTerminalNode):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return self.parsing_expression == other.parsing_expression and \
|
||||||
|
self.start == other.start and \
|
||||||
|
self.end == other.end and \
|
||||||
|
self.children == other.children
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash((self.parsing_expression, self.start, self.end, self.children))
|
||||||
|
|
||||||
|
|
||||||
class TerminalNode(LexerNode):
|
class TerminalNode(LexerNode):
|
||||||
"""
|
"""
|
||||||
@@ -97,7 +134,7 @@ class TerminalNode(LexerNode):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, parsing_expression, start, end, value):
|
def __init__(self, parsing_expression, start, end, value):
|
||||||
super().__init__(start, end)
|
super().__init__(start, end, source=value)
|
||||||
self.parsing_expression = parsing_expression
|
self.parsing_expression = parsing_expression
|
||||||
self.value = value
|
self.value = value
|
||||||
|
|
||||||
@@ -105,23 +142,27 @@ class TerminalNode(LexerNode):
|
|||||||
name = self.parsing_expression.rule_name or ""
|
name = self.parsing_expression.rule_name or ""
|
||||||
return name + f"'{self.value}'"
|
return name + f"'{self.value}'"
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
if not super().__eq__(other):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not isinstance(other, TerminalNode):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return self.parsing_expression == other.parsing_expression and \
|
||||||
|
self.start == other.start and \
|
||||||
|
self.end == other.end and \
|
||||||
|
self.value == other.value
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash((self.parsing_expression, self.start, self.end, self.value))
|
||||||
|
|
||||||
|
|
||||||
@dataclass()
|
@dataclass()
|
||||||
class GrammarErrorNode(ErrorNode):
|
class GrammarErrorNode(ErrorNode):
|
||||||
message: str
|
message: str
|
||||||
|
|
||||||
|
|
||||||
@dataclass()
|
|
||||||
class UnexpectedTokenErrorNode(ErrorNode):
|
|
||||||
message: str
|
|
||||||
expected_tokens: list
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass()
|
|
||||||
class UnexpectedEndOfFileError(ErrorNode):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass()
|
@dataclass()
|
||||||
class UnknownConceptNode(ErrorNode):
|
class UnknownConceptNode(ErrorNode):
|
||||||
concept_key: str
|
concept_key: str
|
||||||
@@ -175,7 +216,7 @@ class Sequence(ParsingExpression):
|
|||||||
children.append(node)
|
children.append(node)
|
||||||
end_pos = node.end
|
end_pos = node.end
|
||||||
|
|
||||||
return NonTerminalNode(self, init_pos, end_pos, children)
|
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children or [])
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
to_str = ", ".join(repr(n) for n in self.elements)
|
to_str = ", ".join(repr(n) for n in self.elements)
|
||||||
@@ -194,7 +235,7 @@ class OrderedChoice(ParsingExpression):
|
|||||||
for e in self.nodes:
|
for e in self.nodes:
|
||||||
node = e.parse(parser)
|
node = e.parse(parser)
|
||||||
if node:
|
if node:
|
||||||
return NonTerminalNode(self, init_pos, node.end, [node])
|
return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
|
||||||
|
|
||||||
parser.seek(init_pos) # backtrack
|
parser.seek(init_pos) # backtrack
|
||||||
|
|
||||||
@@ -214,13 +255,18 @@ class Optional(ParsingExpression):
|
|||||||
|
|
||||||
def _parse(self, parser):
|
def _parse(self, parser):
|
||||||
init_pos = parser.pos
|
init_pos = parser.pos
|
||||||
selected_node = NonTerminalNode(self, parser.pos, -1, [])
|
selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
|
||||||
|
|
||||||
for e in self.nodes:
|
for e in self.nodes:
|
||||||
node = e.parse(parser)
|
node = e.parse(parser)
|
||||||
if node:
|
if node:
|
||||||
if node.end > selected_node.end:
|
if node.end > selected_node.end:
|
||||||
selected_node = node
|
selected_node = NonTerminalNode(
|
||||||
|
self,
|
||||||
|
node.start,
|
||||||
|
node.end,
|
||||||
|
parser.tokens[node.start: node.end + 1],
|
||||||
|
[node])
|
||||||
|
|
||||||
parser.seek(init_pos) # backtrack
|
parser.seek(init_pos) # backtrack
|
||||||
|
|
||||||
@@ -327,12 +373,12 @@ class ConceptMatch(Match):
|
|||||||
When the grammar is created, it is replaced by the actual concept
|
When the grammar is created, it is replaced by the actual concept
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, concept_name):
|
def __init__(self, concept, rule_name=""):
|
||||||
super(Match, self).__init__()
|
super(Match, self).__init__(rule_name=rule_name)
|
||||||
self.concept_name = concept_name
|
self.concept = concept
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"{self.concept_name}"
|
return f"{self.concept}"
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if not super().__eq__(other):
|
if not super().__eq__(other):
|
||||||
@@ -341,32 +387,37 @@ class ConceptMatch(Match):
|
|||||||
if not isinstance(other, ConceptMatch):
|
if not isinstance(other, ConceptMatch):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return self.concept_name == other.concept_name
|
if isinstance(self.concept, Concept):
|
||||||
|
return self.concept.name == other.concept.name
|
||||||
|
|
||||||
class CrossRef:
|
|
||||||
"""
|
|
||||||
During the creation of the model,
|
|
||||||
Creates reference to a concept, as it may not be resolved yet
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, concept):
|
|
||||||
self.concept = concept
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return f"ref({self.concept.key})"
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
if not isinstance(other, CrossRef):
|
|
||||||
return False
|
|
||||||
|
|
||||||
return self.concept == other.concept
|
return self.concept == other.concept
|
||||||
|
|
||||||
|
def _parse(self, parser):
|
||||||
|
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
||||||
|
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if to_match not in parser.concepts_grammars:
|
||||||
|
return None
|
||||||
|
|
||||||
|
self.concept = to_match # Memoize
|
||||||
|
node = parser.concepts_grammars[to_match].parse(parser)
|
||||||
|
if node is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||||
|
|
||||||
|
|
||||||
class ConceptLexerParser(BaseParser):
|
class ConceptLexerParser(BaseParser):
|
||||||
def __init__(self, concepts_dict=None):
|
def __init__(self, **kwargs):
|
||||||
super().__init__("ConceptLexer")
|
super().__init__("ConceptLexer")
|
||||||
self.concepts_dict = concepts_dict or {} # dict of concept, grammar
|
if 'grammars' in kwargs:
|
||||||
|
self.concepts_grammars = kwargs.get("grammars")
|
||||||
|
elif 'sheerka' in kwargs:
|
||||||
|
self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
|
||||||
|
else:
|
||||||
|
self.concepts_grammars = {}
|
||||||
|
|
||||||
self.ignore_case = True
|
self.ignore_case = True
|
||||||
|
|
||||||
self.token = None
|
self.token = None
|
||||||
@@ -430,24 +481,23 @@ class ConceptLexerParser(BaseParser):
|
|||||||
self.pos -= 1
|
self.pos -= 1
|
||||||
self.token = self.tokens[self.pos]
|
self.token = self.tokens[self.pos]
|
||||||
|
|
||||||
def initialize(self, context, grammars):
|
def initialize(self, context, concepts_definitions):
|
||||||
"""
|
"""
|
||||||
Adds a bunch of concepts, and how they can be recognized
|
Adds a bunch of concepts, and how they can be recognized
|
||||||
:param context: execution context
|
:param context: execution context
|
||||||
:param grammars: dictionary of concept, concept_definition
|
:param concepts_definitions: dictionary of concept, concept_definition
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.context = context
|
self.context = context
|
||||||
self.sheerka = context.sheerka
|
self.sheerka = context.sheerka
|
||||||
nodes_to_resolve = []
|
|
||||||
concepts_to_resolve = set()
|
concepts_to_resolve = set()
|
||||||
|
|
||||||
# ## Gets the grammars
|
# ## Gets the grammars
|
||||||
for concept, concept_def in grammars.items():
|
for concept, concept_def in concepts_definitions.items():
|
||||||
concept.init_key() # make sure that the key is initialized
|
concept.init_key() # make sure that the key is initialized
|
||||||
grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve)
|
grammar = self.get_model(concept_def, concepts_to_resolve)
|
||||||
self.concepts_dict[concept] = grammar
|
self.concepts_grammars[concept] = grammar
|
||||||
|
|
||||||
if self.has_error:
|
if self.has_error:
|
||||||
return self.sheerka.ret(self.name, False, self.error_sink)
|
return self.sheerka.ret(self.name, False, self.error_sink)
|
||||||
@@ -456,73 +506,68 @@ class ConceptLexerParser(BaseParser):
|
|||||||
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
|
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
|
||||||
for concept in concepts_to_remove:
|
for concept in concepts_to_remove:
|
||||||
concepts_to_resolve.remove(concept)
|
concepts_to_resolve.remove(concept)
|
||||||
del self.concepts_dict[concept]
|
del self.concepts_grammars[concept]
|
||||||
|
|
||||||
# ## Resolves cross references and remove grammar with unresolved references
|
|
||||||
self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve)
|
|
||||||
|
|
||||||
if self.has_error:
|
if self.has_error:
|
||||||
return self.sheerka.ret(self.name, False, self.error_sink)
|
return self.sheerka.ret(self.name, False, self.error_sink)
|
||||||
else:
|
else:
|
||||||
return self.sheerka.ret(self.name, True, self.concepts_dict)
|
return self.sheerka.ret(self.name, True, self.concepts_grammars)
|
||||||
|
|
||||||
def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve):
|
def get_concept(self, concept_name):
|
||||||
def get_concept(concept_name):
|
if concept_name in self.context.concepts_cache:
|
||||||
if concept_name in self.context.concepts_cache:
|
return self.context.concepts_cache[concept_name]
|
||||||
return self.context.concepts_cache[concept_name]
|
return self.sheerka.get(concept_name)
|
||||||
return self.sheerka.get(concept_name)
|
|
||||||
|
def get_model(self, concept_def, concepts_to_resolve):
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
# inner_get_model must not modify the initial ParsingExpression
|
# inner_get_model must not modify the initial ParsingExpression
|
||||||
# A copy must be created
|
# A copy must be created
|
||||||
def inner_get_model(expression):
|
def inner_get_model(expression):
|
||||||
if isinstance(expression, Concept):
|
if isinstance(expression, Concept):
|
||||||
ret = CrossRef(expression)
|
ret = ConceptMatch(expression, rule_name=expression.name)
|
||||||
concepts_to_resolve.add(concept)
|
concepts_to_resolve.add(expression)
|
||||||
nodes_to_resolve.append(ret)
|
elif isinstance(expression, ConceptMatch):
|
||||||
|
if expression.rule_name is None or expression.rule_name == "":
|
||||||
|
expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
|
||||||
|
else expression.concept
|
||||||
|
concepts_to_resolve.add(expression.concept)
|
||||||
|
ret = expression
|
||||||
elif isinstance(expression, str):
|
elif isinstance(expression, str):
|
||||||
ret = StrMatch(expression, ignore_case=self.ignore_case)
|
ret = StrMatch(expression, ignore_case=self.ignore_case)
|
||||||
elif isinstance(expression, StrMatch):
|
elif isinstance(expression, StrMatch):
|
||||||
ret = expression
|
ret = expression
|
||||||
if ret.ignore_case is None:
|
if ret.ignore_case is None:
|
||||||
ret.ignore_case = self.ignore_case
|
ret.ignore_case = self.ignore_case
|
||||||
elif isinstance(expression, ConceptMatch):
|
|
||||||
to_match = get_concept(expression.concept_name)
|
|
||||||
if hasattr(to_match, "__iter__"):
|
|
||||||
ret = self.add_error(TooManyConceptNode(expression.concept_name), False)
|
|
||||||
elif self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
|
||||||
ret = self.add_error(UnknownConceptNode(expression.concept_name), False)
|
|
||||||
else:
|
|
||||||
ret = CrossRef(to_match)
|
|
||||||
concepts_to_resolve.add(concept)
|
|
||||||
nodes_to_resolve.append(ret)
|
|
||||||
elif isinstance(expression, Sequence) or \
|
elif isinstance(expression, Sequence) or \
|
||||||
isinstance(expression, OrderedChoice) or \
|
isinstance(expression, OrderedChoice) or \
|
||||||
isinstance(expression, Optional):
|
isinstance(expression, Optional):
|
||||||
ret = expression
|
ret = expression
|
||||||
ret.nodes.extend([inner_get_model(e) for e in ret.elements])
|
ret.nodes.extend([inner_get_model(e) for e in ret.elements])
|
||||||
if any((isinstance(x, CrossRef) for x in ret.nodes)):
|
|
||||||
concepts_to_resolve.add(concept)
|
|
||||||
nodes_to_resolve.append(ret)
|
|
||||||
else:
|
else:
|
||||||
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
model = inner_get_model(concept_def)
|
model = inner_get_model(concept_def)
|
||||||
if isinstance(model, CrossRef):
|
|
||||||
concepts_to_resolve.add(concept)
|
|
||||||
|
|
||||||
model.rule_name = concept.key
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
def detect_infinite_recursion(self, concepts_to_resolve):
|
def detect_infinite_recursion(self, concepts_to_resolve):
|
||||||
|
|
||||||
# infinite recursion matcher
|
# infinite recursion matcher
|
||||||
def _is_infinite_recursion(ref_concept, node):
|
def _is_infinite_recursion(ref_concept, node):
|
||||||
if isinstance(node, CrossRef):
|
if isinstance(node, ConceptMatch):
|
||||||
if node.concept == ref_concept:
|
if node.concept == ref_concept:
|
||||||
return True
|
return True
|
||||||
return _is_infinite_recursion(ref_concept, self.concepts_dict[node.concept])
|
|
||||||
|
if isinstance(node.concept, str):
|
||||||
|
to_match = self.get_concept(node.concept)
|
||||||
|
if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
to_match = node.concept
|
||||||
|
|
||||||
|
return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
|
||||||
|
|
||||||
if isinstance(node, OrderedChoice):
|
if isinstance(node, OrderedChoice):
|
||||||
return _is_infinite_recursion(ref_concept, node.nodes[0])
|
return _is_infinite_recursion(ref_concept, node.nodes[0])
|
||||||
@@ -537,32 +582,16 @@ class ConceptLexerParser(BaseParser):
|
|||||||
|
|
||||||
removed_concepts = []
|
removed_concepts = []
|
||||||
for e in concepts_to_resolve:
|
for e in concepts_to_resolve:
|
||||||
to_resolve = self.concepts_dict[e]
|
if isinstance(e, str):
|
||||||
|
e = self.get_concept(e)
|
||||||
|
if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||||
|
continue
|
||||||
|
|
||||||
|
to_resolve = self.concepts_grammars[e]
|
||||||
if _is_infinite_recursion(e, to_resolve):
|
if _is_infinite_recursion(e, to_resolve):
|
||||||
removed_concepts.append(e)
|
removed_concepts.append(e)
|
||||||
return removed_concepts
|
return removed_concepts
|
||||||
|
|
||||||
# Cross-ref resolving
|
|
||||||
def resolve_cross_references(self, concepts_to_resolve, nodes_to_resolve):
|
|
||||||
|
|
||||||
repeat = True
|
|
||||||
while repeat:
|
|
||||||
repeat = False
|
|
||||||
for e in concepts_to_resolve:
|
|
||||||
to_resolve = self.concepts_dict[e]
|
|
||||||
if isinstance(to_resolve, CrossRef):
|
|
||||||
repeat = True
|
|
||||||
self.concepts_dict[e] = self.concepts_dict[to_resolve.concept]
|
|
||||||
|
|
||||||
for e in nodes_to_resolve:
|
|
||||||
if not isinstance(e, ParsingExpression):
|
|
||||||
continue # cases when a concept directly references another concept
|
|
||||||
|
|
||||||
for i, node in enumerate(e.nodes):
|
|
||||||
if isinstance(node, CrossRef):
|
|
||||||
if node.concept in self.concepts_dict:
|
|
||||||
e.nodes[i] = self.concepts_dict[node.concept]
|
|
||||||
|
|
||||||
def parse(self, context, text):
|
def parse(self, context, text):
|
||||||
if text == "":
|
if text == "":
|
||||||
return context.sheerka.ret(
|
return context.sheerka.ret(
|
||||||
@@ -591,13 +620,17 @@ class ConceptLexerParser(BaseParser):
|
|||||||
while True:
|
while True:
|
||||||
init_pos = self.pos
|
init_pos = self.pos
|
||||||
res = []
|
res = []
|
||||||
for concept, grammar in self.concepts_dict.items():
|
for concept, grammar in self.concepts_grammars.items():
|
||||||
self.seek(init_pos)
|
self.seek(init_pos)
|
||||||
node = grammar.parse(self)
|
node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
|
||||||
if node is not None:
|
if node is not None:
|
||||||
concept_node = ConceptNode(concept, node.start, node.end, self.tokens[node.start: node.end + 1])
|
concept_node = ConceptNode(
|
||||||
if hasattr(node, "children"):
|
concept,
|
||||||
concept_node.children = node.children
|
node.start,
|
||||||
|
node.end,
|
||||||
|
self.tokens[node.start: node.end + 1],
|
||||||
|
None,
|
||||||
|
node)
|
||||||
res.append(concept_node)
|
res.append(concept_node)
|
||||||
|
|
||||||
if len(res) == 0: # not recognized
|
if len(res) == 0: # not recognized
|
||||||
@@ -606,9 +639,7 @@ class ConceptLexerParser(BaseParser):
|
|||||||
self.add_error(self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=not_recognized))
|
self.add_error(self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=not_recognized))
|
||||||
break
|
break
|
||||||
|
|
||||||
res = self.get_bests(res) # only keep the concept that eat the more tokens
|
res = self.get_bests(res) # only keep the concepts that eat the more tokens
|
||||||
for r in res:
|
|
||||||
r.children = flatten(r.children)
|
|
||||||
concepts_found = core.utils.product(concepts_found, res)
|
concepts_found = core.utils.product(concepts_found, res)
|
||||||
|
|
||||||
# loop
|
# loop
|
||||||
@@ -659,220 +690,6 @@ class ConceptLexerParser(BaseParser):
|
|||||||
return by_end_pos[max(by_end_pos)]
|
return by_end_pos[max(by_end_pos)]
|
||||||
|
|
||||||
|
|
||||||
class RegexParser:
|
|
||||||
"""
|
|
||||||
Parser used to transform litteral into ParsingExpression
|
|
||||||
example :
|
|
||||||
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
|
|
||||||
|
|
||||||
'|' (pipe) is used for OrderedChoice
|
|
||||||
',' (comma) is used for Sequence
|
|
||||||
'?' (question mark) is used for Optional
|
|
||||||
'*' (star) is used for ZeroOrMore
|
|
||||||
'+' (plus) is used for OneOrMore
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.has_error = False
|
|
||||||
self.error_sink = []
|
|
||||||
self.name = BaseParser.PREFIX + "RegexParser"
|
|
||||||
|
|
||||||
self.lexer_iter = None
|
|
||||||
self._current = None
|
|
||||||
self.after_current = None
|
|
||||||
self.nb_open_par = 0
|
|
||||||
self.context = None
|
|
||||||
self.source = ""
|
|
||||||
self.sheerka = None
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
if not isinstance(other, RegexParser):
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def reset_parser(self, context, text):
|
|
||||||
self.context = context
|
|
||||||
self.sheerka = context.sheerka
|
|
||||||
|
|
||||||
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
|
|
||||||
self._current = None
|
|
||||||
self.after_current = None
|
|
||||||
self.nb_open_par = 0
|
|
||||||
|
|
||||||
self.next_token()
|
|
||||||
self.eat_white_space()
|
|
||||||
|
|
||||||
def add_error(self, error, next_token=True):
|
|
||||||
self.has_error = True
|
|
||||||
self.error_sink.append(error)
|
|
||||||
if next_token:
|
|
||||||
self.next_token()
|
|
||||||
return error
|
|
||||||
|
|
||||||
def get_token(self) -> Token:
|
|
||||||
return self._current
|
|
||||||
|
|
||||||
def next_token(self, skip_whitespace=False):
|
|
||||||
if self._current and self._current.type == TokenKind.EOF:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
self._current = self.after_current or next(self.lexer_iter)
|
|
||||||
self.source += str(self._current.value)
|
|
||||||
self.after_current = None
|
|
||||||
|
|
||||||
if skip_whitespace:
|
|
||||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
|
||||||
self._current = next(self.lexer_iter)
|
|
||||||
self.source += str(self._current.value)
|
|
||||||
except StopIteration:
|
|
||||||
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
|
|
||||||
|
|
||||||
def next_after(self):
|
|
||||||
if self.after_current is not None:
|
|
||||||
return self.after_current
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.after_current = next(self.lexer_iter)
|
|
||||||
# self.source += str(self.after_current.value)
|
|
||||||
return self.after_current
|
|
||||||
except StopIteration:
|
|
||||||
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
|
|
||||||
return self.after_current
|
|
||||||
|
|
||||||
def eat_white_space(self):
|
|
||||||
if self.after_current is not None:
|
|
||||||
self._current = self.after_current
|
|
||||||
self.source += str(self._current.value)
|
|
||||||
self.after_current = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
|
||||||
self._current = next(self.lexer_iter)
|
|
||||||
self.source += str(self._current.value)
|
|
||||||
except StopIteration:
|
|
||||||
self._current = None
|
|
||||||
|
|
||||||
def maybe_sequence(self, first, second):
|
|
||||||
token = self.get_token()
|
|
||||||
return token.type == second or token.type == first and self.next_after().type == second
|
|
||||||
|
|
||||||
def parse(self, context: ExecutionContext, text):
|
|
||||||
self.reset_parser(context, text)
|
|
||||||
tree = self.parse_choice()
|
|
||||||
|
|
||||||
ret = self.sheerka.ret(
|
|
||||||
self.name,
|
|
||||||
not self.has_error,
|
|
||||||
self.sheerka.new(
|
|
||||||
BuiltinConcepts.PARSER_RESULT,
|
|
||||||
parser=self,
|
|
||||||
source=self.source,
|
|
||||||
body=self.error_sink if self.has_error else tree,
|
|
||||||
try_parsed=tree))
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def parse_choice(self):
|
|
||||||
sequence = self.parse_sequence()
|
|
||||||
|
|
||||||
self.eat_white_space()
|
|
||||||
token = self.get_token()
|
|
||||||
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
|
|
||||||
return sequence
|
|
||||||
|
|
||||||
elements = [sequence]
|
|
||||||
while True:
|
|
||||||
# maybe eat the vertical bar
|
|
||||||
self.eat_white_space()
|
|
||||||
token = self.get_token()
|
|
||||||
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
|
|
||||||
break
|
|
||||||
self.next_token(skip_whitespace=True)
|
|
||||||
|
|
||||||
sequence = self.parse_sequence()
|
|
||||||
elements.append(sequence)
|
|
||||||
|
|
||||||
return OrderedChoice(*elements)
|
|
||||||
|
|
||||||
def parse_sequence(self):
|
|
||||||
expr_and_modifier = self.parse_expression_and_modifier()
|
|
||||||
token = self.get_token()
|
|
||||||
if token is None or token.type == TokenKind.EOF or \
|
|
||||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
|
||||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
|
||||||
return expr_and_modifier
|
|
||||||
|
|
||||||
elements = [expr_and_modifier]
|
|
||||||
while True:
|
|
||||||
# maybe eat the comma
|
|
||||||
token = self.get_token()
|
|
||||||
if token is None or token.type == TokenKind.EOF or \
|
|
||||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
|
||||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
|
||||||
break
|
|
||||||
self.eat_white_space()
|
|
||||||
|
|
||||||
sequence = self.parse_expression_and_modifier()
|
|
||||||
elements.append(sequence)
|
|
||||||
|
|
||||||
return Sequence(*elements)
|
|
||||||
|
|
||||||
def parse_expression_and_modifier(self):
|
|
||||||
expression = self.parse_expression()
|
|
||||||
|
|
||||||
token = self.get_token()
|
|
||||||
|
|
||||||
if token.type == TokenKind.QMARK:
|
|
||||||
self.next_token()
|
|
||||||
return Optional(expression)
|
|
||||||
|
|
||||||
if token.type == TokenKind.STAR:
|
|
||||||
self.next_token()
|
|
||||||
return ZeroOrMore(expression)
|
|
||||||
|
|
||||||
if token.type == TokenKind.PLUS:
|
|
||||||
self.next_token()
|
|
||||||
return OneOrMore(expression)
|
|
||||||
|
|
||||||
return expression
|
|
||||||
|
|
||||||
def parse_expression(self):
|
|
||||||
token = self.get_token()
|
|
||||||
if token.type == TokenKind.EOF:
|
|
||||||
self.add_error(UnexpectedEndOfFileError(), False)
|
|
||||||
if token.type == TokenKind.LPAR:
|
|
||||||
self.nb_open_par += 1
|
|
||||||
self.next_token()
|
|
||||||
expression = self.parse_choice()
|
|
||||||
token = self.get_token()
|
|
||||||
if token.type == TokenKind.RPAR:
|
|
||||||
self.nb_open_par -= 1
|
|
||||||
self.next_token()
|
|
||||||
return expression
|
|
||||||
else:
|
|
||||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR]))
|
|
||||||
return expression
|
|
||||||
|
|
||||||
if token.type == TokenKind.IDENTIFIER:
|
|
||||||
self.next_token()
|
|
||||||
return ConceptMatch(token.value)
|
|
||||||
# concept = self.sheerka.get(str(token.value))
|
|
||||||
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
|
||||||
# self.add_error(CannotResolveConceptNode(str(token.value)))
|
|
||||||
# self.next_token()
|
|
||||||
# return None
|
|
||||||
# else:
|
|
||||||
# self.next_token()
|
|
||||||
# return concept
|
|
||||||
|
|
||||||
ret = StrMatch(core.utils.strip_quotes(token.value))
|
|
||||||
self.next_token()
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
class ParsingExpressionVisitor:
|
class ParsingExpressionVisitor:
|
||||||
"""
|
"""
|
||||||
visit ParsingExpression
|
visit ParsingExpression
|
||||||
|
|||||||
@@ -2,12 +2,12 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserRes
|
|||||||
from core.concept import ConceptParts
|
from core.concept import ConceptParts
|
||||||
import core.builtin_helpers
|
import core.builtin_helpers
|
||||||
import core.utils
|
import core.utils
|
||||||
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode
|
from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
|
||||||
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from parsers.ConceptLexerParser import RegexParser
|
from parsers.BnfParser import BnfParser
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -206,7 +206,7 @@ class DefaultParser(BaseParser):
|
|||||||
Parse sheerka specific grammar (like def concept)
|
Parse sheerka specific grammar (like def concept)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, **kwargs):
|
||||||
BaseParser.__init__(self, "DefaultParser")
|
BaseParser.__init__(self, "DefaultParser")
|
||||||
self.lexer_iter = None
|
self.lexer_iter = None
|
||||||
self._current = None
|
self._current = None
|
||||||
@@ -427,7 +427,7 @@ class DefaultParser(BaseParser):
|
|||||||
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
|
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
|
||||||
return NotInitializedNode()
|
return NotInitializedNode()
|
||||||
|
|
||||||
regex_parser = RegexParser()
|
regex_parser = BnfParser()
|
||||||
new_context = self.context.push(self.name)
|
new_context = self.context.push(self.name)
|
||||||
parsing_result = regex_parser.parse(new_context, tokens)
|
parsing_result = regex_parser.parse(new_context, tokens)
|
||||||
if not parsing_result.status:
|
if not parsing_result.status:
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ class EmptyStringParser(BaseParser):
|
|||||||
To parse empty or blank strings
|
To parse empty or blank strings
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, **kwargs):
|
||||||
BaseParser.__init__(self, "NullParser")
|
BaseParser.__init__(self, "NullParser")
|
||||||
|
|
||||||
def parse(self, context, text):
|
def parse(self, context, text):
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
||||||
from parsers.BaseParser import BaseParser
|
from parsers.BaseParser import BaseParser
|
||||||
from core.tokenizer import Tokenizer, Keywords, TokenKind
|
from core.tokenizer import Tokenizer, Keywords, TokenKind
|
||||||
from core.concept import Concept, VARIABLE_PREFIX
|
from core.concept import VARIABLE_PREFIX
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@@ -14,7 +14,7 @@ class ExactConceptParser(BaseParser):
|
|||||||
|
|
||||||
MAX_WORDS_SIZE = 10
|
MAX_WORDS_SIZE = 10
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, **kwargs):
|
||||||
BaseParser.__init__(self, "ConceptParser")
|
BaseParser.__init__(self, "ConceptParser")
|
||||||
|
|
||||||
def parse(self, context, text):
|
def parse(self, context, text):
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ from core.builtin_concepts import BuiltinConcepts
|
|||||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import ast
|
import ast
|
||||||
import copy
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@@ -57,10 +56,10 @@ class PythonParser(BaseParser):
|
|||||||
Parse Python scripts
|
Parse Python scripts
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, source="<undef>"):
|
def __init__(self, **kwargs):
|
||||||
|
|
||||||
BaseParser.__init__(self, "PythonParser")
|
BaseParser.__init__(self, "PythonParser")
|
||||||
self.source = source
|
self.source = kwargs.get("source", "<undef>")
|
||||||
|
|
||||||
def parse(self, context, text):
|
def parse(self, context, text):
|
||||||
text = text if isinstance(text, str) else self.get_text_from_tokens(text)
|
text = text if isinstance(text, str) else self.get_text_from_tokens(text)
|
||||||
|
|||||||
+14
-15
@@ -1,5 +1,3 @@
|
|||||||
# from os import path
|
|
||||||
# import os
|
|
||||||
from datetime import datetime, date
|
from datetime import datetime, date
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
@@ -542,7 +540,7 @@ class SheerkaDataProvider:
|
|||||||
self.set_snapshot(new_snapshot)
|
self.set_snapshot(new_snapshot)
|
||||||
return new_snapshot
|
return new_snapshot
|
||||||
|
|
||||||
def get(self, entry, key=None):
|
def get(self, entry, key=None, load_origin=True):
|
||||||
"""
|
"""
|
||||||
Retrieve an element by its key
|
Retrieve an element by its key
|
||||||
:param entry:
|
:param entry:
|
||||||
@@ -560,11 +558,11 @@ class SheerkaDataProvider:
|
|||||||
|
|
||||||
item = state.data[entry] if key is None else state.data[entry][key]
|
item = state.data[entry] if key is None else state.data[entry][key]
|
||||||
if isinstance(item, list):
|
if isinstance(item, list):
|
||||||
return [self.load_ref_if_needed(i)[0] for i in item]
|
return [self.load_ref_if_needed(i, load_origin)[0] for i in item]
|
||||||
|
|
||||||
return self.load_ref_if_needed(item)[0]
|
return self.load_ref_if_needed(item, load_origin)[0]
|
||||||
|
|
||||||
def get_safe(self, entry, key=None):
|
def get_safe(self, entry, key=None, load_origin=True):
|
||||||
"""
|
"""
|
||||||
Retrieve an element by its key. Return None if the element does not exist
|
Retrieve an element by its key. Return None if the element does not exist
|
||||||
:param entry:
|
:param entry:
|
||||||
@@ -582,9 +580,9 @@ class SheerkaDataProvider:
|
|||||||
|
|
||||||
item = state.data[entry] if key is None else state.data[entry][key]
|
item = state.data[entry] if key is None else state.data[entry][key]
|
||||||
if isinstance(item, list):
|
if isinstance(item, list):
|
||||||
return [self.load_ref_if_needed(i)[0] for i in item]
|
return [self.load_ref_if_needed(i, load_origin)[0] for i in item]
|
||||||
|
|
||||||
return self.load_ref_if_needed(item)[0]
|
return self.load_ref_if_needed(item, load_origin)[0]
|
||||||
|
|
||||||
def exists(self, entry, key=None, digest=None):
|
def exists(self, entry, key=None, digest=None):
|
||||||
"""
|
"""
|
||||||
@@ -676,7 +674,7 @@ class SheerkaDataProvider:
|
|||||||
log.debug(f"...digest={digest}.")
|
log.debug(f"...digest={digest}.")
|
||||||
return digest
|
return digest
|
||||||
|
|
||||||
def load_obj(self, digest):
|
def load_obj(self, digest, add_origin=True):
|
||||||
if digest is None:
|
if digest is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -688,19 +686,20 @@ class SheerkaDataProvider:
|
|||||||
obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
|
obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
|
||||||
|
|
||||||
# set the origin of the object
|
# set the origin of the object
|
||||||
if isinstance(obj, dict):
|
if add_origin:
|
||||||
obj[Serializer.ORIGIN] = digest
|
if isinstance(obj, dict):
|
||||||
elif not isinstance(obj, str):
|
obj[Serializer.ORIGIN] = digest
|
||||||
setattr(obj, Serializer.ORIGIN, digest)
|
elif not isinstance(obj, str):
|
||||||
|
setattr(obj, Serializer.ORIGIN, digest)
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
def load_ref_if_needed(self, obj):
|
def load_ref_if_needed(self, obj, load_origin=True):
|
||||||
if not isinstance(obj, str):
|
if not isinstance(obj, str):
|
||||||
return obj, False
|
return obj, False
|
||||||
if not obj.startswith(SheerkaDataProvider.REF_PREFIX):
|
if not obj.startswith(SheerkaDataProvider.REF_PREFIX):
|
||||||
return obj, False
|
return obj, False
|
||||||
|
|
||||||
resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):])
|
resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):], load_origin)
|
||||||
if resolved is None:
|
if resolved is None:
|
||||||
return obj, False
|
return obj, False
|
||||||
|
|
||||||
|
|||||||
@@ -8,9 +8,9 @@ from core.sheerka import Sheerka, ExecutionContext
|
|||||||
from core.tokenizer import Tokenizer
|
from core.tokenizer import Tokenizer
|
||||||
from evaluators.AddConceptEvaluator import AddConceptEvaluator
|
from evaluators.AddConceptEvaluator import AddConceptEvaluator
|
||||||
from parsers.BaseParser import BaseParser
|
from parsers.BaseParser import BaseParser
|
||||||
from parsers.ConceptLexerParser import Sequence, RegexParser, StrMatch, ZeroOrMore, ConceptMatch
|
from parsers.ConceptLexerParser import Sequence, StrMatch, ZeroOrMore, ConceptMatch
|
||||||
|
from parsers.BnfParser import BnfParser
|
||||||
from parsers.DefaultParser import DefConceptNode, NameNode
|
from parsers.DefaultParser import DefConceptNode, NameNode
|
||||||
from parsers.ExactConceptParser import ExactConceptParser
|
|
||||||
from parsers.PythonParser import PythonNode, PythonParser
|
from parsers.PythonParser import PythonNode, PythonParser
|
||||||
|
|
||||||
|
|
||||||
@@ -67,7 +67,7 @@ def get_concept_definition(source, parsing_expression):
|
|||||||
status=True,
|
status=True,
|
||||||
value=ParserResultConcept(
|
value=ParserResultConcept(
|
||||||
source=source,
|
source=source,
|
||||||
parser=RegexParser(),
|
parser=BnfParser(),
|
||||||
value=parsing_expression
|
value=parsing_expression
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -0,0 +1,138 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from core.concept import Concept
|
||||||
|
from core.sheerka import Sheerka, ExecutionContext
|
||||||
|
from core.tokenizer import Tokenizer, TokenKind
|
||||||
|
from parsers.BaseParser import UnexpectedTokenErrorNode
|
||||||
|
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
|
||||||
|
from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
|
||||||
|
ConceptLexerParser, ConceptNode, ConceptMatch
|
||||||
|
|
||||||
|
|
||||||
|
def get_context():
|
||||||
|
sheerka = Sheerka(skip_builtins_in_db=True)
|
||||||
|
sheerka.initialize("mem://")
|
||||||
|
|
||||||
|
return ExecutionContext("sheerka", "xxxx", sheerka)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("expression, expected", [
|
||||||
|
("'str'", StrMatch("str")),
|
||||||
|
("1", StrMatch("1")),
|
||||||
|
(" 1", StrMatch("1")),
|
||||||
|
(",", StrMatch(",")),
|
||||||
|
("'foo'?", Optional(StrMatch("foo"))),
|
||||||
|
("'foo'*", ZeroOrMore(StrMatch("foo"))),
|
||||||
|
("'foo'+", OneOrMore(StrMatch("foo"))),
|
||||||
|
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
||||||
|
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
||||||
|
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
|
||||||
|
("1 2 | 3 4+", OrderedChoice(
|
||||||
|
Sequence(StrMatch("1"), StrMatch("2")),
|
||||||
|
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
|
||||||
|
("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
|
||||||
|
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
|
||||||
|
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
|
||||||
|
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
|
||||||
|
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
|
||||||
|
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
|
||||||
|
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
|
||||||
|
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
|
||||||
|
("(1 )", StrMatch("1")),
|
||||||
|
])
|
||||||
|
def test_i_can_parse_regex(expression, expected):
|
||||||
|
parser = BnfParser()
|
||||||
|
res = parser.parse(get_context(), Tokenizer(expression))
|
||||||
|
|
||||||
|
assert not parser.has_error
|
||||||
|
assert res.status
|
||||||
|
assert res.value.value == expected
|
||||||
|
assert res.value.source == expression
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("expression, error", [
|
||||||
|
("1 ", UnexpectedEndOfFileError()),
|
||||||
|
("1|", UnexpectedEndOfFileError()),
|
||||||
|
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
|
||||||
|
])
|
||||||
|
def test_i_can_detect_errors(expression, error):
|
||||||
|
parser = BnfParser()
|
||||||
|
res = parser.parse(get_context(), Tokenizer(expression))
|
||||||
|
ret_value = res.value.value
|
||||||
|
assert parser.has_error
|
||||||
|
assert not res.status
|
||||||
|
assert ret_value[0] == error
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_parse_regex_with_reference():
|
||||||
|
expression = "foo"
|
||||||
|
parser = BnfParser()
|
||||||
|
res = parser.parse(get_context(), Tokenizer(expression))
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert res.value.value == ConceptMatch("foo")
|
||||||
|
assert res.value.source == expression
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_parse_cross_ref_with_modifier():
|
||||||
|
expression = "foo*"
|
||||||
|
parser = BnfParser()
|
||||||
|
res = parser.parse(get_context(), Tokenizer(expression))
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
|
||||||
|
assert res.value.source == expression
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_parse_sequence_with_cross_ref():
|
||||||
|
expression = "foo 'and' bar+"
|
||||||
|
parser = BnfParser()
|
||||||
|
res = parser.parse(get_context(), Tokenizer(expression))
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
|
||||||
|
assert res.value.source == expression
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_parse_choice_with_cross_ref():
|
||||||
|
foo = Concept("foo")
|
||||||
|
bar = Concept("bar")
|
||||||
|
context = get_context()
|
||||||
|
context.sheerka.add_in_cache(foo)
|
||||||
|
context.sheerka.add_in_cache(bar)
|
||||||
|
|
||||||
|
expression = "foo | bar?"
|
||||||
|
parser = BnfParser()
|
||||||
|
res = parser.parse(context, Tokenizer(expression))
|
||||||
|
|
||||||
|
assert res.status
|
||||||
|
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
|
||||||
|
assert res.value.source == expression
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
|
||||||
|
foo = Concept(name="foo")
|
||||||
|
bar = Concept(name="bar")
|
||||||
|
context = get_context()
|
||||||
|
context.sheerka.add_in_cache(foo)
|
||||||
|
context.sheerka.add_in_cache(bar)
|
||||||
|
|
||||||
|
regex_parser = BnfParser()
|
||||||
|
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
|
||||||
|
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
|
||||||
|
|
||||||
|
concepts = {bar: bar_definition, foo: foo_definition}
|
||||||
|
concept_parser = ConceptLexerParser()
|
||||||
|
concept_parser.initialize(context, concepts)
|
||||||
|
|
||||||
|
res = concept_parser.parse(context, "twenty two")
|
||||||
|
assert res.status
|
||||||
|
assert res.value.body == [(bar, 0, 2, "twenty two")]
|
||||||
|
|
||||||
|
res = concept_parser.parse(context, "thirty one")
|
||||||
|
assert res.status
|
||||||
|
assert res.value.body == [(bar, 0, 2, "thirty one")]
|
||||||
|
|
||||||
|
res = concept_parser.parse(context, "twenty")
|
||||||
|
assert res.status
|
||||||
|
assert res.value.body == [(foo, 0, 0, "twenty")]
|
||||||
@@ -2,10 +2,8 @@ import pytest
|
|||||||
from core.builtin_concepts import BuiltinConcepts
|
from core.builtin_concepts import BuiltinConcepts
|
||||||
from core.concept import Concept
|
from core.concept import Concept
|
||||||
from core.sheerka import Sheerka, ExecutionContext
|
from core.sheerka import Sheerka, ExecutionContext
|
||||||
from core.tokenizer import Tokenizer, TokenKind
|
|
||||||
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
|
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
|
||||||
CrossRef, RegexParser, ZeroOrMore, OneOrMore, UnexpectedEndOfFileError, UnexpectedTokenErrorNode, ConceptMatch, \
|
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch
|
||||||
ParsingExpressionVisitor
|
|
||||||
|
|
||||||
|
|
||||||
class ConceptVisitor(ParsingExpressionVisitor):
|
class ConceptVisitor(ParsingExpressionVisitor):
|
||||||
@@ -13,7 +11,17 @@ class ConceptVisitor(ParsingExpressionVisitor):
|
|||||||
self.concepts = set()
|
self.concepts = set()
|
||||||
|
|
||||||
def visit_ConceptMatch(self, node):
|
def visit_ConceptMatch(self, node):
|
||||||
self.concepts.add(node.concept_name)
|
self.concepts.add(node.concept)
|
||||||
|
|
||||||
|
|
||||||
|
def u(parsing_expression, start, end, children=None):
|
||||||
|
if isinstance(parsing_expression, str):
|
||||||
|
parsing_expression = StrMatch(parsing_expression)
|
||||||
|
|
||||||
|
if isinstance(parsing_expression, StrMatch):
|
||||||
|
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
|
||||||
|
|
||||||
|
return NonTerminalNode(parsing_expression, start, end, [], children)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("match, text", [
|
@pytest.mark.parametrize("match, text", [
|
||||||
@@ -39,7 +47,7 @@ def test_i_can_match_simple_tokens(match, text):
|
|||||||
|
|
||||||
assert res.status
|
assert res.status
|
||||||
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res.value.value == [ConceptNode(foo, 0, 0, source=text)]
|
assert res.value.value == [ConceptNode(foo, 0, 0, source=text, underlying=u(match, 0, 0))]
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_match_multiple_concepts_in_one_input():
|
def test_i_can_match_multiple_concepts_in_one_input():
|
||||||
@@ -55,9 +63,9 @@ def test_i_can_match_multiple_concepts_in_one_input():
|
|||||||
assert res.status
|
assert res.status
|
||||||
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res.value.value == [
|
assert res.value.value == [
|
||||||
ConceptNode(one, 0, 0, source="one"),
|
ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)),
|
||||||
ConceptNode(two, 2, 2, source="two"),
|
ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2)),
|
||||||
ConceptNode(one, 4, 4, source="one"),
|
ConceptNode(one, 4, 4, source="one", underlying=u("one", 4, 4)),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -85,8 +93,8 @@ def test_i_cannot_match_when_part_of_the_input_is_unknown():
|
|||||||
assert not res.status
|
assert not res.status
|
||||||
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res.value.try_parsed == [
|
assert res.value.try_parsed == [
|
||||||
ConceptNode(one, 0, 0, source="one"),
|
ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)),
|
||||||
ConceptNode(two, 2, 2, source="two")] # these two were recognized
|
ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2))] # these two were recognized
|
||||||
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
||||||
assert res.value.body[0].body == "three"
|
assert res.value.body[0].body == "three"
|
||||||
|
|
||||||
@@ -102,7 +110,11 @@ def test_i_can_match_sequence():
|
|||||||
|
|
||||||
assert res.status
|
assert res.status
|
||||||
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")]
|
assert res.value.value == [
|
||||||
|
ConceptNode(foo, 0, 4, source="one two three", underlying=u(concepts[foo], 0, 4, [
|
||||||
|
u("one", 0, 0),
|
||||||
|
u("two", 2, 2),
|
||||||
|
u("three", 4, 4)]))]
|
||||||
|
|
||||||
|
|
||||||
def test_wrong_sequence_is_not_matched():
|
def test_wrong_sequence_is_not_matched():
|
||||||
@@ -116,7 +128,7 @@ def test_wrong_sequence_is_not_matched():
|
|||||||
|
|
||||||
assert not res.status
|
assert not res.status
|
||||||
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res.value.try_parsed == [ConceptNode(foo, 0, 4, source="one two three")]
|
assert res.value.try_parsed == [(foo, "one two three")]
|
||||||
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
|
||||||
assert res.value.body[0].body == "one"
|
assert res.value.body[0].body == "one"
|
||||||
|
|
||||||
@@ -149,7 +161,7 @@ def test_i_always_choose_the_longest_match():
|
|||||||
|
|
||||||
assert res.status
|
assert res.status
|
||||||
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")]
|
assert res.value.value == [(foo, "one two three")]
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_match_several_sequences():
|
def test_i_can_match_several_sequences():
|
||||||
@@ -166,8 +178,8 @@ def test_i_can_match_several_sequences():
|
|||||||
assert res.status
|
assert res.status
|
||||||
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res.value.value == [
|
assert res.value.value == [
|
||||||
ConceptNode(foo, 0, 4, source="one two three"),
|
(foo, 0, 4, "one two three"),
|
||||||
ConceptNode(bar, 6, 8, source="one two"),
|
(bar, 6, 8, "one two"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -181,12 +193,14 @@ def test_i_can_match_ordered_choice():
|
|||||||
res1 = parser.parse(context, "one")
|
res1 = parser.parse(context, "one")
|
||||||
assert res1.status
|
assert res1.status
|
||||||
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res1.value.body == [ConceptNode(foo, 0, 0, source="one")]
|
assert res1.value.body == [
|
||||||
|
ConceptNode(foo, 0, 0, source="one", underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
|
||||||
|
|
||||||
res2 = parser.parse(context, "two")
|
res2 = parser.parse(context, "two")
|
||||||
assert res2.status
|
assert res2.status
|
||||||
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res2.value.body == [ConceptNode(foo, 0, 0, source="two")]
|
assert res2.value.body == [
|
||||||
|
ConceptNode(foo, 0, 0, source="two", underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))]
|
||||||
|
|
||||||
res3 = parser.parse(context, "three")
|
res3 = parser.parse(context, "three")
|
||||||
assert not res3.status
|
assert not res3.status
|
||||||
@@ -216,12 +230,20 @@ def test_i_can_mix_sequences_and_ordered_choices():
|
|||||||
res1 = parser.parse(context, "twenty one ok")
|
res1 = parser.parse(context, "twenty one ok")
|
||||||
assert res1.status
|
assert res1.status
|
||||||
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok")]
|
assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok",
|
||||||
|
underlying=u(concepts[foo], 0, 4, [
|
||||||
|
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]),
|
||||||
|
u("one", 2, 2),
|
||||||
|
u("ok", 4, 4)]))]
|
||||||
|
|
||||||
res2 = parser.parse(context, "thirty one ok")
|
res2 = parser.parse(context, "thirty one ok")
|
||||||
assert res2.status
|
assert res2.status
|
||||||
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok")]
|
assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok",
|
||||||
|
underlying=u(concepts[foo], 0, 4, [
|
||||||
|
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]),
|
||||||
|
u("one", 2, 2),
|
||||||
|
u("ok", 4, 4)]))]
|
||||||
|
|
||||||
res3 = parser.parse(context, "twenty one")
|
res3 = parser.parse(context, "twenty one")
|
||||||
assert not res3.status
|
assert not res3.status
|
||||||
@@ -267,7 +289,8 @@ def test_i_can_parse_optional():
|
|||||||
|
|
||||||
res = parser.parse(context, "one")
|
res = parser.parse(context, "one")
|
||||||
assert res.status
|
assert res.status
|
||||||
assert res.value.value == [ConceptNode(foo, 0, 0, source="one")]
|
assert res.value.value == [ConceptNode(foo, 0, 0, source="one",
|
||||||
|
underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_parse_sequence_starting_with_optional():
|
def test_i_can_parse_sequence_starting_with_optional():
|
||||||
@@ -280,11 +303,19 @@ def test_i_can_parse_sequence_starting_with_optional():
|
|||||||
|
|
||||||
res = parser.parse(context, "twenty one")
|
res = parser.parse(context, "twenty one")
|
||||||
assert res.status
|
assert res.status
|
||||||
assert res.value.body == [ConceptNode(foo, 0, 2, source="twenty one")]
|
assert res.value.body == [ConceptNode(
|
||||||
|
foo, 0, 2,
|
||||||
|
source="twenty one",
|
||||||
|
underlying=u(concepts[foo], 0, 2,
|
||||||
|
[
|
||||||
|
u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]),
|
||||||
|
u("one", 2, 2)]
|
||||||
|
))]
|
||||||
|
|
||||||
res = parser.parse(context, "one")
|
res = parser.parse(context, "one")
|
||||||
assert res.status
|
assert res.status
|
||||||
assert res.value.body == [ConceptNode(foo, 0, 0, source="one")]
|
assert res.value.body == [ConceptNode(foo, 0, 0, source="one",
|
||||||
|
underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_parse_sequence_ending_with_optional():
|
def test_i_can_parse_sequence_ending_with_optional():
|
||||||
@@ -297,11 +328,11 @@ def test_i_can_parse_sequence_ending_with_optional():
|
|||||||
|
|
||||||
res = parser.parse(context, "one two three")
|
res = parser.parse(context, "one two three")
|
||||||
assert res.status
|
assert res.status
|
||||||
assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")]
|
assert res.value.body == [(foo, 0, 4, "one two three")]
|
||||||
|
|
||||||
res = parser.parse(context, "one two")
|
res = parser.parse(context, "one two")
|
||||||
assert res.status
|
assert res.status
|
||||||
assert res.value.body == [ConceptNode(foo, 0, 2, source="one two")]
|
assert res.value.body == [(foo, 0, 2, "one two")]
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_parse_sequence_with_optional_in_between():
|
def test_i_can_parse_sequence_with_optional_in_between():
|
||||||
@@ -314,11 +345,11 @@ def test_i_can_parse_sequence_with_optional_in_between():
|
|||||||
|
|
||||||
res = parser.parse(context, "one two three")
|
res = parser.parse(context, "one two three")
|
||||||
assert res.status
|
assert res.status
|
||||||
assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")]
|
assert res.value.body == [(foo, 0, 4, "one two three")]
|
||||||
|
|
||||||
res = parser.parse(context, "one three")
|
res = parser.parse(context, "one three")
|
||||||
assert res.status
|
assert res.status
|
||||||
assert res.value.body == [ConceptNode(foo, 0, 2, source="one three")]
|
assert res.value.body == [(foo, 0, 2, "one three")]
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_use_reference():
|
def test_i_can_use_reference():
|
||||||
@@ -338,11 +369,14 @@ def test_i_can_use_reference():
|
|||||||
|
|
||||||
assert res[0].status
|
assert res[0].status
|
||||||
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")]
|
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two",
|
||||||
|
underlying=u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)]))]
|
||||||
|
|
||||||
assert res[1].status
|
assert res[1].status
|
||||||
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")]
|
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two",
|
||||||
|
underlying=u(ConceptMatch(foo, rule_name="foo"), 0, 2,
|
||||||
|
[u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)])]))]
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_use_context_reference_with_multiple_levels():
|
def test_i_can_use_context_reference_with_multiple_levels():
|
||||||
@@ -364,15 +398,15 @@ def test_i_can_use_context_reference_with_multiple_levels():
|
|||||||
|
|
||||||
assert res[0].status
|
assert res[0].status
|
||||||
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")]
|
assert res[0].value.body == [(foo, 0, 2, "one two")]
|
||||||
|
|
||||||
assert res[1].status
|
assert res[1].status
|
||||||
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")]
|
assert res[1].value.body == [(bar, 0, 2, "one two")]
|
||||||
|
|
||||||
assert res[2].status
|
assert res[2].status
|
||||||
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res[2].value.body == [ConceptNode(baz, 0, 2, source="one two")]
|
assert res[2].value.body == [(baz, 0, 2, "one two")]
|
||||||
|
|
||||||
|
|
||||||
def test_order_is_not_important_when_using_references():
|
def test_order_is_not_important_when_using_references():
|
||||||
@@ -386,8 +420,8 @@ def test_order_is_not_important_when_using_references():
|
|||||||
|
|
||||||
res = parser.parse(context, "one two")
|
res = parser.parse(context, "one two")
|
||||||
assert len(res) == 2
|
assert len(res) == 2
|
||||||
assert res[0].value.body == [ConceptNode(bar, 0, 2, source="one two")]
|
assert res[0].value.body == [(bar, 0, 2, "one two")]
|
||||||
assert res[1].value.body == [ConceptNode(foo, 0, 2, source="one two")]
|
assert res[1].value.body == [(foo, 0, 2, "one two")]
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_parse_when_reference():
|
def test_i_can_parse_when_reference():
|
||||||
@@ -401,15 +435,15 @@ def test_i_can_parse_when_reference():
|
|||||||
|
|
||||||
res = parser.parse(context, "twenty two")
|
res = parser.parse(context, "twenty two")
|
||||||
assert res.status
|
assert res.status
|
||||||
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
|
assert res.value.body == [(bar, 0, 2, "twenty two")]
|
||||||
|
|
||||||
res = parser.parse(context, "thirty one")
|
res = parser.parse(context, "thirty one")
|
||||||
assert res.status
|
assert res.status
|
||||||
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
|
assert res.value.body == [(bar, 0, 2, "thirty one")]
|
||||||
|
|
||||||
res = parser.parse(context, "twenty")
|
res = parser.parse(context, "twenty")
|
||||||
assert res.status
|
assert res.status
|
||||||
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
|
assert res.value.body == [(foo, 0, 0, "twenty")]
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_detect_duplicates_when_reference():
|
def test_i_can_detect_duplicates_when_reference():
|
||||||
@@ -428,11 +462,11 @@ def test_i_can_detect_duplicates_when_reference():
|
|||||||
assert len(res) == 2
|
assert len(res) == 2
|
||||||
assert res[0].status
|
assert res[0].status
|
||||||
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res[0].value.body == [ConceptNode(bar, 0, 0, source="twenty")]
|
assert res[0].value.body == [(bar, 0, 0, "twenty")]
|
||||||
|
|
||||||
assert res[1].status
|
assert res[1].status
|
||||||
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
|
||||||
assert res[1].value.body == [ConceptNode(foo, 0, 0, source="twenty")]
|
assert res[1].value.body == [(foo, 0, 0, "twenty")]
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_detect_infinite_recursion():
|
def test_i_can_detect_infinite_recursion():
|
||||||
@@ -446,8 +480,8 @@ def test_i_can_detect_infinite_recursion():
|
|||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(get_context(), concepts)
|
parser.initialize(get_context(), concepts)
|
||||||
|
|
||||||
assert bar not in parser.concepts_dict
|
assert bar not in parser.concepts_grammars
|
||||||
assert foo not in parser.concepts_dict
|
assert foo not in parser.concepts_grammars
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
|
def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
|
||||||
@@ -461,8 +495,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
|
|||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(get_context(), concepts)
|
parser.initialize(get_context(), concepts)
|
||||||
|
|
||||||
assert foo not in parser.concepts_dict # removed because of the infinite recursion
|
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
||||||
assert bar not in parser.concepts_dict # removed because of the infinite recursion
|
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
||||||
|
|
||||||
# the other way around is possible
|
# the other way around is possible
|
||||||
context = get_context()
|
context = get_context()
|
||||||
@@ -472,15 +506,15 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
|
|||||||
}
|
}
|
||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(context, concepts)
|
parser.initialize(context, concepts)
|
||||||
assert foo in parser.concepts_dict
|
assert foo in parser.concepts_grammars
|
||||||
assert bar in parser.concepts_dict
|
assert bar in parser.concepts_grammars
|
||||||
|
|
||||||
res = parser.parse(context, "foo")
|
res = parser.parse(context, "foo")
|
||||||
assert len(res) == 2
|
assert len(res) == 2
|
||||||
assert res[0].status
|
assert res[0].status
|
||||||
assert res[0].value.body == [ConceptNode(bar, 0, 0, source="foo")]
|
assert res[0].value.body == [(bar, 0, 0, "foo")]
|
||||||
assert res[1].status
|
assert res[1].status
|
||||||
assert res[1].value.body == [ConceptNode(foo, 0, 0, source="foo")]
|
assert res[1].value.body == [(foo, 0, 0, "foo")]
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_detect_indirect_infinite_recursion_with_sequence():
|
def test_i_can_detect_indirect_infinite_recursion_with_sequence():
|
||||||
@@ -494,8 +528,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence():
|
|||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(get_context(), concepts)
|
parser.initialize(get_context(), concepts)
|
||||||
|
|
||||||
assert foo not in parser.concepts_dict # removed because of the infinite recursion
|
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
||||||
assert bar not in parser.concepts_dict # removed because of the infinite recursion
|
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice():
|
def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice():
|
||||||
@@ -509,8 +543,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choic
|
|||||||
parser = ConceptLexerParser()
|
parser = ConceptLexerParser()
|
||||||
parser.initialize(get_context(), concepts)
|
parser.initialize(get_context(), concepts)
|
||||||
|
|
||||||
assert foo not in parser.concepts_dict # removed because of the infinite recursion
|
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
|
||||||
assert bar not in parser.concepts_dict # removed because of the infinite recursion
|
assert bar not in parser.concepts_grammars # removed because of the infinite recursion
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_detect_indirect_infinite_recursion_with_optional():
|
def test_i_can_detect_indirect_infinite_recursion_with_optional():
|
||||||
@@ -518,128 +552,6 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("expression, expected", [
|
|
||||||
("'str'", StrMatch("str")),
|
|
||||||
("1", StrMatch("1")),
|
|
||||||
(" 1", StrMatch("1")),
|
|
||||||
(",", StrMatch(",")),
|
|
||||||
("'foo'?", Optional(StrMatch("foo"))),
|
|
||||||
("'foo'*", ZeroOrMore(StrMatch("foo"))),
|
|
||||||
("'foo'+", OneOrMore(StrMatch("foo"))),
|
|
||||||
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
|
||||||
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
|
||||||
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
|
|
||||||
("1 2 | 3 4+", OrderedChoice(
|
|
||||||
Sequence(StrMatch("1"), StrMatch("2")),
|
|
||||||
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
|
|
||||||
("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
|
|
||||||
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
|
|
||||||
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
|
|
||||||
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
|
|
||||||
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
|
|
||||||
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
|
|
||||||
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
|
|
||||||
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
|
|
||||||
("(1 )", StrMatch("1")),
|
|
||||||
])
|
|
||||||
def test_i_can_parse_regex(expression, expected):
|
|
||||||
parser = RegexParser()
|
|
||||||
res = parser.parse(get_context(), Tokenizer(expression))
|
|
||||||
|
|
||||||
assert not parser.has_error
|
|
||||||
assert res.status
|
|
||||||
assert res.value.value == expected
|
|
||||||
assert res.value.source == expression
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("expression, error", [
|
|
||||||
("1 ", UnexpectedEndOfFileError()),
|
|
||||||
("1|", UnexpectedEndOfFileError()),
|
|
||||||
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
|
|
||||||
])
|
|
||||||
def test_i_can_detect_errors(expression, error):
|
|
||||||
parser = RegexParser()
|
|
||||||
res = parser.parse(get_context(), Tokenizer(expression))
|
|
||||||
ret_value = res.value.value
|
|
||||||
assert parser.has_error
|
|
||||||
assert not res.status
|
|
||||||
assert ret_value[0] == error
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_parse_regex_with_reference():
|
|
||||||
expression = "foo"
|
|
||||||
parser = RegexParser()
|
|
||||||
res = parser.parse(get_context(), Tokenizer(expression))
|
|
||||||
|
|
||||||
assert res.status
|
|
||||||
assert res.value.value == ConceptMatch("foo")
|
|
||||||
assert res.value.source == expression
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_parse_cross_ref_with_modifier():
|
|
||||||
expression = "foo*"
|
|
||||||
parser = RegexParser()
|
|
||||||
res = parser.parse(get_context(), Tokenizer(expression))
|
|
||||||
|
|
||||||
assert res.status
|
|
||||||
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
|
|
||||||
assert res.value.source == expression
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_parse_sequence_with_cross_ref():
|
|
||||||
expression = "foo 'and' bar+"
|
|
||||||
parser = RegexParser()
|
|
||||||
res = parser.parse(get_context(), Tokenizer(expression))
|
|
||||||
|
|
||||||
assert res.status
|
|
||||||
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
|
|
||||||
assert res.value.source == expression
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_parse_choice_with_cross_ref():
|
|
||||||
foo = Concept("foo")
|
|
||||||
bar = Concept("bar")
|
|
||||||
context = get_context()
|
|
||||||
context.sheerka.add_in_cache(foo)
|
|
||||||
context.sheerka.add_in_cache(bar)
|
|
||||||
|
|
||||||
expression = "foo | bar?"
|
|
||||||
parser = RegexParser()
|
|
||||||
res = parser.parse(context, Tokenizer(expression))
|
|
||||||
|
|
||||||
assert res.status
|
|
||||||
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
|
|
||||||
assert res.value.source == expression
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
|
|
||||||
foo = Concept(name="foo")
|
|
||||||
bar = Concept(name="bar")
|
|
||||||
context = get_context()
|
|
||||||
context.sheerka.add_in_cache(foo)
|
|
||||||
context.sheerka.add_in_cache(bar)
|
|
||||||
|
|
||||||
regex_parser = RegexParser()
|
|
||||||
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
|
|
||||||
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
|
|
||||||
|
|
||||||
concepts = {bar: bar_definition, foo: foo_definition}
|
|
||||||
concept_parser = ConceptLexerParser()
|
|
||||||
concept_parser.initialize(context, concepts)
|
|
||||||
|
|
||||||
res = concept_parser.parse(context, "twenty two")
|
|
||||||
assert res.status
|
|
||||||
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
|
|
||||||
|
|
||||||
res = concept_parser.parse(context, "thirty one")
|
|
||||||
assert res.status
|
|
||||||
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
|
|
||||||
|
|
||||||
res = concept_parser.parse(context, "twenty")
|
|
||||||
assert res.status
|
|
||||||
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
|
|
||||||
|
|
||||||
|
|
||||||
def test_i_can_visit_parsing_expression():
|
def test_i_can_visit_parsing_expression():
|
||||||
mult = Concept(name="mult")
|
mult = Concept(name="mult")
|
||||||
add = Concept(name="add")
|
add = Concept(name="add")
|
||||||
@@ -650,6 +562,19 @@ def test_i_can_visit_parsing_expression():
|
|||||||
assert sorted(list(visitor.concepts)) == ["add", "mult"]
|
assert sorted(list(visitor.concepts)) == ["add", "mult"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_initialize_rule_names():
|
||||||
|
context = get_context()
|
||||||
|
foo = Concept(name="foo")
|
||||||
|
bar = Concept(name="bar")
|
||||||
|
|
||||||
|
concepts = {foo: Sequence("one", "two"), bar: foo}
|
||||||
|
parser = ConceptLexerParser()
|
||||||
|
ret = parser.initialize(context, concepts)
|
||||||
|
return_value = ret.body
|
||||||
|
|
||||||
|
assert return_value[foo].rule_name == ""
|
||||||
|
assert return_value[bar].rule_name == "foo"
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
|
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
|
||||||
|
|||||||
@@ -0,0 +1,178 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept
|
||||||
|
from core.concept import Concept
|
||||||
|
from core.sheerka import Sheerka, ExecutionContext
|
||||||
|
from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator
|
||||||
|
from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, NonTerminalNode, Sequence, TerminalNode, \
|
||||||
|
StrMatch, Optional, OrderedChoice
|
||||||
|
|
||||||
|
|
||||||
|
def get_context():
|
||||||
|
sheerka = Sheerka(skip_builtins_in_db=True)
|
||||||
|
sheerka.initialize("mem://")
|
||||||
|
return ExecutionContext("test", "xxx", sheerka)
|
||||||
|
|
||||||
|
|
||||||
|
def get_return_value(nodes, source):
|
||||||
|
return ReturnValueConcept(
|
||||||
|
"some_name",
|
||||||
|
True,
|
||||||
|
ParserResultConcept(parser=ConceptLexerParser(),
|
||||||
|
source=source,
|
||||||
|
value=nodes,
|
||||||
|
try_parsed=nodes))
|
||||||
|
|
||||||
|
|
||||||
|
def get_concept_node(context, grammar, expression):
|
||||||
|
parser = ConceptLexerParser()
|
||||||
|
parser.initialize(context, grammar)
|
||||||
|
|
||||||
|
res = parser.parse(context, expression)
|
||||||
|
assert res.status
|
||||||
|
return res.value.value[0]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("ret_val, expected", [
|
||||||
|
(ReturnValueConcept("some_name", True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), True),
|
||||||
|
(ReturnValueConcept("some_name", True, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), True),
|
||||||
|
(ReturnValueConcept("some_name", False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), False),
|
||||||
|
(ReturnValueConcept("some_name", False, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), False),
|
||||||
|
(ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept node")), False),
|
||||||
|
(ReturnValueConcept("some_name", True, ParserResultConcept(value=["Not a concept node"])), False),
|
||||||
|
(ReturnValueConcept("some_name", True, [ConceptNode(Concept(), 0, 0)]), False),
|
||||||
|
(ReturnValueConcept("some_name", True, ConceptNode(Concept(), 0, 0)), False),
|
||||||
|
])
|
||||||
|
def test_i_can_match(ret_val, expected):
|
||||||
|
context = get_context()
|
||||||
|
assert ConceptNodeEvaluator().matches(context, ret_val) == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_concept_is_returned_when_list_of_one_concept_node():
|
||||||
|
foo = Concept("foo")
|
||||||
|
context = get_context()
|
||||||
|
context.sheerka.add_in_cache(foo)
|
||||||
|
|
||||||
|
evaluator = ConceptNodeEvaluator()
|
||||||
|
node = ConceptNode(foo, 0, 0, underlying=TerminalNode(StrMatch("foo"), 0, 0, "foo"))
|
||||||
|
|
||||||
|
ret_val = get_return_value([node], "h")
|
||||||
|
result = evaluator.eval(context, ret_val)
|
||||||
|
|
||||||
|
assert result.who == evaluator.name
|
||||||
|
assert result.status
|
||||||
|
assert result.value == node.concept
|
||||||
|
assert result.parents == [ret_val]
|
||||||
|
|
||||||
|
|
||||||
|
def test_concept_property_is_correctly_updated_for_str_match():
|
||||||
|
context = get_context()
|
||||||
|
|
||||||
|
foo = Concept("foo")
|
||||||
|
concept_node = get_concept_node(context, {foo: StrMatch("foo", rule_name="variable")}, "foo")
|
||||||
|
updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying)
|
||||||
|
|
||||||
|
assert "variable" in updated.props
|
||||||
|
assert updated.props["variable"].value == "foo"
|
||||||
|
|
||||||
|
|
||||||
|
def test_concept_property_is_correctly_updated_for_sequence():
|
||||||
|
context = get_context()
|
||||||
|
|
||||||
|
foo = Concept("foo")
|
||||||
|
grammar = {foo: Sequence("one", "two", rule_name="variable")}
|
||||||
|
concept_node = get_concept_node(context, grammar, "one two")
|
||||||
|
updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying)
|
||||||
|
|
||||||
|
assert "variable" in updated.props
|
||||||
|
assert updated.props["variable"].value == "one two"
|
||||||
|
|
||||||
|
|
||||||
|
def test_concept_property_is_updated_for_str_in_sequence():
|
||||||
|
context = get_context()
|
||||||
|
|
||||||
|
foo = Concept("foo")
|
||||||
|
grammar = {foo: Sequence(StrMatch("one", rule_name="s1"), StrMatch("two", rule_name="s2"), rule_name="variable")}
|
||||||
|
concept_node = get_concept_node(context, grammar, "one two")
|
||||||
|
|
||||||
|
updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying)
|
||||||
|
|
||||||
|
assert updated.props["variable"].value == "one two"
|
||||||
|
assert updated.props["s1"].value == "one"
|
||||||
|
assert updated.props["s2"].value == "two"
|
||||||
|
|
||||||
|
|
||||||
|
def test_concept_property_is_correctly_updated_for_optional():
|
||||||
|
context = get_context()
|
||||||
|
|
||||||
|
foo = Concept("foo")
|
||||||
|
grammar = {foo: Sequence("one", Optional("two", rule_name="o"), rule_name="variable")}
|
||||||
|
concept_node = get_concept_node(context, grammar, "one two")
|
||||||
|
|
||||||
|
updated = ConceptNodeEvaluator().update_concept(
|
||||||
|
context.sheerka,
|
||||||
|
context.sheerka.new(concept_node.concept.key),
|
||||||
|
concept_node.underlying)
|
||||||
|
|
||||||
|
assert "variable" in updated.props
|
||||||
|
assert updated.props["variable"].value == "one two"
|
||||||
|
assert updated.props["o"].value == "two"
|
||||||
|
|
||||||
|
|
||||||
|
def test_concept_property_is_correctly_updated_when_list_of_properties():
|
||||||
|
context = get_context()
|
||||||
|
|
||||||
|
foo = Concept("foo")
|
||||||
|
grammar = {foo: Sequence(StrMatch("one", rule_name="s"), StrMatch("two", rule_name="s"), rule_name="variable")}
|
||||||
|
concept_node = get_concept_node(context, grammar, "one two")
|
||||||
|
|
||||||
|
updated = ConceptNodeEvaluator().update_concept(
|
||||||
|
context.sheerka,
|
||||||
|
context.sheerka.new(concept_node.concept.key),
|
||||||
|
concept_node.underlying)
|
||||||
|
|
||||||
|
assert updated.props["variable"].value == "one two"
|
||||||
|
assert updated.props["s"].value == ["one", "two"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_concept_property_is_correctly_updated_when_another_concept():
|
||||||
|
context = get_context()
|
||||||
|
|
||||||
|
foo = Concept("foo")
|
||||||
|
bar = Concept("bar")
|
||||||
|
context.sheerka.add_in_cache(foo)
|
||||||
|
grammar = {
|
||||||
|
foo: Sequence("one", "two", rule_name="variable"),
|
||||||
|
bar: Sequence(foo, "three", rule_name="variable")}
|
||||||
|
concept_node = get_concept_node(context, grammar, "one two three")
|
||||||
|
|
||||||
|
updated = ConceptNodeEvaluator().update_concept(
|
||||||
|
context.sheerka,
|
||||||
|
context.sheerka.new(concept_node.concept.key),
|
||||||
|
concept_node.underlying)
|
||||||
|
|
||||||
|
assert updated.props["variable"].value == "one two three"
|
||||||
|
assert updated.props["foo"].value == Concept("foo").set_prop("variable", "one two").init_key()
|
||||||
|
|
||||||
|
|
||||||
|
def test_concept_property_is_correctly_updated_when_concept_recursion():
|
||||||
|
context = get_context()
|
||||||
|
|
||||||
|
number = Concept("number")
|
||||||
|
add = Concept("add")
|
||||||
|
context.sheerka.add_in_cache(number)
|
||||||
|
context.sheerka.add_in_cache(add)
|
||||||
|
grammar = {
|
||||||
|
number: OrderedChoice("one", "two"),
|
||||||
|
add: Sequence(number, Optional(Sequence(OrderedChoice("plus", "minus", rule_name="op"), add)))
|
||||||
|
}
|
||||||
|
concept_node = get_concept_node(context, grammar, "one plus two")
|
||||||
|
|
||||||
|
updated = ConceptNodeEvaluator().update_concept(
|
||||||
|
context.sheerka,
|
||||||
|
context.sheerka.new(concept_node.concept.key),
|
||||||
|
concept_node.underlying)
|
||||||
|
|
||||||
|
assert updated.props["number"].value == Concept("number").init_key()
|
||||||
|
assert updated.props["op"].value == "plus"
|
||||||
|
assert updated.props["add"].value == Concept("add").set_prop("number", Concept("number").init_key()).init_key()
|
||||||
@@ -2,13 +2,13 @@ import pytest
|
|||||||
import ast
|
import ast
|
||||||
|
|
||||||
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
|
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
|
||||||
from core.concept import Concept
|
|
||||||
from core.sheerka import Sheerka, ExecutionContext
|
from core.sheerka import Sheerka, ExecutionContext
|
||||||
from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch, RegexParser
|
from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch
|
||||||
from parsers.PythonParser import PythonParser, PythonNode
|
from parsers.PythonParser import PythonParser, PythonNode
|
||||||
from core.tokenizer import Keywords, Tokenizer
|
from core.tokenizer import Keywords, Tokenizer
|
||||||
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode
|
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode
|
||||||
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
|
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
|
||||||
|
from parsers.BnfParser import BnfParser
|
||||||
|
|
||||||
|
|
||||||
# def nop():
|
# def nop():
|
||||||
@@ -341,7 +341,7 @@ def test_i_can_parse_def_concept_from_regex():
|
|||||||
res = parser.parse(get_context(), text)
|
res = parser.parse(get_context(), text)
|
||||||
node = res.value.value
|
node = res.value.value
|
||||||
definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string"))
|
definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string"))
|
||||||
parser_result = ParserResultConcept(RegexParser(), "a_concept | 'a_string'", definition, definition)
|
parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", definition, definition)
|
||||||
expected = get_concept(name="name", body="__definition[0]", definition=parser_result)
|
expected = get_concept(name="name", body="__definition[0]", definition=parser_result)
|
||||||
|
|
||||||
assert res.status
|
assert res.status
|
||||||
|
|||||||
+52
-9
@@ -1,12 +1,10 @@
|
|||||||
import ast
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from os import path
|
from os import path
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
||||||
from core.concept import Concept, PROPERTIES_TO_SERIALIZE
|
from core.concept import Concept, PROPERTIES_TO_SERIALIZE, Property
|
||||||
from core.sheerka import Sheerka, ExecutionContext
|
from core.sheerka import Sheerka, ExecutionContext
|
||||||
from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator
|
from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator
|
||||||
from parsers.ConceptLexerParser import Sequence, ZeroOrMore, StrMatch, OrderedChoice, Optional, ConceptMatch, \
|
from parsers.ConceptLexerParser import Sequence, ZeroOrMore, StrMatch, OrderedChoice, Optional, ConceptMatch, \
|
||||||
@@ -63,12 +61,12 @@ def test_builtin_concepts_are_initialized():
|
|||||||
|
|
||||||
|
|
||||||
def test_builtin_concepts_can_be_updated():
|
def test_builtin_concepts_can_be_updated():
|
||||||
sheerka = get_sheerka(root_folder, skip_builtins_in_db=False)
|
sheerka = get_sheerka(False, skip_builtins_in_db=False)
|
||||||
loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA)
|
loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA)
|
||||||
loaded_sheerka.metadata.desc = "I have a description"
|
loaded_sheerka.metadata.desc = "I have a description"
|
||||||
sheerka.sdp.modify("Test", sheerka.CONCEPTS_ENTRY, loaded_sheerka.key, loaded_sheerka)
|
sheerka.sdp.modify("Test", sheerka.CONCEPTS_ENTRY, loaded_sheerka.key, loaded_sheerka)
|
||||||
|
|
||||||
sheerka = get_sheerka(root_folder)
|
sheerka = get_sheerka(False)
|
||||||
loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA)
|
loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA)
|
||||||
|
|
||||||
assert loaded_sheerka.metadata.desc == "I have a description"
|
assert loaded_sheerka.metadata.desc == "I have a description"
|
||||||
@@ -593,9 +591,8 @@ def test_i_can_create_concept_with_bnf_definition():
|
|||||||
|
|
||||||
saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY)
|
saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY)
|
||||||
expected_bnf = Sequence(
|
expected_bnf = Sequence(
|
||||||
ConceptMatch("a"),
|
ConceptMatch("a", rule_name="a"),
|
||||||
Optional(Sequence(StrMatch("plus"), ConceptMatch("plus"))),
|
Optional(Sequence(StrMatch("plus"), ConceptMatch("plus", rule_name="plus"))))
|
||||||
rule_name="plus")
|
|
||||||
assert saved_definitions[saved_concept] == expected_bnf
|
assert saved_definitions[saved_concept] == expected_bnf
|
||||||
|
|
||||||
new_concept = res[0].value.body
|
new_concept = res[0].value.body
|
||||||
@@ -606,7 +603,53 @@ def test_i_can_create_concept_with_bnf_definition():
|
|||||||
assert "plus" in new_concept.props
|
assert "plus" in new_concept.props
|
||||||
|
|
||||||
|
|
||||||
def get_sheerka(root="mem://", skip_builtins_in_db=True):
|
def test_i_can_eval_bnf_definitions():
|
||||||
|
sheerka = get_sheerka()
|
||||||
|
concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body
|
||||||
|
|
||||||
|
res = sheerka.eval("one")
|
||||||
|
|
||||||
|
assert len(res) == 1
|
||||||
|
assert res[0].status
|
||||||
|
assert sheerka.isinstance(res[0].value, concept_a)
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_eval_bnf_definitions_with_variables():
|
||||||
|
sheerka = get_sheerka()
|
||||||
|
concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body
|
||||||
|
concept_b = sheerka.eval("def concept b from bnf a 'three'")[0].body.body
|
||||||
|
|
||||||
|
res = sheerka.eval("one three")
|
||||||
|
|
||||||
|
assert len(res) == 1
|
||||||
|
assert res[0].status
|
||||||
|
return_value = res[0].value
|
||||||
|
|
||||||
|
assert sheerka.isinstance(return_value, concept_b)
|
||||||
|
assert return_value.props["a"] == Property("a", concept_a)
|
||||||
|
|
||||||
|
|
||||||
|
def test_i_can_eval_bnf_definitions_from_separate_instances():
|
||||||
|
"""
|
||||||
|
Same test then before,
|
||||||
|
but make sure that the BNF are correctly persisted and loaded
|
||||||
|
"""
|
||||||
|
sheerka = get_sheerka(False)
|
||||||
|
concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body
|
||||||
|
|
||||||
|
res = get_sheerka(False).eval("one")
|
||||||
|
assert len(res) == 1
|
||||||
|
assert res[0].status
|
||||||
|
assert sheerka.isinstance(res[0].value, concept_a)
|
||||||
|
|
||||||
|
res = get_sheerka(False).eval("two")
|
||||||
|
assert len(res) == 1
|
||||||
|
assert res[0].status
|
||||||
|
assert sheerka.isinstance(res[0].value, concept_a)
|
||||||
|
|
||||||
|
|
||||||
|
def get_sheerka(use_dict=True, skip_builtins_in_db=True):
|
||||||
|
root = "mem://" if use_dict else root_folder
|
||||||
sheerka = Sheerka(skip_builtins_in_db)
|
sheerka = Sheerka(skip_builtins_in_db)
|
||||||
sheerka.initialize(root)
|
sheerka.initialize(root)
|
||||||
|
|
||||||
|
|||||||
@@ -754,7 +754,7 @@ def test_i_can_set_using_reference(root):
|
|||||||
".sheerka",
|
".sheerka",
|
||||||
"mem://"
|
"mem://"
|
||||||
])
|
])
|
||||||
def test_i_can_add_reference_of_an_object_with_a_key(root):
|
def test_i_can_add_an_object_with_a_key_as_a_reference(root):
|
||||||
sdp = SheerkaDataProvider(root)
|
sdp = SheerkaDataProvider(root)
|
||||||
obj = ObjDumpJson("my_key", "value1")
|
obj = ObjDumpJson("my_key", "value1")
|
||||||
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
|
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
|
||||||
@@ -777,7 +777,7 @@ def test_i_can_add_reference_of_an_object_with_a_key(root):
|
|||||||
".sheerka",
|
".sheerka",
|
||||||
"mem://"
|
"mem://"
|
||||||
])
|
])
|
||||||
def test_i_can_add_reference_a_dictionary(root):
|
def test_i_can_add_a_dictionary_as_a_reference(root):
|
||||||
sdp = SheerkaDataProvider(root)
|
sdp = SheerkaDataProvider(root)
|
||||||
obj = {"my_key": "value1"}
|
obj = {"my_key": "value1"}
|
||||||
|
|
||||||
@@ -1403,7 +1403,7 @@ def test_i_can_get_an_entry_by_key(root):
|
|||||||
".sheerka",
|
".sheerka",
|
||||||
"mem://"
|
"mem://"
|
||||||
])
|
])
|
||||||
def test_i_can_get_object_save_by_reference(root):
|
def test_i_can_get_object_saved_by_reference(root):
|
||||||
sdp = SheerkaDataProvider(root)
|
sdp = SheerkaDataProvider(root)
|
||||||
obj = ObjDumpJson("my_key", "value1")
|
obj = ObjDumpJson("my_key", "value1")
|
||||||
sdp.serializer.register(ObjectSerializer(core.utils.get_full_qualified_name(obj)))
|
sdp.serializer.register(ObjectSerializer(core.utils.get_full_qualified_name(obj)))
|
||||||
@@ -1687,3 +1687,73 @@ def test_i_can_add_obj_with_same_key_and_get_them_back(root):
|
|||||||
assert len(loaded) == 2
|
assert len(loaded) == 2
|
||||||
assert loaded[0] == obj1
|
assert loaded[0] == obj1
|
||||||
assert loaded[1] == obj2
|
assert loaded[1] == obj2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("root", [
|
||||||
|
".sheerka",
|
||||||
|
"mem://"
|
||||||
|
])
|
||||||
|
def test_i_get_safe_dictionary_without_origin(root):
|
||||||
|
sdp = SheerkaDataProvider(root)
|
||||||
|
obj = {"my_key": "value1"}
|
||||||
|
|
||||||
|
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
|
||||||
|
sdp.serializer.register(obj_serializer)
|
||||||
|
|
||||||
|
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
|
||||||
|
from_db = sdp.get_safe(entry, key)
|
||||||
|
|
||||||
|
assert len(from_db) == 2
|
||||||
|
assert from_db["my_key"] == obj["my_key"]
|
||||||
|
assert Serializer.ORIGIN in from_db
|
||||||
|
|
||||||
|
from_db_no_origin = sdp.get_safe(entry, key, load_origin=False)
|
||||||
|
assert len(from_db_no_origin) == 1
|
||||||
|
assert from_db_no_origin["my_key"] == obj["my_key"]
|
||||||
|
assert Serializer.ORIGIN not in from_db_no_origin
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("root", [
|
||||||
|
".sheerka",
|
||||||
|
"mem://"
|
||||||
|
])
|
||||||
|
def test_i_get_dictionary_without_origin(root):
|
||||||
|
sdp = SheerkaDataProvider(root)
|
||||||
|
obj = {"my_key": "value1"}
|
||||||
|
|
||||||
|
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
|
||||||
|
sdp.serializer.register(obj_serializer)
|
||||||
|
|
||||||
|
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
|
||||||
|
from_db = sdp.get(entry, key)
|
||||||
|
|
||||||
|
assert len(from_db) == 2
|
||||||
|
assert from_db["my_key"] == obj["my_key"]
|
||||||
|
assert Serializer.ORIGIN in from_db
|
||||||
|
|
||||||
|
from_db_no_origin = sdp.get(entry, key, load_origin=False)
|
||||||
|
assert len(from_db_no_origin) == 1
|
||||||
|
assert from_db_no_origin["my_key"] == obj["my_key"]
|
||||||
|
assert Serializer.ORIGIN not in from_db_no_origin
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("root", [
|
||||||
|
".sheerka",
|
||||||
|
"mem://"
|
||||||
|
])
|
||||||
|
def test_i_get_safe_object_without_origin(root):
|
||||||
|
sdp = SheerkaDataProvider(root)
|
||||||
|
obj = ObjDumpJson("my_key", "value1")
|
||||||
|
|
||||||
|
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
|
||||||
|
sdp.serializer.register(obj_serializer)
|
||||||
|
|
||||||
|
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
|
||||||
|
from_db = sdp.get_safe(entry, key)
|
||||||
|
|
||||||
|
assert from_db == obj
|
||||||
|
assert hasattr(from_db, Serializer.ORIGIN)
|
||||||
|
|
||||||
|
from_db_no_origin = sdp.get_safe(entry, key, load_origin=False)
|
||||||
|
assert from_db_no_origin == obj
|
||||||
|
assert not hasattr(from_db_no_origin, Serializer.ORIGIN)
|
||||||
|
|||||||
Reference in New Issue
Block a user