I can define and eval BNF definitions

This commit is contained in:
2019-12-17 21:19:44 +01:00
parent c668cc46d2
commit 88cd3162be
25 changed files with 1099 additions and 569 deletions
+28 -8
View File
@@ -1,5 +1,4 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from functools import lru_cache
from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept
from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST
@@ -40,11 +39,12 @@ class Sheerka(Concept):
# #
# Cache for all concepts BNF # Cache for all concepts BNF
#
self.concepts_definitions = {} self.concepts_definitions = {}
# #
# cache for concepts grammars # cache for concepts grammars
# a grammar can be seen as a resolved BNF # a grammar is a resolved BNF
self.concepts_grammars = {} self.concepts_grammars = {}
# a concept can be instantiated # a concept can be instantiated
@@ -79,14 +79,18 @@ class Sheerka(Concept):
try: try:
self.init_logging() self.init_logging()
self.sdp = SheerkaDataProvider(root_folder)
self.sdp = SheerkaDataProvider(root_folder)
if self.sdp.first_time: if self.sdp.first_time:
self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000) self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000)
evt_digest = self.sdp.save_event(Event("Initializing Sheerka."))
exec_context = ExecutionContext(self.key, evt_digest, self)
self.initialize_builtin_concepts() self.initialize_builtin_concepts()
self.initialize_builtin_parsers() self.initialize_builtin_parsers()
self.initialize_builtin_evaluators() self.initialize_builtin_evaluators()
self.initialize_concepts_definitions(exec_context)
except IOError as e: except IOError as e:
return ReturnValueConcept(self, False, self.get(BuiltinConcepts.ERROR), e) return ReturnValueConcept(self, False, self.get(BuiltinConcepts.ERROR), e)
@@ -149,7 +153,24 @@ class Sheerka(Concept):
init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'") init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'")
self.evaluators.append(evaluator) self.evaluators.append(evaluator)
def logger_filter(self, record: logging.LogRecord): def initialize_concepts_definitions(self, execution_context):
init_log.debug("Initializing concepts definitions")
definitions = self.sdp.get_safe(self.CONCEPTS_DEFINITIONS_ENTRY, load_origin=False)
if definitions is None:
init_log.debug("No BNF defined")
return
lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS]()
ret_val = lexer_parser.initialize(execution_context, definitions)
if not ret_val.status:
init_log.error("Failed to initialize concepts definitions " + str(ret_val.body))
return
self.concepts_grammars = lexer_parser.concepts_grammars
def init_logging(self):
def _logger_filter(record: logging.LogRecord):
if 'all' in self.loggers: if 'all' in self.loggers:
return True return True
@@ -159,9 +180,8 @@ class Sheerka(Concept):
return ret return ret
def init_logging(self):
handler = logging.StreamHandler() handler = logging.StreamHandler()
handler.addFilter(self.logger_filter) handler.addFilter(_logger_filter)
if self.debug: if self.debug:
log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s" log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s"
log_level = logging.DEBUG log_level = logging.DEBUG
@@ -211,7 +231,7 @@ class Sheerka(Concept):
else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens" else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens"
log.debug(f"Parsing {debug_text}") log.debug(f"Parsing {debug_text}")
for parser in self.parsers.values(): for parser in self.parsers.values():
p = parser() p = parser(sheerka=self)
res = p.parse(context, text) res = p.parse(context, text)
if isinstance(res, list): if isinstance(res, list):
result.extend(res) result.extend(res)
@@ -347,7 +367,7 @@ class Sheerka(Concept):
concepts_definitions[concept] = concept.bnf concepts_definitions[concept] = concept.bnf
# check if it's a valid BNF or whether it breaks the known rules # check if it's a valid BNF or whether it breaks the known rules
concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](self.concepts_grammars.copy()) concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](grammars=self.concepts_grammars.copy())
sub_context = context.push(self.name, "Initializing concept definition") sub_context = context.push(self.name, "Initializing concept definition")
sub_context.concepts_cache[concept.key] = concept # the concept is not in the real cache yet sub_context.concepts_cache[concept.key] = concept # the concept is not in the real cache yet
init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions) init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions)
-1
View File
@@ -1,7 +1,6 @@
import importlib import importlib
import inspect import inspect
import pkgutil import pkgutil
import sys
from core.tokenizer import TokenKind from core.tokenizer import TokenKind
+8 -4
View File
@@ -1,14 +1,13 @@
from core.ast.nodes import python_to_concept from core.ast.nodes import python_to_concept
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts from core.builtin_concepts import ParserResultConcept, ReturnValueConcept
from core.builtin_helpers import get_names from core.builtin_helpers import get_names
from core.concept import Concept from core.concept import Concept
from evaluators.BaseEvaluator import OneReturnValueEvaluator from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor
from parsers.DefaultParser import DefConceptNode from parsers.DefaultParser import DefConceptNode
import functools
import logging import logging
from parsers.PythonParser import PythonGetNamesVisitor, PythonNode from parsers.PythonParser import PythonNode
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -23,7 +22,12 @@ class ConceptOrRuleNameVisitor(ParsingExpressionVisitor):
self.names = set() self.names = set()
def visit_ConceptMatch(self, node): def visit_ConceptMatch(self, node):
self.names.add(node.rule_name or node.concept_name) if node.rule_name:
self.names.add(node.rule_name)
elif isinstance(node.concept, Concept):
self.names.add(node.concept.name)
else:
self.names.add(node.concept)
def visit_all(self, node): def visit_all(self, node):
if node.rule_name: if node.rule_name:
+6 -2
View File
@@ -4,12 +4,16 @@ from core.concept import Concept, ConceptParts
from evaluators.BaseEvaluator import OneReturnValueEvaluator from evaluators.BaseEvaluator import OneReturnValueEvaluator
import logging import logging
from parsers.BaseParser import BaseParser
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
class ConceptEvaluator(OneReturnValueEvaluator): class ConceptEvaluator(OneReturnValueEvaluator):
"""
The concept evaluatuor is the main class that know what to do with a concept
It verifies the PRE
If ok, can execute or not the BODY
Then checks the POST conditions
"""
NAME = "Concept" NAME = "Concept"
evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION] evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION]
+92
View File
@@ -0,0 +1,92 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from evaluators.BaseEvaluator import OneReturnValueEvaluator
import logging
from parsers.ConceptLexerParser import ConceptNode, TerminalNode, NonTerminalNode, ConceptMatch
log = logging.getLogger(__name__)
class ConceptNodeEvaluator(OneReturnValueEvaluator):
"""
After a BNF is recognized, generates the concept or the list concepts
"""
NAME = "ConceptNode"
def __init__(self):
super().__init__(self.NAME, 60) # more than the ConceptNodeEvaluator
def matches(self, context, return_value):
if not return_value.status:
return False
if not isinstance(return_value.value, ParserResultConcept):
return False
return (isinstance(return_value.value.value, ConceptNode) or
(
hasattr(return_value.value.value, "__iter__") and
len(return_value.value.value) > 0 and
isinstance(return_value.value.value[0], ConceptNode)
))
def eval(self, context, return_value):
"""
From a concept node, creates a new concept
and makes sure that the properties are correctly set
"""
sheerka = context.sheerka
nodes = return_value.value.value
if not hasattr(nodes, "__iter__"):
nodes = [nodes]
concepts = []
for node in nodes:
concept = sheerka.new(node.concept.key)
concept = self.update_concept(sheerka, concept, node.underlying)
concepts.append(concept)
if len(concepts) == 1:
return sheerka.ret(
self.name,
True,
concepts[0],
parents=[return_value])
raise NotImplementedError("Not yet")
def update_concept(self, sheerka, concept, underlying):
"""
Updates the property of the concept
"""
def _add_prop(c, prop_name, value):
"""
Adds a new entry,
makes a list if the property already exists
"""
if prop_name not in c.props or c.props[prop_name].value is None:
c.set_prop(prop_name, value)
else:
new_value = [c.props[prop_name].value, value]
c.set_prop(prop_name, new_value)
parsing_expression = underlying.parsing_expression
if parsing_expression.rule_name:
_add_prop(concept, parsing_expression.rule_name, underlying.source)
if isinstance(underlying, NonTerminalNode):
for child in underlying.children:
if isinstance(child.parsing_expression, ConceptMatch):
new_concept = sheerka.new(child.parsing_expression.concept.key)
_add_prop(concept, child.parsing_expression.rule_name, new_concept)
if sheerka.isinstance(new_concept, BuiltinConcepts.UNKNOWN_CONCEPT):
continue
else:
self.update_concept(sheerka, new_concept, child.children[0])
else:
self.update_concept(sheerka, concept, child)
return concept
+2 -1
View File
@@ -1,5 +1,4 @@
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
import core.builtin_helpers import core.builtin_helpers
from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator
import logging import logging
@@ -13,6 +12,8 @@ class MultipleSameSuccessEvaluator(AllReturnValuesEvaluator):
""" """
Used to filter the responses Used to filter the responses
It has a low priority to let other evaluators try to resolve the errors It has a low priority to let other evaluators try to resolve the errors
It reduces the responses when several evaluators give the same answer
""" """
NAME = "MultipleSameSuccess" NAME = "MultipleSameSuccess"
+2
View File
@@ -11,6 +11,8 @@ class OneSuccessEvaluator(AllReturnValuesEvaluator):
""" """
Used to filter the responses Used to filter the responses
It has a low priority to let other evaluators try to resolve the errors It has a low priority to let other evaluators try to resolve the errors
Make sure that there is only one successful answer
""" """
NAME = "OneSuccess" NAME = "OneSuccess"
+4
View File
@@ -15,6 +15,10 @@ log = logging.getLogger(__name__)
class PythonEvaluator(OneReturnValueEvaluator): class PythonEvaluator(OneReturnValueEvaluator):
NAME = "Python" NAME = "Python"
"""
Evaluate a Python node, ie, evaluate some Python code
"""
def __init__(self): def __init__(self):
super().__init__(self.NAME, 50) super().__init__(self.NAME, 50)
+2
View File
@@ -12,6 +12,8 @@ class TooManySuccessEvaluator(AllReturnValuesEvaluator):
""" """
Used to filter the responses Used to filter the responses
It has a low priority to let other evaluators try to resolve the errors It has a low priority to let other evaluators try to resolve the errors
Raises an error when that are several successful answers, with different values
""" """
NAME = "TooManySuccess" NAME = "TooManySuccess"
+1 -1
View File
@@ -9,7 +9,7 @@ import core.utils
def usage(): def usage():
print("Sheerka v0.1\n") print("Sheerka v0.1\n")
print("usage:") print("usage:")
print(sys.argv[0] + "[-hd] command ") print(sys.argv[0] + "[-hdl:] command ")
def main(argv): def main(argv):
+6
View File
@@ -27,6 +27,12 @@ class ErrorNode(Node):
pass pass
@dataclass()
class UnexpectedTokenErrorNode(ErrorNode):
message: str
expected_tokens: list
class BaseParser: class BaseParser:
PREFIX = "Parsers:" PREFIX = "Parsers:"
+227
View File
@@ -0,0 +1,227 @@
from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptMatch, StrMatch
@dataclass()
class UnexpectedEndOfFileError(ErrorNode):
pass
class BnfParser:
"""
Parser used to transform litteral into ParsingExpression
example :
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
'|' (pipe) is used for OrderedChoice
',' (comma) is used for Sequence
'?' (question mark) is used for Optional
'*' (star) is used for ZeroOrMore
'+' (plus) is used for OneOrMore
"""
def __init__(self):
self.has_error = False
self.error_sink = []
self.name = BaseParser.PREFIX + "RegexParser"
self.lexer_iter = None
self._current = None
self.after_current = None
self.nb_open_par = 0
self.context = None
self.source = ""
self.sheerka = None
def __eq__(self, other):
if not isinstance(other, BnfParser):
return False
return True
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
self._current = None
self.after_current = None
self.nb_open_par = 0
self.next_token()
self.eat_white_space()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=False):
if self._current and self._current.type == TokenKind.EOF:
return
try:
self._current = self.after_current or next(self.lexer_iter)
self.source += str(self._current.value)
self.after_current = None
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
def next_after(self):
if self.after_current is not None:
return self.after_current
try:
self.after_current = next(self.lexer_iter)
# self.source += str(self.after_current.value)
return self.after_current
except StopIteration:
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
return self.after_current
def eat_white_space(self):
if self.after_current is not None:
self._current = self.after_current
self.source += str(self._current.value)
self.after_current = None
try:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = None
def maybe_sequence(self, first, second):
token = self.get_token()
return token.type == second or token.type == first and self.next_after().type == second
def parse(self, context: ExecutionContext, text):
self.reset_parser(context, text)
tree = self.parse_choice()
ret = self.sheerka.ret(
self.name,
not self.has_error,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=self.source,
body=self.error_sink if self.has_error else tree,
try_parsed=tree))
return ret
def parse_choice(self):
sequence = self.parse_sequence()
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
return sequence
elements = [sequence]
while True:
# maybe eat the vertical bar
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
break
self.next_token(skip_whitespace=True)
sequence = self.parse_sequence()
elements.append(sequence)
return OrderedChoice(*elements)
def parse_sequence(self):
expr_and_modifier = self.parse_expression_and_modifier()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
return expr_and_modifier
elements = [expr_and_modifier]
while True:
# maybe eat the comma
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
break
self.eat_white_space()
sequence = self.parse_expression_and_modifier()
elements.append(sequence)
return Sequence(*elements)
def parse_expression_and_modifier(self):
expression = self.parse_expression()
token = self.get_token()
if token.type == TokenKind.QMARK:
self.next_token()
return Optional(expression)
if token.type == TokenKind.STAR:
self.next_token()
return ZeroOrMore(expression)
if token.type == TokenKind.PLUS:
self.next_token()
return OneOrMore(expression)
return expression
def parse_expression(self):
token = self.get_token()
if token.type == TokenKind.EOF:
self.add_error(UnexpectedEndOfFileError(), False)
if token.type == TokenKind.LPAR:
self.nb_open_par += 1
self.next_token()
expression = self.parse_choice()
token = self.get_token()
if token.type == TokenKind.RPAR:
self.nb_open_par -= 1
self.next_token()
return expression
else:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR]))
return expression
if token.type == TokenKind.IDENTIFIER:
self.next_token()
return ConceptMatch(token.value)
# concept = self.sheerka.get(str(token.value))
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# self.add_error(CannotResolveConceptNode(str(token.value)))
# self.next_token()
# return None
# else:
# self.next_token()
# return concept
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return ret
+146 -329
View File
@@ -1,5 +1,5 @@
##################################################################################################### #####################################################################################################
# This part of code is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio) # This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
# I don't directly use the project, but it helped me figure out # I don't directly use the project, but it helped me figure out
# what to do. # what to do.
# Dejanović I., Milosavljević G., Vaderna R.: # Dejanović I., Milosavljević G., Vaderna R.:
@@ -10,7 +10,6 @@ from dataclasses import field, dataclass
from collections import defaultdict from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept from core.concept import Concept
from core.sheerka import ExecutionContext
from core.tokenizer import TokenKind, Tokenizer, Token from core.tokenizer import TokenKind, Tokenizer, Token
from parsers.BaseParser import BaseParser, Node, ErrorNode from parsers.BaseParser import BaseParser, Node, ErrorNode
import core.utils import core.utils
@@ -40,6 +39,18 @@ def flatten(iterable):
class LexerNode(Node): class LexerNode(Node):
start: int start: int
end: int end: int
tokens: list = None
source: str = None
def __post_init__(self):
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if not isinstance(other, LexerNode):
return False
return self.start == other.start and self.end == other.end
class ConceptNode(LexerNode): class ConceptNode(LexerNode):
@@ -48,17 +59,24 @@ class ConceptNode(LexerNode):
It represents a recognized concept It represents a recognized concept
""" """
def __init__(self, concept, start, end, tokens=None, source=None, children=None): def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
super().__init__(start, end) super().__init__(start, end, tokens, source)
self.concept = concept self.concept = concept
self.tokens = tokens self.underlying = underlying
self.source = source
self.children = children
if self.source is None: if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens) self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other): def __eq__(self, other):
if isinstance(other, tuple):
if len(other) == 2:
return self.concept == other[0] and self.source == other[1]
else:
return self.concept == other[0] and \
self.start == other[1] and \
self.end == other[2] and \
self.source == other[3]
if not super().__eq__(other): if not super().__eq__(other):
return False return False
@@ -66,10 +84,14 @@ class ConceptNode(LexerNode):
return False return False
return self.concept == other.concept and \ return self.concept == other.concept and \
self.source == other.source self.source == other.source and \
self.underlying == other.underlying
def __hash__(self): def __hash__(self):
return hash((self.concept, self.start, self.end, self.source)) return hash((self.concept, self.start, self.end, self.source, self.underlying))
def __repr__(self):
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
class NonTerminalNode(LexerNode): class NonTerminalNode(LexerNode):
@@ -77,8 +99,8 @@ class NonTerminalNode(LexerNode):
Returned by the ConceptLexerParser Returned by the ConceptLexerParser
""" """
def __init__(self, parsing_expression, start, end, children=None): def __init__(self, parsing_expression, start, end, tokens, children=None):
super().__init__(start, end) super().__init__(start, end, tokens)
self.parsing_expression = parsing_expression self.parsing_expression = parsing_expression
self.children = children self.children = children
@@ -90,6 +112,21 @@ class NonTerminalNode(LexerNode):
sub_names = "" sub_names = ""
return name + sub_names return name + sub_names
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, NonTerminalNode):
return False
return self.parsing_expression == other.parsing_expression and \
self.start == other.start and \
self.end == other.end and \
self.children == other.children
def __hash__(self):
return hash((self.parsing_expression, self.start, self.end, self.children))
class TerminalNode(LexerNode): class TerminalNode(LexerNode):
""" """
@@ -97,7 +134,7 @@ class TerminalNode(LexerNode):
""" """
def __init__(self, parsing_expression, start, end, value): def __init__(self, parsing_expression, start, end, value):
super().__init__(start, end) super().__init__(start, end, source=value)
self.parsing_expression = parsing_expression self.parsing_expression = parsing_expression
self.value = value self.value = value
@@ -105,23 +142,27 @@ class TerminalNode(LexerNode):
name = self.parsing_expression.rule_name or "" name = self.parsing_expression.rule_name or ""
return name + f"'{self.value}'" return name + f"'{self.value}'"
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, TerminalNode):
return False
return self.parsing_expression == other.parsing_expression and \
self.start == other.start and \
self.end == other.end and \
self.value == other.value
def __hash__(self):
return hash((self.parsing_expression, self.start, self.end, self.value))
@dataclass() @dataclass()
class GrammarErrorNode(ErrorNode): class GrammarErrorNode(ErrorNode):
message: str message: str
@dataclass()
class UnexpectedTokenErrorNode(ErrorNode):
message: str
expected_tokens: list
@dataclass()
class UnexpectedEndOfFileError(ErrorNode):
pass
@dataclass() @dataclass()
class UnknownConceptNode(ErrorNode): class UnknownConceptNode(ErrorNode):
concept_key: str concept_key: str
@@ -175,7 +216,7 @@ class Sequence(ParsingExpression):
children.append(node) children.append(node)
end_pos = node.end end_pos = node.end
return NonTerminalNode(self, init_pos, end_pos, children) return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children or [])
def __repr__(self): def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements) to_str = ", ".join(repr(n) for n in self.elements)
@@ -194,7 +235,7 @@ class OrderedChoice(ParsingExpression):
for e in self.nodes: for e in self.nodes:
node = e.parse(parser) node = e.parse(parser)
if node: if node:
return NonTerminalNode(self, init_pos, node.end, [node]) return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
parser.seek(init_pos) # backtrack parser.seek(init_pos) # backtrack
@@ -214,13 +255,18 @@ class Optional(ParsingExpression):
def _parse(self, parser): def _parse(self, parser):
init_pos = parser.pos init_pos = parser.pos
selected_node = NonTerminalNode(self, parser.pos, -1, []) selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
for e in self.nodes: for e in self.nodes:
node = e.parse(parser) node = e.parse(parser)
if node: if node:
if node.end > selected_node.end: if node.end > selected_node.end:
selected_node = node selected_node = NonTerminalNode(
self,
node.start,
node.end,
parser.tokens[node.start: node.end + 1],
[node])
parser.seek(init_pos) # backtrack parser.seek(init_pos) # backtrack
@@ -327,12 +373,12 @@ class ConceptMatch(Match):
When the grammar is created, it is replaced by the actual concept When the grammar is created, it is replaced by the actual concept
""" """
def __init__(self, concept_name): def __init__(self, concept, rule_name=""):
super(Match, self).__init__() super(Match, self).__init__(rule_name=rule_name)
self.concept_name = concept_name self.concept = concept
def __repr__(self): def __repr__(self):
return f"{self.concept_name}" return f"{self.concept}"
def __eq__(self, other): def __eq__(self, other):
if not super().__eq__(other): if not super().__eq__(other):
@@ -341,32 +387,37 @@ class ConceptMatch(Match):
if not isinstance(other, ConceptMatch): if not isinstance(other, ConceptMatch):
return False return False
return self.concept_name == other.concept_name if isinstance(self.concept, Concept):
return self.concept.name == other.concept.name
class CrossRef:
"""
During the creation of the model,
Creates reference to a concept, as it may not be resolved yet
"""
def __init__(self, concept):
self.concept = concept
def __repr__(self):
return f"ref({self.concept.key})"
def __eq__(self, other):
if not isinstance(other, CrossRef):
return False
return self.concept == other.concept return self.concept == other.concept
def _parse(self, parser):
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
return None
if to_match not in parser.concepts_grammars:
return None
self.concept = to_match # Memoize
node = parser.concepts_grammars[to_match].parse(parser)
if node is None:
return None
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
class ConceptLexerParser(BaseParser): class ConceptLexerParser(BaseParser):
def __init__(self, concepts_dict=None): def __init__(self, **kwargs):
super().__init__("ConceptLexer") super().__init__("ConceptLexer")
self.concepts_dict = concepts_dict or {} # dict of concept, grammar if 'grammars' in kwargs:
self.concepts_grammars = kwargs.get("grammars")
elif 'sheerka' in kwargs:
self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
else:
self.concepts_grammars = {}
self.ignore_case = True self.ignore_case = True
self.token = None self.token = None
@@ -430,24 +481,23 @@ class ConceptLexerParser(BaseParser):
self.pos -= 1 self.pos -= 1
self.token = self.tokens[self.pos] self.token = self.tokens[self.pos]
def initialize(self, context, grammars): def initialize(self, context, concepts_definitions):
""" """
Adds a bunch of concepts, and how they can be recognized Adds a bunch of concepts, and how they can be recognized
:param context: execution context :param context: execution context
:param grammars: dictionary of concept, concept_definition :param concepts_definitions: dictionary of concept, concept_definition
:return: :return:
""" """
self.context = context self.context = context
self.sheerka = context.sheerka self.sheerka = context.sheerka
nodes_to_resolve = []
concepts_to_resolve = set() concepts_to_resolve = set()
# ## Gets the grammars # ## Gets the grammars
for concept, concept_def in grammars.items(): for concept, concept_def in concepts_definitions.items():
concept.init_key() # make sure that the key is initialized concept.init_key() # make sure that the key is initialized
grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve) grammar = self.get_model(concept_def, concepts_to_resolve)
self.concepts_dict[concept] = grammar self.concepts_grammars[concept] = grammar
if self.has_error: if self.has_error:
return self.sheerka.ret(self.name, False, self.error_sink) return self.sheerka.ret(self.name, False, self.error_sink)
@@ -456,73 +506,68 @@ class ConceptLexerParser(BaseParser):
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve) concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
for concept in concepts_to_remove: for concept in concepts_to_remove:
concepts_to_resolve.remove(concept) concepts_to_resolve.remove(concept)
del self.concepts_dict[concept] del self.concepts_grammars[concept]
# ## Resolves cross references and remove grammar with unresolved references
self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve)
if self.has_error: if self.has_error:
return self.sheerka.ret(self.name, False, self.error_sink) return self.sheerka.ret(self.name, False, self.error_sink)
else: else:
return self.sheerka.ret(self.name, True, self.concepts_dict) return self.sheerka.ret(self.name, True, self.concepts_grammars)
def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve): def get_concept(self, concept_name):
def get_concept(concept_name):
if concept_name in self.context.concepts_cache: if concept_name in self.context.concepts_cache:
return self.context.concepts_cache[concept_name] return self.context.concepts_cache[concept_name]
return self.sheerka.get(concept_name) return self.sheerka.get(concept_name)
def get_model(self, concept_def, concepts_to_resolve):
# TODO # TODO
# inner_get_model must not modify the initial ParsingExpression # inner_get_model must not modify the initial ParsingExpression
# A copy must be created # A copy must be created
def inner_get_model(expression): def inner_get_model(expression):
if isinstance(expression, Concept): if isinstance(expression, Concept):
ret = CrossRef(expression) ret = ConceptMatch(expression, rule_name=expression.name)
concepts_to_resolve.add(concept) concepts_to_resolve.add(expression)
nodes_to_resolve.append(ret) elif isinstance(expression, ConceptMatch):
if expression.rule_name is None or expression.rule_name == "":
expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
else expression.concept
concepts_to_resolve.add(expression.concept)
ret = expression
elif isinstance(expression, str): elif isinstance(expression, str):
ret = StrMatch(expression, ignore_case=self.ignore_case) ret = StrMatch(expression, ignore_case=self.ignore_case)
elif isinstance(expression, StrMatch): elif isinstance(expression, StrMatch):
ret = expression ret = expression
if ret.ignore_case is None: if ret.ignore_case is None:
ret.ignore_case = self.ignore_case ret.ignore_case = self.ignore_case
elif isinstance(expression, ConceptMatch):
to_match = get_concept(expression.concept_name)
if hasattr(to_match, "__iter__"):
ret = self.add_error(TooManyConceptNode(expression.concept_name), False)
elif self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
ret = self.add_error(UnknownConceptNode(expression.concept_name), False)
else:
ret = CrossRef(to_match)
concepts_to_resolve.add(concept)
nodes_to_resolve.append(ret)
elif isinstance(expression, Sequence) or \ elif isinstance(expression, Sequence) or \
isinstance(expression, OrderedChoice) or \ isinstance(expression, OrderedChoice) or \
isinstance(expression, Optional): isinstance(expression, Optional):
ret = expression ret = expression
ret.nodes.extend([inner_get_model(e) for e in ret.elements]) ret.nodes.extend([inner_get_model(e) for e in ret.elements])
if any((isinstance(x, CrossRef) for x in ret.nodes)):
concepts_to_resolve.add(concept)
nodes_to_resolve.append(ret)
else: else:
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False) ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
return ret return ret
model = inner_get_model(concept_def) model = inner_get_model(concept_def)
if isinstance(model, CrossRef):
concepts_to_resolve.add(concept)
model.rule_name = concept.key
return model return model
def detect_infinite_recursion(self, concepts_to_resolve): def detect_infinite_recursion(self, concepts_to_resolve):
# infinite recursion matcher # infinite recursion matcher
def _is_infinite_recursion(ref_concept, node): def _is_infinite_recursion(ref_concept, node):
if isinstance(node, CrossRef): if isinstance(node, ConceptMatch):
if node.concept == ref_concept: if node.concept == ref_concept:
return True return True
return _is_infinite_recursion(ref_concept, self.concepts_dict[node.concept])
if isinstance(node.concept, str):
to_match = self.get_concept(node.concept)
if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
return False
else:
to_match = node.concept
return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
if isinstance(node, OrderedChoice): if isinstance(node, OrderedChoice):
return _is_infinite_recursion(ref_concept, node.nodes[0]) return _is_infinite_recursion(ref_concept, node.nodes[0])
@@ -537,32 +582,16 @@ class ConceptLexerParser(BaseParser):
removed_concepts = [] removed_concepts = []
for e in concepts_to_resolve: for e in concepts_to_resolve:
to_resolve = self.concepts_dict[e] if isinstance(e, str):
e = self.get_concept(e)
if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
continue
to_resolve = self.concepts_grammars[e]
if _is_infinite_recursion(e, to_resolve): if _is_infinite_recursion(e, to_resolve):
removed_concepts.append(e) removed_concepts.append(e)
return removed_concepts return removed_concepts
# Cross-ref resolving
def resolve_cross_references(self, concepts_to_resolve, nodes_to_resolve):
repeat = True
while repeat:
repeat = False
for e in concepts_to_resolve:
to_resolve = self.concepts_dict[e]
if isinstance(to_resolve, CrossRef):
repeat = True
self.concepts_dict[e] = self.concepts_dict[to_resolve.concept]
for e in nodes_to_resolve:
if not isinstance(e, ParsingExpression):
continue # cases when a concept directly references another concept
for i, node in enumerate(e.nodes):
if isinstance(node, CrossRef):
if node.concept in self.concepts_dict:
e.nodes[i] = self.concepts_dict[node.concept]
def parse(self, context, text): def parse(self, context, text):
if text == "": if text == "":
return context.sheerka.ret( return context.sheerka.ret(
@@ -591,13 +620,17 @@ class ConceptLexerParser(BaseParser):
while True: while True:
init_pos = self.pos init_pos = self.pos
res = [] res = []
for concept, grammar in self.concepts_dict.items(): for concept, grammar in self.concepts_grammars.items():
self.seek(init_pos) self.seek(init_pos)
node = grammar.parse(self) node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
if node is not None: if node is not None:
concept_node = ConceptNode(concept, node.start, node.end, self.tokens[node.start: node.end + 1]) concept_node = ConceptNode(
if hasattr(node, "children"): concept,
concept_node.children = node.children node.start,
node.end,
self.tokens[node.start: node.end + 1],
None,
node)
res.append(concept_node) res.append(concept_node)
if len(res) == 0: # not recognized if len(res) == 0: # not recognized
@@ -606,9 +639,7 @@ class ConceptLexerParser(BaseParser):
self.add_error(self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=not_recognized)) self.add_error(self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=not_recognized))
break break
res = self.get_bests(res) # only keep the concept that eat the more tokens res = self.get_bests(res) # only keep the concepts that eat the more tokens
for r in res:
r.children = flatten(r.children)
concepts_found = core.utils.product(concepts_found, res) concepts_found = core.utils.product(concepts_found, res)
# loop # loop
@@ -659,220 +690,6 @@ class ConceptLexerParser(BaseParser):
return by_end_pos[max(by_end_pos)] return by_end_pos[max(by_end_pos)]
class RegexParser:
"""
Parser used to transform litteral into ParsingExpression
example :
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
'|' (pipe) is used for OrderedChoice
',' (comma) is used for Sequence
'?' (question mark) is used for Optional
'*' (star) is used for ZeroOrMore
'+' (plus) is used for OneOrMore
"""
def __init__(self):
self.has_error = False
self.error_sink = []
self.name = BaseParser.PREFIX + "RegexParser"
self.lexer_iter = None
self._current = None
self.after_current = None
self.nb_open_par = 0
self.context = None
self.source = ""
self.sheerka = None
def __eq__(self, other):
if not isinstance(other, RegexParser):
return False
return True
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
self._current = None
self.after_current = None
self.nb_open_par = 0
self.next_token()
self.eat_white_space()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=False):
if self._current and self._current.type == TokenKind.EOF:
return
try:
self._current = self.after_current or next(self.lexer_iter)
self.source += str(self._current.value)
self.after_current = None
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
def next_after(self):
if self.after_current is not None:
return self.after_current
try:
self.after_current = next(self.lexer_iter)
# self.source += str(self.after_current.value)
return self.after_current
except StopIteration:
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
return self.after_current
def eat_white_space(self):
if self.after_current is not None:
self._current = self.after_current
self.source += str(self._current.value)
self.after_current = None
try:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
self.source += str(self._current.value)
except StopIteration:
self._current = None
def maybe_sequence(self, first, second):
token = self.get_token()
return token.type == second or token.type == first and self.next_after().type == second
def parse(self, context: ExecutionContext, text):
self.reset_parser(context, text)
tree = self.parse_choice()
ret = self.sheerka.ret(
self.name,
not self.has_error,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=self.source,
body=self.error_sink if self.has_error else tree,
try_parsed=tree))
return ret
def parse_choice(self):
sequence = self.parse_sequence()
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
return sequence
elements = [sequence]
while True:
# maybe eat the vertical bar
self.eat_white_space()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR:
break
self.next_token(skip_whitespace=True)
sequence = self.parse_sequence()
elements.append(sequence)
return OrderedChoice(*elements)
def parse_sequence(self):
expr_and_modifier = self.parse_expression_and_modifier()
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
return expr_and_modifier
elements = [expr_and_modifier]
while True:
# maybe eat the comma
token = self.get_token()
if token is None or token.type == TokenKind.EOF or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
break
self.eat_white_space()
sequence = self.parse_expression_and_modifier()
elements.append(sequence)
return Sequence(*elements)
def parse_expression_and_modifier(self):
expression = self.parse_expression()
token = self.get_token()
if token.type == TokenKind.QMARK:
self.next_token()
return Optional(expression)
if token.type == TokenKind.STAR:
self.next_token()
return ZeroOrMore(expression)
if token.type == TokenKind.PLUS:
self.next_token()
return OneOrMore(expression)
return expression
def parse_expression(self):
token = self.get_token()
if token.type == TokenKind.EOF:
self.add_error(UnexpectedEndOfFileError(), False)
if token.type == TokenKind.LPAR:
self.nb_open_par += 1
self.next_token()
expression = self.parse_choice()
token = self.get_token()
if token.type == TokenKind.RPAR:
self.nb_open_par -= 1
self.next_token()
return expression
else:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR]))
return expression
if token.type == TokenKind.IDENTIFIER:
self.next_token()
return ConceptMatch(token.value)
# concept = self.sheerka.get(str(token.value))
# if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# self.add_error(CannotResolveConceptNode(str(token.value)))
# self.next_token()
# return None
# else:
# self.next_token()
# return concept
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return ret
class ParsingExpressionVisitor: class ParsingExpressionVisitor:
""" """
visit ParsingExpression visit ParsingExpression
+4 -4
View File
@@ -2,12 +2,12 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserRes
from core.concept import ConceptParts from core.concept import ConceptParts
import core.builtin_helpers import core.builtin_helpers
import core.utils import core.utils
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field from dataclasses import dataclass, field
import logging import logging
from parsers.ConceptLexerParser import RegexParser from parsers.BnfParser import BnfParser
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -206,7 +206,7 @@ class DefaultParser(BaseParser):
Parse sheerka specific grammar (like def concept) Parse sheerka specific grammar (like def concept)
""" """
def __init__(self): def __init__(self, **kwargs):
BaseParser.__init__(self, "DefaultParser") BaseParser.__init__(self, "DefaultParser")
self.lexer_iter = None self.lexer_iter = None
self._current = None self._current = None
@@ -427,7 +427,7 @@ class DefaultParser(BaseParser):
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False) self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
return NotInitializedNode() return NotInitializedNode()
regex_parser = RegexParser() regex_parser = BnfParser()
new_context = self.context.push(self.name) new_context = self.context.push(self.name)
parsing_result = regex_parser.parse(new_context, tokens) parsing_result = regex_parser.parse(new_context, tokens)
if not parsing_result.status: if not parsing_result.status:
+1 -1
View File
@@ -10,7 +10,7 @@ class EmptyStringParser(BaseParser):
To parse empty or blank strings To parse empty or blank strings
""" """
def __init__(self): def __init__(self, **kwargs):
BaseParser.__init__(self, "NullParser") BaseParser.__init__(self, "NullParser")
def parse(self, context, text): def parse(self, context, text):
+2 -2
View File
@@ -1,7 +1,7 @@
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from parsers.BaseParser import BaseParser from parsers.BaseParser import BaseParser
from core.tokenizer import Tokenizer, Keywords, TokenKind from core.tokenizer import Tokenizer, Keywords, TokenKind
from core.concept import Concept, VARIABLE_PREFIX from core.concept import VARIABLE_PREFIX
import logging import logging
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -14,7 +14,7 @@ class ExactConceptParser(BaseParser):
MAX_WORDS_SIZE = 10 MAX_WORDS_SIZE = 10
def __init__(self): def __init__(self, **kwargs):
BaseParser.__init__(self, "ConceptParser") BaseParser.__init__(self, "ConceptParser")
def parse(self, context, text): def parse(self, context, text):
+2 -3
View File
@@ -2,7 +2,6 @@ from core.builtin_concepts import BuiltinConcepts
from parsers.BaseParser import BaseParser, Node, ErrorNode from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass from dataclasses import dataclass
import ast import ast
import copy
import logging import logging
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -57,10 +56,10 @@ class PythonParser(BaseParser):
Parse Python scripts Parse Python scripts
""" """
def __init__(self, source="<undef>"): def __init__(self, **kwargs):
BaseParser.__init__(self, "PythonParser") BaseParser.__init__(self, "PythonParser")
self.source = source self.source = kwargs.get("source", "<undef>")
def parse(self, context, text): def parse(self, context, text):
text = text if isinstance(text, str) else self.get_text_from_tokens(text) text = text if isinstance(text, str) else self.get_text_from_tokens(text)
+10 -11
View File
@@ -1,5 +1,3 @@
# from os import path
# import os
from datetime import datetime, date from datetime import datetime, date
import hashlib import hashlib
import json import json
@@ -542,7 +540,7 @@ class SheerkaDataProvider:
self.set_snapshot(new_snapshot) self.set_snapshot(new_snapshot)
return new_snapshot return new_snapshot
def get(self, entry, key=None): def get(self, entry, key=None, load_origin=True):
""" """
Retrieve an element by its key Retrieve an element by its key
:param entry: :param entry:
@@ -560,11 +558,11 @@ class SheerkaDataProvider:
item = state.data[entry] if key is None else state.data[entry][key] item = state.data[entry] if key is None else state.data[entry][key]
if isinstance(item, list): if isinstance(item, list):
return [self.load_ref_if_needed(i)[0] for i in item] return [self.load_ref_if_needed(i, load_origin)[0] for i in item]
return self.load_ref_if_needed(item)[0] return self.load_ref_if_needed(item, load_origin)[0]
def get_safe(self, entry, key=None): def get_safe(self, entry, key=None, load_origin=True):
""" """
Retrieve an element by its key. Return None if the element does not exist Retrieve an element by its key. Return None if the element does not exist
:param entry: :param entry:
@@ -582,9 +580,9 @@ class SheerkaDataProvider:
item = state.data[entry] if key is None else state.data[entry][key] item = state.data[entry] if key is None else state.data[entry][key]
if isinstance(item, list): if isinstance(item, list):
return [self.load_ref_if_needed(i)[0] for i in item] return [self.load_ref_if_needed(i, load_origin)[0] for i in item]
return self.load_ref_if_needed(item)[0] return self.load_ref_if_needed(item, load_origin)[0]
def exists(self, entry, key=None, digest=None): def exists(self, entry, key=None, digest=None):
""" """
@@ -676,7 +674,7 @@ class SheerkaDataProvider:
log.debug(f"...digest={digest}.") log.debug(f"...digest={digest}.")
return digest return digest
def load_obj(self, digest): def load_obj(self, digest, add_origin=True):
if digest is None: if digest is None:
return None return None
@@ -688,19 +686,20 @@ class SheerkaDataProvider:
obj = self.serializer.deserialize(f, SerializerContext(origin=digest)) obj = self.serializer.deserialize(f, SerializerContext(origin=digest))
# set the origin of the object # set the origin of the object
if add_origin:
if isinstance(obj, dict): if isinstance(obj, dict):
obj[Serializer.ORIGIN] = digest obj[Serializer.ORIGIN] = digest
elif not isinstance(obj, str): elif not isinstance(obj, str):
setattr(obj, Serializer.ORIGIN, digest) setattr(obj, Serializer.ORIGIN, digest)
return obj return obj
def load_ref_if_needed(self, obj): def load_ref_if_needed(self, obj, load_origin=True):
if not isinstance(obj, str): if not isinstance(obj, str):
return obj, False return obj, False
if not obj.startswith(SheerkaDataProvider.REF_PREFIX): if not obj.startswith(SheerkaDataProvider.REF_PREFIX):
return obj, False return obj, False
resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):]) resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):], load_origin)
if resolved is None: if resolved is None:
return obj, False return obj, False
+3 -3
View File
@@ -8,9 +8,9 @@ from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer from core.tokenizer import Tokenizer
from evaluators.AddConceptEvaluator import AddConceptEvaluator from evaluators.AddConceptEvaluator import AddConceptEvaluator
from parsers.BaseParser import BaseParser from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import Sequence, RegexParser, StrMatch, ZeroOrMore, ConceptMatch from parsers.ConceptLexerParser import Sequence, StrMatch, ZeroOrMore, ConceptMatch
from parsers.BnfParser import BnfParser
from parsers.DefaultParser import DefConceptNode, NameNode from parsers.DefaultParser import DefConceptNode, NameNode
from parsers.ExactConceptParser import ExactConceptParser
from parsers.PythonParser import PythonNode, PythonParser from parsers.PythonParser import PythonNode, PythonParser
@@ -67,7 +67,7 @@ def get_concept_definition(source, parsing_expression):
status=True, status=True,
value=ParserResultConcept( value=ParserResultConcept(
source=source, source=source,
parser=RegexParser(), parser=BnfParser(),
value=parsing_expression value=parsing_expression
) )
) )
+138
View File
@@ -0,0 +1,138 @@
import pytest
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind
from parsers.BaseParser import UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
ConceptLexerParser, ConceptNode, ConceptMatch
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("sheerka", "xxxx", sheerka)
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
])
def test_i_can_parse_regex(expression, expected):
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
])
def test_i_can_detect_errors(expression, error):
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
ret_value = res.value.value
assert parser.has_error
assert not res.status
assert ret_value[0] == error
def test_i_can_parse_regex_with_reference():
expression = "foo"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ConceptMatch("foo")
assert res.value.source == expression
def test_i_can_parse_cross_ref_with_modifier():
expression = "foo*"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
assert res.value.source == expression
def test_i_can_parse_sequence_with_cross_ref():
expression = "foo 'and' bar+"
parser = BnfParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_parse_choice_with_cross_ref():
foo = Concept("foo")
bar = Concept("bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
expression = "foo | bar?"
parser = BnfParser()
res = parser.parse(context, Tokenizer(expression))
assert res.status
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
foo = Concept(name="foo")
bar = Concept(name="bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
regex_parser = BnfParser()
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = ConceptLexerParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [(bar, 0, 2, "twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [(bar, 0, 2, "thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [(foo, 0, 0, "twenty")]
+97 -172
View File
@@ -2,10 +2,8 @@ import pytest
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
from core.tokenizer import Tokenizer, TokenKind
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
CrossRef, RegexParser, ZeroOrMore, OneOrMore, UnexpectedEndOfFileError, UnexpectedTokenErrorNode, ConceptMatch, \ ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch
ParsingExpressionVisitor
class ConceptVisitor(ParsingExpressionVisitor): class ConceptVisitor(ParsingExpressionVisitor):
@@ -13,7 +11,17 @@ class ConceptVisitor(ParsingExpressionVisitor):
self.concepts = set() self.concepts = set()
def visit_ConceptMatch(self, node): def visit_ConceptMatch(self, node):
self.concepts.add(node.concept_name) self.concepts.add(node.concept)
def u(parsing_expression, start, end, children=None):
if isinstance(parsing_expression, str):
parsing_expression = StrMatch(parsing_expression)
if isinstance(parsing_expression, StrMatch):
return TerminalNode(parsing_expression, start, end, parsing_expression.to_match)
return NonTerminalNode(parsing_expression, start, end, [], children)
@pytest.mark.parametrize("match, text", [ @pytest.mark.parametrize("match, text", [
@@ -39,7 +47,7 @@ def test_i_can_match_simple_tokens(match, text):
assert res.status assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 0, source=text)] assert res.value.value == [ConceptNode(foo, 0, 0, source=text, underlying=u(match, 0, 0))]
def test_i_can_match_multiple_concepts_in_one_input(): def test_i_can_match_multiple_concepts_in_one_input():
@@ -55,9 +63,9 @@ def test_i_can_match_multiple_concepts_in_one_input():
assert res.status assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ assert res.value.value == [
ConceptNode(one, 0, 0, source="one"), ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)),
ConceptNode(two, 2, 2, source="two"), ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2)),
ConceptNode(one, 4, 4, source="one"), ConceptNode(one, 4, 4, source="one", underlying=u("one", 4, 4)),
] ]
@@ -85,8 +93,8 @@ def test_i_cannot_match_when_part_of_the_input_is_unknown():
assert not res.status assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == [ assert res.value.try_parsed == [
ConceptNode(one, 0, 0, source="one"), ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)),
ConceptNode(two, 2, 2, source="two")] # these two were recognized ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2))] # these two were recognized
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "three" assert res.value.body[0].body == "three"
@@ -102,7 +110,11 @@ def test_i_can_match_sequence():
assert res.status assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")] assert res.value.value == [
ConceptNode(foo, 0, 4, source="one two three", underlying=u(concepts[foo], 0, 4, [
u("one", 0, 0),
u("two", 2, 2),
u("three", 4, 4)]))]
def test_wrong_sequence_is_not_matched(): def test_wrong_sequence_is_not_matched():
@@ -116,7 +128,7 @@ def test_wrong_sequence_is_not_matched():
assert not res.status assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.try_parsed == [ConceptNode(foo, 0, 4, source="one two three")] assert res.value.try_parsed == [(foo, "one two three")]
assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body[0].body == "one" assert res.value.body[0].body == "one"
@@ -149,7 +161,7 @@ def test_i_always_choose_the_longest_match():
assert res.status assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")] assert res.value.value == [(foo, "one two three")]
def test_i_can_match_several_sequences(): def test_i_can_match_several_sequences():
@@ -166,8 +178,8 @@ def test_i_can_match_several_sequences():
assert res.status assert res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT)
assert res.value.value == [ assert res.value.value == [
ConceptNode(foo, 0, 4, source="one two three"), (foo, 0, 4, "one two three"),
ConceptNode(bar, 6, 8, source="one two"), (bar, 6, 8, "one two"),
] ]
@@ -181,12 +193,14 @@ def test_i_can_match_ordered_choice():
res1 = parser.parse(context, "one") res1 = parser.parse(context, "one")
assert res1.status assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [ConceptNode(foo, 0, 0, source="one")] assert res1.value.body == [
ConceptNode(foo, 0, 0, source="one", underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
res2 = parser.parse(context, "two") res2 = parser.parse(context, "two")
assert res2.status assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [ConceptNode(foo, 0, 0, source="two")] assert res2.value.body == [
ConceptNode(foo, 0, 0, source="two", underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))]
res3 = parser.parse(context, "three") res3 = parser.parse(context, "three")
assert not res3.status assert not res3.status
@@ -216,12 +230,20 @@ def test_i_can_mix_sequences_and_ordered_choices():
res1 = parser.parse(context, "twenty one ok") res1 = parser.parse(context, "twenty one ok")
assert res1.status assert res1.status
assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT)
assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok")] assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok",
underlying=u(concepts[foo], 0, 4, [
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]),
u("one", 2, 2),
u("ok", 4, 4)]))]
res2 = parser.parse(context, "thirty one ok") res2 = parser.parse(context, "thirty one ok")
assert res2.status assert res2.status
assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT)
assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok")] assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok",
underlying=u(concepts[foo], 0, 4, [
u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]),
u("one", 2, 2),
u("ok", 4, 4)]))]
res3 = parser.parse(context, "twenty one") res3 = parser.parse(context, "twenty one")
assert not res3.status assert not res3.status
@@ -267,7 +289,8 @@ def test_i_can_parse_optional():
res = parser.parse(context, "one") res = parser.parse(context, "one")
assert res.status assert res.status
assert res.value.value == [ConceptNode(foo, 0, 0, source="one")] assert res.value.value == [ConceptNode(foo, 0, 0, source="one",
underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
def test_i_can_parse_sequence_starting_with_optional(): def test_i_can_parse_sequence_starting_with_optional():
@@ -280,11 +303,19 @@ def test_i_can_parse_sequence_starting_with_optional():
res = parser.parse(context, "twenty one") res = parser.parse(context, "twenty one")
assert res.status assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="twenty one")] assert res.value.body == [ConceptNode(
foo, 0, 2,
source="twenty one",
underlying=u(concepts[foo], 0, 2,
[
u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]),
u("one", 2, 2)]
))]
res = parser.parse(context, "one") res = parser.parse(context, "one")
assert res.status assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="one")] assert res.value.body == [ConceptNode(foo, 0, 0, source="one",
underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))]
def test_i_can_parse_sequence_ending_with_optional(): def test_i_can_parse_sequence_ending_with_optional():
@@ -297,11 +328,11 @@ def test_i_can_parse_sequence_ending_with_optional():
res = parser.parse(context, "one two three") res = parser.parse(context, "one two three")
assert res.status assert res.status
assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")] assert res.value.body == [(foo, 0, 4, "one two three")]
res = parser.parse(context, "one two") res = parser.parse(context, "one two")
assert res.status assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="one two")] assert res.value.body == [(foo, 0, 2, "one two")]
def test_i_can_parse_sequence_with_optional_in_between(): def test_i_can_parse_sequence_with_optional_in_between():
@@ -314,11 +345,11 @@ def test_i_can_parse_sequence_with_optional_in_between():
res = parser.parse(context, "one two three") res = parser.parse(context, "one two three")
assert res.status assert res.status
assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")] assert res.value.body == [(foo, 0, 4, "one two three")]
res = parser.parse(context, "one three") res = parser.parse(context, "one three")
assert res.status assert res.status
assert res.value.body == [ConceptNode(foo, 0, 2, source="one three")] assert res.value.body == [(foo, 0, 2, "one three")]
def test_i_can_use_reference(): def test_i_can_use_reference():
@@ -338,11 +369,14 @@ def test_i_can_use_reference():
assert res[0].status assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")] assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two",
underlying=u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)]))]
assert res[1].status assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")] assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two",
underlying=u(ConceptMatch(foo, rule_name="foo"), 0, 2,
[u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)])]))]
def test_i_can_use_context_reference_with_multiple_levels(): def test_i_can_use_context_reference_with_multiple_levels():
@@ -364,15 +398,15 @@ def test_i_can_use_context_reference_with_multiple_levels():
assert res[0].status assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")] assert res[0].value.body == [(foo, 0, 2, "one two")]
assert res[1].status assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")] assert res[1].value.body == [(bar, 0, 2, "one two")]
assert res[2].status assert res[2].status
assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT)
assert res[2].value.body == [ConceptNode(baz, 0, 2, source="one two")] assert res[2].value.body == [(baz, 0, 2, "one two")]
def test_order_is_not_important_when_using_references(): def test_order_is_not_important_when_using_references():
@@ -386,8 +420,8 @@ def test_order_is_not_important_when_using_references():
res = parser.parse(context, "one two") res = parser.parse(context, "one two")
assert len(res) == 2 assert len(res) == 2
assert res[0].value.body == [ConceptNode(bar, 0, 2, source="one two")] assert res[0].value.body == [(bar, 0, 2, "one two")]
assert res[1].value.body == [ConceptNode(foo, 0, 2, source="one two")] assert res[1].value.body == [(foo, 0, 2, "one two")]
def test_i_can_parse_when_reference(): def test_i_can_parse_when_reference():
@@ -401,15 +435,15 @@ def test_i_can_parse_when_reference():
res = parser.parse(context, "twenty two") res = parser.parse(context, "twenty two")
assert res.status assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")] assert res.value.body == [(bar, 0, 2, "twenty two")]
res = parser.parse(context, "thirty one") res = parser.parse(context, "thirty one")
assert res.status assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")] assert res.value.body == [(bar, 0, 2, "thirty one")]
res = parser.parse(context, "twenty") res = parser.parse(context, "twenty")
assert res.status assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")] assert res.value.body == [(foo, 0, 0, "twenty")]
def test_i_can_detect_duplicates_when_reference(): def test_i_can_detect_duplicates_when_reference():
@@ -428,11 +462,11 @@ def test_i_can_detect_duplicates_when_reference():
assert len(res) == 2 assert len(res) == 2
assert res[0].status assert res[0].status
assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT)
assert res[0].value.body == [ConceptNode(bar, 0, 0, source="twenty")] assert res[0].value.body == [(bar, 0, 0, "twenty")]
assert res[1].status assert res[1].status
assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT)
assert res[1].value.body == [ConceptNode(foo, 0, 0, source="twenty")] assert res[1].value.body == [(foo, 0, 0, "twenty")]
def test_i_can_detect_infinite_recursion(): def test_i_can_detect_infinite_recursion():
@@ -446,8 +480,8 @@ def test_i_can_detect_infinite_recursion():
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(get_context(), concepts) parser.initialize(get_context(), concepts)
assert bar not in parser.concepts_dict assert bar not in parser.concepts_grammars
assert foo not in parser.concepts_dict assert foo not in parser.concepts_grammars
def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(): def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
@@ -461,8 +495,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(get_context(), concepts) parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion
# the other way around is possible # the other way around is possible
context = get_context() context = get_context()
@@ -472,15 +506,15 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice():
} }
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(context, concepts) parser.initialize(context, concepts)
assert foo in parser.concepts_dict assert foo in parser.concepts_grammars
assert bar in parser.concepts_dict assert bar in parser.concepts_grammars
res = parser.parse(context, "foo") res = parser.parse(context, "foo")
assert len(res) == 2 assert len(res) == 2
assert res[0].status assert res[0].status
assert res[0].value.body == [ConceptNode(bar, 0, 0, source="foo")] assert res[0].value.body == [(bar, 0, 0, "foo")]
assert res[1].status assert res[1].status
assert res[1].value.body == [ConceptNode(foo, 0, 0, source="foo")] assert res[1].value.body == [(foo, 0, 0, "foo")]
def test_i_can_detect_indirect_infinite_recursion_with_sequence(): def test_i_can_detect_indirect_infinite_recursion_with_sequence():
@@ -494,8 +528,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence():
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(get_context(), concepts) parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion
def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice(): def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice():
@@ -509,8 +543,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choic
parser = ConceptLexerParser() parser = ConceptLexerParser()
parser.initialize(get_context(), concepts) parser.initialize(get_context(), concepts)
assert foo not in parser.concepts_dict # removed because of the infinite recursion assert foo not in parser.concepts_grammars # removed because of the infinite recursion
assert bar not in parser.concepts_dict # removed because of the infinite recursion assert bar not in parser.concepts_grammars # removed because of the infinite recursion
def test_i_can_detect_indirect_infinite_recursion_with_optional(): def test_i_can_detect_indirect_infinite_recursion_with_optional():
@@ -518,128 +552,6 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional():
pass pass
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
])
def test_i_can_parse_regex(expression, expected):
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert not parser.has_error
assert res.status
assert res.value.value == expected
assert res.value.source == expression
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEndOfFileError()),
("1|", UnexpectedEndOfFileError()),
("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])),
])
def test_i_can_detect_errors(expression, error):
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
ret_value = res.value.value
assert parser.has_error
assert not res.status
assert ret_value[0] == error
def test_i_can_parse_regex_with_reference():
expression = "foo"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ConceptMatch("foo")
assert res.value.source == expression
def test_i_can_parse_cross_ref_with_modifier():
expression = "foo*"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == ZeroOrMore(ConceptMatch("foo"))
assert res.value.source == expression
def test_i_can_parse_sequence_with_cross_ref():
expression = "foo 'and' bar+"
parser = RegexParser()
res = parser.parse(get_context(), Tokenizer(expression))
assert res.status
assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_parse_choice_with_cross_ref():
foo = Concept("foo")
bar = Concept("bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
expression = "foo | bar?"
parser = RegexParser()
res = parser.parse(context, Tokenizer(expression))
assert res.status
assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar")))
assert res.value.source == expression
def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text():
foo = Concept(name="foo")
bar = Concept(name="bar")
context = get_context()
context.sheerka.add_in_cache(foo)
context.sheerka.add_in_cache(bar)
regex_parser = RegexParser()
foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = ConceptLexerParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")]
res = concept_parser.parse(context, "thirty one")
assert res.status
assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")]
res = concept_parser.parse(context, "twenty")
assert res.status
assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")]
def test_i_can_visit_parsing_expression(): def test_i_can_visit_parsing_expression():
mult = Concept(name="mult") mult = Concept(name="mult")
add = Concept(name="add") add = Concept(name="add")
@@ -650,6 +562,19 @@ def test_i_can_visit_parsing_expression():
assert sorted(list(visitor.concepts)) == ["add", "mult"] assert sorted(list(visitor.concepts)) == ["add", "mult"]
def test_i_can_initialize_rule_names():
context = get_context()
foo = Concept(name="foo")
bar = Concept(name="bar")
concepts = {foo: Sequence("one", "two"), bar: foo}
parser = ConceptLexerParser()
ret = parser.initialize(context, concepts)
return_value = ret.body
assert return_value[foo].rule_name == ""
assert return_value[bar].rule_name == "foo"
# #
# def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(): # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties():
+178
View File
@@ -0,0 +1,178 @@
import pytest
from core.builtin_concepts import ReturnValueConcept, ParserResultConcept
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext
from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator
from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, NonTerminalNode, Sequence, TerminalNode, \
StrMatch, Optional, OrderedChoice
def get_context():
sheerka = Sheerka(skip_builtins_in_db=True)
sheerka.initialize("mem://")
return ExecutionContext("test", "xxx", sheerka)
def get_return_value(nodes, source):
return ReturnValueConcept(
"some_name",
True,
ParserResultConcept(parser=ConceptLexerParser(),
source=source,
value=nodes,
try_parsed=nodes))
def get_concept_node(context, grammar, expression):
parser = ConceptLexerParser()
parser.initialize(context, grammar)
res = parser.parse(context, expression)
assert res.status
return res.value.value[0]
@pytest.mark.parametrize("ret_val, expected", [
(ReturnValueConcept("some_name", True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), True),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), True),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), False),
(ReturnValueConcept("some_name", False, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept node")), False),
(ReturnValueConcept("some_name", True, ParserResultConcept(value=["Not a concept node"])), False),
(ReturnValueConcept("some_name", True, [ConceptNode(Concept(), 0, 0)]), False),
(ReturnValueConcept("some_name", True, ConceptNode(Concept(), 0, 0)), False),
])
def test_i_can_match(ret_val, expected):
context = get_context()
assert ConceptNodeEvaluator().matches(context, ret_val) == expected
def test_concept_is_returned_when_list_of_one_concept_node():
foo = Concept("foo")
context = get_context()
context.sheerka.add_in_cache(foo)
evaluator = ConceptNodeEvaluator()
node = ConceptNode(foo, 0, 0, underlying=TerminalNode(StrMatch("foo"), 0, 0, "foo"))
ret_val = get_return_value([node], "h")
result = evaluator.eval(context, ret_val)
assert result.who == evaluator.name
assert result.status
assert result.value == node.concept
assert result.parents == [ret_val]
def test_concept_property_is_correctly_updated_for_str_match():
context = get_context()
foo = Concept("foo")
concept_node = get_concept_node(context, {foo: StrMatch("foo", rule_name="variable")}, "foo")
updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "foo"
def test_concept_property_is_correctly_updated_for_sequence():
context = get_context()
foo = Concept("foo")
grammar = {foo: Sequence("one", "two", rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one two")
updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "one two"
def test_concept_property_is_updated_for_str_in_sequence():
context = get_context()
foo = Concept("foo")
grammar = {foo: Sequence(StrMatch("one", rule_name="s1"), StrMatch("two", rule_name="s2"), rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one two")
updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying)
assert updated.props["variable"].value == "one two"
assert updated.props["s1"].value == "one"
assert updated.props["s2"].value == "two"
def test_concept_property_is_correctly_updated_for_optional():
context = get_context()
foo = Concept("foo")
grammar = {foo: Sequence("one", Optional("two", rule_name="o"), rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one two")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert "variable" in updated.props
assert updated.props["variable"].value == "one two"
assert updated.props["o"].value == "two"
def test_concept_property_is_correctly_updated_when_list_of_properties():
context = get_context()
foo = Concept("foo")
grammar = {foo: Sequence(StrMatch("one", rule_name="s"), StrMatch("two", rule_name="s"), rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one two")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert updated.props["variable"].value == "one two"
assert updated.props["s"].value == ["one", "two"]
def test_concept_property_is_correctly_updated_when_another_concept():
context = get_context()
foo = Concept("foo")
bar = Concept("bar")
context.sheerka.add_in_cache(foo)
grammar = {
foo: Sequence("one", "two", rule_name="variable"),
bar: Sequence(foo, "three", rule_name="variable")}
concept_node = get_concept_node(context, grammar, "one two three")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert updated.props["variable"].value == "one two three"
assert updated.props["foo"].value == Concept("foo").set_prop("variable", "one two").init_key()
def test_concept_property_is_correctly_updated_when_concept_recursion():
context = get_context()
number = Concept("number")
add = Concept("add")
context.sheerka.add_in_cache(number)
context.sheerka.add_in_cache(add)
grammar = {
number: OrderedChoice("one", "two"),
add: Sequence(number, Optional(Sequence(OrderedChoice("plus", "minus", rule_name="op"), add)))
}
concept_node = get_concept_node(context, grammar, "one plus two")
updated = ConceptNodeEvaluator().update_concept(
context.sheerka,
context.sheerka.new(concept_node.concept.key),
concept_node.underlying)
assert updated.props["number"].value == Concept("number").init_key()
assert updated.props["op"].value == "plus"
assert updated.props["add"].value == Concept("add").set_prop("number", Concept("number").init_key()).init_key()
+3 -3
View File
@@ -2,13 +2,13 @@ import pytest
import ast import ast
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import Concept
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch, RegexParser from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch
from parsers.PythonParser import PythonParser, PythonNode from parsers.PythonParser import PythonParser, PythonNode
from core.tokenizer import Keywords, Tokenizer from core.tokenizer import Keywords, Tokenizer
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
from parsers.BnfParser import BnfParser
# def nop(): # def nop():
@@ -341,7 +341,7 @@ def test_i_can_parse_def_concept_from_regex():
res = parser.parse(get_context(), text) res = parser.parse(get_context(), text)
node = res.value.value node = res.value.value
definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string")) definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string"))
parser_result = ParserResultConcept(RegexParser(), "a_concept | 'a_string'", definition, definition) parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", definition, definition)
expected = get_concept(name="name", body="__definition[0]", definition=parser_result) expected = get_concept(name="name", body="__definition[0]", definition=parser_result)
assert res.status assert res.status
+52 -9
View File
@@ -1,12 +1,10 @@
import ast
import pytest import pytest
import os import os
from os import path from os import path
import shutil import shutil
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.concept import Concept, PROPERTIES_TO_SERIALIZE from core.concept import Concept, PROPERTIES_TO_SERIALIZE, Property
from core.sheerka import Sheerka, ExecutionContext from core.sheerka import Sheerka, ExecutionContext
from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator
from parsers.ConceptLexerParser import Sequence, ZeroOrMore, StrMatch, OrderedChoice, Optional, ConceptMatch, \ from parsers.ConceptLexerParser import Sequence, ZeroOrMore, StrMatch, OrderedChoice, Optional, ConceptMatch, \
@@ -63,12 +61,12 @@ def test_builtin_concepts_are_initialized():
def test_builtin_concepts_can_be_updated(): def test_builtin_concepts_can_be_updated():
sheerka = get_sheerka(root_folder, skip_builtins_in_db=False) sheerka = get_sheerka(False, skip_builtins_in_db=False)
loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA) loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA)
loaded_sheerka.metadata.desc = "I have a description" loaded_sheerka.metadata.desc = "I have a description"
sheerka.sdp.modify("Test", sheerka.CONCEPTS_ENTRY, loaded_sheerka.key, loaded_sheerka) sheerka.sdp.modify("Test", sheerka.CONCEPTS_ENTRY, loaded_sheerka.key, loaded_sheerka)
sheerka = get_sheerka(root_folder) sheerka = get_sheerka(False)
loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA) loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA)
assert loaded_sheerka.metadata.desc == "I have a description" assert loaded_sheerka.metadata.desc == "I have a description"
@@ -593,9 +591,8 @@ def test_i_can_create_concept_with_bnf_definition():
saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY) saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY)
expected_bnf = Sequence( expected_bnf = Sequence(
ConceptMatch("a"), ConceptMatch("a", rule_name="a"),
Optional(Sequence(StrMatch("plus"), ConceptMatch("plus"))), Optional(Sequence(StrMatch("plus"), ConceptMatch("plus", rule_name="plus"))))
rule_name="plus")
assert saved_definitions[saved_concept] == expected_bnf assert saved_definitions[saved_concept] == expected_bnf
new_concept = res[0].value.body new_concept = res[0].value.body
@@ -606,7 +603,53 @@ def test_i_can_create_concept_with_bnf_definition():
assert "plus" in new_concept.props assert "plus" in new_concept.props
def get_sheerka(root="mem://", skip_builtins_in_db=True): def test_i_can_eval_bnf_definitions():
sheerka = get_sheerka()
concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body
res = sheerka.eval("one")
assert len(res) == 1
assert res[0].status
assert sheerka.isinstance(res[0].value, concept_a)
def test_i_can_eval_bnf_definitions_with_variables():
sheerka = get_sheerka()
concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body
concept_b = sheerka.eval("def concept b from bnf a 'three'")[0].body.body
res = sheerka.eval("one three")
assert len(res) == 1
assert res[0].status
return_value = res[0].value
assert sheerka.isinstance(return_value, concept_b)
assert return_value.props["a"] == Property("a", concept_a)
def test_i_can_eval_bnf_definitions_from_separate_instances():
"""
Same test then before,
but make sure that the BNF are correctly persisted and loaded
"""
sheerka = get_sheerka(False)
concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body
res = get_sheerka(False).eval("one")
assert len(res) == 1
assert res[0].status
assert sheerka.isinstance(res[0].value, concept_a)
res = get_sheerka(False).eval("two")
assert len(res) == 1
assert res[0].status
assert sheerka.isinstance(res[0].value, concept_a)
def get_sheerka(use_dict=True, skip_builtins_in_db=True):
root = "mem://" if use_dict else root_folder
sheerka = Sheerka(skip_builtins_in_db) sheerka = Sheerka(skip_builtins_in_db)
sheerka.initialize(root) sheerka.initialize(root)
+73 -3
View File
@@ -754,7 +754,7 @@ def test_i_can_set_using_reference(root):
".sheerka", ".sheerka",
"mem://" "mem://"
]) ])
def test_i_can_add_reference_of_an_object_with_a_key(root): def test_i_can_add_an_object_with_a_key_as_a_reference(root):
sdp = SheerkaDataProvider(root) sdp = SheerkaDataProvider(root)
obj = ObjDumpJson("my_key", "value1") obj = ObjDumpJson("my_key", "value1")
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj)) obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
@@ -777,7 +777,7 @@ def test_i_can_add_reference_of_an_object_with_a_key(root):
".sheerka", ".sheerka",
"mem://" "mem://"
]) ])
def test_i_can_add_reference_a_dictionary(root): def test_i_can_add_a_dictionary_as_a_reference(root):
sdp = SheerkaDataProvider(root) sdp = SheerkaDataProvider(root)
obj = {"my_key": "value1"} obj = {"my_key": "value1"}
@@ -1403,7 +1403,7 @@ def test_i_can_get_an_entry_by_key(root):
".sheerka", ".sheerka",
"mem://" "mem://"
]) ])
def test_i_can_get_object_save_by_reference(root): def test_i_can_get_object_saved_by_reference(root):
sdp = SheerkaDataProvider(root) sdp = SheerkaDataProvider(root)
obj = ObjDumpJson("my_key", "value1") obj = ObjDumpJson("my_key", "value1")
sdp.serializer.register(ObjectSerializer(core.utils.get_full_qualified_name(obj))) sdp.serializer.register(ObjectSerializer(core.utils.get_full_qualified_name(obj)))
@@ -1687,3 +1687,73 @@ def test_i_can_add_obj_with_same_key_and_get_them_back(root):
assert len(loaded) == 2 assert len(loaded) == 2
assert loaded[0] == obj1 assert loaded[0] == obj1
assert loaded[1] == obj2 assert loaded[1] == obj2
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_get_safe_dictionary_without_origin(root):
sdp = SheerkaDataProvider(root)
obj = {"my_key": "value1"}
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get_safe(entry, key)
assert len(from_db) == 2
assert from_db["my_key"] == obj["my_key"]
assert Serializer.ORIGIN in from_db
from_db_no_origin = sdp.get_safe(entry, key, load_origin=False)
assert len(from_db_no_origin) == 1
assert from_db_no_origin["my_key"] == obj["my_key"]
assert Serializer.ORIGIN not in from_db_no_origin
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_get_dictionary_without_origin(root):
sdp = SheerkaDataProvider(root)
obj = {"my_key": "value1"}
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get(entry, key)
assert len(from_db) == 2
assert from_db["my_key"] == obj["my_key"]
assert Serializer.ORIGIN in from_db
from_db_no_origin = sdp.get(entry, key, load_origin=False)
assert len(from_db_no_origin) == 1
assert from_db_no_origin["my_key"] == obj["my_key"]
assert Serializer.ORIGIN not in from_db_no_origin
@pytest.mark.parametrize("root", [
".sheerka",
"mem://"
])
def test_i_get_safe_object_without_origin(root):
sdp = SheerkaDataProvider(root)
obj = ObjDumpJson("my_key", "value1")
obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj))
sdp.serializer.register(obj_serializer)
entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True)
from_db = sdp.get_safe(entry, key)
assert from_db == obj
assert hasattr(from_db, Serializer.ORIGIN)
from_db_no_origin = sdp.get_safe(entry, key, load_origin=False)
assert from_db_no_origin == obj
assert not hasattr(from_db_no_origin, Serializer.ORIGIN)