diff --git a/core/sheerka.py b/core/sheerka.py index 0753222..b8a0b6f 100644 --- a/core/sheerka.py +++ b/core/sheerka.py @@ -1,5 +1,4 @@ from dataclasses import dataclass, field -from functools import lru_cache from core.builtin_concepts import BuiltinConcepts, ErrorConcept, ReturnValueConcept from core.concept import Concept, ConceptParts, PROPERTIES_FOR_DIGEST @@ -40,11 +39,12 @@ class Sheerka(Concept): # # Cache for all concepts BNF + # self.concepts_definitions = {} # # cache for concepts grammars - # a grammar can be seen as a resolved BNF + # a grammar is a resolved BNF self.concepts_grammars = {} # a concept can be instantiated @@ -79,14 +79,18 @@ class Sheerka(Concept): try: self.init_logging() - self.sdp = SheerkaDataProvider(root_folder) + self.sdp = SheerkaDataProvider(root_folder) if self.sdp.first_time: self.sdp.set_key(self.USER_CONCEPTS_KEYS, 1000) + evt_digest = self.sdp.save_event(Event("Initializing Sheerka.")) + exec_context = ExecutionContext(self.key, evt_digest, self) + self.initialize_builtin_concepts() self.initialize_builtin_parsers() self.initialize_builtin_evaluators() + self.initialize_concepts_definitions(exec_context) except IOError as e: return ReturnValueConcept(self, False, self.get(BuiltinConcepts.ERROR), e) @@ -149,19 +153,35 @@ class Sheerka(Concept): init_log.debug(f"Adding builtin evaluator '{evaluator.__name__}'") self.evaluators.append(evaluator) - def logger_filter(self, record: logging.LogRecord): - if 'all' in self.loggers: - return True + def initialize_concepts_definitions(self, execution_context): + init_log.debug("Initializing concepts definitions") + definitions = self.sdp.get_safe(self.CONCEPTS_DEFINITIONS_ENTRY, load_origin=False) - ret = True - if 'init' not in self.loggers and record.name.endswith(".init"): - ret = False + if definitions is None: + init_log.debug("No BNF defined") + return - return ret + lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS]() + ret_val = lexer_parser.initialize(execution_context, definitions) + if not ret_val.status: + init_log.error("Failed to initialize concepts definitions " + str(ret_val.body)) + return + + self.concepts_grammars = lexer_parser.concepts_grammars def init_logging(self): + def _logger_filter(record: logging.LogRecord): + if 'all' in self.loggers: + return True + + ret = True + if 'init' not in self.loggers and record.name.endswith(".init"): + ret = False + + return ret + handler = logging.StreamHandler() - handler.addFilter(self.logger_filter) + handler.addFilter(_logger_filter) if self.debug: log_format = "%(asctime)s %(name)s [%(levelname)s] %(message)s" log_level = logging.DEBUG @@ -211,7 +231,7 @@ class Sheerka(Concept): else "'" + BaseParser.get_text_from_tokens(text) + "' as tokens" log.debug(f"Parsing {debug_text}") for parser in self.parsers.values(): - p = parser() + p = parser(sheerka=self) res = p.parse(context, text) if isinstance(res, list): result.extend(res) @@ -347,7 +367,7 @@ class Sheerka(Concept): concepts_definitions[concept] = concept.bnf # check if it's a valid BNF or whether it breaks the known rules - concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](self.concepts_grammars.copy()) + concept_lexer_parser = self.parsers[CONCEPT_LEXER_PARSER_CLASS](grammars=self.concepts_grammars.copy()) sub_context = context.push(self.name, "Initializing concept definition") sub_context.concepts_cache[concept.key] = concept # the concept is not in the real cache yet init_ret_value = concept_lexer_parser.initialize(sub_context, concepts_definitions) diff --git a/core/utils.py b/core/utils.py index 3da36f4..67d88ca 100644 --- a/core/utils.py +++ b/core/utils.py @@ -1,7 +1,6 @@ import importlib import inspect import pkgutil -import sys from core.tokenizer import TokenKind diff --git a/evaluators/AddConceptEvaluator.py b/evaluators/AddConceptEvaluator.py index c9014d1..dc19057 100644 --- a/evaluators/AddConceptEvaluator.py +++ b/evaluators/AddConceptEvaluator.py @@ -1,14 +1,13 @@ from core.ast.nodes import python_to_concept -from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts +from core.builtin_concepts import ParserResultConcept, ReturnValueConcept from core.builtin_helpers import get_names from core.concept import Concept from evaluators.BaseEvaluator import OneReturnValueEvaluator from parsers.ConceptLexerParser import ParsingExpression, ParsingExpressionVisitor from parsers.DefaultParser import DefConceptNode -import functools import logging -from parsers.PythonParser import PythonGetNamesVisitor, PythonNode +from parsers.PythonParser import PythonNode log = logging.getLogger(__name__) @@ -23,7 +22,12 @@ class ConceptOrRuleNameVisitor(ParsingExpressionVisitor): self.names = set() def visit_ConceptMatch(self, node): - self.names.add(node.rule_name or node.concept_name) + if node.rule_name: + self.names.add(node.rule_name) + elif isinstance(node.concept, Concept): + self.names.add(node.concept.name) + else: + self.names.add(node.concept) def visit_all(self, node): if node.rule_name: diff --git a/evaluators/ConceptEvaluator.py b/evaluators/ConceptEvaluator.py index b4371ca..f0721ae 100644 --- a/evaluators/ConceptEvaluator.py +++ b/evaluators/ConceptEvaluator.py @@ -4,12 +4,16 @@ from core.concept import Concept, ConceptParts from evaluators.BaseEvaluator import OneReturnValueEvaluator import logging -from parsers.BaseParser import BaseParser - log = logging.getLogger(__name__) class ConceptEvaluator(OneReturnValueEvaluator): + """ + The concept evaluatuor is the main class that know what to do with a concept + It verifies the PRE + If ok, can execute or not the BODY + Then checks the POST conditions + """ NAME = "Concept" evaluation_steps = [BuiltinConcepts.EVALUATION, BuiltinConcepts.AFTER_EVALUATION] diff --git a/evaluators/ConceptNodeEvaluator.py b/evaluators/ConceptNodeEvaluator.py new file mode 100644 index 0000000..5d3dec0 --- /dev/null +++ b/evaluators/ConceptNodeEvaluator.py @@ -0,0 +1,92 @@ +from core.builtin_concepts import ParserResultConcept, BuiltinConcepts +from evaluators.BaseEvaluator import OneReturnValueEvaluator + +import logging + +from parsers.ConceptLexerParser import ConceptNode, TerminalNode, NonTerminalNode, ConceptMatch + +log = logging.getLogger(__name__) + + +class ConceptNodeEvaluator(OneReturnValueEvaluator): + """ + After a BNF is recognized, generates the concept or the list concepts + """ + + NAME = "ConceptNode" + + def __init__(self): + super().__init__(self.NAME, 60) # more than the ConceptNodeEvaluator + + def matches(self, context, return_value): + if not return_value.status: + return False + if not isinstance(return_value.value, ParserResultConcept): + return False + + return (isinstance(return_value.value.value, ConceptNode) or + ( + hasattr(return_value.value.value, "__iter__") and + len(return_value.value.value) > 0 and + isinstance(return_value.value.value[0], ConceptNode) + )) + + def eval(self, context, return_value): + """ + From a concept node, creates a new concept + and makes sure that the properties are correctly set + """ + sheerka = context.sheerka + nodes = return_value.value.value + if not hasattr(nodes, "__iter__"): + nodes = [nodes] + + concepts = [] + for node in nodes: + concept = sheerka.new(node.concept.key) + concept = self.update_concept(sheerka, concept, node.underlying) + concepts.append(concept) + + if len(concepts) == 1: + return sheerka.ret( + self.name, + True, + concepts[0], + parents=[return_value]) + + raise NotImplementedError("Not yet") + + def update_concept(self, sheerka, concept, underlying): + """ + Updates the property of the concept + """ + + def _add_prop(c, prop_name, value): + """ + Adds a new entry, + makes a list if the property already exists + """ + if prop_name not in c.props or c.props[prop_name].value is None: + c.set_prop(prop_name, value) + else: + new_value = [c.props[prop_name].value, value] + c.set_prop(prop_name, new_value) + + parsing_expression = underlying.parsing_expression + + if parsing_expression.rule_name: + _add_prop(concept, parsing_expression.rule_name, underlying.source) + + if isinstance(underlying, NonTerminalNode): + for child in underlying.children: + if isinstance(child.parsing_expression, ConceptMatch): + new_concept = sheerka.new(child.parsing_expression.concept.key) + _add_prop(concept, child.parsing_expression.rule_name, new_concept) + if sheerka.isinstance(new_concept, BuiltinConcepts.UNKNOWN_CONCEPT): + continue + else: + self.update_concept(sheerka, new_concept, child.children[0]) + else: + self.update_concept(sheerka, concept, child) + + return concept diff --git a/evaluators/MutipleSameSuccessEvaluator.py b/evaluators/MutipleSameSuccessEvaluator.py index b752db2..f6cc898 100644 --- a/evaluators/MutipleSameSuccessEvaluator.py +++ b/evaluators/MutipleSameSuccessEvaluator.py @@ -1,5 +1,4 @@ from core.builtin_concepts import BuiltinConcepts -from core.concept import Concept import core.builtin_helpers from evaluators.BaseEvaluator import AllReturnValuesEvaluator, BaseEvaluator import logging @@ -13,6 +12,8 @@ class MultipleSameSuccessEvaluator(AllReturnValuesEvaluator): """ Used to filter the responses It has a low priority to let other evaluators try to resolve the errors + + It reduces the responses when several evaluators give the same answer """ NAME = "MultipleSameSuccess" diff --git a/evaluators/OneSuccessEvaluator.py b/evaluators/OneSuccessEvaluator.py index 770e290..6304085 100644 --- a/evaluators/OneSuccessEvaluator.py +++ b/evaluators/OneSuccessEvaluator.py @@ -11,6 +11,8 @@ class OneSuccessEvaluator(AllReturnValuesEvaluator): """ Used to filter the responses It has a low priority to let other evaluators try to resolve the errors + + Make sure that there is only one successful answer """ NAME = "OneSuccess" diff --git a/evaluators/PythonEvaluator.py b/evaluators/PythonEvaluator.py index 633f739..0a3930d 100644 --- a/evaluators/PythonEvaluator.py +++ b/evaluators/PythonEvaluator.py @@ -15,6 +15,10 @@ log = logging.getLogger(__name__) class PythonEvaluator(OneReturnValueEvaluator): NAME = "Python" + """ + Evaluate a Python node, ie, evaluate some Python code + """ + def __init__(self): super().__init__(self.NAME, 50) diff --git a/evaluators/TooManySuccessEvaluator.py b/evaluators/TooManySuccessEvaluator.py index 7ae0939..609c152 100644 --- a/evaluators/TooManySuccessEvaluator.py +++ b/evaluators/TooManySuccessEvaluator.py @@ -12,6 +12,8 @@ class TooManySuccessEvaluator(AllReturnValuesEvaluator): """ Used to filter the responses It has a low priority to let other evaluators try to resolve the errors + + Raises an error when that are several successful answers, with different values """ NAME = "TooManySuccess" diff --git a/main.py b/main.py index 3023e9d..c32d1ca 100644 --- a/main.py +++ b/main.py @@ -9,7 +9,7 @@ import core.utils def usage(): print("Sheerka v0.1\n") print("usage:") - print(sys.argv[0] + "[-hd] command ") + print(sys.argv[0] + "[-hdl:] command ") def main(argv): diff --git a/parsers/BaseParser.py b/parsers/BaseParser.py index 4c8693b..b8a0630 100644 --- a/parsers/BaseParser.py +++ b/parsers/BaseParser.py @@ -27,6 +27,12 @@ class ErrorNode(Node): pass +@dataclass() +class UnexpectedTokenErrorNode(ErrorNode): + message: str + expected_tokens: list + + class BaseParser: PREFIX = "Parsers:" diff --git a/parsers/BnfParser.py b/parsers/BnfParser.py new file mode 100644 index 0000000..85d0142 --- /dev/null +++ b/parsers/BnfParser.py @@ -0,0 +1,227 @@ +from dataclasses import dataclass + +import core.utils +from core.builtin_concepts import BuiltinConcepts +from core.sheerka import ExecutionContext +from core.tokenizer import Tokenizer, Token, TokenKind +from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode +from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptMatch, StrMatch + + +@dataclass() +class UnexpectedEndOfFileError(ErrorNode): + pass + + +class BnfParser: + """ + Parser used to transform litteral into ParsingExpression + example : + a | b, c -> Sequence(OrderedChoice(a, b) ,c) + + '|' (pipe) is used for OrderedChoice + ',' (comma) is used for Sequence + '?' (question mark) is used for Optional + '*' (star) is used for ZeroOrMore + '+' (plus) is used for OneOrMore + + """ + + def __init__(self): + self.has_error = False + self.error_sink = [] + self.name = BaseParser.PREFIX + "RegexParser" + + self.lexer_iter = None + self._current = None + self.after_current = None + self.nb_open_par = 0 + self.context = None + self.source = "" + self.sheerka = None + + def __eq__(self, other): + if not isinstance(other, BnfParser): + return False + + return True + + def reset_parser(self, context, text): + self.context = context + self.sheerka = context.sheerka + + self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text) + self._current = None + self.after_current = None + self.nb_open_par = 0 + + self.next_token() + self.eat_white_space() + + def add_error(self, error, next_token=True): + self.has_error = True + self.error_sink.append(error) + if next_token: + self.next_token() + return error + + def get_token(self) -> Token: + return self._current + + def next_token(self, skip_whitespace=False): + if self._current and self._current.type == TokenKind.EOF: + return + + try: + self._current = self.after_current or next(self.lexer_iter) + self.source += str(self._current.value) + self.after_current = None + + if skip_whitespace: + while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: + self._current = next(self.lexer_iter) + self.source += str(self._current.value) + except StopIteration: + self._current = Token(TokenKind.EOF, "", -1, -1, -1) + + def next_after(self): + if self.after_current is not None: + return self.after_current + + try: + self.after_current = next(self.lexer_iter) + # self.source += str(self.after_current.value) + return self.after_current + except StopIteration: + self.after_current = Token(TokenKind.EOF, "", -1, -1, -1) + return self.after_current + + def eat_white_space(self): + if self.after_current is not None: + self._current = self.after_current + self.source += str(self._current.value) + self.after_current = None + + try: + while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: + self._current = next(self.lexer_iter) + self.source += str(self._current.value) + except StopIteration: + self._current = None + + def maybe_sequence(self, first, second): + token = self.get_token() + return token.type == second or token.type == first and self.next_after().type == second + + def parse(self, context: ExecutionContext, text): + self.reset_parser(context, text) + tree = self.parse_choice() + + ret = self.sheerka.ret( + self.name, + not self.has_error, + self.sheerka.new( + BuiltinConcepts.PARSER_RESULT, + parser=self, + source=self.source, + body=self.error_sink if self.has_error else tree, + try_parsed=tree)) + + return ret + + def parse_choice(self): + sequence = self.parse_sequence() + + self.eat_white_space() + token = self.get_token() + if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR: + return sequence + + elements = [sequence] + while True: + # maybe eat the vertical bar + self.eat_white_space() + token = self.get_token() + if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR: + break + self.next_token(skip_whitespace=True) + + sequence = self.parse_sequence() + elements.append(sequence) + + return OrderedChoice(*elements) + + def parse_sequence(self): + expr_and_modifier = self.parse_expression_and_modifier() + token = self.get_token() + if token is None or token.type == TokenKind.EOF or \ + self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \ + self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR): + return expr_and_modifier + + elements = [expr_and_modifier] + while True: + # maybe eat the comma + token = self.get_token() + if token is None or token.type == TokenKind.EOF or \ + self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \ + self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR): + break + self.eat_white_space() + + sequence = self.parse_expression_and_modifier() + elements.append(sequence) + + return Sequence(*elements) + + def parse_expression_and_modifier(self): + expression = self.parse_expression() + + token = self.get_token() + + if token.type == TokenKind.QMARK: + self.next_token() + return Optional(expression) + + if token.type == TokenKind.STAR: + self.next_token() + return ZeroOrMore(expression) + + if token.type == TokenKind.PLUS: + self.next_token() + return OneOrMore(expression) + + return expression + + def parse_expression(self): + token = self.get_token() + if token.type == TokenKind.EOF: + self.add_error(UnexpectedEndOfFileError(), False) + if token.type == TokenKind.LPAR: + self.nb_open_par += 1 + self.next_token() + expression = self.parse_choice() + token = self.get_token() + if token.type == TokenKind.RPAR: + self.nb_open_par -= 1 + self.next_token() + return expression + else: + self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR])) + return expression + + if token.type == TokenKind.IDENTIFIER: + self.next_token() + return ConceptMatch(token.value) + # concept = self.sheerka.get(str(token.value)) + # if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): + # self.add_error(CannotResolveConceptNode(str(token.value))) + # self.next_token() + # return None + # else: + # self.next_token() + # return concept + + ret = StrMatch(core.utils.strip_quotes(token.value)) + self.next_token() + return ret \ No newline at end of file diff --git a/parsers/ConceptLexerParser.py b/parsers/ConceptLexerParser.py index cc5442f..f4984d2 100644 --- a/parsers/ConceptLexerParser.py +++ b/parsers/ConceptLexerParser.py @@ -1,5 +1,5 @@ ##################################################################################################### -# This part of code is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio) +# This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio) # I don't directly use the project, but it helped me figure out # what to do. # Dejanović I., Milosavljević G., Vaderna R.: @@ -10,7 +10,6 @@ from dataclasses import field, dataclass from collections import defaultdict from core.builtin_concepts import BuiltinConcepts from core.concept import Concept -from core.sheerka import ExecutionContext from core.tokenizer import TokenKind, Tokenizer, Token from parsers.BaseParser import BaseParser, Node, ErrorNode import core.utils @@ -40,6 +39,18 @@ def flatten(iterable): class LexerNode(Node): start: int end: int + tokens: list = None + source: str = None + + def __post_init__(self): + if self.source is None: + self.source = BaseParser.get_text_from_tokens(self.tokens) + + def __eq__(self, other): + if not isinstance(other, LexerNode): + return False + + return self.start == other.start and self.end == other.end class ConceptNode(LexerNode): @@ -48,17 +59,24 @@ class ConceptNode(LexerNode): It represents a recognized concept """ - def __init__(self, concept, start, end, tokens=None, source=None, children=None): - super().__init__(start, end) + def __init__(self, concept, start, end, tokens=None, source=None, underlying=None): + super().__init__(start, end, tokens, source) self.concept = concept - self.tokens = tokens - self.source = source - self.children = children + self.underlying = underlying if self.source is None: self.source = BaseParser.get_text_from_tokens(self.tokens) def __eq__(self, other): + if isinstance(other, tuple): + if len(other) == 2: + return self.concept == other[0] and self.source == other[1] + else: + return self.concept == other[0] and \ + self.start == other[1] and \ + self.end == other[2] and \ + self.source == other[3] + if not super().__eq__(other): return False @@ -66,10 +84,14 @@ class ConceptNode(LexerNode): return False return self.concept == other.concept and \ - self.source == other.source + self.source == other.source and \ + self.underlying == other.underlying def __hash__(self): - return hash((self.concept, self.start, self.end, self.source)) + return hash((self.concept, self.start, self.end, self.source, self.underlying)) + + def __repr__(self): + return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')" class NonTerminalNode(LexerNode): @@ -77,8 +99,8 @@ class NonTerminalNode(LexerNode): Returned by the ConceptLexerParser """ - def __init__(self, parsing_expression, start, end, children=None): - super().__init__(start, end) + def __init__(self, parsing_expression, start, end, tokens, children=None): + super().__init__(start, end, tokens) self.parsing_expression = parsing_expression self.children = children @@ -90,6 +112,21 @@ class NonTerminalNode(LexerNode): sub_names = "" return name + sub_names + def __eq__(self, other): + if not super().__eq__(other): + return False + + if not isinstance(other, NonTerminalNode): + return False + + return self.parsing_expression == other.parsing_expression and \ + self.start == other.start and \ + self.end == other.end and \ + self.children == other.children + + def __hash__(self): + return hash((self.parsing_expression, self.start, self.end, self.children)) + class TerminalNode(LexerNode): """ @@ -97,7 +134,7 @@ class TerminalNode(LexerNode): """ def __init__(self, parsing_expression, start, end, value): - super().__init__(start, end) + super().__init__(start, end, source=value) self.parsing_expression = parsing_expression self.value = value @@ -105,23 +142,27 @@ class TerminalNode(LexerNode): name = self.parsing_expression.rule_name or "" return name + f"'{self.value}'" + def __eq__(self, other): + if not super().__eq__(other): + return False + + if not isinstance(other, TerminalNode): + return False + + return self.parsing_expression == other.parsing_expression and \ + self.start == other.start and \ + self.end == other.end and \ + self.value == other.value + + def __hash__(self): + return hash((self.parsing_expression, self.start, self.end, self.value)) + @dataclass() class GrammarErrorNode(ErrorNode): message: str -@dataclass() -class UnexpectedTokenErrorNode(ErrorNode): - message: str - expected_tokens: list - - -@dataclass() -class UnexpectedEndOfFileError(ErrorNode): - pass - - @dataclass() class UnknownConceptNode(ErrorNode): concept_key: str @@ -175,7 +216,7 @@ class Sequence(ParsingExpression): children.append(node) end_pos = node.end - return NonTerminalNode(self, init_pos, end_pos, children) + return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children or []) def __repr__(self): to_str = ", ".join(repr(n) for n in self.elements) @@ -194,7 +235,7 @@ class OrderedChoice(ParsingExpression): for e in self.nodes: node = e.parse(parser) if node: - return NonTerminalNode(self, init_pos, node.end, [node]) + return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node]) parser.seek(init_pos) # backtrack @@ -214,13 +255,18 @@ class Optional(ParsingExpression): def _parse(self, parser): init_pos = parser.pos - selected_node = NonTerminalNode(self, parser.pos, -1, []) + selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found for e in self.nodes: node = e.parse(parser) if node: if node.end > selected_node.end: - selected_node = node + selected_node = NonTerminalNode( + self, + node.start, + node.end, + parser.tokens[node.start: node.end + 1], + [node]) parser.seek(init_pos) # backtrack @@ -327,12 +373,12 @@ class ConceptMatch(Match): When the grammar is created, it is replaced by the actual concept """ - def __init__(self, concept_name): - super(Match, self).__init__() - self.concept_name = concept_name + def __init__(self, concept, rule_name=""): + super(Match, self).__init__(rule_name=rule_name) + self.concept = concept def __repr__(self): - return f"{self.concept_name}" + return f"{self.concept}" def __eq__(self, other): if not super().__eq__(other): @@ -341,32 +387,37 @@ class ConceptMatch(Match): if not isinstance(other, ConceptMatch): return False - return self.concept_name == other.concept_name - - -class CrossRef: - """ - During the creation of the model, - Creates reference to a concept, as it may not be resolved yet - """ - - def __init__(self, concept): - self.concept = concept - - def __repr__(self): - return f"ref({self.concept.key})" - - def __eq__(self, other): - if not isinstance(other, CrossRef): - return False + if isinstance(self.concept, Concept): + return self.concept.name == other.concept.name return self.concept == other.concept + def _parse(self, parser): + to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept + if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): + return None + + if to_match not in parser.concepts_grammars: + return None + + self.concept = to_match # Memoize + node = parser.concepts_grammars[to_match].parse(parser) + if node is None: + return None + + return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node]) + class ConceptLexerParser(BaseParser): - def __init__(self, concepts_dict=None): + def __init__(self, **kwargs): super().__init__("ConceptLexer") - self.concepts_dict = concepts_dict or {} # dict of concept, grammar + if 'grammars' in kwargs: + self.concepts_grammars = kwargs.get("grammars") + elif 'sheerka' in kwargs: + self.concepts_grammars = kwargs.get("sheerka").concepts_grammars + else: + self.concepts_grammars = {} + self.ignore_case = True self.token = None @@ -430,24 +481,23 @@ class ConceptLexerParser(BaseParser): self.pos -= 1 self.token = self.tokens[self.pos] - def initialize(self, context, grammars): + def initialize(self, context, concepts_definitions): """ Adds a bunch of concepts, and how they can be recognized :param context: execution context - :param grammars: dictionary of concept, concept_definition + :param concepts_definitions: dictionary of concept, concept_definition :return: """ self.context = context self.sheerka = context.sheerka - nodes_to_resolve = [] concepts_to_resolve = set() # ## Gets the grammars - for concept, concept_def in grammars.items(): + for concept, concept_def in concepts_definitions.items(): concept.init_key() # make sure that the key is initialized - grammar = self.get_model(concept, concept_def, nodes_to_resolve, concepts_to_resolve) - self.concepts_dict[concept] = grammar + grammar = self.get_model(concept_def, concepts_to_resolve) + self.concepts_grammars[concept] = grammar if self.has_error: return self.sheerka.ret(self.name, False, self.error_sink) @@ -456,73 +506,68 @@ class ConceptLexerParser(BaseParser): concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve) for concept in concepts_to_remove: concepts_to_resolve.remove(concept) - del self.concepts_dict[concept] - - # ## Resolves cross references and remove grammar with unresolved references - self.resolve_cross_references(concepts_to_resolve, nodes_to_resolve) + del self.concepts_grammars[concept] if self.has_error: return self.sheerka.ret(self.name, False, self.error_sink) else: - return self.sheerka.ret(self.name, True, self.concepts_dict) + return self.sheerka.ret(self.name, True, self.concepts_grammars) - def get_model(self, concept, concept_def, nodes_to_resolve, concepts_to_resolve): - def get_concept(concept_name): - if concept_name in self.context.concepts_cache: - return self.context.concepts_cache[concept_name] - return self.sheerka.get(concept_name) + def get_concept(self, concept_name): + if concept_name in self.context.concepts_cache: + return self.context.concepts_cache[concept_name] + return self.sheerka.get(concept_name) + + def get_model(self, concept_def, concepts_to_resolve): # TODO # inner_get_model must not modify the initial ParsingExpression # A copy must be created def inner_get_model(expression): if isinstance(expression, Concept): - ret = CrossRef(expression) - concepts_to_resolve.add(concept) - nodes_to_resolve.append(ret) + ret = ConceptMatch(expression, rule_name=expression.name) + concepts_to_resolve.add(expression) + elif isinstance(expression, ConceptMatch): + if expression.rule_name is None or expression.rule_name == "": + expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \ + else expression.concept + concepts_to_resolve.add(expression.concept) + ret = expression elif isinstance(expression, str): ret = StrMatch(expression, ignore_case=self.ignore_case) elif isinstance(expression, StrMatch): ret = expression if ret.ignore_case is None: ret.ignore_case = self.ignore_case - elif isinstance(expression, ConceptMatch): - to_match = get_concept(expression.concept_name) - if hasattr(to_match, "__iter__"): - ret = self.add_error(TooManyConceptNode(expression.concept_name), False) - elif self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): - ret = self.add_error(UnknownConceptNode(expression.concept_name), False) - else: - ret = CrossRef(to_match) - concepts_to_resolve.add(concept) - nodes_to_resolve.append(ret) elif isinstance(expression, Sequence) or \ isinstance(expression, OrderedChoice) or \ isinstance(expression, Optional): ret = expression ret.nodes.extend([inner_get_model(e) for e in ret.elements]) - if any((isinstance(x, CrossRef) for x in ret.nodes)): - concepts_to_resolve.add(concept) - nodes_to_resolve.append(ret) else: ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False) return ret model = inner_get_model(concept_def) - if isinstance(model, CrossRef): - concepts_to_resolve.add(concept) - model.rule_name = concept.key return model def detect_infinite_recursion(self, concepts_to_resolve): # infinite recursion matcher def _is_infinite_recursion(ref_concept, node): - if isinstance(node, CrossRef): + if isinstance(node, ConceptMatch): if node.concept == ref_concept: return True - return _is_infinite_recursion(ref_concept, self.concepts_dict[node.concept]) + + if isinstance(node.concept, str): + to_match = self.get_concept(node.concept) + if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT): + return False + else: + to_match = node.concept + + return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match]) if isinstance(node, OrderedChoice): return _is_infinite_recursion(ref_concept, node.nodes[0]) @@ -537,32 +582,16 @@ class ConceptLexerParser(BaseParser): removed_concepts = [] for e in concepts_to_resolve: - to_resolve = self.concepts_dict[e] + if isinstance(e, str): + e = self.get_concept(e) + if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT): + continue + + to_resolve = self.concepts_grammars[e] if _is_infinite_recursion(e, to_resolve): removed_concepts.append(e) return removed_concepts - # Cross-ref resolving - def resolve_cross_references(self, concepts_to_resolve, nodes_to_resolve): - - repeat = True - while repeat: - repeat = False - for e in concepts_to_resolve: - to_resolve = self.concepts_dict[e] - if isinstance(to_resolve, CrossRef): - repeat = True - self.concepts_dict[e] = self.concepts_dict[to_resolve.concept] - - for e in nodes_to_resolve: - if not isinstance(e, ParsingExpression): - continue # cases when a concept directly references another concept - - for i, node in enumerate(e.nodes): - if isinstance(node, CrossRef): - if node.concept in self.concepts_dict: - e.nodes[i] = self.concepts_dict[node.concept] - def parse(self, context, text): if text == "": return context.sheerka.ret( @@ -591,13 +620,17 @@ class ConceptLexerParser(BaseParser): while True: init_pos = self.pos res = [] - for concept, grammar in self.concepts_dict.items(): + for concept, grammar in self.concepts_grammars.items(): self.seek(init_pos) - node = grammar.parse(self) + node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode if node is not None: - concept_node = ConceptNode(concept, node.start, node.end, self.tokens[node.start: node.end + 1]) - if hasattr(node, "children"): - concept_node.children = node.children + concept_node = ConceptNode( + concept, + node.start, + node.end, + self.tokens[node.start: node.end + 1], + None, + node) res.append(concept_node) if len(res) == 0: # not recognized @@ -606,9 +639,7 @@ class ConceptLexerParser(BaseParser): self.add_error(self.sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=not_recognized)) break - res = self.get_bests(res) # only keep the concept that eat the more tokens - for r in res: - r.children = flatten(r.children) + res = self.get_bests(res) # only keep the concepts that eat the more tokens concepts_found = core.utils.product(concepts_found, res) # loop @@ -659,220 +690,6 @@ class ConceptLexerParser(BaseParser): return by_end_pos[max(by_end_pos)] -class RegexParser: - """ - Parser used to transform litteral into ParsingExpression - example : - a | b, c -> Sequence(OrderedChoice(a, b) ,c) - - '|' (pipe) is used for OrderedChoice - ',' (comma) is used for Sequence - '?' (question mark) is used for Optional - '*' (star) is used for ZeroOrMore - '+' (plus) is used for OneOrMore - - """ - - def __init__(self): - self.has_error = False - self.error_sink = [] - self.name = BaseParser.PREFIX + "RegexParser" - - self.lexer_iter = None - self._current = None - self.after_current = None - self.nb_open_par = 0 - self.context = None - self.source = "" - self.sheerka = None - - def __eq__(self, other): - if not isinstance(other, RegexParser): - return False - - return True - - def reset_parser(self, context, text): - self.context = context - self.sheerka = context.sheerka - - self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text) - self._current = None - self.after_current = None - self.nb_open_par = 0 - - self.next_token() - self.eat_white_space() - - def add_error(self, error, next_token=True): - self.has_error = True - self.error_sink.append(error) - if next_token: - self.next_token() - return error - - def get_token(self) -> Token: - return self._current - - def next_token(self, skip_whitespace=False): - if self._current and self._current.type == TokenKind.EOF: - return - - try: - self._current = self.after_current or next(self.lexer_iter) - self.source += str(self._current.value) - self.after_current = None - - if skip_whitespace: - while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: - self._current = next(self.lexer_iter) - self.source += str(self._current.value) - except StopIteration: - self._current = Token(TokenKind.EOF, "", -1, -1, -1) - - def next_after(self): - if self.after_current is not None: - return self.after_current - - try: - self.after_current = next(self.lexer_iter) - # self.source += str(self.after_current.value) - return self.after_current - except StopIteration: - self.after_current = Token(TokenKind.EOF, "", -1, -1, -1) - return self.after_current - - def eat_white_space(self): - if self.after_current is not None: - self._current = self.after_current - self.source += str(self._current.value) - self.after_current = None - - try: - while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: - self._current = next(self.lexer_iter) - self.source += str(self._current.value) - except StopIteration: - self._current = None - - def maybe_sequence(self, first, second): - token = self.get_token() - return token.type == second or token.type == first and self.next_after().type == second - - def parse(self, context: ExecutionContext, text): - self.reset_parser(context, text) - tree = self.parse_choice() - - ret = self.sheerka.ret( - self.name, - not self.has_error, - self.sheerka.new( - BuiltinConcepts.PARSER_RESULT, - parser=self, - source=self.source, - body=self.error_sink if self.has_error else tree, - try_parsed=tree)) - - return ret - - def parse_choice(self): - sequence = self.parse_sequence() - - self.eat_white_space() - token = self.get_token() - if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR: - return sequence - - elements = [sequence] - while True: - # maybe eat the vertical bar - self.eat_white_space() - token = self.get_token() - if token is None or token.type == TokenKind.EOF or token.type != TokenKind.VBAR: - break - self.next_token(skip_whitespace=True) - - sequence = self.parse_sequence() - elements.append(sequence) - - return OrderedChoice(*elements) - - def parse_sequence(self): - expr_and_modifier = self.parse_expression_and_modifier() - token = self.get_token() - if token is None or token.type == TokenKind.EOF or \ - self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \ - self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR): - return expr_and_modifier - - elements = [expr_and_modifier] - while True: - # maybe eat the comma - token = self.get_token() - if token is None or token.type == TokenKind.EOF or \ - self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \ - self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR): - break - self.eat_white_space() - - sequence = self.parse_expression_and_modifier() - elements.append(sequence) - - return Sequence(*elements) - - def parse_expression_and_modifier(self): - expression = self.parse_expression() - - token = self.get_token() - - if token.type == TokenKind.QMARK: - self.next_token() - return Optional(expression) - - if token.type == TokenKind.STAR: - self.next_token() - return ZeroOrMore(expression) - - if token.type == TokenKind.PLUS: - self.next_token() - return OneOrMore(expression) - - return expression - - def parse_expression(self): - token = self.get_token() - if token.type == TokenKind.EOF: - self.add_error(UnexpectedEndOfFileError(), False) - if token.type == TokenKind.LPAR: - self.nb_open_par += 1 - self.next_token() - expression = self.parse_choice() - token = self.get_token() - if token.type == TokenKind.RPAR: - self.nb_open_par -= 1 - self.next_token() - return expression - else: - self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token.type}'", [TokenKind.RPAR])) - return expression - - if token.type == TokenKind.IDENTIFIER: - self.next_token() - return ConceptMatch(token.value) - # concept = self.sheerka.get(str(token.value)) - # if hasattr(concept, "__iter__") or self.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT): - # self.add_error(CannotResolveConceptNode(str(token.value))) - # self.next_token() - # return None - # else: - # self.next_token() - # return concept - - ret = StrMatch(core.utils.strip_quotes(token.value)) - self.next_token() - return ret - - class ParsingExpressionVisitor: """ visit ParsingExpression diff --git a/parsers/DefaultParser.py b/parsers/DefaultParser.py index 1ca9a75..3f5c90c 100644 --- a/parsers/DefaultParser.py +++ b/parsers/DefaultParser.py @@ -2,12 +2,12 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserRes from core.concept import ConceptParts import core.builtin_helpers import core.utils -from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode +from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode from core.tokenizer import Tokenizer, TokenKind, Token, Keywords from dataclasses import dataclass, field import logging -from parsers.ConceptLexerParser import RegexParser +from parsers.BnfParser import BnfParser log = logging.getLogger(__name__) @@ -206,7 +206,7 @@ class DefaultParser(BaseParser): Parse sheerka specific grammar (like def concept) """ - def __init__(self): + def __init__(self, **kwargs): BaseParser.__init__(self, "DefaultParser") self.lexer_iter = None self._current = None @@ -427,7 +427,7 @@ class DefaultParser(BaseParser): self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False) return NotInitializedNode() - regex_parser = RegexParser() + regex_parser = BnfParser() new_context = self.context.push(self.name) parsing_result = regex_parser.parse(new_context, tokens) if not parsing_result.status: diff --git a/parsers/EmptyStringParser.py b/parsers/EmptyStringParser.py index a2d9fac..410c67a 100644 --- a/parsers/EmptyStringParser.py +++ b/parsers/EmptyStringParser.py @@ -10,7 +10,7 @@ class EmptyStringParser(BaseParser): To parse empty or blank strings """ - def __init__(self): + def __init__(self, **kwargs): BaseParser.__init__(self, "NullParser") def parse(self, context, text): diff --git a/parsers/ExactConceptParser.py b/parsers/ExactConceptParser.py index a84c0ab..6c12bb4 100644 --- a/parsers/ExactConceptParser.py +++ b/parsers/ExactConceptParser.py @@ -1,7 +1,7 @@ from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts from parsers.BaseParser import BaseParser from core.tokenizer import Tokenizer, Keywords, TokenKind -from core.concept import Concept, VARIABLE_PREFIX +from core.concept import VARIABLE_PREFIX import logging log = logging.getLogger(__name__) @@ -14,7 +14,7 @@ class ExactConceptParser(BaseParser): MAX_WORDS_SIZE = 10 - def __init__(self): + def __init__(self, **kwargs): BaseParser.__init__(self, "ConceptParser") def parse(self, context, text): diff --git a/parsers/PythonParser.py b/parsers/PythonParser.py index 7becfef..819d44c 100644 --- a/parsers/PythonParser.py +++ b/parsers/PythonParser.py @@ -2,7 +2,6 @@ from core.builtin_concepts import BuiltinConcepts from parsers.BaseParser import BaseParser, Node, ErrorNode from dataclasses import dataclass import ast -import copy import logging log = logging.getLogger(__name__) @@ -57,10 +56,10 @@ class PythonParser(BaseParser): Parse Python scripts """ - def __init__(self, source=""): + def __init__(self, **kwargs): BaseParser.__init__(self, "PythonParser") - self.source = source + self.source = kwargs.get("source", "") def parse(self, context, text): text = text if isinstance(text, str) else self.get_text_from_tokens(text) diff --git a/sdp/sheerkaDataProvider.py b/sdp/sheerkaDataProvider.py index 200d500..2905333 100644 --- a/sdp/sheerkaDataProvider.py +++ b/sdp/sheerkaDataProvider.py @@ -1,5 +1,3 @@ -# from os import path -# import os from datetime import datetime, date import hashlib import json @@ -542,7 +540,7 @@ class SheerkaDataProvider: self.set_snapshot(new_snapshot) return new_snapshot - def get(self, entry, key=None): + def get(self, entry, key=None, load_origin=True): """ Retrieve an element by its key :param entry: @@ -560,11 +558,11 @@ class SheerkaDataProvider: item = state.data[entry] if key is None else state.data[entry][key] if isinstance(item, list): - return [self.load_ref_if_needed(i)[0] for i in item] + return [self.load_ref_if_needed(i, load_origin)[0] for i in item] - return self.load_ref_if_needed(item)[0] + return self.load_ref_if_needed(item, load_origin)[0] - def get_safe(self, entry, key=None): + def get_safe(self, entry, key=None, load_origin=True): """ Retrieve an element by its key. Return None if the element does not exist :param entry: @@ -582,9 +580,9 @@ class SheerkaDataProvider: item = state.data[entry] if key is None else state.data[entry][key] if isinstance(item, list): - return [self.load_ref_if_needed(i)[0] for i in item] + return [self.load_ref_if_needed(i, load_origin)[0] for i in item] - return self.load_ref_if_needed(item)[0] + return self.load_ref_if_needed(item, load_origin)[0] def exists(self, entry, key=None, digest=None): """ @@ -676,7 +674,7 @@ class SheerkaDataProvider: log.debug(f"...digest={digest}.") return digest - def load_obj(self, digest): + def load_obj(self, digest, add_origin=True): if digest is None: return None @@ -688,19 +686,20 @@ class SheerkaDataProvider: obj = self.serializer.deserialize(f, SerializerContext(origin=digest)) # set the origin of the object - if isinstance(obj, dict): - obj[Serializer.ORIGIN] = digest - elif not isinstance(obj, str): - setattr(obj, Serializer.ORIGIN, digest) + if add_origin: + if isinstance(obj, dict): + obj[Serializer.ORIGIN] = digest + elif not isinstance(obj, str): + setattr(obj, Serializer.ORIGIN, digest) return obj - def load_ref_if_needed(self, obj): + def load_ref_if_needed(self, obj, load_origin=True): if not isinstance(obj, str): return obj, False if not obj.startswith(SheerkaDataProvider.REF_PREFIX): return obj, False - resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):]) + resolved = self.load_obj(obj[len(SheerkaDataProvider.REF_PREFIX):], load_origin) if resolved is None: return obj, False diff --git a/tests/test_AddConceptEvaluator.py b/tests/test_AddConceptEvaluator.py index 18ec7f0..e03faee 100644 --- a/tests/test_AddConceptEvaluator.py +++ b/tests/test_AddConceptEvaluator.py @@ -8,9 +8,9 @@ from core.sheerka import Sheerka, ExecutionContext from core.tokenizer import Tokenizer from evaluators.AddConceptEvaluator import AddConceptEvaluator from parsers.BaseParser import BaseParser -from parsers.ConceptLexerParser import Sequence, RegexParser, StrMatch, ZeroOrMore, ConceptMatch +from parsers.ConceptLexerParser import Sequence, StrMatch, ZeroOrMore, ConceptMatch +from parsers.BnfParser import BnfParser from parsers.DefaultParser import DefConceptNode, NameNode -from parsers.ExactConceptParser import ExactConceptParser from parsers.PythonParser import PythonNode, PythonParser @@ -67,7 +67,7 @@ def get_concept_definition(source, parsing_expression): status=True, value=ParserResultConcept( source=source, - parser=RegexParser(), + parser=BnfParser(), value=parsing_expression ) ) diff --git a/tests/test_BnfParser.py b/tests/test_BnfParser.py new file mode 100644 index 0000000..b2ce301 --- /dev/null +++ b/tests/test_BnfParser.py @@ -0,0 +1,138 @@ +import pytest + +from core.concept import Concept +from core.sheerka import Sheerka, ExecutionContext +from core.tokenizer import Tokenizer, TokenKind +from parsers.BaseParser import UnexpectedTokenErrorNode +from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError +from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \ + ConceptLexerParser, ConceptNode, ConceptMatch + + +def get_context(): + sheerka = Sheerka(skip_builtins_in_db=True) + sheerka.initialize("mem://") + + return ExecutionContext("sheerka", "xxxx", sheerka) + + +@pytest.mark.parametrize("expression, expected", [ + ("'str'", StrMatch("str")), + ("1", StrMatch("1")), + (" 1", StrMatch("1")), + (",", StrMatch(",")), + ("'foo'?", Optional(StrMatch("foo"))), + ("'foo'*", ZeroOrMore(StrMatch("foo"))), + ("'foo'+", OneOrMore(StrMatch("foo"))), + ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), + ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), + ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), + ("1 2 | 3 4+", OrderedChoice( + Sequence(StrMatch("1"), StrMatch("2")), + Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))), + ("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))), + ("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))), + ("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))), + ("1 *", Sequence(StrMatch("1"), StrMatch("*"))), + ("1 ?", Sequence(StrMatch("1"), StrMatch("?"))), + ("1 +", Sequence(StrMatch("1"), StrMatch("+"))), + ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), + ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), + ("(1 )", StrMatch("1")), +]) +def test_i_can_parse_regex(expression, expected): + parser = BnfParser() + res = parser.parse(get_context(), Tokenizer(expression)) + + assert not parser.has_error + assert res.status + assert res.value.value == expected + assert res.value.source == expression + + +@pytest.mark.parametrize("expression, error", [ + ("1 ", UnexpectedEndOfFileError()), + ("1|", UnexpectedEndOfFileError()), + ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])), +]) +def test_i_can_detect_errors(expression, error): + parser = BnfParser() + res = parser.parse(get_context(), Tokenizer(expression)) + ret_value = res.value.value + assert parser.has_error + assert not res.status + assert ret_value[0] == error + + +def test_i_can_parse_regex_with_reference(): + expression = "foo" + parser = BnfParser() + res = parser.parse(get_context(), Tokenizer(expression)) + + assert res.status + assert res.value.value == ConceptMatch("foo") + assert res.value.source == expression + + +def test_i_can_parse_cross_ref_with_modifier(): + expression = "foo*" + parser = BnfParser() + res = parser.parse(get_context(), Tokenizer(expression)) + + assert res.status + assert res.value.value == ZeroOrMore(ConceptMatch("foo")) + assert res.value.source == expression + + +def test_i_can_parse_sequence_with_cross_ref(): + expression = "foo 'and' bar+" + parser = BnfParser() + res = parser.parse(get_context(), Tokenizer(expression)) + + assert res.status + assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar"))) + assert res.value.source == expression + + +def test_i_can_parse_choice_with_cross_ref(): + foo = Concept("foo") + bar = Concept("bar") + context = get_context() + context.sheerka.add_in_cache(foo) + context.sheerka.add_in_cache(bar) + + expression = "foo | bar?" + parser = BnfParser() + res = parser.parse(context, Tokenizer(expression)) + + assert res.status + assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar"))) + assert res.value.source == expression + + +def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(): + foo = Concept(name="foo") + bar = Concept(name="bar") + context = get_context() + context.sheerka.add_in_cache(foo) + context.sheerka.add_in_cache(bar) + + regex_parser = BnfParser() + foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value + bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value + + concepts = {bar: bar_definition, foo: foo_definition} + concept_parser = ConceptLexerParser() + concept_parser.initialize(context, concepts) + + res = concept_parser.parse(context, "twenty two") + assert res.status + assert res.value.body == [(bar, 0, 2, "twenty two")] + + res = concept_parser.parse(context, "thirty one") + assert res.status + assert res.value.body == [(bar, 0, 2, "thirty one")] + + res = concept_parser.parse(context, "twenty") + assert res.status + assert res.value.body == [(foo, 0, 0, "twenty")] diff --git a/tests/test_ConceptLexerParser.py b/tests/test_ConceptLexerParser.py index 7a783c5..72b398c 100644 --- a/tests/test_ConceptLexerParser.py +++ b/tests/test_ConceptLexerParser.py @@ -2,10 +2,8 @@ import pytest from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext -from core.tokenizer import Tokenizer, TokenKind from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \ - CrossRef, RegexParser, ZeroOrMore, OneOrMore, UnexpectedEndOfFileError, UnexpectedTokenErrorNode, ConceptMatch, \ - ParsingExpressionVisitor + ParsingExpressionVisitor, TerminalNode, NonTerminalNode, LexerNode, ConceptMatch class ConceptVisitor(ParsingExpressionVisitor): @@ -13,7 +11,17 @@ class ConceptVisitor(ParsingExpressionVisitor): self.concepts = set() def visit_ConceptMatch(self, node): - self.concepts.add(node.concept_name) + self.concepts.add(node.concept) + + +def u(parsing_expression, start, end, children=None): + if isinstance(parsing_expression, str): + parsing_expression = StrMatch(parsing_expression) + + if isinstance(parsing_expression, StrMatch): + return TerminalNode(parsing_expression, start, end, parsing_expression.to_match) + + return NonTerminalNode(parsing_expression, start, end, [], children) @pytest.mark.parametrize("match, text", [ @@ -39,7 +47,7 @@ def test_i_can_match_simple_tokens(match, text): assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ConceptNode(foo, 0, 0, source=text)] + assert res.value.value == [ConceptNode(foo, 0, 0, source=text, underlying=u(match, 0, 0))] def test_i_can_match_multiple_concepts_in_one_input(): @@ -55,9 +63,9 @@ def test_i_can_match_multiple_concepts_in_one_input(): assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.value == [ - ConceptNode(one, 0, 0, source="one"), - ConceptNode(two, 2, 2, source="two"), - ConceptNode(one, 4, 4, source="one"), + ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)), + ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2)), + ConceptNode(one, 4, 4, source="one", underlying=u("one", 4, 4)), ] @@ -85,8 +93,8 @@ def test_i_cannot_match_when_part_of_the_input_is_unknown(): assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.try_parsed == [ - ConceptNode(one, 0, 0, source="one"), - ConceptNode(two, 2, 2, source="two")] # these two were recognized + ConceptNode(one, 0, 0, source="one", underlying=u("one", 0, 0)), + ConceptNode(two, 2, 2, source="two", underlying=u("two", 2, 2))] # these two were recognized assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) assert res.value.body[0].body == "three" @@ -102,7 +110,11 @@ def test_i_can_match_sequence(): assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")] + assert res.value.value == [ + ConceptNode(foo, 0, 4, source="one two three", underlying=u(concepts[foo], 0, 4, [ + u("one", 0, 0), + u("two", 2, 2), + u("three", 4, 4)]))] def test_wrong_sequence_is_not_matched(): @@ -116,7 +128,7 @@ def test_wrong_sequence_is_not_matched(): assert not res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.try_parsed == [ConceptNode(foo, 0, 4, source="one two three")] + assert res.value.try_parsed == [(foo, "one two three")] assert context.sheerka.isinstance(res.value.body[0], BuiltinConcepts.UNKNOWN_CONCEPT) assert res.value.body[0].body == "one" @@ -149,7 +161,7 @@ def test_i_always_choose_the_longest_match(): assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) - assert res.value.value == [ConceptNode(foo, 0, 4, source="one two three")] + assert res.value.value == [(foo, "one two three")] def test_i_can_match_several_sequences(): @@ -166,8 +178,8 @@ def test_i_can_match_several_sequences(): assert res.status assert context.sheerka.isinstance(res.value, BuiltinConcepts.PARSER_RESULT) assert res.value.value == [ - ConceptNode(foo, 0, 4, source="one two three"), - ConceptNode(bar, 6, 8, source="one two"), + (foo, 0, 4, "one two three"), + (bar, 6, 8, "one two"), ] @@ -181,12 +193,14 @@ def test_i_can_match_ordered_choice(): res1 = parser.parse(context, "one") assert res1.status assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) - assert res1.value.body == [ConceptNode(foo, 0, 0, source="one")] + assert res1.value.body == [ + ConceptNode(foo, 0, 0, source="one", underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))] res2 = parser.parse(context, "two") assert res2.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) - assert res2.value.body == [ConceptNode(foo, 0, 0, source="two")] + assert res2.value.body == [ + ConceptNode(foo, 0, 0, source="two", underlying=u(concepts[foo], 0, 0, [u("two", 0, 0)]))] res3 = parser.parse(context, "three") assert not res3.status @@ -216,12 +230,20 @@ def test_i_can_mix_sequences_and_ordered_choices(): res1 = parser.parse(context, "twenty one ok") assert res1.status assert context.sheerka.isinstance(res1.value, BuiltinConcepts.PARSER_RESULT) - assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok")] + assert res1.value.body == [ConceptNode(foo, 0, 4, source="twenty one ok", + underlying=u(concepts[foo], 0, 4, [ + u(OrderedChoice("twenty", "thirty"), 0, 0, [u("twenty", 0, 0)]), + u("one", 2, 2), + u("ok", 4, 4)]))] res2 = parser.parse(context, "thirty one ok") assert res2.status assert context.sheerka.isinstance(res2.value, BuiltinConcepts.PARSER_RESULT) - assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok")] + assert res2.value.body == [ConceptNode(foo, 0, 4, source="thirty one ok", + underlying=u(concepts[foo], 0, 4, [ + u(OrderedChoice("twenty", "thirty"), 0, 0, [u("thirty", 0, 0)]), + u("one", 2, 2), + u("ok", 4, 4)]))] res3 = parser.parse(context, "twenty one") assert not res3.status @@ -267,7 +289,8 @@ def test_i_can_parse_optional(): res = parser.parse(context, "one") assert res.status - assert res.value.value == [ConceptNode(foo, 0, 0, source="one")] + assert res.value.value == [ConceptNode(foo, 0, 0, source="one", + underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))] def test_i_can_parse_sequence_starting_with_optional(): @@ -280,11 +303,19 @@ def test_i_can_parse_sequence_starting_with_optional(): res = parser.parse(context, "twenty one") assert res.status - assert res.value.body == [ConceptNode(foo, 0, 2, source="twenty one")] + assert res.value.body == [ConceptNode( + foo, 0, 2, + source="twenty one", + underlying=u(concepts[foo], 0, 2, + [ + u(Optional("twenty"), 0, 0, [u("twenty", 0, 0)]), + u("one", 2, 2)] + ))] res = parser.parse(context, "one") assert res.status - assert res.value.body == [ConceptNode(foo, 0, 0, source="one")] + assert res.value.body == [ConceptNode(foo, 0, 0, source="one", + underlying=u(concepts[foo], 0, 0, [u("one", 0, 0)]))] def test_i_can_parse_sequence_ending_with_optional(): @@ -297,11 +328,11 @@ def test_i_can_parse_sequence_ending_with_optional(): res = parser.parse(context, "one two three") assert res.status - assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")] + assert res.value.body == [(foo, 0, 4, "one two three")] res = parser.parse(context, "one two") assert res.status - assert res.value.body == [ConceptNode(foo, 0, 2, source="one two")] + assert res.value.body == [(foo, 0, 2, "one two")] def test_i_can_parse_sequence_with_optional_in_between(): @@ -314,11 +345,11 @@ def test_i_can_parse_sequence_with_optional_in_between(): res = parser.parse(context, "one two three") assert res.status - assert res.value.body == [ConceptNode(foo, 0, 4, source="one two three")] + assert res.value.body == [(foo, 0, 4, "one two three")] res = parser.parse(context, "one three") assert res.status - assert res.value.body == [ConceptNode(foo, 0, 2, source="one three")] + assert res.value.body == [(foo, 0, 2, "one three")] def test_i_can_use_reference(): @@ -338,11 +369,14 @@ def test_i_can_use_reference(): assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")] + assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two", + underlying=u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)]))] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")] + assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two", + underlying=u(ConceptMatch(foo, rule_name="foo"), 0, 2, + [u(concepts[foo], 0, 2, [u("one", 0, 0), u("two", 2, 2)])]))] def test_i_can_use_context_reference_with_multiple_levels(): @@ -364,15 +398,15 @@ def test_i_can_use_context_reference_with_multiple_levels(): assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [ConceptNode(foo, 0, 2, source="one two")] + assert res[0].value.body == [(foo, 0, 2, "one two")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [ConceptNode(bar, 0, 2, source="one two")] + assert res[1].value.body == [(bar, 0, 2, "one two")] assert res[2].status assert context.sheerka.isinstance(res[2].value, BuiltinConcepts.PARSER_RESULT) - assert res[2].value.body == [ConceptNode(baz, 0, 2, source="one two")] + assert res[2].value.body == [(baz, 0, 2, "one two")] def test_order_is_not_important_when_using_references(): @@ -386,8 +420,8 @@ def test_order_is_not_important_when_using_references(): res = parser.parse(context, "one two") assert len(res) == 2 - assert res[0].value.body == [ConceptNode(bar, 0, 2, source="one two")] - assert res[1].value.body == [ConceptNode(foo, 0, 2, source="one two")] + assert res[0].value.body == [(bar, 0, 2, "one two")] + assert res[1].value.body == [(foo, 0, 2, "one two")] def test_i_can_parse_when_reference(): @@ -401,15 +435,15 @@ def test_i_can_parse_when_reference(): res = parser.parse(context, "twenty two") assert res.status - assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")] + assert res.value.body == [(bar, 0, 2, "twenty two")] res = parser.parse(context, "thirty one") assert res.status - assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")] + assert res.value.body == [(bar, 0, 2, "thirty one")] res = parser.parse(context, "twenty") assert res.status - assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")] + assert res.value.body == [(foo, 0, 0, "twenty")] def test_i_can_detect_duplicates_when_reference(): @@ -428,11 +462,11 @@ def test_i_can_detect_duplicates_when_reference(): assert len(res) == 2 assert res[0].status assert context.sheerka.isinstance(res[0].value, BuiltinConcepts.PARSER_RESULT) - assert res[0].value.body == [ConceptNode(bar, 0, 0, source="twenty")] + assert res[0].value.body == [(bar, 0, 0, "twenty")] assert res[1].status assert context.sheerka.isinstance(res[1].value, BuiltinConcepts.PARSER_RESULT) - assert res[1].value.body == [ConceptNode(foo, 0, 0, source="twenty")] + assert res[1].value.body == [(foo, 0, 0, "twenty")] def test_i_can_detect_infinite_recursion(): @@ -446,8 +480,8 @@ def test_i_can_detect_infinite_recursion(): parser = ConceptLexerParser() parser.initialize(get_context(), concepts) - assert bar not in parser.concepts_dict - assert foo not in parser.concepts_dict + assert bar not in parser.concepts_grammars + assert foo not in parser.concepts_grammars def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(): @@ -461,8 +495,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(): parser = ConceptLexerParser() parser.initialize(get_context(), concepts) - assert foo not in parser.concepts_dict # removed because of the infinite recursion - assert bar not in parser.concepts_dict # removed because of the infinite recursion + assert foo not in parser.concepts_grammars # removed because of the infinite recursion + assert bar not in parser.concepts_grammars # removed because of the infinite recursion # the other way around is possible context = get_context() @@ -472,15 +506,15 @@ def test_i_can_detect_indirect_infinite_recursion_with_ordered_choice(): } parser = ConceptLexerParser() parser.initialize(context, concepts) - assert foo in parser.concepts_dict - assert bar in parser.concepts_dict + assert foo in parser.concepts_grammars + assert bar in parser.concepts_grammars res = parser.parse(context, "foo") assert len(res) == 2 assert res[0].status - assert res[0].value.body == [ConceptNode(bar, 0, 0, source="foo")] + assert res[0].value.body == [(bar, 0, 0, "foo")] assert res[1].status - assert res[1].value.body == [ConceptNode(foo, 0, 0, source="foo")] + assert res[1].value.body == [(foo, 0, 0, "foo")] def test_i_can_detect_indirect_infinite_recursion_with_sequence(): @@ -494,8 +528,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence(): parser = ConceptLexerParser() parser.initialize(get_context(), concepts) - assert foo not in parser.concepts_dict # removed because of the infinite recursion - assert bar not in parser.concepts_dict # removed because of the infinite recursion + assert foo not in parser.concepts_grammars # removed because of the infinite recursion + assert bar not in parser.concepts_grammars # removed because of the infinite recursion def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choice(): @@ -509,8 +543,8 @@ def test_i_can_detect_indirect_infinite_recursion_with_sequence_or_ordered_choic parser = ConceptLexerParser() parser.initialize(get_context(), concepts) - assert foo not in parser.concepts_dict # removed because of the infinite recursion - assert bar not in parser.concepts_dict # removed because of the infinite recursion + assert foo not in parser.concepts_grammars # removed because of the infinite recursion + assert bar not in parser.concepts_grammars # removed because of the infinite recursion def test_i_can_detect_indirect_infinite_recursion_with_optional(): @@ -518,128 +552,6 @@ def test_i_can_detect_indirect_infinite_recursion_with_optional(): pass -@pytest.mark.parametrize("expression, expected", [ - ("'str'", StrMatch("str")), - ("1", StrMatch("1")), - (" 1", StrMatch("1")), - (",", StrMatch(",")), - ("'foo'?", Optional(StrMatch("foo"))), - ("'foo'*", ZeroOrMore(StrMatch("foo"))), - ("'foo'+", OneOrMore(StrMatch("foo"))), - ("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), - ("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))), - ("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))), - ("1 2 | 3 4+", OrderedChoice( - Sequence(StrMatch("1"), StrMatch("2")), - Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))), - ("1 (2 | 3) 4+", Sequence(StrMatch("1"), OrderedChoice(StrMatch("2"), StrMatch("3")), OneOrMore(StrMatch("4")))), - ("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))), - ("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))), - ("1 *", Sequence(StrMatch("1"), StrMatch("*"))), - ("1 ?", Sequence(StrMatch("1"), StrMatch("?"))), - ("1 +", Sequence(StrMatch("1"), StrMatch("+"))), - ("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))), - ("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))), - ("(1 )", StrMatch("1")), -]) -def test_i_can_parse_regex(expression, expected): - parser = RegexParser() - res = parser.parse(get_context(), Tokenizer(expression)) - - assert not parser.has_error - assert res.status - assert res.value.value == expected - assert res.value.source == expression - - -@pytest.mark.parametrize("expression, error", [ - ("1 ", UnexpectedEndOfFileError()), - ("1|", UnexpectedEndOfFileError()), - ("(1|)", UnexpectedTokenErrorNode("Unexpected token 'TokenKind.EOF'", [TokenKind.RPAR])), -]) -def test_i_can_detect_errors(expression, error): - parser = RegexParser() - res = parser.parse(get_context(), Tokenizer(expression)) - ret_value = res.value.value - assert parser.has_error - assert not res.status - assert ret_value[0] == error - - -def test_i_can_parse_regex_with_reference(): - expression = "foo" - parser = RegexParser() - res = parser.parse(get_context(), Tokenizer(expression)) - - assert res.status - assert res.value.value == ConceptMatch("foo") - assert res.value.source == expression - - -def test_i_can_parse_cross_ref_with_modifier(): - expression = "foo*" - parser = RegexParser() - res = parser.parse(get_context(), Tokenizer(expression)) - - assert res.status - assert res.value.value == ZeroOrMore(ConceptMatch("foo")) - assert res.value.source == expression - - -def test_i_can_parse_sequence_with_cross_ref(): - expression = "foo 'and' bar+" - parser = RegexParser() - res = parser.parse(get_context(), Tokenizer(expression)) - - assert res.status - assert res.value.value == Sequence(ConceptMatch("foo"), StrMatch("and"), OneOrMore(ConceptMatch("bar"))) - assert res.value.source == expression - - -def test_i_can_parse_choice_with_cross_ref(): - foo = Concept("foo") - bar = Concept("bar") - context = get_context() - context.sheerka.add_in_cache(foo) - context.sheerka.add_in_cache(bar) - - expression = "foo | bar?" - parser = RegexParser() - res = parser.parse(context, Tokenizer(expression)) - - assert res.status - assert res.value.value == OrderedChoice(ConceptMatch("foo"), Optional(ConceptMatch("bar"))) - assert res.value.source == expression - - -def test_i_can_use_the_result_of_regex_parsing_to_parse_a_text(): - foo = Concept(name="foo") - bar = Concept(name="bar") - context = get_context() - context.sheerka.add_in_cache(foo) - context.sheerka.add_in_cache(bar) - - regex_parser = RegexParser() - foo_definition = regex_parser.parse(context, "'twenty' | 'thirty'").value.value - bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value - - concepts = {bar: bar_definition, foo: foo_definition} - concept_parser = ConceptLexerParser() - concept_parser.initialize(context, concepts) - - res = concept_parser.parse(context, "twenty two") - assert res.status - assert res.value.body == [ConceptNode(bar, 0, 2, source="twenty two")] - - res = concept_parser.parse(context, "thirty one") - assert res.status - assert res.value.body == [ConceptNode(bar, 0, 2, source="thirty one")] - - res = concept_parser.parse(context, "twenty") - assert res.status - assert res.value.body == [ConceptNode(foo, 0, 0, source="twenty")] - - def test_i_can_visit_parsing_expression(): mult = Concept(name="mult") add = Concept(name="add") @@ -650,6 +562,19 @@ def test_i_can_visit_parsing_expression(): assert sorted(list(visitor.concepts)) == ["add", "mult"] +def test_i_can_initialize_rule_names(): + context = get_context() + foo = Concept(name="foo") + bar = Concept(name="bar") + + concepts = {foo: Sequence("one", "two"), bar: foo} + parser = ConceptLexerParser() + ret = parser.initialize(context, concepts) + return_value = ret.body + + assert return_value[foo].rule_name == "" + assert return_value[bar].rule_name == "foo" + # # def test_i_can_parse_basic_arithmetic_operations_and_resolve_properties(): diff --git a/tests/test_ConceptNodeEvaluator.py b/tests/test_ConceptNodeEvaluator.py new file mode 100644 index 0000000..368c5ad --- /dev/null +++ b/tests/test_ConceptNodeEvaluator.py @@ -0,0 +1,178 @@ +import pytest + +from core.builtin_concepts import ReturnValueConcept, ParserResultConcept +from core.concept import Concept +from core.sheerka import Sheerka, ExecutionContext +from evaluators.ConceptNodeEvaluator import ConceptNodeEvaluator +from parsers.ConceptLexerParser import ConceptNode, ConceptLexerParser, NonTerminalNode, Sequence, TerminalNode, \ + StrMatch, Optional, OrderedChoice + + +def get_context(): + sheerka = Sheerka(skip_builtins_in_db=True) + sheerka.initialize("mem://") + return ExecutionContext("test", "xxx", sheerka) + + +def get_return_value(nodes, source): + return ReturnValueConcept( + "some_name", + True, + ParserResultConcept(parser=ConceptLexerParser(), + source=source, + value=nodes, + try_parsed=nodes)) + + +def get_concept_node(context, grammar, expression): + parser = ConceptLexerParser() + parser.initialize(context, grammar) + + res = parser.parse(context, expression) + assert res.status + return res.value.value[0] + + +@pytest.mark.parametrize("ret_val, expected", [ + (ReturnValueConcept("some_name", True, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), True), + (ReturnValueConcept("some_name", True, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), True), + (ReturnValueConcept("some_name", False, ParserResultConcept(value=[ConceptNode(Concept(), 0, 0)])), False), + (ReturnValueConcept("some_name", False, ParserResultConcept(value=ConceptNode(Concept(), 0, 0))), False), + (ReturnValueConcept("some_name", True, ParserResultConcept(value="Not a concept node")), False), + (ReturnValueConcept("some_name", True, ParserResultConcept(value=["Not a concept node"])), False), + (ReturnValueConcept("some_name", True, [ConceptNode(Concept(), 0, 0)]), False), + (ReturnValueConcept("some_name", True, ConceptNode(Concept(), 0, 0)), False), +]) +def test_i_can_match(ret_val, expected): + context = get_context() + assert ConceptNodeEvaluator().matches(context, ret_val) == expected + + +def test_concept_is_returned_when_list_of_one_concept_node(): + foo = Concept("foo") + context = get_context() + context.sheerka.add_in_cache(foo) + + evaluator = ConceptNodeEvaluator() + node = ConceptNode(foo, 0, 0, underlying=TerminalNode(StrMatch("foo"), 0, 0, "foo")) + + ret_val = get_return_value([node], "h") + result = evaluator.eval(context, ret_val) + + assert result.who == evaluator.name + assert result.status + assert result.value == node.concept + assert result.parents == [ret_val] + + +def test_concept_property_is_correctly_updated_for_str_match(): + context = get_context() + + foo = Concept("foo") + concept_node = get_concept_node(context, {foo: StrMatch("foo", rule_name="variable")}, "foo") + updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying) + + assert "variable" in updated.props + assert updated.props["variable"].value == "foo" + + +def test_concept_property_is_correctly_updated_for_sequence(): + context = get_context() + + foo = Concept("foo") + grammar = {foo: Sequence("one", "two", rule_name="variable")} + concept_node = get_concept_node(context, grammar, "one two") + updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying) + + assert "variable" in updated.props + assert updated.props["variable"].value == "one two" + + +def test_concept_property_is_updated_for_str_in_sequence(): + context = get_context() + + foo = Concept("foo") + grammar = {foo: Sequence(StrMatch("one", rule_name="s1"), StrMatch("two", rule_name="s2"), rule_name="variable")} + concept_node = get_concept_node(context, grammar, "one two") + + updated = ConceptNodeEvaluator().update_concept(context.sheerka, concept_node.concept, concept_node.underlying) + + assert updated.props["variable"].value == "one two" + assert updated.props["s1"].value == "one" + assert updated.props["s2"].value == "two" + + +def test_concept_property_is_correctly_updated_for_optional(): + context = get_context() + + foo = Concept("foo") + grammar = {foo: Sequence("one", Optional("two", rule_name="o"), rule_name="variable")} + concept_node = get_concept_node(context, grammar, "one two") + + updated = ConceptNodeEvaluator().update_concept( + context.sheerka, + context.sheerka.new(concept_node.concept.key), + concept_node.underlying) + + assert "variable" in updated.props + assert updated.props["variable"].value == "one two" + assert updated.props["o"].value == "two" + + +def test_concept_property_is_correctly_updated_when_list_of_properties(): + context = get_context() + + foo = Concept("foo") + grammar = {foo: Sequence(StrMatch("one", rule_name="s"), StrMatch("two", rule_name="s"), rule_name="variable")} + concept_node = get_concept_node(context, grammar, "one two") + + updated = ConceptNodeEvaluator().update_concept( + context.sheerka, + context.sheerka.new(concept_node.concept.key), + concept_node.underlying) + + assert updated.props["variable"].value == "one two" + assert updated.props["s"].value == ["one", "two"] + + +def test_concept_property_is_correctly_updated_when_another_concept(): + context = get_context() + + foo = Concept("foo") + bar = Concept("bar") + context.sheerka.add_in_cache(foo) + grammar = { + foo: Sequence("one", "two", rule_name="variable"), + bar: Sequence(foo, "three", rule_name="variable")} + concept_node = get_concept_node(context, grammar, "one two three") + + updated = ConceptNodeEvaluator().update_concept( + context.sheerka, + context.sheerka.new(concept_node.concept.key), + concept_node.underlying) + + assert updated.props["variable"].value == "one two three" + assert updated.props["foo"].value == Concept("foo").set_prop("variable", "one two").init_key() + + +def test_concept_property_is_correctly_updated_when_concept_recursion(): + context = get_context() + + number = Concept("number") + add = Concept("add") + context.sheerka.add_in_cache(number) + context.sheerka.add_in_cache(add) + grammar = { + number: OrderedChoice("one", "two"), + add: Sequence(number, Optional(Sequence(OrderedChoice("plus", "minus", rule_name="op"), add))) + } + concept_node = get_concept_node(context, grammar, "one plus two") + + updated = ConceptNodeEvaluator().update_concept( + context.sheerka, + context.sheerka.new(concept_node.concept.key), + concept_node.underlying) + + assert updated.props["number"].value == Concept("number").init_key() + assert updated.props["op"].value == "plus" + assert updated.props["add"].value == Concept("add").set_prop("number", Concept("number").init_key()).init_key() diff --git a/tests/test_DefaultParser.py b/tests/test_DefaultParser.py index 75c8d88..4735fc1 100644 --- a/tests/test_DefaultParser.py +++ b/tests/test_DefaultParser.py @@ -2,13 +2,13 @@ import pytest import ast from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept -from core.concept import Concept from core.sheerka import Sheerka, ExecutionContext -from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch, RegexParser +from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptMatch from parsers.PythonParser import PythonParser, PythonNode from core.tokenizer import Keywords, Tokenizer from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode +from parsers.BnfParser import BnfParser # def nop(): @@ -341,7 +341,7 @@ def test_i_can_parse_def_concept_from_regex(): res = parser.parse(get_context(), text) node = res.value.value definition = OrderedChoice(ConceptMatch("a_concept"), StrMatch("a_string")) - parser_result = ParserResultConcept(RegexParser(), "a_concept | 'a_string'", definition, definition) + parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", definition, definition) expected = get_concept(name="name", body="__definition[0]", definition=parser_result) assert res.status diff --git a/tests/test_sheerka.py b/tests/test_sheerka.py index 5df5488..d5a913a 100644 --- a/tests/test_sheerka.py +++ b/tests/test_sheerka.py @@ -1,12 +1,10 @@ -import ast - import pytest import os from os import path import shutil from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept -from core.concept import Concept, PROPERTIES_TO_SERIALIZE +from core.concept import Concept, PROPERTIES_TO_SERIALIZE, Property from core.sheerka import Sheerka, ExecutionContext from evaluators.MutipleSameSuccessEvaluator import MultipleSameSuccessEvaluator from parsers.ConceptLexerParser import Sequence, ZeroOrMore, StrMatch, OrderedChoice, Optional, ConceptMatch, \ @@ -63,12 +61,12 @@ def test_builtin_concepts_are_initialized(): def test_builtin_concepts_can_be_updated(): - sheerka = get_sheerka(root_folder, skip_builtins_in_db=False) + sheerka = get_sheerka(False, skip_builtins_in_db=False) loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA) loaded_sheerka.metadata.desc = "I have a description" sheerka.sdp.modify("Test", sheerka.CONCEPTS_ENTRY, loaded_sheerka.key, loaded_sheerka) - sheerka = get_sheerka(root_folder) + sheerka = get_sheerka(False) loaded_sheerka = sheerka.get(BuiltinConcepts.SHEERKA) assert loaded_sheerka.metadata.desc == "I have a description" @@ -593,9 +591,8 @@ def test_i_can_create_concept_with_bnf_definition(): saved_definitions = sheerka.sdp.get_safe(sheerka.CONCEPTS_DEFINITIONS_ENTRY) expected_bnf = Sequence( - ConceptMatch("a"), - Optional(Sequence(StrMatch("plus"), ConceptMatch("plus"))), - rule_name="plus") + ConceptMatch("a", rule_name="a"), + Optional(Sequence(StrMatch("plus"), ConceptMatch("plus", rule_name="plus")))) assert saved_definitions[saved_concept] == expected_bnf new_concept = res[0].value.body @@ -606,7 +603,53 @@ def test_i_can_create_concept_with_bnf_definition(): assert "plus" in new_concept.props -def get_sheerka(root="mem://", skip_builtins_in_db=True): +def test_i_can_eval_bnf_definitions(): + sheerka = get_sheerka() + concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body + + res = sheerka.eval("one") + + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].value, concept_a) + + +def test_i_can_eval_bnf_definitions_with_variables(): + sheerka = get_sheerka() + concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body + concept_b = sheerka.eval("def concept b from bnf a 'three'")[0].body.body + + res = sheerka.eval("one three") + + assert len(res) == 1 + assert res[0].status + return_value = res[0].value + + assert sheerka.isinstance(return_value, concept_b) + assert return_value.props["a"] == Property("a", concept_a) + + +def test_i_can_eval_bnf_definitions_from_separate_instances(): + """ + Same test then before, + but make sure that the BNF are correctly persisted and loaded + """ + sheerka = get_sheerka(False) + concept_a = sheerka.eval("def concept a from bnf 'one' | 'two'")[0].body.body + + res = get_sheerka(False).eval("one") + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].value, concept_a) + + res = get_sheerka(False).eval("two") + assert len(res) == 1 + assert res[0].status + assert sheerka.isinstance(res[0].value, concept_a) + + +def get_sheerka(use_dict=True, skip_builtins_in_db=True): + root = "mem://" if use_dict else root_folder sheerka = Sheerka(skip_builtins_in_db) sheerka.initialize(root) diff --git a/tests/test_sheerkaDataProvider.py b/tests/test_sheerkaDataProvider.py index 08c1b9e..f04c696 100644 --- a/tests/test_sheerkaDataProvider.py +++ b/tests/test_sheerkaDataProvider.py @@ -754,7 +754,7 @@ def test_i_can_set_using_reference(root): ".sheerka", "mem://" ]) -def test_i_can_add_reference_of_an_object_with_a_key(root): +def test_i_can_add_an_object_with_a_key_as_a_reference(root): sdp = SheerkaDataProvider(root) obj = ObjDumpJson("my_key", "value1") obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj)) @@ -777,7 +777,7 @@ def test_i_can_add_reference_of_an_object_with_a_key(root): ".sheerka", "mem://" ]) -def test_i_can_add_reference_a_dictionary(root): +def test_i_can_add_a_dictionary_as_a_reference(root): sdp = SheerkaDataProvider(root) obj = {"my_key": "value1"} @@ -1403,7 +1403,7 @@ def test_i_can_get_an_entry_by_key(root): ".sheerka", "mem://" ]) -def test_i_can_get_object_save_by_reference(root): +def test_i_can_get_object_saved_by_reference(root): sdp = SheerkaDataProvider(root) obj = ObjDumpJson("my_key", "value1") sdp.serializer.register(ObjectSerializer(core.utils.get_full_qualified_name(obj))) @@ -1687,3 +1687,73 @@ def test_i_can_add_obj_with_same_key_and_get_them_back(root): assert len(loaded) == 2 assert loaded[0] == obj1 assert loaded[1] == obj2 + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_get_safe_dictionary_without_origin(root): + sdp = SheerkaDataProvider(root) + obj = {"my_key": "value1"} + + obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj)) + sdp.serializer.register(obj_serializer) + + entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True) + from_db = sdp.get_safe(entry, key) + + assert len(from_db) == 2 + assert from_db["my_key"] == obj["my_key"] + assert Serializer.ORIGIN in from_db + + from_db_no_origin = sdp.get_safe(entry, key, load_origin=False) + assert len(from_db_no_origin) == 1 + assert from_db_no_origin["my_key"] == obj["my_key"] + assert Serializer.ORIGIN not in from_db_no_origin + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_get_dictionary_without_origin(root): + sdp = SheerkaDataProvider(root) + obj = {"my_key": "value1"} + + obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj)) + sdp.serializer.register(obj_serializer) + + entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True) + from_db = sdp.get(entry, key) + + assert len(from_db) == 2 + assert from_db["my_key"] == obj["my_key"] + assert Serializer.ORIGIN in from_db + + from_db_no_origin = sdp.get(entry, key, load_origin=False) + assert len(from_db_no_origin) == 1 + assert from_db_no_origin["my_key"] == obj["my_key"] + assert Serializer.ORIGIN not in from_db_no_origin + + +@pytest.mark.parametrize("root", [ + ".sheerka", + "mem://" +]) +def test_i_get_safe_object_without_origin(root): + sdp = SheerkaDataProvider(root) + obj = ObjDumpJson("my_key", "value1") + + obj_serializer = ObjectSerializer(core.utils.get_full_qualified_name(obj)) + sdp.serializer.register(obj_serializer) + + entry, key = sdp.add(evt_digest, "entry", obj, use_ref=True) + from_db = sdp.get_safe(entry, key) + + assert from_db == obj + assert hasattr(from_db, Serializer.ORIGIN) + + from_db_no_origin = sdp.get_safe(entry, key, load_origin=False) + assert from_db_no_origin == obj + assert not hasattr(from_db_no_origin, Serializer.ORIGIN)