From e8f2705dcf7601b434ead188a9566ee24a018fcb Mon Sep 17 00:00:00 2001 From: Kodjo Sossouvi Date: Fri, 2 Oct 2020 04:45:47 +0200 Subject: [PATCH] Refactored Parsers. Introduced BaseCustomGrammarParser. Renamed DefaultParser into DefConceptParser --- src/core/builtin_helpers.py | 2 +- src/core/sheerka/Sheerka.py | 2 +- src/core/sheerka/services/SheerkaExecute.py | 43 +- src/core/tokenizer.py | 4 +- src/evaluators/AddConceptEvaluator.py | 2 +- src/evaluators/AddConceptInSetEvaluator.py | 2 +- src/parsers/BaseCustomGrammarParser.py | 250 +++++++++ src/parsers/BaseNodeParser.py | 16 +- src/parsers/BaseParser.py | 54 +- src/parsers/DefConceptParser.py | 274 ++++++++++ src/parsers/DefaultParser.py | 509 ----------------- src/parsers/ExpressionParser.py | 9 +- src/parsers/FormatRuleParser.py | 132 +++++ src/parsers/FunctionParser.py | 21 +- src/printer/SheerkaPrinter.py | 4 +- tests/BaseTest.py | 2 +- tests/core/test_ParserInput.py | 80 ++- tests/core/test_sheerka.py | 2 +- tests/core/test_utils.py | 10 +- tests/evaluators/test_AddConceptEvaluator.py | 2 +- .../test_AddConceptInSetEvaluator.py | 2 +- tests/parsers/test_BaseCustomGrammarParser.py | 227 ++++++++ tests/parsers/test_BnfParser.py | 2 +- ...aultParser.py => test_DefConceptParser.py} | 511 ++++++++++-------- tests/parsers/test_ExpressionParser.py | 42 +- tests/parsers/test_FormatRuleParser.py | 71 +++ tests/parsers/test_FunctionParser.py | 2 + tests/sheerkapickle/test_sheerka_handlers.py | 6 +- 28 files changed, 1411 insertions(+), 872 deletions(-) create mode 100644 src/parsers/BaseCustomGrammarParser.py create mode 100644 src/parsers/DefConceptParser.py delete mode 100644 src/parsers/DefaultParser.py create mode 100644 src/parsers/FormatRuleParser.py create mode 100644 tests/parsers/test_BaseCustomGrammarParser.py rename tests/parsers/{test_DefaultParser.py => test_DefConceptParser.py} (73%) create mode 100644 tests/parsers/test_FormatRuleParser.py diff --git a/src/core/builtin_helpers.py b/src/core/builtin_helpers.py index b450b88..fec7131 100644 --- a/src/core/builtin_helpers.py +++ b/src/core/builtin_helpers.py @@ -287,7 +287,7 @@ def parse_unrecognized(context, source, parsers, who=None, prop=None, filter_fun """ Try to recognize concepts or code from source using the given parsers :param context: - :param source: + :param source: ParserInput if possible :param parsers: :param who: who is asking the parsing ? :param prop: Extra info, when parsing a property diff --git a/src/core/sheerka/Sheerka.py b/src/core/sheerka/Sheerka.py index a34b4bb..4c15f87 100644 --- a/src/core/sheerka/Sheerka.py +++ b/src/core/sheerka/Sheerka.py @@ -335,7 +335,7 @@ class Sheerka(Concept): """ core.utils.import_module_and_sub_module("parsers") base_class = core.utils.get_class("parsers.BaseParser.BaseParser") - modules_to_skip = ["parsers.BaseNodeParser"] + modules_to_skip = ["parsers.BaseNodeParser", "parsers.BaseCustomGrammarParser"] temp_result = {} for parser in core.utils.get_sub_classes("parsers", base_class): diff --git a/src/core/sheerka/services/SheerkaExecute.py b/src/core/sheerka/services/SheerkaExecute.py index f1b28db..59a76cb 100644 --- a/src/core/sheerka/services/SheerkaExecute.py +++ b/src/core/sheerka/services/SheerkaExecute.py @@ -15,11 +15,29 @@ class ParserInput: def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True): self.text = text self.tokens = tokens or None - self.length = len(tokens) if tokens else None + if self.tokens: + # make sure tokens ends with EOF token + # and do not modify the original token list + if len(self.tokens) == 0: + self.tokens = [Token(TokenKind.EOF, "", 0, 1, 1)] + + elif (last_token := self.tokens[-1]).type != TokenKind.EOF: + self.tokens = self.tokens + [Token(TokenKind.EOF, + "", + last_token.index + 1, + last_token.line, + last_token.column + 1)] + + self.length = None # to be computed in reset() self.yield_oef = yield_oef self.start = start or 0 - self.end = end + 1 if end else None + if end: + self.original_end = end + 1 + self.end = self.original_end + else: + self.original_end = self.end = None + self.sub_text = None self.sub_tokens = None @@ -32,15 +50,19 @@ class ParserInput: from_tokens = "from_tokens" if self.from_tokens else "" return f"ParserInput({from_tokens}'{self.text}')" - def reset(self, yield_oef=True): + def reset(self, yield_oef=None): + if yield_oef is None: + yield_oef = self.yield_oef + + # make sure tokens is correctly initialized if self.tokens is None: - self.tokens = list(Tokenizer(self.text)) - self.length = len(self.tokens) + self.tokens = list(Tokenizer(self.text, yield_eof=True)) - if self.end is None: - self.end = self.length + if self.original_end is None: + self.end = len(self.tokens) if yield_oef else len(self.tokens) - 1 + else: + self.end = self.original_end if self.original_end <= len(self.tokens) else self.tokens - self.yield_oef = yield_oef self.pos = self.start - 1 self.token = None return self @@ -70,13 +92,10 @@ class ParserInput: self.pos += 1 if self.pos >= self.end: - if self.yield_oef: - self.token = Token(TokenKind.EOF, "", -1, -1, -1) return False self.token = self.tokens[self.pos] - - if self.token.type == TokenKind.EOF and not self.yield_oef: + if self.token.type == TokenKind.EOF: return False if skip_whitespace: diff --git a/src/core/tokenizer.py b/src/core/tokenizer.py index e8d7b36..d471ca7 100644 --- a/src/core/tokenizer.py +++ b/src/core/tokenizer.py @@ -68,7 +68,7 @@ class Token: if self.type == TokenKind.IDENTIFIER: value = str(self.value) elif self.type == TokenKind.WHITESPACE: - value = "" if self.value[0] == "\t" else "" + value = "" if self.value == "" else "" if self.value[0] == "\t" else "" elif self.type == TokenKind.NEWLINE: value = "" elif self.type == TokenKind.EOF: @@ -148,6 +148,8 @@ class Keywords(Enum): POST = "post" ISA = "isa" RET = "ret" + WHEN = "when" + PRINT = "print" class Tokenizer: diff --git a/src/evaluators/AddConceptEvaluator.py b/src/evaluators/AddConceptEvaluator.py index 7862a6f..c7091d1 100644 --- a/src/evaluators/AddConceptEvaluator.py +++ b/src/evaluators/AddConceptEvaluator.py @@ -8,7 +8,7 @@ from core.tokenizer import TokenKind, Tokenizer from evaluators.BaseEvaluator import OneReturnValueEvaluator from parsers.BaseParser import NotInitializedNode from parsers.BnfNodeParser import ParsingExpression, ParsingExpressionVisitor -from parsers.DefaultParser import DefConceptNode, NameNode +from parsers.DefConceptParser import DefConceptNode, NameNode from parsers.PythonParser import PythonNode diff --git a/src/evaluators/AddConceptInSetEvaluator.py b/src/evaluators/AddConceptInSetEvaluator.py index 36c5a1a..41ebf68 100644 --- a/src/evaluators/AddConceptInSetEvaluator.py +++ b/src/evaluators/AddConceptInSetEvaluator.py @@ -2,7 +2,7 @@ import core.builtin_helpers from core.builtin_concepts import ParserResultConcept, BuiltinConcepts from core.sheerka.services.SheerkaExecute import SheerkaExecute from evaluators.BaseEvaluator import OneReturnValueEvaluator -from parsers.DefaultParser import IsaConceptNode +from parsers.DefConceptParser import IsaConceptNode ALL_STEPS = [ BuiltinConcepts.BEFORE_PARSING, diff --git a/src/parsers/BaseCustomGrammarParser.py b/src/parsers/BaseCustomGrammarParser.py new file mode 100644 index 0000000..779d9d6 --- /dev/null +++ b/src/parsers/BaseCustomGrammarParser.py @@ -0,0 +1,250 @@ +from dataclasses import dataclass, field + +import core.utils +from core.tokenizer import Keywords, TokenKind, Tokenizer +from parsers.BaseParser import BaseParser, Node, ErrorNode, UnexpectedEofNode, UnexpectedTokenErrorNode + + +@dataclass() +class CustomGrammarParserNode(Node): + """ + Base node for all default parser nodes + """ + tokens: list = field(compare=False, repr=False) + + +@dataclass() +class SyntaxErrorNode(CustomGrammarParserNode, ErrorNode): + """ + The input is recognized, but there is a syntax error + """ + message: str + + def __eq__(self, other): + if id(self) == id(other): + return True + + if not isinstance(other, SyntaxErrorNode): + return False + + if self.message != other.message: + return False + + if other.tokens is not None and self.tokens != other.tokens: + return False + + return True + + def __hash__(self): + return hash(self.message) + + +@dataclass() +class KeywordNotFound(CustomGrammarParserNode, ErrorNode): + keywords: list + + def __eq__(self, other): + if id(self) == id(other): + return True + + if not isinstance(other, KeywordNotFound): + return False + + if self.keywords != other.keywords: + return False + + if other.tokens is not None and self.tokens != other.tokens: + return False + + return True + + def __hash__(self): + return hash(self.keywords) + + +class BaseCustomGrammarParser(BaseParser): + """ + Base class for sheerka specific grammars + """ + + DEFAULT_TAB_SIZE = 4 + + def __init__(self, name, priority: int, enabled=True): + super().__init__(name, priority, enabled=enabled) + + @staticmethod + def skip_white_spaces(tokens): + i = 0 + while i < len(tokens) and tokens[i].type == TokenKind.WHITESPACE: + i += 1 + + return i + + def get_body(self, tokens): + """ + Get the body of a keyword definition + It manages colon body, but the colon must be stripped first + :param tokens: + :return: + """ + + def get_tab_size(default_tab_size, text): + return sum([1 if isinstance(c, str) else default_tab_size for c in text]) + + pos = self.skip_white_spaces(tokens) + + if len(tokens) - pos < 3: + self.add_error(SyntaxErrorNode(tokens, "Body is empty or too short.")) + return None + + if tokens[pos].type != TokenKind.NEWLINE: + self.add_error(UnexpectedTokenErrorNode("New line not found.", tokens[pos], [TokenKind.NEWLINE])) + return None + pos += 1 + + if tokens[pos].type != TokenKind.WHITESPACE: + self.add_error(UnexpectedTokenErrorNode("Indentation not found.", tokens[pos], [TokenKind.WHITESPACE])) + return None + + indent_size = get_tab_size(self.DEFAULT_TAB_SIZE, tokens[pos].value) + pos += 1 + + i = pos + while i < len(tokens) - 1: + if tokens[i].type == TokenKind.NEWLINE: + if tokens[i + 1].type != TokenKind.WHITESPACE: + self.add_error(UnexpectedTokenErrorNode("Indentation not found.", + tokens[i + 1], + [TokenKind.WHITESPACE])) + return None + + if get_tab_size(self.DEFAULT_TAB_SIZE, tokens[i + 1].value) < indent_size: + self.add_error(SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")) + return None + + tokens[i + 1] = tokens[i + 1].clone() + tokens[i + 1].value = " " * (get_tab_size(self.DEFAULT_TAB_SIZE, tokens[i + 1].value) - indent_size) + i += 1 + + return tokens[pos:] + + def get_parts(self, keywords, expected_first_token=None): + """ + Reads Parser Input and groups the tokens by keywords + ex: + tokens = Tokenizer("as a b c pre u v w where x y z") + keywords = ["as", "pre", "where"] + assert get_parts(keywords) == { + Keyword("as"): [Token("a"), Token(), Token("b"), Token(), Token("c"), Token()], + Keyword("pre"): [Token("u"), Token(), Token("v"), Token(), Token("w"), Token()], + Keyword("where"): [Token("x"), Token(), Token("y"), Token(), Token("z"), Token()]} + + * The order of appearance of the keywords is not important + "as w pre y where z" and "where z pre y as w" will produce the same dictionary + + * I can use double quote to protect keyword + where "x y" will produce the entry Keyword("where"): [Token("x"), Token(), Token("y"), Token()] + where 'x y' will produce the entry Keyword("where"): [Token("'x y'")] + + :param keywords: + :param expected_first_token: it must be a KeyW + :return: dictionary + """ + + def new_part(t, cma, p): + """ + + :param t: token + :param cma: colon_mode_activated + :param p: previous token + :return: + """ + if t.value not in keywords: + return False + + if not cma or not p: + return True + + return p.line != t.line + + if self.parser_input.token is None: + self.add_error(KeywordNotFound([], keywords)) + return None + + if self.parser_input.token.type == TokenKind.WHITESPACE: + self.parser_input.next_token() + + token = self.parser_input.token + if expected_first_token and token.value != expected_first_token.value: + self.add_error(UnexpectedTokenErrorNode(f"'{expected_first_token.value}' keyword not found.", + token, + [expected_first_token])) + return None + + if token.value not in keywords: + self.add_error(KeywordNotFound([token], keywords)) + return None + + colon_mode_activated = False # if activate, use keyword + colon to start a new keyword definition + previous_token = None + + res = {} + + # More explanations on colon_mode_activated + # You can use the pattern + # def concept as: + # xxx + # yyy + # ... + # + # It allows to readability and usage of other keywords inside the bloc# + # Example + # def concept give the the date as: + # from datetime import date # I can use the 'from' keyword !!! + # return date.today() + # + # Note that I can choose to use colon or not + # + # def concept in x days as: + # from datetime import date + # return date.today() - x + # where x > 0 + # + # is a valid declaration + + # loop thru the tokens, and put them in the correct tokens_found_by_parts entry + while True: + if new_part(token, colon_mode_activated, previous_token): + keyword = Keywords(token.value) + if keyword in res: + # a part is defined more than once + self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations.")) + break + + res[keyword] = [token] # to keep track of when it starts + colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON + if not self.parser_input.next_token(): + self.add_error(UnexpectedEofNode(f"While parsing keyword '{keyword.value}'.")) + break + else: + res[keyword].append(token) + if not self.parser_input.next_token(skip_whitespace=False): + break + + previous_token = token + token = self.parser_input.token + + # Post process the result if needed + for k, v in res.items(): + stripped = core.utils.strip_tokens(v[1:]) + + # manage colon first, to sure that what is protected by the quotes remains protected + if len(stripped) > 0 and stripped[0].type == TokenKind.COLON: + body = self.get_body(stripped[1:]) + if body: + res[k] = v[0:1] + body + # replace double quoted strings by their content + elif len(stripped) == 1 and stripped[0].type == TokenKind.STRING and stripped[0].value[0] == '"': + res[k] = v[0:1] + list(Tokenizer(stripped[0].strip_quote, yield_eof=False)) + + return res diff --git a/src/parsers/BaseNodeParser.py b/src/parsers/BaseNodeParser.py index 4269f95..b25f0fc 100644 --- a/src/parsers/BaseNodeParser.py +++ b/src/parsers/BaseNodeParser.py @@ -6,8 +6,7 @@ from typing import Set import core.utils from core.builtin_concepts import BuiltinConcepts from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts -from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import TokenKind, LexerError, Token +from core.tokenizer import TokenKind, Token from parsers.BaseParser import Node, BaseParser, ErrorNode DEBUG_COMPILED = True @@ -718,7 +717,7 @@ class BaseNodeParser(BaseParser): """ def __init__(self, name, priority, **kwargs): - super().__init__(name, priority) + super().__init__(name, priority, yield_eof=True) if 'sheerka' in kwargs: sheerka = kwargs.get("sheerka") self.concepts_by_first_keyword = sheerka.resolved_concepts_by_first_keyword @@ -745,17 +744,6 @@ class BaseNodeParser(BaseParser): concepts_by_first_keyword = self.get_concepts_by_first_token(context, concepts).body self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body - def reset_parser(self, context, parser_input: ParserInput): - self.context = context - self.sheerka = context.sheerka - self.parser_input = parser_input - try: - self.parser_input.reset(False) - except LexerError as e: - self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) - return False - return True - def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False): """ Tries to find if there are concepts that match the value of the token diff --git a/src/parsers/BaseParser.py b/src/parsers/BaseParser.py index 66410c2..357e9c0 100644 --- a/src/parsers/BaseParser.py +++ b/src/parsers/BaseParser.py @@ -1,7 +1,7 @@ import logging from dataclasses import dataclass +from typing import Union -import core.utils from core.builtin_concepts import BuiltinConcepts, ParserResultConcept from core.concept import Concept from core.sheerka.ExecutionContext import ExecutionContext @@ -57,7 +57,7 @@ class ErrorNode(Node): @dataclass() class UnexpectedTokenErrorNode(ErrorNode): message: str - token: Token + token: Union[Token, str] expected_tokens: list def __eq__(self, other): @@ -70,31 +70,25 @@ class UnexpectedTokenErrorNode(ErrorNode): if self.message != other.message: return False - if self.token.type != other.token.type or self.token.value != other.token.value: + to_compare = self.token.repr_value if isinstance(other.token, str) else self.token + if to_compare != other.token: return False - if len(self.expected_tokens) != len(other.expected_tokens): - return False - - for i, t in enumerate(self.expected_tokens): - if t != other.expected_tokens[i]: - return False - - return True + return self.expected_tokens == other.expected_tokens def __hash__(self): return hash((self.message, self.token, self.expected_tokens)) @dataclass() -class UnexpectedEof(ErrorNode): +class UnexpectedEofNode(ErrorNode): message: str class BaseParser: PREFIX = "parsers." - def __init__(self, name, priority: int, enabled=True): + def __init__(self, name, priority: int, enabled=True, yield_eof=False): self.log = get_logger("parsers." + self.__class__.__name__) self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__) self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__) @@ -107,6 +101,7 @@ class BaseParser: self.context: ExecutionContext = None self.sheerka = None self.parser_input: ParserInput = None + self.yield_eof = yield_eof def __eq__(self, other): if not isinstance(other, self.__class__): @@ -126,10 +121,9 @@ class BaseParser: self.error_sink.clear() try: - self.parser_input.reset(False) - self.parser_input.next_token() + self.parser_input.reset(self.yield_eof) except LexerError as e: - self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False) + self.add_error(e, False) return False return True @@ -165,12 +159,12 @@ class BaseParser: value = context.return_value_to_str(r) context.log(f" Recognized '{value}'", self.name) - def get_return_value_body(self, sheerka, source, tree, try_parse): + def get_return_value_body(self, sheerka, source, parsed, try_parse): """ All parsers must return their result in a standard way :param sheerka: :param source: - :param tree: + :param parsed: :param try_parse: :return: """ @@ -178,17 +172,19 @@ class BaseParser: return self.error_sink[0] if self.has_error: - return sheerka.new( - BuiltinConcepts.ERROR, - body=self.error_sink - ) + if parsed is None: + return sheerka.new(BuiltinConcepts.NOT_FOR_ME, + body=source, + reason=self.error_sink) + else: + return sheerka.new(BuiltinConcepts.ERROR, + body=self.error_sink) - return sheerka.new( - BuiltinConcepts.PARSER_RESULT, - parser=self, - source=source, - body=tree, - try_parsed=try_parse) + return sheerka.new(BuiltinConcepts.PARSER_RESULT, + parser=self, + source=source, + body=parsed, + try_parsed=try_parse) @staticmethod def get_input_as_lexer_nodes(parser_input, expected_parser=None): @@ -242,7 +238,7 @@ class BaseParser: tokens = [tokens] switcher = { - # TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value), + # TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value), } if custom_switcher: diff --git a/src/parsers/DefConceptParser.py b/src/parsers/DefConceptParser.py new file mode 100644 index 0000000..ef854ae --- /dev/null +++ b/src/parsers/DefConceptParser.py @@ -0,0 +1,274 @@ +from dataclasses import dataclass, field + +import core.builtin_helpers +import core.utils +from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept +from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF +from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute +from core.tokenizer import TokenKind, Keywords +from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode +from parsers.BaseParser import Node, ErrorNode, NotInitializedNode, UnexpectedTokenErrorNode +from parsers.BnfParser import BnfParser + + +class ParsingException(Exception): + def __init__(self, error): + self.error = error + + +@dataclass() +class DefaultParserNode(Node): + """ + Base node for all default parser nodes + """ + tokens: list = field(compare=False, repr=False) + + +@dataclass() +class DefaultParserErrorNode(DefaultParserNode, ErrorNode): + pass + + +@dataclass() +class CannotHandleErrorNode(DefaultParserErrorNode): + """ + The input is not recognized + """ + text: str + + +@dataclass() +class NameNode(DefaultParserNode): + + def get_name(self): + name = "" + first = True + for token in self.tokens: + if token.type == TokenKind.EOF: + break + if token.type == TokenKind.WHITESPACE: + continue + if not first: + name += " " + + name += token.value[1:-1] if token.type == TokenKind.STRING else str(token.value) + first = False + + return name + + def __repr__(self): + return self.get_name() + + def __eq__(self, other): + if not isinstance(other, NameNode): + return False + + return self.get_name() == other.get_name() + + def __hash__(self): + return hash(self.get_name()) + + +@dataclass() +class DefConceptNode(DefaultParserNode): + name: NameNode = NotInitializedNode() + where: ReturnValueConcept = NotInitializedNode() + pre: ReturnValueConcept = NotInitializedNode() + post: ReturnValueConcept = NotInitializedNode() + body: ReturnValueConcept = NotInitializedNode() + ret: ReturnValueConcept = NotInitializedNode() + definition: ReturnValueConcept = NotInitializedNode() + definition_type: str = None + + def get_asts(self): + asts = {} + for part_key in ConceptParts: + prop_value = getattr(self, part_key.value) + if isinstance(prop_value, ReturnValueConcept) and \ + isinstance(prop_value.body, ParserResultConcept) and \ + hasattr(prop_value.body.body, "ast_"): + asts[part_key] = prop_value + return asts + + +@dataclass() +class IsaConceptNode(DefaultParserNode): + concept: NameNode = NotInitializedNode() + set: NameNode = NotInitializedNode() + + +class DefConceptParser(BaseCustomGrammarParser): + """ + Parse sheerka specific grammar (like def concept) + """ + + KEYWORDS = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST, Keywords.RET] + KEYWORDS_VALUES = [k.value for k in KEYWORDS] + + def __init__(self, **kwargs): + BaseCustomGrammarParser.__init__(self, "DefConcept", 60) + + def parse(self, context, parser_input: ParserInput): + # default parser can only manage string text + if parser_input.from_tokens: + ret = context.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input)) + self.log_result(context, parser_input, ret) + return ret + + context.log(f"Parsing '{parser_input}' with FunctionParser", self.name) + sheerka = context.sheerka + + if parser_input.is_empty(): + return sheerka.ret(self.name, + False, + sheerka.new(BuiltinConcepts.IS_EMPTY)) + + if not self.reset_parser(context, parser_input): + return self.sheerka.ret(self.name, + False, + context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + + self.parser_input.next_token() + node = self.parse_def_concept() + + body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node) + ret = sheerka.ret(self.name, not self.has_error, body) + + self.log_result(context, parser_input.as_text(), ret) + return ret + + def parse_def_concept(self): + """ + def concept name [where xxx] [pre xxx] [post xxx] [as xxx] + """ + token = self.parser_input.token + if token.value != Keywords.DEF.value: + self.add_error(UnexpectedTokenErrorNode("'def' keyword not found.", token, [Keywords.DEF])) + return None + + self.context.log("Keyword DEF found.", self.name) + keywords_found = [token] + self.parser_input.next_token() + + # ## the definition of a concept consists of several parts + # Keywords.CONCEPT to get the name of the concept + # Keywords.FROM [Keywords.BNF] | [Keywords.DEF] to get the definition of the concept + # Keywords.AS to get the body + # Keywords.WHERE to get the conditions to recognize for the variables + # Keywords.PRE to know if the conditions to evaluate the concept + # Keywords.POST to apply or verify once the concept is executed + # Keywords.RET to transform the concept into another concept + parts = self.get_parts(self.KEYWORDS_VALUES, expected_first_token=Keywords.CONCEPT) + if parts is None: + return None + + keywords_found.extend([t[0] for t in parts.values()]) # keep track of all keywords found + node = DefConceptNode(keywords_found) + # if first_token.type == TokenKind.EOF: + # return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT])) + + # get the name + node.name = self.get_concept_name(parts[Keywords.CONCEPT]) + + # get definition + node.definition_type, node.definition = self.get_concept_definition(node, parts) + + # get the bodies + node.body = self.get_ast(Keywords.AS, parts) + node.where = self.get_ast(Keywords.WHERE, parts) + node.pre = self.get_ast(Keywords.PRE, parts) + node.post = self.get_ast(Keywords.POST, parts) + node.ret = self.get_ast(Keywords.RET, parts) + + return node + + def get_concept_name(self, tokens): + name_tokens = core.utils.strip_tokens(tokens[1:]) + if len(name_tokens) == 0: + self.add_error(SyntaxErrorNode([], "Name is mandatory")) + return None + + for token in name_tokens: + if token.type == TokenKind.NEWLINE: + self.add_error(SyntaxErrorNode([token], "Newline are not allowed in name.")) + return None + + name_node = NameNode(name_tokens) # skip the first token + return name_node + + def get_concept_definition(self, current_concept_def, parts): + if Keywords.FROM not in parts: + return None, NotInitializedNode() + + tokens = parts[Keywords.FROM] + if len(tokens) == 1: + self.add_error(SyntaxErrorNode([], f"Empty '{tokens[0].value}' declaration."), False) + return None, NotInitializedNode() + + if tokens[1].value == Keywords.BNF.value: + return self.get_concept_bnf_definition(current_concept_def, core.utils.strip_tokens(tokens[2:])) + + return self.get_concept_simple_definition(core.utils.strip_tokens(tokens[0:])) + + def get_concept_bnf_definition(self, current_concept_def, tokens): + if len(tokens) == 0: + self.add_error(SyntaxErrorNode([], "Empty 'bnf' declaration"), False) + return None, NotInitializedNode() + + if tokens[0].type == TokenKind.COLON: + tokens = self.get_body(tokens[1:]) + + bnf_regex_parser = BnfParser() + desc = f"Resolving BNF {current_concept_def.definition}" + with self.context.push(BuiltinConcepts.INIT_BNF, + current_concept_def, + who=self.name, + obj=current_concept_def, + desc=desc) as sub_context: + parsing_result = bnf_regex_parser.parse(sub_context, tokens) + sub_context.add_values(return_values=parsing_result) + + if not parsing_result.status: + self.add_error(parsing_result.value) + return None, NotInitializedNode() + + return DEFINITION_TYPE_BNF, parsing_result + + def get_concept_simple_definition(self, tokens): + + start = 2 if tokens[1].value == Keywords.DEF.value else 1 + tokens = core.utils.strip_tokens(tokens[start:]) + if len(tokens) == 0: + self.add_error(SyntaxErrorNode([], f"Empty 'from' declaration."), False) + return None, NotInitializedNode() + + if tokens[0].type == TokenKind.COLON: + tokens = self.get_body(tokens[1:]) + + return DEFINITION_TYPE_DEF, NameNode(tokens) + + def get_ast(self, keyword, parts): + if keyword not in parts: + return NotInitializedNode() + + tokens = parts[keyword] + if len(tokens) == 1: + self.add_error(SyntaxErrorNode(tokens, f"Empty '{tokens[0].value}' declaration.")) + return None + + source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens[1:]) + parsed = core.builtin_helpers.parse_unrecognized(self.context, + source, + parsers="all", + who=self.name, + prop=keyword, + filter_func=core.builtin_helpers.expect_one) + + if not parsed.status: + self.add_error(parsed.value) + return None + + return parsed diff --git a/src/parsers/DefaultParser.py b/src/parsers/DefaultParser.py deleted file mode 100644 index 798c70b..0000000 --- a/src/parsers/DefaultParser.py +++ /dev/null @@ -1,509 +0,0 @@ -from dataclasses import dataclass, field - -import core.builtin_helpers -import core.utils -from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept -from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF -from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute -from core.tokenizer import Tokenizer, TokenKind, Keywords -from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode -from parsers.BnfParser import BnfParser - - -class ParsingException(Exception): - def __init__(self, error): - self.error = error - - -@dataclass() -class DefaultParserNode(Node): - """ - Base node for all default parser nodes - """ - tokens: list = field(compare=False, repr=False) - - -@dataclass() -class DefaultParserErrorNode(DefaultParserNode, ErrorNode): - pass - - -@dataclass() -class UnexpectedTokenErrorNode(DefaultParserErrorNode): - message: str - expected_tokens: list - - -@dataclass() -class SyntaxErrorNode(DefaultParserErrorNode): - """ - The input is recognized, but there is a syntax error - """ - message: str - - -@dataclass() -class CannotHandleErrorNode(DefaultParserErrorNode): - """ - The input is not recognized - """ - text: str - - -@dataclass() -class NameNode(DefaultParserNode): - - def get_name(self): - name = "" - first = True - for token in self.tokens: - if token.type == TokenKind.EOF: - break - if token.type == TokenKind.WHITESPACE: - continue - if not first: - name += " " - - name += token.value[1:-1] if token.type == TokenKind.STRING else str(token.value) - first = False - - return name - - def __repr__(self): - return self.get_name() - - def __eq__(self, other): - if not isinstance(other, NameNode): - return False - - return self.get_name() == other.get_name() - - def __hash__(self): - return hash(self.get_name()) - - -@dataclass() -class DefConceptNode(DefaultParserNode): - name: NameNode = NotInitializedNode() - where: ReturnValueConcept = NotInitializedNode() - pre: ReturnValueConcept = NotInitializedNode() - post: ReturnValueConcept = NotInitializedNode() - body: ReturnValueConcept = NotInitializedNode() - ret: ReturnValueConcept = NotInitializedNode() - definition: ReturnValueConcept = NotInitializedNode() - definition_type: str = None - - def get_asts(self): - asts = {} - for part_key in ConceptParts: - prop_value = getattr(self, part_key.value) - if isinstance(prop_value, ReturnValueConcept) and \ - isinstance(prop_value.body, ParserResultConcept) and \ - hasattr(prop_value.body.body, "ast_"): - asts[part_key] = prop_value - return asts - - -@dataclass() -class IsaConceptNode(DefaultParserNode): - concept: NameNode = NotInitializedNode() - set: NameNode = NotInitializedNode() - - -class DefaultParser(BaseParser): - """ - Parse sheerka specific grammar (like def concept) - """ - - def __init__(self, **kwargs): - BaseParser.__init__(self, "Default", 60) - - @staticmethod - def fix_indentation(tokens): - """ - In the following example - def concept add one to a as: - def func(x): - return x+1 - func(a) - indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error - :param tokens: - :return: - """ - if len(tokens) == 0: - return tokens - - tokens = tokens.copy() # do not modify ParserInput.tokens - - if tokens[0].type != TokenKind.COLON: - return tokens - - if len(tokens) < 3: - raise ParsingException(UnexpectedTokenErrorNode(tokens[0:2], - "Unexpected end of file", - [TokenKind.NEWLINE])) - pos = DefaultParser.eat_white_space(tokens, 1) - if tokens[pos].type != TokenKind.NEWLINE: - raise ParsingException(UnexpectedTokenErrorNode([tokens[pos]], - "Unexpected token after colon", - [TokenKind.NEWLINE])) - pos += 1 - - if tokens[pos].type != TokenKind.WHITESPACE: - raise ParsingException(SyntaxErrorNode([tokens[pos]], - "Indentation not found.")) - indent_size = len(tokens[pos].value) - pos += 1 - - # now fix the other indentations - # KSI 23/05/2020 Not quite sure this 'fixing' stuff is still relevant, - # as I now have an editor in interactive mode - i = pos - while i < len(tokens) - 1: - if tokens[i].type == TokenKind.NEWLINE: - if tokens[i + 1].type != TokenKind.WHITESPACE: - return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE]) - - if len(tokens[i + 1].value) < indent_size: - return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.") - - tokens[i + 1] = tokens[i + 1].clone() - tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size) - i += 1 - - return tokens[pos:] - - @staticmethod - def eat_white_space(tokens, index): - if index >= len(tokens): - return index - - while index < len(tokens) and tokens[index].type == TokenKind.WHITESPACE: - index += 1 - - return index - - def reset_parser(self, context, parser_input): - self.context = context - self.sheerka = context.sheerka - self.parser_input = parser_input - self.parser_input.reset() - self.parser_input.next_token() - - def parse(self, context, parser_input: ParserInput): - # default parser can only manage string text - if parser_input.from_tokens: - ret = context.sheerka.ret( - self.name, - False, - context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input)) - self.log_result(context, parser_input, ret) - return ret - - try: - self.reset_parser(context, parser_input) - tree = self.parse_statement() - except core.tokenizer.LexerError as e: - return self.sheerka.ret( - self.name, - False, - context.sheerka.new(BuiltinConcepts.ERROR, body=[e])) - - # If a error is found it must be sent to error_sink - # tree must contain what was recognized - - if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode): - body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink) - else: - body = self.get_return_value_body(context.sheerka, parser_input.as_text(), tree, tree) - - ret = self.sheerka.ret( - self.name, - not self.has_error, - body) - - self.log_result(context, parser_input.as_text(), ret) - return ret - - def parse_statement(self): - token = self.parser_input.token - if token.value == Keywords.DEF.value: - self.parser_input.next_token() - self.context.log("Keyword DEF found.", self.name) - return self.parse_def_concept(token) - - return self.add_error(CannotHandleErrorNode([token], "")) - - def parse_def_concept(self, def_token): - """ - def concept name [where xxx] [pre xxx] [post xxx] [as xxx] - """ - - # init - keywords_tokens = [def_token] - concept_found = DefConceptNode(keywords_tokens) - - # ## - # ## the definition of a concept consists of several parts - # ## Keywords.CONCEPT to get the name of the concept - # ## Keywords.FROM [Keywords.BNF] | [Keywords.DEF] to get the definition of the concept - # ## Keywords.AS to get the body - # ## Keywords.WHERE to get the conditions to recognize for the variables - # ## Keywords.PRE to know if the conditions to evaluate the concept - # ## Keywords.POST to apply or verify once the concept is executed - - # Regroup the tokens by parts - first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens) - - if first_token.type == TokenKind.EOF: - return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT])) - - # get the name - concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts) - - # get the definition - def_type, def_value = self.get_concept_definition(concept_found, tokens_found_by_parts) - concept_found.definition_type = def_type - concept_found.definition = def_value - - # get the ASTs for the remaining parts - asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts) - concept_found.where = asts_found_by_parts[Keywords.WHERE] - concept_found.pre = asts_found_by_parts[Keywords.PRE] - concept_found.post = asts_found_by_parts[Keywords.POST] - concept_found.body = asts_found_by_parts[Keywords.AS] - concept_found.ret = asts_found_by_parts[Keywords.RET] - - return concept_found - - def regroup_tokens_by_parts(self, keywords_tokens): - - def new_part(t, cma, p): - """ - - :param t: token - :param cma: concept_mode_activated - :param p: previous token - :return: - """ - if not t.value in def_concept_parts: - return False - - if not cma or not p: - return True - - return p.line != t.line - - def_concept_parts = [Keywords.CONCEPT.value, - Keywords.FROM.value, - Keywords.AS.value, - Keywords.WHERE.value, - Keywords.PRE.value, - Keywords.POST.value, - Keywords.RET.value] - - # tokens found, when trying to recognize the parts - tokens_found_by_parts = { - Keywords.CONCEPT: [], - Keywords.FROM: None, - Keywords.AS: None, - Keywords.WHERE: None, - Keywords.PRE: None, - Keywords.POST: None, - Keywords.RET: None, - } - current_part = Keywords.CONCEPT - token = self.parser_input.token - first_token = token - colon_mode_activated = False # if activate, use keyword + colon to start a new keyword definition - previous_token = None - - # more explanation on colon_mode_activated - # You can use the pattern - # def concept as: - # xxx - # yyy - # ... - # - # It allows to readability and usage of other keywords inside the bloc# - # Example - # def concept give the the date as: - # from datetime import date - # return date.today() - # - # 'from datetime' will not be considered as a keyword because it's lead by a tab - # whereas in - # def concept in x days as: - # from datetime import date - # return date.today() - x - # where x > 0 - # - # where will be recognized as the keyword because it is the first word of the line - - # loop thru the tokens, and put them in the correct tokens_found_by_parts entry - while token.type != TokenKind.EOF: - if new_part(token, colon_mode_activated, previous_token): - keywords_tokens.append(token) # keep track of the keywords - keyword = Keywords(token.value) - if tokens_found_by_parts[keyword]: - # a part is defined more than once - self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations.")) - tokens_found_by_parts[current_part].append(token) # adds the token again - else: - tokens_found_by_parts[keyword] = [token] - current_part = keyword - colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON - - self.parser_input.next_token() - else: - tokens_found_by_parts[current_part].append(token) - self.parser_input.next_token(False) - - previous_token = token - token = self.parser_input.token - - return first_token, tokens_found_by_parts - - def get_concept_name(self, first_token, tokens_found_by_parts): - name_first_token_index = 1 - token = self.parser_input.token - if first_token.value != Keywords.CONCEPT.value: - self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT])) - name_first_token_index = 0 - - name_tokens = tokens_found_by_parts[Keywords.CONCEPT] - if len(name_tokens) == name_first_token_index: - self.add_error(SyntaxErrorNode([], "Name is mandatory")) - - if name_tokens[-1].type == TokenKind.NEWLINE: - name_tokens = name_tokens[:-1] # strip trailing newlines - - if TokenKind.NEWLINE in [t.type for t in name_tokens]: - self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name.")) - - tokens = name_tokens[name_first_token_index:] - stripped = core.utils.strip_tokens(tokens) - if len(stripped) == 1 and stripped[0].type == TokenKind.STRING and stripped[0].value[0] == '"': - tokens = list(Tokenizer(stripped[0].strip_quote, yield_eof=False)) - - name_node = NameNode(tokens) # skip the first token - return name_node - - def get_concept_definition(self, current_concept_def, tokens_found_by_parts): - if tokens_found_by_parts[Keywords.FROM] is None: - return None, NotInitializedNode() - - definition_tokens = tokens_found_by_parts[Keywords.FROM] - if len(definition_tokens) == 1: - self.add_error(SyntaxErrorNode([], "Empty declaration"), False) - return None, NotInitializedNode() - - if definition_tokens[1].value == Keywords.BNF.value: - return self.get_concept_bnf_definition(current_concept_def, definition_tokens) - - return self.get_concept_simple_definition(definition_tokens) - - def get_concept_bnf_definition(self, current_concept_def, definition_tokens): - try: - tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[2:])) - except ParsingException as ex: - self.add_error(ex.error) - return None, NotInitializedNode() - - if len(tokens) == 0: - self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False) - return None, NotInitializedNode() - - bnf_regex_parser = BnfParser() - desc = f"Resolving BNF {current_concept_def.definition}" - with self.context.push(BuiltinConcepts.INIT_BNF, - current_concept_def, - who=self.name, - obj=current_concept_def, - desc=desc) as sub_context: - parsing_result = bnf_regex_parser.parse(sub_context, tokens) - sub_context.add_values(return_values=parsing_result) - - if not parsing_result.status: - self.add_error(parsing_result.value) - return None, NotInitializedNode() - - return DEFINITION_TYPE_BNF, parsing_result - - def get_concept_simple_definition(self, definition_tokens): - start = 2 if definition_tokens[1].value == Keywords.DEF.value else 1 - try: - tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[start:])) - except ParsingException as ex: - self.add_error(ex.error) - return None, NotInitializedNode() - - if len(tokens) == 0: - self.add_error(SyntaxErrorNode([definition_tokens[start]], "Empty declaration"), False) - return None, NotInitializedNode() - - return DEFINITION_TYPE_DEF, NameNode(tokens) - - def get_concept_parts(self, tokens_found_by_parts): - asts_found_by_parts = { - Keywords.AS: NotInitializedNode(), - Keywords.WHERE: NotInitializedNode(), - Keywords.PRE: NotInitializedNode(), - Keywords.POST: NotInitializedNode(), - Keywords.RET: NotInitializedNode() - } - - for keyword in tokens_found_by_parts: - if keyword == Keywords.CONCEPT or keyword == Keywords.FROM: - continue # already done - - tokens = tokens_found_by_parts[keyword] - if tokens is None: - continue # nothing to do - - if len(tokens) == 1: # check for empty declarations - self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False) - continue - - try: - tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations - except ParsingException as ex: - self.add_error(ex.error) - continue - - # ask the other parsers if they recognize the tokens - source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens) - parsed = core.builtin_helpers.parse_unrecognized(self.context, - source, - parsers="all", - who=self.name, - prop=keyword, - filter_func=core.builtin_helpers.expect_one) - - if not parsed.status: - self.add_error(parsed.value) - continue - - asts_found_by_parts[keyword] = parsed - - # - # with self.context.push(BuiltinConcepts.PARSING, keyword, who=self.name, desc=f"Parsing {keyword}") as sub_context: - # parser_input = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens) - # to_parse = self.sheerka.ret( - # sub_context.who, - # True, - # self.sheerka.new(BuiltinConcepts.USER_INPUT, body=parser_input)) - # steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING] - # if keyword in (Keywords.WHERE, Keywords.PRE): - # sub_context.protected_hints.add(BuiltinConcepts.EVAL_QUESTION_REQUESTED) - # parsed = self.sheerka.execute(sub_context, to_parse, steps) - # parsing_result = core.builtin_helpers.expect_one(sub_context, parsed) - # sub_context.add_values(return_values=parsing_result) - # - # if not parsing_result.status: - # self.add_error(parsing_result.value) - # continue - # - # asts_found_by_parts[keyword] = parsing_result - - return asts_found_by_parts diff --git a/src/parsers/ExpressionParser.py b/src/parsers/ExpressionParser.py index ed2d617..92a3744 100644 --- a/src/parsers/ExpressionParser.py +++ b/src/parsers/ExpressionParser.py @@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts from core.concept import Concept from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import LexerError, TokenKind, Token -from parsers.BaseParser import Node, BaseParser, UnexpectedTokenErrorNode, UnexpectedEof, ErrorNode +from parsers.BaseParser import Node, BaseParser, UnexpectedTokenErrorNode, UnexpectedEofNode, ErrorNode class ExprNode(Node): @@ -189,7 +189,7 @@ class ExpressionParser(BaseParser): """ def __init__(self, **kwargs): - super().__init__("Expression", 50, False) + super().__init__("Expression", 50, False, yield_eof=True) def parse(self, context, parser_input: ParserInput): """ @@ -215,6 +215,7 @@ class ExpressionParser(BaseParser): False, context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + self.parser_input.next_token() tree = self.parse_or() token = self.parser_input.token if token and token.type != TokenKind.EOF: @@ -240,7 +241,7 @@ class ExpressionParser(BaseParser): self.parser_input.next_token() expr = self.parse_and() if expr is None: - self.add_error(UnexpectedEof("When parsing 'or'")) + self.add_error(UnexpectedEofNode("When parsing 'or'")) return OrNode(*parts) parts.append(expr) token = self.parser_input.token @@ -258,7 +259,7 @@ class ExpressionParser(BaseParser): self.parser_input.next_token() expr = self.parse_names() if expr is None: - self.add_error(UnexpectedEof("When parsing 'and'")) + self.add_error(UnexpectedEofNode("When parsing 'and'")) return AndNode(*parts) parts.append(expr) token = self.parser_input.token diff --git a/src/parsers/FormatRuleParser.py b/src/parsers/FormatRuleParser.py new file mode 100644 index 0000000..bdf17db --- /dev/null +++ b/src/parsers/FormatRuleParser.py @@ -0,0 +1,132 @@ +from dataclasses import dataclass + +from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept +from core.builtin_helpers import parse_unrecognized, expect_one +from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute +from core.tokenizer import Keywords +from core.utils import strip_tokens +from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, KeywordNotFound +from parsers.BaseParser import BaseParser, Node + + +@dataclass +class FormatAstNode: + pass + + +@dataclass +class FormatAstRawText(FormatAstNode): + text: str + + +@dataclass +class FormatRuleNode(Node): + tokens: dict + rule: ReturnValueConcept = None + format_ast: FormatAstNode = None + + +class FormatRuleParser(BaseCustomGrammarParser): + """ + Class that will parse formatting rules definitions + when xxx print yyy + where xxx will be evaluated in the context of BuiltinConcepts.EVAL_QUESTION_REQUESTED + and yyy is a internal way to describe a format (yet another one) + """ + + KEYWORDS = [Keywords.WHEN, Keywords.PRINT] + KEYWORDS_VALUES = [k.value for k in KEYWORDS] + + def __init__(self, **kwargs): + BaseCustomGrammarParser.__init__(self, "FormatRule", 60) + + def parse(self, context, parser_input: ParserInput): + """ + + :param context: + :param parser_input: + :return: + """ + + if not isinstance(parser_input, ParserInput): + return None + + if parser_input.from_tokens: + ret = context.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input)) + self.log_result(context, parser_input, ret) + return ret + + context.log(f"Parsing '{parser_input}' with FunctionParser", self.name) + sheerka = context.sheerka + + if parser_input.is_empty(): + return sheerka.ret(self.name, + False, + sheerka.new(BuiltinConcepts.IS_EMPTY)) + + if not self.reset_parser(context, parser_input): + return self.sheerka.ret(self.name, + False, + context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + + self.parser_input.next_token() + rule = self.parse_rule() + body = self.get_return_value_body(sheerka, parser_input.as_text(), rule, rule) + ret = sheerka.ret(self.name, not self.has_error, body) + + self.log_result(context, parser_input.as_text(), ret) + return ret + + def parse_rule(self): + parts = self.get_parts(self.KEYWORDS_VALUES) + if parts is None: + return None + + node = FormatRuleNode(parts) + try: + res = self.get_when(parts[Keywords.WHEN]) + if res is None: + return node + node.rule = res + + parsed = self.get_print(parts[Keywords.PRINT]) + if parsed is None: + return node + node.format_ast = parsed + except KeyError as e: + self.add_error(KeywordNotFound([], [e.args[0].value])) + return None + + return node + + def get_when(self, tokens): + """ + Validate the when part of the rule. + :param tokens: + :return: + """ + source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, strip_tokens(tokens[1:])) + parsed = parse_unrecognized(self.context, + source, + parsers="all", + who=self.name, + prop=Keywords.WHEN, + filter_func=expect_one) + + if not parsed.status: + self.add_error(parsed.value) + return None + + return parsed + + def get_print(self, tokens): + """ + Validate the print part + :param tokens: + :return: + """ + source = BaseParser.get_text_from_tokens(strip_tokens(tokens[1:])) + return FormatAstRawText(source) diff --git a/src/parsers/FunctionParser.py b/src/parsers/FunctionParser.py index a8ab86f..70e6ba9 100644 --- a/src/parsers/FunctionParser.py +++ b/src/parsers/FunctionParser.py @@ -7,7 +7,7 @@ from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind, Token from core.utils import get_n_clones from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode -from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEof, Node +from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEofNode, Node from parsers.PythonWithConceptsParser import PythonWithConceptsParser # No need to check for Python code as the source code node will resolve to python code anyway @@ -143,7 +143,7 @@ class FunctionParser(BaseParser): so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]] :param kwargs: """ - super().__init__("Function", 55, True) + super().__init__("Function", 55) self.sep = sep self.longest_concepts_only = longest_concepts_only self.record_errors = True @@ -179,6 +179,7 @@ class FunctionParser(BaseParser): False, context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + self.parser_input.next_token() node = self.parse_function() if self.parser_input.next_token(): @@ -219,7 +220,7 @@ class FunctionParser(BaseParser): return None if not self.parser_input.next_token(): - self.add_error(UnexpectedEof(f"Unexpected EOF while parsing left parenthesis")) + self.add_error(UnexpectedEofNode(f"Unexpected EOF while parsing left parenthesis")) return None token = self.parser_input.token @@ -231,7 +232,7 @@ class FunctionParser(BaseParser): start_node = NamesNode(start, start + 1, self.parser_input.tokens[start:start + 2]) if not self.parser_input.next_token(): - self.add_error(UnexpectedEof(f"Unexpected EOF after left parenthesis")) + self.add_error(UnexpectedEofNode(f"Unexpected EOF after left parenthesis")) return FunctionNode(start_node, None, None) params = self.parse_parameters() @@ -239,7 +240,7 @@ class FunctionParser(BaseParser): return FunctionNode(start_node, None, params) token = self.parser_input.token - if token.type != TokenKind.RPAR: + if not token or token.type != TokenKind.RPAR: self.add_error(UnexpectedTokenErrorNode(f"Right parenthesis not found", token, [TokenKind.RPAR])) @@ -261,7 +262,7 @@ class FunctionParser(BaseParser): token = self.parser_input.token if token.type == TokenKind.EOF: - self.add_error(UnexpectedEof(f"Unexpected EOF while parsing parameters")) + self.add_error(UnexpectedEofNode(f"Unexpected EOF while parsing parameters")) return None if token.type == TokenKind.RPAR: @@ -269,10 +270,12 @@ class FunctionParser(BaseParser): if token.value == self.sep: sep_pos = self.parser_input.pos - self.parser_input.next_token() + has_next = self.parser_input.next_token() # it's before add_sep() to capture trailing whitespace function_parameter.add_sep(sep_pos, self.parser_input.pos - 1, self.parser_input.tokens[sep_pos: self.parser_input.pos]) + if not has_next: + break return nodes @@ -292,8 +295,8 @@ class FunctionParser(BaseParser): tokens = [] while True: token = self.parser_input.token - # if token is None: - # break + if token is None: + break if token.value == self.sep or token.type == TokenKind.RPAR: break diff --git a/src/printer/SheerkaPrinter.py b/src/printer/SheerkaPrinter.py index 94f376d..bf3c35a 100644 --- a/src/printer/SheerkaPrinter.py +++ b/src/printer/SheerkaPrinter.py @@ -29,8 +29,8 @@ class SheerkaPrinter: def __init__(self, sheerka): self.sheerka = sheerka self.formatter = Formatter() - self.custom_concepts_printers = None - self.reset() + self.custom_concepts_printers = {} + # self.reset() def reset(self): self.custom_concepts_printers = { diff --git a/tests/BaseTest.py b/tests/BaseTest.py index bf112ee..71bc07a 100644 --- a/tests/BaseTest.py +++ b/tests/BaseTest.py @@ -27,7 +27,7 @@ class BaseTest: where="isinstance(a, int) and isinstance(b, int)\n", pre="isinstance(a, int) and isinstance(b, int)\n", post="isinstance(res, int)\n", - body="def func(x,y):\n return x+y\nfunc(a,b)\n", + body="def func(x,y):\n return x+y\nfunc(a,b)", desc="specific description") concept.def_var("a", "value1") concept.def_var("b", "value2") diff --git a/tests/core/test_ParserInput.py b/tests/core/test_ParserInput.py index a33b6da..c3b7782 100644 --- a/tests/core/test_ParserInput.py +++ b/tests/core/test_ParserInput.py @@ -1,6 +1,6 @@ import pytest from core.sheerka.services.SheerkaExecute import ParserInput -from core.tokenizer import Tokenizer +from core.tokenizer import Tokenizer, TokenKind @pytest.mark.parametrize("text, start, end, expected", [ @@ -14,38 +14,86 @@ def test_i_can_use_parser_input(text, start, end, expected): assert parser_input.as_text() == expected -def test_i_can_get_the_next_token(): - parser_input = ParserInput("def concept a from bnf 'xyz' as 'xyz'").reset() +def test_i_can_get_the_next_token_when_yield_eof_is_activated(): + parser_input = ParserInput("def concept a from bnf 'xyz' as 'xyz'", yield_oef=True).reset() + res = [] + parser_input.next_token() + while True: + res.append(f"{parser_input.token.repr_value}") + if parser_input.token.type == TokenKind.EOF: + break + parser_input.next_token() + + expected = ['def', 'concept', 'a', 'from', 'bnf', "'xyz'", 'as', "'xyz'", ''] + + assert res == expected + + +def test_i_can_get_the_next_token_when_yield_eof_is_deactivated(): + parser_input = ParserInput("def concept a from bnf 'xyz' as 'xyz'", yield_oef=False).reset() res = [] while parser_input.next_token(): - res.append(f"{parser_input.token.str_value}") + res.append(f"{parser_input.token.repr_value}") - assert res == ['def', 'concept', 'a', 'from', 'bnf', "'xyz'", 'as', "'xyz'", ''] + expected = ['def', 'concept', 'a', 'from', 'bnf', "'xyz'", 'as', "'xyz'"] + assert res == expected + + +def test_i_can_get_the_next_token_when_start_and_end_are_provided(): parser_input = ParserInput("def concept a concept name from bnf 'xyz' as 'xyz'", start=4, end=9).reset() res = [] while parser_input.next_token(skip_whitespace=False): - res.append(f"{parser_input.token.str_value}") + res.append(f"{parser_input.token.repr_value}") - assert res == ['a', ' ', 'concept', ' ', 'name', ' '] + assert res == ['a', '', 'concept', '', 'name', ''] -def test_i_can_get_the_next_token_when_initialised_with_tokens(): - tokens = list(Tokenizer(" def concept a as 'xyz' ")) - parser_input = ParserInput(" def concept a as 'xyz' ", tokens).reset() +def test_i_can_get_next_token_when_yield_eof_is_false(): + parser_input = ParserInput("def concept a from bnf 'xyz' as 'xyz'", yield_oef=False).reset() res = [] while parser_input.next_token(): - res.append(f"{parser_input.token.str_value}") + res.append(f"{parser_input.token.repr_value}") - assert res == ['def', 'concept', 'a', 'as', "'xyz'", ''] + assert res == ['def', 'concept', 'a', 'from', 'bnf', "'xyz'", 'as', "'xyz'"] - tokens = list(Tokenizer(" def concept a as 'xyz' ", yield_eof=False)) - parser_input = ParserInput(" def concept a as 'xyz' ", tokens).reset() + +def test_i_can_override_yield_oef_within_reset(): + parser_input = ParserInput("def concept a from bnf 'xyz' as 'xyz'", yield_oef=False).reset(yield_oef=True) + res = [] + parser_input.next_token() + while True: + res.append(f"{parser_input.token.repr_value}") + if parser_input.token.type == TokenKind.EOF: + break + parser_input.next_token() + + assert res == ['def', 'concept', 'a', 'from', 'bnf', "'xyz'", 'as', "'xyz'", ""] + assert not parser_input.yield_oef + + +@pytest.mark.parametrize("list_has_eof, parser_has_eof, reset_has_eof", [ + (True, True, True), + (True, False, True), + (False, True, True), + (False, False, True), + (True, True, False), + (True, False, False), + (False, True, False), + (False, False, False), +]) +def test_i_can_get_the_next_token_when_initialised_with_tokens(list_has_eof, parser_has_eof, reset_has_eof): + tokens = list(Tokenizer(" def concept a as 'xyz' ", yield_eof=list_has_eof)) + parser_input = ParserInput(" def concept a as 'xyz' ", tokens, yield_oef=parser_has_eof).reset() + parser_input.reset(reset_has_eof) res = [] while parser_input.next_token(): - res.append(f"{parser_input.token.str_value}") + res.append(f"{parser_input.token.repr_value}") - assert res == ['def', 'concept', 'a', 'as', "'xyz'"] + expected = ['def', 'concept', 'a', 'as', "'xyz'"] + if reset_has_eof: + expected.append("") + assert res == expected def test_i_can_parse_twice(): diff --git a/tests/core/test_sheerka.py b/tests/core/test_sheerka.py index f63c711..43c8e84 100644 --- a/tests/core/test_sheerka.py +++ b/tests/core/test_sheerka.py @@ -28,7 +28,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka): sheerka = self.get_sheerka() # test existence of some parser (not all) - assert "parsers.DefaultParser.DefaultParser" in sheerka.parsers + assert "parsers.DefConceptParser.DefConceptParser" in sheerka.parsers assert "parsers.BnfNodeParser.BnfNodeParser" in sheerka.parsers assert "parsers.SyaNodeParser.SyaNodeParser" in sheerka.parsers assert "parsers.AtomNodeParser.AtomNodeParser" in sheerka.parsers diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py index 58f423d..9c253d0 100644 --- a/tests/core/test_utils.py +++ b/tests/core/test_utils.py @@ -55,15 +55,15 @@ def test_i_can_get_base_classes(): # example of classes that should be in the result base_parser = core.utils.get_class("parsers.BaseParser.BaseParser") - default_parser = core.utils.get_class("parsers.DefaultParser.DefaultParser") + def_concept_parser = core.utils.get_class("parsers.DefConceptParser.DefConceptParser") exact_concept_parser = core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser") python_parser = core.utils.get_class("parsers.PythonParser.PythonParser") node = core.utils.get_class("parsers.BaseParser.Node") - def_concept_node = core.utils.get_class("parsers.DefaultParser.DefConceptNode") + def_concept_node = core.utils.get_class("parsers.DefConceptParser.DefConceptNode") python_node = core.utils.get_class("parsers.PythonParser.PythonNode") assert base_parser in classes - assert default_parser in classes + assert def_concept_parser in classes assert exact_concept_parser in classes assert python_parser in classes assert node in classes @@ -76,13 +76,13 @@ def test_i_can_get_sub_classes(): # example of classes that should be (or not) in the result base_parser = core.utils.get_class("parsers.BaseParser.BaseParser") - default_parser = core.utils.get_class("parsers.DefaultParser.DefaultParser") + def_concept_parser = core.utils.get_class("parsers.DefConceptParser.DefConceptParser") exact_concept_parser = core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser") python_parser = core.utils.get_class("parsers.PythonParser.PythonParser") bnf_node_parser = core.utils.get_class("parsers.BnfNodeParser.BnfNodeParser") assert base_parser not in sub_classes - assert default_parser in sub_classes + assert def_concept_parser in sub_classes assert exact_concept_parser in sub_classes assert python_parser in sub_classes assert bnf_node_parser in sub_classes diff --git a/tests/evaluators/test_AddConceptEvaluator.py b/tests/evaluators/test_AddConceptEvaluator.py index 98e6c64..6df27a0 100644 --- a/tests/evaluators/test_AddConceptEvaluator.py +++ b/tests/evaluators/test_AddConceptEvaluator.py @@ -8,7 +8,7 @@ from evaluators.AddConceptEvaluator import AddConceptEvaluator from parsers.BaseParser import BaseParser from parsers.BnfNodeParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression from parsers.BnfParser import BnfParser -from parsers.DefaultParser import DefConceptNode, NameNode +from parsers.DefConceptParser import DefConceptNode, NameNode from parsers.PythonParser import PythonNode, PythonParser from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka diff --git a/tests/evaluators/test_AddConceptInSetEvaluator.py b/tests/evaluators/test_AddConceptInSetEvaluator.py index e61e0c2..47afb48 100644 --- a/tests/evaluators/test_AddConceptInSetEvaluator.py +++ b/tests/evaluators/test_AddConceptInSetEvaluator.py @@ -3,7 +3,7 @@ from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, Built from core.concept import Concept from core.tokenizer import Tokenizer from evaluators.AddConceptInSetEvaluator import AddConceptInSetEvaluator -from parsers.DefaultParser import IsaConceptNode, NameNode +from parsers.DefConceptParser import IsaConceptNode, NameNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka diff --git a/tests/parsers/test_BaseCustomGrammarParser.py b/tests/parsers/test_BaseCustomGrammarParser.py new file mode 100644 index 0000000..bbabb6e --- /dev/null +++ b/tests/parsers/test_BaseCustomGrammarParser.py @@ -0,0 +1,227 @@ +import pytest +from core.sheerka.services.SheerkaExecute import ParserInput +from core.tokenizer import Keywords, Tokenizer, TokenKind +from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode, KeywordNotFound +from parsers.BaseParser import UnexpectedEofNode, UnexpectedTokenErrorNode + +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka + + +class TestBaseCustomGrammarParser(TestUsingMemoryBasedSheerka): + + @staticmethod + def compare_results(actual, expected, compare_str=False): + resolved_expected = {} + for k, v in expected.items(): + tokens = list(Tokenizer(v, yield_eof=False)) + resolved_expected[k] = [tokens[0]] + tokens[2:] + + def get_better_representation(value): + better_repr = {} + for k, tokens in value.items(): + value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]]) + better_repr[k] = [tokens[0].repr_value, value] + return better_repr + + actual_to_compare = get_better_representation(actual) + expected_to_compare = get_better_representation(resolved_expected) + + assert actual_to_compare == expected_to_compare + + def init_parser(self, text): + sheerka, context = self.init_concepts() + + parser = BaseCustomGrammarParser("TestBaseCustomLanguageParser", 0) + + parser.reset_parser(context, ParserInput(text)) + parser.parser_input.next_token(False) # do not skip starting whitespaces + + return sheerka, context, parser + + @pytest.mark.parametrize("text, expected", [ + ("when xxx yyy", {Keywords.WHEN: "when xxx yyy"}), + ("when uuu vvv print xxx yyy", {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}), + ("print xxx yyy when uuu vvv", {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}), + (" when xxx", {Keywords.WHEN: "when xxx"}), + ]) + def test_i_can_get_parts(self, text, expected): + sheerka, context, parser = self.init_parser(text) + + res = parser.get_parts(["when", "print"]) + self.compare_results(res, expected) + + def test_i_can_get_parts_when_multilines(self): + text = """when +def func(x): + return x+1 +func(a) +""" + expected = {Keywords.WHEN: "when def func(x):\n\treturn x+1\nfunc(a)\n"} + sheerka, context, parser = self.init_parser(text) + + res = parser.get_parts(["when"]) + self.compare_results(res, expected) + + @pytest.mark.parametrize("text", [ + "", + "no keyword", + "anything before when xxx print yyy", + ]) + def test_i_cannot_get_parts_when_no_keyword_found(self, text): + sheerka, context, parser = self.init_parser(text) + + assert parser.get_parts(["when", "print"]) is None + assert len(parser.error_sink) == 1 + assert isinstance(parser.error_sink[0], KeywordNotFound) + assert parser.error_sink[0].keywords == ['when', 'print'] + + def test_i_cannot_get_part_when_the_first_expected_token_is_incorrect(self): + sheerka, context, parser = self.init_parser("when xxx print yyy") + + assert parser.get_parts(["when", "print"], Keywords.PRINT) is None + assert parser.error_sink == [UnexpectedTokenErrorNode(f"'print' keyword not found.", + "when", + [Keywords.PRINT])] + + def test_i_can_detect_when_a_keyword_appears_several_times(self): + sheerka, context, parser = self.init_parser("print hello when True print True") + + parser.get_parts(["print"]) + assert len(parser.error_sink) == 1 + assert isinstance(parser.error_sink[0], SyntaxErrorNode) + assert parser.error_sink[0].message == "Too many 'print' declarations." + + @pytest.mark.parametrize("text", [ + "print", + "print ", + "when xxx print", + "when xxx print ", + ]) + def test_i_can_detect_incorrect_end_of_file_after_keyword(self, text): + sheerka, context, parser = self.init_parser(text) + + assert parser.get_parts(["print", "when"]) is not None + assert len(parser.error_sink) == 1 + assert isinstance(parser.error_sink[0], UnexpectedEofNode) + assert parser.error_sink[0].message == "While parsing keyword 'print'." + + def test_i_can_double_quoted_strings_are_expanded(self): + """ + When inside a double quote, the double quote is removed and its content it used as is. + It allows usage of keywords withing parts + :return: + """ + sheerka, context, parser = self.init_parser('print "when can be used" when True') + expected = {Keywords.PRINT: "print when can be used", Keywords.WHEN: "when True"} + + res = parser.get_parts(["print", "when"]) + self.compare_results(res, expected) + + def test_single_quoted_strings_are_not_expanded(self): + sheerka, context, parser = self.init_parser("print 'when can be used' when True") + expected = {Keywords.PRINT: "print 'when can be used' ", Keywords.WHEN: "when True"} + + res = parser.get_parts(["print", "when"]) + self.compare_results(res, expected) + + def test_i_can_manage_colon(self): + text = """when: + xxx + when + print +print: + xxx: + when + print + yyy +""" + sheerka, context, parser = self.init_parser(text) + expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy", Keywords.WHEN: "when xxx\nwhen\nprint"} + + res = parser.get_parts(["print", "when"]) + self.compare_results(res, expected, compare_str=True) + + def test_indentation_is_normalized_when_using_colon(self): + text = """print: + xxx: + when + print + yyy + """ + sheerka, context, parser = self.init_parser(text) + expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy"} + + res = parser.get_parts(["print", "when"]) + self.compare_results(res, expected, compare_str=True) + + def test_i_can_mix_parts_with_colon_and_parts_without_colon(self): + text = """when: + xxx + when + print +print xxx""" + sheerka, context, parser = self.init_parser(text) + expected = {Keywords.PRINT: "print xxx", Keywords.WHEN: "when xxx\nwhen\nprint"} + + res = parser.get_parts(["print", "when"]) + self.compare_results(res, expected, compare_str=True) + + @pytest.mark.parametrize("text", [ + "when:\nx x", + "when: \nx x", + ]) + def test_i_cannot_manage_colon_when_tab_is_missing(self, text): + sheerka, context, parser = self.init_parser(text) + + assert parser.get_parts(["when"]) + assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])] + + @pytest.mark.parametrize("text", [ + "", + "\n", + " \n", + "x", # less than two characters + "\n\t" + ]) + def test_i_cannot_get_body_when_body_is_too_short(self, text): + sheerka, context, parser = self.init_parser("") + + assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None + assert parser.error_sink == [SyntaxErrorNode(None, "Body is empty or too short.")] + + def test_a_new_line_is_expected_when_get_body(self): + sheerka, context, parser = self.init_parser("") + + assert parser.get_body(list(Tokenizer("not a newline", yield_eof=False))) is None + assert parser.error_sink == [UnexpectedTokenErrorNode("New line not found.", "not", [TokenKind.NEWLINE])] + + @pytest.mark.parametrize("text", [ + "\nx x", + " \nx x", + ]) + def test_tab_is_mandatory_after_new_line_when_get_body(self, text): + sheerka, context, parser = self.init_parser("") + + assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None + assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])] + + def test_i_can_detect_missing_tab_when_get_body(self): + text = "\n\txxx\n\tyyy\nzzz" + + sheerka, context, parser = self.init_parser("") + assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None + assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "zzz", [TokenKind.WHITESPACE])] + + def test_i_can_detect_invalid_indentation_when_get_body(self): + sheerka, context, parser = self.init_parser("") + assert parser.get_body(list(Tokenizer("\n\t\txxx\n\tyyy", yield_eof=False))) is None + assert parser.error_sink == [SyntaxErrorNode(None, "Invalid indentation.")] + + def test_i_can_get_body(self): + sheerka, context, parser = self.init_parser("") + res = parser.get_body(list(Tokenizer("\n\txxx\n\tyyyy", yield_eof=False))) + expected = list(Tokenizer("xxx\n yyyy", yield_eof=False)) + expected[2].value = "" + + assert [t.repr_value for t in res] == [t.repr_value for t in expected] + assert parser.error_sink == [] diff --git a/tests/parsers/test_BnfParser.py b/tests/parsers/test_BnfParser.py index 0ce235f..c632cc8 100644 --- a/tests/parsers/test_BnfParser.py +++ b/tests/parsers/test_BnfParser.py @@ -33,7 +33,7 @@ def update_concepts_ids(sheerka, parsing_expression): update_concepts_ids(sheerka, pe) -eof_token = Token(TokenKind.EOF, "", 0, 0, 0) +eof_token = "" class TestBnfParser(TestUsingMemoryBasedSheerka): diff --git a/tests/parsers/test_DefaultParser.py b/tests/parsers/test_DefConceptParser.py similarity index 73% rename from tests/parsers/test_DefaultParser.py rename to tests/parsers/test_DefConceptParser.py index 4b0730f..6c14edd 100644 --- a/tests/parsers/test_DefaultParser.py +++ b/tests/parsers/test_DefConceptParser.py @@ -7,10 +7,11 @@ from core.concept import DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF, Concept, CV from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Keywords, Tokenizer, LexerError from parsers.BaseNodeParser import SCWC +from parsers.BaseParser import NotInitializedNode, UnexpectedEofNode from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch, Sequence from parsers.BnfParser import BnfParser -from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode -from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode +from parsers.DefConceptParser import DefConceptParser, NameNode, SyntaxErrorNode +from parsers.DefConceptParser import UnexpectedTokenErrorNode, DefConceptNode from parsers.FunctionParser import FunctionParser from parsers.PythonParser import PythonParser, PythonNode @@ -48,7 +49,7 @@ def get_concept_part(part): if isinstance(part, str): node = PythonNode(part.strip(), ast.parse(part.strip(), mode="eval")) return ReturnValueConcept( - who="parsers.Default", + who="parsers.DefConcept", status=True, value=ParserResultConcept( source=part, @@ -59,7 +60,7 @@ def get_concept_part(part): # node = PythonNode(part.strip(), ast.parse(part.strip(), mode="eval")) nodes = compute_expected_array({}, part.source, [SCWC(part.first, part.last, *part.content)]) return ReturnValueConcept( - who="parsers.Default", + who="parsers.DefConcept", status=True, value=ParserResultConcept( source=part.source, @@ -70,7 +71,7 @@ def get_concept_part(part): if isinstance(part, PN): node = PythonNode(part.source.strip(), ast.parse(part.source.strip(), mode=part.mode)) return ReturnValueConcept( - who="parsers.Default", + who="parsers.DefConcept", status=True, value=ParserResultConcept( source=part.source, @@ -79,7 +80,7 @@ def get_concept_part(part): if isinstance(part, PythonNode): return ReturnValueConcept( - who="parsers.Default", + who="parsers.DefConcept", status=True, value=ParserResultConcept( source=part.source, @@ -110,13 +111,26 @@ class FN: content: list -class TestDefaultParser(TestUsingMemoryBasedSheerka): +class TestDefConceptParser(TestUsingMemoryBasedSheerka): def init_parser(self, *concepts): sheerka, context, *updated = self.init_concepts(*concepts, singleton=True) - parser = DefaultParser() + parser = DefConceptParser() return sheerka, context, parser, *updated + @pytest.mark.parametrize("text, error", [ + ("concept", UnexpectedTokenErrorNode("'def' keyword not found.", "concept", [Keywords.DEF])), + ("hello word", UnexpectedTokenErrorNode("'def' keyword not found.", "hello", [Keywords.DEF])), + ("def hello", UnexpectedTokenErrorNode("'concept' keyword not found.", "hello", [Keywords.CONCEPT])), + ]) + def test_i_can_detect_not_for_me(self, text, error): + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + + assert not res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME) + assert res.value.reason == [error] + @pytest.mark.parametrize("text, expected", [ ("def concept hello", get_def_concept(name="hello")), ("def concept hello ", get_def_concept(name="hello")), @@ -124,13 +138,11 @@ class TestDefaultParser(TestUsingMemoryBasedSheerka): ("def concept a+b", get_def_concept(name="a + b")), ("def concept 'a+b'+c", get_def_concept(name="'a+b' + c")), ("def concept 'as if'", get_def_concept(name="'as if'")), - ("def concept 'as' if", get_def_concept(name="'as if'")), - ("def concept hello as 'hello'", get_def_concept(name="hello", body="'hello'")), - ("def concept hello as 1", get_def_concept(name="hello", body="1")), - ("def concept hello as 1 + 1", get_def_concept(name="hello", body="1 + 1")), + ("def concept 'as' if", get_def_concept(name="'as' if")), + ('def concept "as if"', get_def_concept(name="as if")), ]) - def test_i_can_parse_def_concept(self, text, expected): - sheerka, context, parser = self.init_parser() + def test_i_can_parse_def_concept_name(self, text, expected): + sheerka, context, parser, *concepts = self.init_parser() res = parser.parse(context, ParserInput(text)) node = res.value.value @@ -140,6 +152,113 @@ class TestDefaultParser(TestUsingMemoryBasedSheerka): assert isinstance(res.value, ParserResultConcept) assert node == expected + def test_name_is_mandatory(self): + text = "def concept as 'hello'" + + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + return_value = res.value + + assert not res.status + assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR) + assert isinstance(return_value.body[0], SyntaxErrorNode) + assert return_value.body[0].message == "Name is mandatory" + + @pytest.mark.parametrize("text", [ + "def concept hello\nmy friend", + "def concept hello \nmy friend", + "def concept hello\n my friend", + "def concept hello \n my friend", + "def concept hello from hello\nmy friend", + "def concept hello from def hello\nmy friend", + "def concept hello from bnf hello\nmy friend", + "def concept hello from:\n\thello\nmy friend", + "def concept hello from def:\n\thello\nmy friend", + "def concept hello from bnf:\n\thello\nmy friend", + ]) + def test_new_line_is_not_allowed_in_the_name(self, text): + text = "def concept hello \n my friend as 'hello'" + + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + return_value = res.value + + assert not res.status + assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR) + assert return_value.body == [SyntaxErrorNode(None, "Newline are not allowed in name.")] + + def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self): + text = "def hello as a where b pre c post d" + + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + return_value = res.value + + assert not res.status + assert sheerka.isinstance(return_value, BuiltinConcepts.NOT_FOR_ME) + assert isinstance(return_value.reason[0], UnexpectedTokenErrorNode) + assert return_value.reason[0].message == "'concept' keyword not found." + assert return_value.reason[0].expected_tokens == [Keywords.CONCEPT] + assert return_value.reason[0].token.value == "hello" + + def test_i_can_detect_empty_declaration(self): + sheerka, context, parser, *concepts = self.init_parser() + text = "def concept foo as where True" + res = parser.parse(context, ParserInput(text)) + error = res.body.body[0] + + assert not res.status + assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR) + assert isinstance(error, SyntaxErrorNode) + assert error.message == "Empty 'as' declaration." + + def test_empty_parts_are_not_initialized(self): + sheerka, context, parser, *concepts = self.init_parser() + text = "def concept foo" + res = parser.parse(context, ParserInput(text)) + parser_result = res.body + node = res.body.body + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert isinstance(node, DefConceptNode) + assert node.body == NotInitializedNode() + assert node.where == NotInitializedNode() + assert node.pre == NotInitializedNode() + assert node.post == NotInitializedNode() + assert node.ret == NotInitializedNode() + + @pytest.mark.parametrize("part", [ + "as", + "pre", + "post", + "ret", + "where" + ]) + def test_i_can_parse_def_concept_parts(self, part): + sheerka, context, parser, *concepts = self.init_parser() + text = "def concept foo " + part + " True" + res = parser.parse(context, ParserInput(text)) + node = res.value.value + + assert res.status + assert res.who == parser.name + assert res.value.source == text + assert isinstance(res.value, ParserResultConcept) + + part_mapping = "body" if part == "as" else part + args = {part_mapping: get_concept_part("True")} + expected = get_def_concept("foo", **args) + assert node == expected + + def test_i_can_detect_error_in_declaration(self): + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput("def concept hello where 1+")) + return_value = res.value + + assert not res.status + assert sheerka.isinstance(return_value, BuiltinConcepts.TOO_MANY_ERRORS) + def test_i_can_parse_complex_def_concept_statement(self): text = """def concept a mult b where a,b @@ -148,7 +267,7 @@ post isinstance(res, a) as res = a * b ret a if isinstance(a, Concept) else self """ - sheerka, context, parser = self.init_parser() + sheerka, context, parser, *concepts = self.init_parser() res = parser.parse(context, ParserInput(text)) return_value = res.value expected_concept = get_def_concept( @@ -177,7 +296,7 @@ func(a) body=PN("def func(x):\n return x+1\nfunc(a)\n", "exec") ) - sheerka, context, parser = self.init_parser() + sheerka, context, parser, *concepts = self.init_parser() res = parser.parse(context, ParserInput(text)) return_value = res.value @@ -199,7 +318,7 @@ def concept add one to a as: ast.parse("def func(x):\n return x+1\nfunc(a)", mode="exec")) ) - sheerka, context, parser = self.init_parser() + sheerka, context, parser, *concepts = self.init_parser() res = parser.parse(context, ParserInput(text)) return_value = res.value @@ -208,156 +327,17 @@ def concept add one to a as: assert return_value.value == expected_concept @pytest.mark.parametrize("text", [ - "def concept foo as:\npass", - "def concept foo where:\npass", - "def concept foo pre:\npass", - "def concept foo post:\npass", - "def concept foo from:\nanother definition", - "def concept foo from def:\nanother definition", - "def concept foo from bnf:\n'another' 'definition'", + "def concept name from bnf", + "def concept name from bnf ", + "def concept name from bnf as True", ]) - def test_indentation_is_mandatory_after_a_colon(self, text): - sheerka, context, parser = self.init_parser() + def test_i_cannot_parse_empty_bnf_definition(self, text): + sheerka, context, parser, *concepts = self.init_parser() res = parser.parse(context, ParserInput(text)) - return_value = res.value - + error = res.body assert not res.status - assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR) - assert isinstance(return_value.body[0], SyntaxErrorNode) - assert return_value.body[0].message == "Indentation not found." - - @pytest.mark.parametrize("text", [ - "def concept plus from:\n\ta plus b", - "def concept plus from def:\n\ta plus b", - - # space before the colon - "def concept plus from :\n\ta plus b", - "def concept plus from def :\n\ta plus b", - - # space after the colon - "def concept plus from: \n\ta plus b", - "def concept plus from def: \n\ta plus b", - ]) - def test_i_can_use_colon_and_definition_together(self, text): - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - defined_concept = res.body.body - defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens] - - assert res.status - assert defined_concept.definition_type == DEFINITION_TYPE_DEF - assert defined_concept_tokens == [t.repr_value for t in Tokenizer("a plus b", yield_eof=False)] - - @pytest.mark.parametrize("text", [ - "def concept plus from bnf:\n\t'a' 'plus' 'b'", - "def concept plus from bnf :\n\t'a' 'plus' 'b'", - "def concept plus from bnf: \n\t'a' 'plus' 'b'", - ]) - def test_i_can_use_colon_and_bnf_definition_together(self, text): - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - defined_concept = res.body.body - - assert res.status - assert defined_concept.definition.status - assert defined_concept.definition.body.body == Sequence(StrMatch("a"), StrMatch("plus"), StrMatch("b")) - - def test_i_can_use_colon_to_protect_keyword(self): - text = """ -def concept today as: - from datetime import date - today = date.today() -from: - give me the date ! -""" - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - defined_concept = res.body.body - defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens] - - assert res.status - assert defined_concept.definition_type == DEFINITION_TYPE_DEF - assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)] - assert defined_concept.body.status - - def test_i_can_use_colon_to_protect_keyword_2(self): - text = """ -def concept today as: - from datetime import date - today = date.today() -from give me the date ! -""" - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - defined_concept = res.body.body - defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens] - - assert res.status - assert defined_concept.definition_type == DEFINITION_TYPE_DEF - assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)] - assert defined_concept.body.status - - def test_name_is_mandatory(self): - text = "def concept as 'hello'" - - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - return_value = res.value - - assert not res.status - assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR) - assert isinstance(return_value.body[0], SyntaxErrorNode) - assert return_value.body[0].message == "Name is mandatory" - - def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self): - text = "def hello as a where b pre c post d" - - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - return_value = res.value - - assert not res.status - assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR) - assert isinstance(return_value.body[0], UnexpectedTokenErrorNode) - assert return_value.body[0].message == "Syntax error." - assert return_value.body[0].expected_tokens == [Keywords.CONCEPT] - - @pytest.mark.parametrize("text", [ - "def concept hello where 1+", - "def concept hello pre 1+", - "def concept hello post 1+", - "def concept hello as 1+" - ]) - def test_i_can_detect_error_in_declaration(self, text): - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - return_value = res.value - - assert not res.status - assert sheerka.isinstance(return_value, BuiltinConcepts.TOO_MANY_ERRORS) - - @pytest.mark.parametrize("text", [ - "def concept hello\nmy friend", - "def concept hello \nmy friend", - "def concept hello\n my friend", - "def concept hello \n my friend", - "def concept hello from hello\nmy friend", - "def concept hello from def hello\nmy friend", - "def concept hello from bnf hello\nmy friend", - "def concept hello from:\n\thello\nmy friend", - "def concept hello from def:\n\thello\nmy friend", - "def concept hello from bnf:\n\thello\nmy friend", - ]) - def test_new_line_is_not_allowed_in_the_name(self, text): - text = "def concept hello \n my friend as 'hello'" - - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - return_value = res.value - - assert not res.status - assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR) - assert return_value.body == [SyntaxErrorNode([], "Newline are not allowed in name.")] + assert sheerka.isinstance(error, BuiltinConcepts.ERROR) + assert error.body == [SyntaxErrorNode([], "Empty 'bnf' declaration")] def test_i_can_parse_def_concept_from_bnf(self): text = "def concept name from bnf a_concept | 'a_string' as __definition[0]" @@ -383,35 +363,40 @@ from give me the date ! assert not parser.has_error @pytest.mark.parametrize("text", [ - 'def concept "def concept x"', - 'def concept "def concept x" as x', + "def concept plus from bnf:\n\t'a' 'plus' 'b'", + "def concept plus from bnf :\n\t'a' 'plus' 'b'", + "def concept plus from bnf: \n\t'a' 'plus' 'b'", ]) - def test_i_can_use_double_quotes_to_protect_keywords(self, text): - sheerka, context, parser = self.init_parser() + def test_i_can_use_colon_and_bnf_definition_together(self, text): + sheerka, context, parser, *concepts = self.init_parser() res = parser.parse(context, ParserInput(text)) - concept_defined = res.value.value + defined_concept = res.body.body assert res.status - assert concept_defined.name.tokens == list(Tokenizer("def concept x", yield_eof=False)) + assert defined_concept.definition.status + assert defined_concept.definition.body.body == Sequence(StrMatch("a"), StrMatch("plus"), StrMatch("b")) - @pytest.mark.parametrize("text", [ - "def concept name from bnf as here is my body", - "def concept name from def as here is my body", - "def concept name from as here is my body" + @pytest.mark.parametrize("text, error", [ + ("def concept name from def as True", SyntaxErrorNode([], "Empty 'from' declaration.")), + ("def concept name from def", SyntaxErrorNode([], "Empty 'from' declaration.")), + ("def concept name from def ", SyntaxErrorNode([], "Empty 'from' declaration.")), + ("def concept name from as True", SyntaxErrorNode([], "Empty 'from' declaration.")), + ("def concept name from", UnexpectedEofNode("While parsing keyword 'from'.")), + ("def concept name from ", UnexpectedEofNode("While parsing keyword 'from'.")), ]) - def test_i_can_detect_empty_bnf_declaration(self, text): - sheerka, context, parser = self.init_parser() + def test_i_can_detect_empty_def_declaration(self, text, error): + sheerka, context, parser, *concepts = self.init_parser() res = parser.parse(context, ParserInput(text)) assert not res.status assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR) - assert res.value.body[0] == SyntaxErrorNode([], "Empty declaration") + assert res.value.body[0] == error @pytest.mark.parametrize("text", [ "def concept addition from a plus b as a + b", "def concept addition from def a plus b as a + b"]) def test_i_can_def_concept_from_definition(self, text): - sheerka, context, parser = self.init_parser() + sheerka, context, parser, *concepts = self.init_parser() res = parser.parse(context, ParserInput(text)) expected = get_def_concept("addition", definition="a plus b", body="a + b") node = res.value.value @@ -422,6 +407,114 @@ from give me the date ! assert isinstance(res.value, ParserResultConcept) assert node == expected + @pytest.mark.parametrize("text", [ + "def concept plus from:\n\ta plus b", + "def concept plus from def:\n\ta plus b", + + # space before the colon + "def concept plus from :\n\ta plus b", + "def concept plus from def :\n\ta plus b", + + # space after the colon + "def concept plus from: \n\ta plus b", + "def concept plus from def: \n\ta plus b", + ]) + def test_i_can_use_colon_and_definition_together(self, text): + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + defined_concept = res.body.body + defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens] + + assert res.status + assert defined_concept.definition_type == DEFINITION_TYPE_DEF + assert defined_concept_tokens == [t.repr_value for t in Tokenizer("a plus b", yield_eof=False)] + + def test_i_can_use_colon_to_protect_keyword(self): + text = """ +def concept today as: + from datetime import date + today = date.today() +from: + give me the date ! +""" + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + defined_concept = res.body.body + defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens] + + assert res.status + assert defined_concept.definition_type == DEFINITION_TYPE_DEF + assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)] + assert defined_concept.body.status + + def test_i_can_use_colon_to_protect_keyword_2(self): + text = """ +def concept today as: + from datetime import date + today = date.today() +from give me the date ! +""" + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + defined_concept = res.body.body + defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens] + + assert res.status + assert defined_concept.definition_type == DEFINITION_TYPE_DEF + assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)] + assert defined_concept.body.status + + @pytest.mark.parametrize("text", [ + "def", + "def concept_name" + ]) + def test_i_cannot_parse_invalid_entries(self, text): + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode) + + @pytest.mark.parametrize("text, error_msg, error_text", [ + ("'name", "Missing Trailing quote", "'name"), + ("foo isa 'name", "Missing Trailing quote", "'name"), + ("def concept 'name", "Missing Trailing quote", "'name"), + ("def concept name as 'body", "Missing Trailing quote", "'body"), + ("def concept name from bnf 'expression", "Missing Trailing quote", "'expression"), + ("def concept c::", "Concept identifiers not found", ""), + ]) + def test_i_cannot_parse_when_tokenizer_fails(self, text, error_msg, error_text): + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) + assert isinstance(res.body.body[0], LexerError) + assert res.body.body[0].message == error_msg + assert res.body.body[0].text == error_text + + def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self): + text = "def concept name from bnf unknown" + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + + assert not res.status + assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) + assert res.value.body == ("key", "unknown") + + @pytest.mark.parametrize("text", [ + 'def concept "def concept x"', + 'def concept "def concept x" as x', + ]) + def test_i_can_use_double_quotes_to_protect_keywords(self, text): + sheerka, context, parser, *concepts = self.init_parser() + res = parser.parse(context, ParserInput(text)) + concept_defined = res.value.value + + assert res.status + assert concept_defined.name.tokens == list(Tokenizer("def concept x", yield_eof=False)) + def test_i_can_parse_when_ambiguity_in_where_pre_clause(self): sheerka, context, parser, *concepts = self.init_parser( Concept("x is a y", pre="in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED)"), @@ -430,7 +523,7 @@ from give me the date ! text = "def concept foo x y where x is a y" res = parser.parse(context, ParserInput(text)) - expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.Default", + expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.DefConcept", parser="parsers.ExactConcept") expected = get_def_concept("foo x y", where=expected_body) node = res.value.value @@ -443,7 +536,7 @@ from give me the date ! text = "def concept foo x y pre x is a y" res = parser.parse(context, ParserInput(text)) - expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.Default", + expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.DefConcept", parser="parsers.ExactConcept") expected = get_def_concept("foo x y", pre=expected_body) node = res.value.value @@ -454,63 +547,5 @@ from give me the date ! assert isinstance(res.value, ParserResultConcept) assert node == expected - def test_i_can_detect_not_for_me(self): - text = "hello world" - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - assert not res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME) - assert isinstance(res.value.body[0], CannotHandleErrorNode) - @pytest.mark.parametrize("text", [ - "def", - "def concept_name" - ]) - def test_i_cannot_parse_invalid_entries(self, text): - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - - assert not res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) - assert isinstance(res.body.body[0], UnexpectedTokenErrorNode) - - @pytest.mark.parametrize("text", [ - "concept", - "isa number", - "name isa", - ]) - def test_i_cannot_parse_not_for_me_entries(self, text): - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - - assert not res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) - assert isinstance(res.body.body[0], CannotHandleErrorNode) - - @pytest.mark.parametrize("text, error_msg, error_text", [ - ("'name", "Missing Trailing quote", "'name"), - ("foo isa 'name", "Missing Trailing quote", "'name"), - ("def concept 'name", "Missing Trailing quote", "'name"), - ("def concept name as 'body", "Missing Trailing quote", "'body"), - ("def concept name from bnf 'expression", "Missing Trailing quote", "'expression"), - ("def concept c::", "Concept identifiers not found", ""), - ]) - def test_i_cannot_parse_when_tokenizer_fails(self, text, error_msg, error_text): - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - - assert not res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) - assert isinstance(res.body.body[0], LexerError) - assert res.body.body[0].message == error_msg - assert res.body.body[0].text == error_text - - def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self): - text = "def concept name from bnf unknown" - sheerka, context, parser = self.init_parser() - res = parser.parse(context, ParserInput(text)) - - assert not res.status - assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT) - assert res.value.body == ("key", "unknown") diff --git a/tests/parsers/test_ExpressionParser.py b/tests/parsers/test_ExpressionParser.py index 99eb555..bba65f8 100644 --- a/tests/parsers/test_ExpressionParser.py +++ b/tests/parsers/test_ExpressionParser.py @@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept from core.concept import Concept from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer, TokenKind -from parsers.BaseParser import UnexpectedEof, UnexpectedTokenErrorNode +from parsers.BaseParser import UnexpectedEofNode, UnexpectedTokenErrorNode from parsers.ExpressionParser import PropertyEqualsNode, PropertyEqualsSequenceNode, PropertyContainsNode, AndNode, \ OrNode, NotNode, LambdaNode, IsaNode, NameExprNode, ExpressionParser, LeftPartNotFoundError, TrueifyVisitor @@ -33,14 +33,14 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): @pytest.mark.parametrize("expression, expected", [ ("one complicated expression", n("one complicated expression")), - ("function_call(a,b,c)", n("function_call(a,b,c)")), - ("one expression or another expression", OrNode(n("one expression"), n("another expression"))), - ("one expression and another expression", AndNode(n("one expression"), n("another expression"))), - ("one or two or three", OrNode(n("one"), n("two"), n("three"))), - ("one and two and three", AndNode(n("one"), n("two"), n("three"))), - ("one or two and three", OrNode(n("one"), AndNode(n("two"), n("three")))), - ("one and two or three", OrNode(AndNode(n("one"), n("two")), n("three"))), - ("one and (two or three)", AndNode(n("one"), OrNode(n("two"), n("three")))), + # ("function_call(a,b,c)", n("function_call(a,b,c)")), + # ("one expression or another expression", OrNode(n("one expression"), n("another expression"))), + # ("one expression and another expression", AndNode(n("one expression"), n("another expression"))), + # ("one or two or three", OrNode(n("one"), n("two"), n("three"))), + # ("one and two and three", AndNode(n("one"), n("two"), n("three"))), + # ("one or two and three", OrNode(n("one"), AndNode(n("two"), n("three")))), + # ("one and two or three", OrNode(AndNode(n("one"), n("two")), n("three"))), + # ("one and (two or three)", AndNode(n("one"), OrNode(n("two"), n("three")))), ]) def test_i_can_parse_expression(self, expression, expected): sheerka, context, parser = self.init_parser() @@ -54,12 +54,12 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): assert expressions == expected @pytest.mark.parametrize("expression, expected_errors", [ - ("one or", [UnexpectedEof("When parsing 'or'")]), - ("one and", [UnexpectedEof("When parsing 'and'")]), + ("one or", [UnexpectedEofNode("When parsing 'or'")]), + ("one and", [UnexpectedEofNode("When parsing 'and'")]), ("and one", [LeftPartNotFoundError()]), ("or one", [LeftPartNotFoundError()]), - ("or", [LeftPartNotFoundError(), UnexpectedEof("When parsing 'or'")]), - ("and", [LeftPartNotFoundError(), UnexpectedEof("When parsing 'and'")]), + ("or", [LeftPartNotFoundError(), UnexpectedEofNode("When parsing 'or'")]), + ("and", [LeftPartNotFoundError(), UnexpectedEofNode("When parsing 'and'")]), ]) def test_i_can_detect_error(self, expression, expected_errors): sheerka, context, parser = self.init_parser() @@ -74,17 +74,17 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): res = parser.parse(context, ParserInput("(")) assert not res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) - assert isinstance(res.body.body[0], UnexpectedTokenErrorNode) - assert res.body.body[0].token.type == TokenKind.EOF - assert res.body.body[0].expected_tokens == [TokenKind.RPAR] + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode) + assert res.body.reason[0].token.type == TokenKind.EOF + assert res.body.reason[0].expected_tokens == [TokenKind.RPAR] res = parser.parse(context, ParserInput(")")) assert not res.status - assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) - assert isinstance(res.body.body[0], UnexpectedTokenErrorNode) - assert res.body.body[0].token.type == TokenKind.RPAR - assert res.body.body[0].expected_tokens == [] + assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME) + assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode) + assert res.body.reason[0].token.type == TokenKind.RPAR + assert res.body.reason[0].expected_tokens == [] res = parser.parse(context, ParserInput("one and two)")) assert not res.status diff --git a/tests/parsers/test_FormatRuleParser.py b/tests/parsers/test_FormatRuleParser.py new file mode 100644 index 0000000..1158515 --- /dev/null +++ b/tests/parsers/test_FormatRuleParser.py @@ -0,0 +1,71 @@ +import pytest +from core.builtin_concepts import BuiltinConcepts +from core.sheerka.services.SheerkaExecute import ParserInput +from parsers.BaseCustomGrammarParser import KeywordNotFound +from parsers.FormatRuleParser import FormatRuleParser, FormatAstRawText, FormatRuleNode + +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka + +cmap = {} + + +class TestFormatRuleParser(TestUsingMemoryBasedSheerka): + sheerka = None + + @classmethod + def setup_class(cls): + t = cls() + cls.sheerka, context, _ = t.init_parser(cmap) + + def init_parser(self, concepts_map=None): + if concepts_map is not None: + sheerka, context, *concepts = self.init_concepts(*concepts_map.values(), create_new=True) + else: + sheerka = TestFormatRuleParser.sheerka + context = self.get_context(sheerka) + + parser = FormatRuleParser() + return sheerka, context, parser + + def test_i_can_detect_empty_expression(self): + sheerka, context, parser = self.init_parser() + res = parser.parse(context, ParserInput("")) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) + + def test_input_must_be_a_parser_input(self): + sheerka, context, parser = self.init_parser() + parser.parse(context, "not a parser input") is None + + def test_i_can_parse_a_simple_rule(self): + sheerka, context, parser = self.init_parser() + + text = "when isinstance(last_value(), Concept) print hello world!" + res = parser.parse(context, ParserInput(text)) + parser_result = res.body + format_rule = res.body.body + rule = format_rule.rule + format_ast = format_rule.format_ast + + assert res.status + assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) + assert isinstance(format_rule, FormatRuleNode) + + assert sheerka.isinstance(rule, BuiltinConcepts.RETURN_VALUE) + assert format_ast == FormatAstRawText("hello world!") + + @pytest.mark.parametrize("text, error", [ + ("hello world", [KeywordNotFound(None, keywords=['when', 'print'])]), + ("when True", [KeywordNotFound([], keywords=['print'])]), + ("print True", [KeywordNotFound([], keywords=['when'])]), + ]) + def test_cannot_parse_when_not_for_me(self, text, error): + sheerka, context, parser = self.init_parser() + + res = parser.parse(context, ParserInput(text)) + not_for_me = res.body + + assert not res.status + assert sheerka.isinstance(not_for_me, BuiltinConcepts.NOT_FOR_ME) + assert not_for_me.reason == error diff --git a/tests/parsers/test_FunctionParser.py b/tests/parsers/test_FunctionParser.py index 76fb95e..e79085d 100644 --- a/tests/parsers/test_FunctionParser.py +++ b/tests/parsers/test_FunctionParser.py @@ -70,6 +70,8 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka): sheerka, context, parser = self.init_parser() parser.reset_parser(context, ParserInput(expression)) + parser.parser_input.next_token() + res = parser.parse_function() assert res == expected diff --git a/tests/sheerkapickle/test_sheerka_handlers.py b/tests/sheerkapickle/test_sheerka_handlers.py index e4d7740..80ee9df 100644 --- a/tests/sheerkapickle/test_sheerka_handlers.py +++ b/tests/sheerkapickle/test_sheerka_handlers.py @@ -4,7 +4,7 @@ from core.concept import Concept, ConceptParts from core.sheerka.ExecutionContext import ExecutionContext from core.tokenizer import Tokenizer from evaluators.ConceptEvaluator import ConceptEvaluator -from parsers.DefaultParser import DefaultParser +from parsers.DefConceptParser import DefConceptParser from sdp.sheerkaDataProvider import Event from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -267,10 +267,10 @@ class TestSheerkaPickleHandler(TestUsingMemoryBasedSheerka): decoded = sheerkapickle.decode(sheerka, to_string) assert decoded == sheerka.ret("c:1001:", True, 10) - ret_val = sheerka.ret(DefaultParser(), True, 10) + ret_val = sheerka.ret(DefConceptParser(), True, 10) to_string = sheerkapickle.encode(sheerka, ret_val) decoded = sheerkapickle.decode(sheerka, to_string) - assert decoded == sheerka.ret("parsers.Default", True, 10) + assert decoded == sheerka.ret("parsers.DefConcept", True, 10) ret_val = sheerka.ret(ConceptEvaluator(), True, 10) to_string = sheerkapickle.encode(sheerka, ret_val)