Refactored Parsers. Introduced BaseCustomGrammarParser. Renamed DefaultParser into DefConceptParser

This commit is contained in:
2020-10-02 04:45:47 +02:00
parent d100b7e8b3
commit e8f2705dcf
28 changed files with 1411 additions and 872 deletions
+1 -1
View File
@@ -287,7 +287,7 @@ def parse_unrecognized(context, source, parsers, who=None, prop=None, filter_fun
"""
Try to recognize concepts or code from source using the given parsers
:param context:
:param source:
:param source: ParserInput if possible
:param parsers:
:param who: who is asking the parsing ?
:param prop: Extra info, when parsing a property
+1 -1
View File
@@ -335,7 +335,7 @@ class Sheerka(Concept):
"""
core.utils.import_module_and_sub_module("parsers")
base_class = core.utils.get_class("parsers.BaseParser.BaseParser")
modules_to_skip = ["parsers.BaseNodeParser"]
modules_to_skip = ["parsers.BaseNodeParser", "parsers.BaseCustomGrammarParser"]
temp_result = {}
for parser in core.utils.get_sub_classes("parsers", base_class):
+31 -12
View File
@@ -15,11 +15,29 @@ class ParserInput:
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True):
self.text = text
self.tokens = tokens or None
self.length = len(tokens) if tokens else None
if self.tokens:
# make sure tokens ends with EOF token
# and do not modify the original token list
if len(self.tokens) == 0:
self.tokens = [Token(TokenKind.EOF, "", 0, 1, 1)]
elif (last_token := self.tokens[-1]).type != TokenKind.EOF:
self.tokens = self.tokens + [Token(TokenKind.EOF,
"",
last_token.index + 1,
last_token.line,
last_token.column + 1)]
self.length = None # to be computed in reset()
self.yield_oef = yield_oef
self.start = start or 0
self.end = end + 1 if end else None
if end:
self.original_end = end + 1
self.end = self.original_end
else:
self.original_end = self.end = None
self.sub_text = None
self.sub_tokens = None
@@ -32,15 +50,19 @@ class ParserInput:
from_tokens = "from_tokens" if self.from_tokens else ""
return f"ParserInput({from_tokens}'{self.text}')"
def reset(self, yield_oef=True):
def reset(self, yield_oef=None):
if yield_oef is None:
yield_oef = self.yield_oef
# make sure tokens is correctly initialized
if self.tokens is None:
self.tokens = list(Tokenizer(self.text))
self.length = len(self.tokens)
self.tokens = list(Tokenizer(self.text, yield_eof=True))
if self.end is None:
self.end = self.length
if self.original_end is None:
self.end = len(self.tokens) if yield_oef else len(self.tokens) - 1
else:
self.end = self.original_end if self.original_end <= len(self.tokens) else self.tokens
self.yield_oef = yield_oef
self.pos = self.start - 1
self.token = None
return self
@@ -70,13 +92,10 @@ class ParserInput:
self.pos += 1
if self.pos >= self.end:
if self.yield_oef:
self.token = Token(TokenKind.EOF, "", -1, -1, -1)
return False
self.token = self.tokens[self.pos]
if self.token.type == TokenKind.EOF and not self.yield_oef:
if self.token.type == TokenKind.EOF:
return False
if skip_whitespace:
+3 -1
View File
@@ -68,7 +68,7 @@ class Token:
if self.type == TokenKind.IDENTIFIER:
value = str(self.value)
elif self.type == TokenKind.WHITESPACE:
value = "<tab>" if self.value[0] == "\t" else "<ws>"
value = "<ws!>" if self.value == "" else "<tab>" if self.value[0] == "\t" else "<ws>"
elif self.type == TokenKind.NEWLINE:
value = "<nl>"
elif self.type == TokenKind.EOF:
@@ -148,6 +148,8 @@ class Keywords(Enum):
POST = "post"
ISA = "isa"
RET = "ret"
WHEN = "when"
PRINT = "print"
class Tokenizer:
+1 -1
View File
@@ -8,7 +8,7 @@ from core.tokenizer import TokenKind, Tokenizer
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.BaseParser import NotInitializedNode
from parsers.BnfNodeParser import ParsingExpression, ParsingExpressionVisitor
from parsers.DefaultParser import DefConceptNode, NameNode
from parsers.DefConceptParser import DefConceptNode, NameNode
from parsers.PythonParser import PythonNode
+1 -1
View File
@@ -2,7 +2,7 @@ import core.builtin_helpers
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.sheerka.services.SheerkaExecute import SheerkaExecute
from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.DefaultParser import IsaConceptNode
from parsers.DefConceptParser import IsaConceptNode
ALL_STEPS = [
BuiltinConcepts.BEFORE_PARSING,
+250
View File
@@ -0,0 +1,250 @@
from dataclasses import dataclass, field
import core.utils
from core.tokenizer import Keywords, TokenKind, Tokenizer
from parsers.BaseParser import BaseParser, Node, ErrorNode, UnexpectedEofNode, UnexpectedTokenErrorNode
@dataclass()
class CustomGrammarParserNode(Node):
"""
Base node for all default parser nodes
"""
tokens: list = field(compare=False, repr=False)
@dataclass()
class SyntaxErrorNode(CustomGrammarParserNode, ErrorNode):
"""
The input is recognized, but there is a syntax error
"""
message: str
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, SyntaxErrorNode):
return False
if self.message != other.message:
return False
if other.tokens is not None and self.tokens != other.tokens:
return False
return True
def __hash__(self):
return hash(self.message)
@dataclass()
class KeywordNotFound(CustomGrammarParserNode, ErrorNode):
keywords: list
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, KeywordNotFound):
return False
if self.keywords != other.keywords:
return False
if other.tokens is not None and self.tokens != other.tokens:
return False
return True
def __hash__(self):
return hash(self.keywords)
class BaseCustomGrammarParser(BaseParser):
"""
Base class for sheerka specific grammars
"""
DEFAULT_TAB_SIZE = 4
def __init__(self, name, priority: int, enabled=True):
super().__init__(name, priority, enabled=enabled)
@staticmethod
def skip_white_spaces(tokens):
i = 0
while i < len(tokens) and tokens[i].type == TokenKind.WHITESPACE:
i += 1
return i
def get_body(self, tokens):
"""
Get the body of a keyword definition
It manages colon body, but the colon must be stripped first
:param tokens:
:return:
"""
def get_tab_size(default_tab_size, text):
return sum([1 if isinstance(c, str) else default_tab_size for c in text])
pos = self.skip_white_spaces(tokens)
if len(tokens) - pos < 3:
self.add_error(SyntaxErrorNode(tokens, "Body is empty or too short."))
return None
if tokens[pos].type != TokenKind.NEWLINE:
self.add_error(UnexpectedTokenErrorNode("New line not found.", tokens[pos], [TokenKind.NEWLINE]))
return None
pos += 1
if tokens[pos].type != TokenKind.WHITESPACE:
self.add_error(UnexpectedTokenErrorNode("Indentation not found.", tokens[pos], [TokenKind.WHITESPACE]))
return None
indent_size = get_tab_size(self.DEFAULT_TAB_SIZE, tokens[pos].value)
pos += 1
i = pos
while i < len(tokens) - 1:
if tokens[i].type == TokenKind.NEWLINE:
if tokens[i + 1].type != TokenKind.WHITESPACE:
self.add_error(UnexpectedTokenErrorNode("Indentation not found.",
tokens[i + 1],
[TokenKind.WHITESPACE]))
return None
if get_tab_size(self.DEFAULT_TAB_SIZE, tokens[i + 1].value) < indent_size:
self.add_error(SyntaxErrorNode([tokens[i + 1]], "Invalid indentation."))
return None
tokens[i + 1] = tokens[i + 1].clone()
tokens[i + 1].value = " " * (get_tab_size(self.DEFAULT_TAB_SIZE, tokens[i + 1].value) - indent_size)
i += 1
return tokens[pos:]
def get_parts(self, keywords, expected_first_token=None):
"""
Reads Parser Input and groups the tokens by keywords
ex:
tokens = Tokenizer("as a b c pre u v w where x y z")
keywords = ["as", "pre", "where"]
assert get_parts(keywords) == {
Keyword("as"): [Token("a"), Token(<ws>), Token("b"), Token(<ws>), Token("c"), Token(<ws>)],
Keyword("pre"): [Token("u"), Token(<ws>), Token("v"), Token(<ws>), Token("w"), Token(<ws>)],
Keyword("where"): [Token("x"), Token(<ws>), Token("y"), Token(<ws>), Token("z"), Token(<ws>)]}
* The order of appearance of the keywords is not important
"as w pre y where z" and "where z pre y as w" will produce the same dictionary
* I can use double quote to protect keyword
where "x y" will produce the entry Keyword("where"): [Token("x"), Token(<ws>), Token("y"), Token(<ws>)]
where 'x y' will produce the entry Keyword("where"): [Token("'x y'")]
:param keywords:
:param expected_first_token: it must be a KeyW
:return: dictionary
"""
def new_part(t, cma, p):
"""
:param t: token
:param cma: colon_mode_activated
:param p: previous token
:return:
"""
if t.value not in keywords:
return False
if not cma or not p:
return True
return p.line != t.line
if self.parser_input.token is None:
self.add_error(KeywordNotFound([], keywords))
return None
if self.parser_input.token.type == TokenKind.WHITESPACE:
self.parser_input.next_token()
token = self.parser_input.token
if expected_first_token and token.value != expected_first_token.value:
self.add_error(UnexpectedTokenErrorNode(f"'{expected_first_token.value}' keyword not found.",
token,
[expected_first_token]))
return None
if token.value not in keywords:
self.add_error(KeywordNotFound([token], keywords))
return None
colon_mode_activated = False # if activate, use keyword + colon to start a new keyword definition
previous_token = None
res = {}
# More explanations on colon_mode_activated
# You can use the pattern
# def concept <name> as:
# <tab> xxx
# <tab> yyy
# ...
#
# It allows to readability and usage of other keywords inside the bloc#
# Example
# def concept give the the date as:
# from datetime import date # I can use the 'from' keyword !!!
# return date.today()
#
# Note that I can choose to use colon or not
#
# def concept in x days as:
# from datetime import date
# return date.today() - x
# where x > 0
#
# is a valid declaration
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
while True:
if new_part(token, colon_mode_activated, previous_token):
keyword = Keywords(token.value)
if keyword in res:
# a part is defined more than once
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
break
res[keyword] = [token] # to keep track of when it starts
colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON
if not self.parser_input.next_token():
self.add_error(UnexpectedEofNode(f"While parsing keyword '{keyword.value}'."))
break
else:
res[keyword].append(token)
if not self.parser_input.next_token(skip_whitespace=False):
break
previous_token = token
token = self.parser_input.token
# Post process the result if needed
for k, v in res.items():
stripped = core.utils.strip_tokens(v[1:])
# manage colon first, to sure that what is protected by the quotes remains protected
if len(stripped) > 0 and stripped[0].type == TokenKind.COLON:
body = self.get_body(stripped[1:])
if body:
res[k] = v[0:1] + body
# replace double quoted strings by their content
elif len(stripped) == 1 and stripped[0].type == TokenKind.STRING and stripped[0].value[0] == '"':
res[k] = v[0:1] + list(Tokenizer(stripped[0].strip_quote, yield_eof=False))
return res
+2 -14
View File
@@ -6,8 +6,7 @@ from typing import Set
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind, LexerError, Token
from core.tokenizer import TokenKind, Token
from parsers.BaseParser import Node, BaseParser, ErrorNode
DEBUG_COMPILED = True
@@ -718,7 +717,7 @@ class BaseNodeParser(BaseParser):
"""
def __init__(self, name, priority, **kwargs):
super().__init__(name, priority)
super().__init__(name, priority, yield_eof=True)
if 'sheerka' in kwargs:
sheerka = kwargs.get("sheerka")
self.concepts_by_first_keyword = sheerka.resolved_concepts_by_first_keyword
@@ -745,17 +744,6 @@ class BaseNodeParser(BaseParser):
concepts_by_first_keyword = self.get_concepts_by_first_token(context, concepts).body
self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
def reset_parser(self, context, parser_input: ParserInput):
self.context = context
self.sheerka = context.sheerka
self.parser_input = parser_input
try:
self.parser_input.reset(False)
except LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
return True
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
"""
Tries to find if there are concepts that match the value of the token
+21 -25
View File
@@ -1,7 +1,7 @@
import logging
from dataclasses import dataclass
from typing import Union
import core.utils
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept
from core.sheerka.ExecutionContext import ExecutionContext
@@ -57,7 +57,7 @@ class ErrorNode(Node):
@dataclass()
class UnexpectedTokenErrorNode(ErrorNode):
message: str
token: Token
token: Union[Token, str]
expected_tokens: list
def __eq__(self, other):
@@ -70,31 +70,25 @@ class UnexpectedTokenErrorNode(ErrorNode):
if self.message != other.message:
return False
if self.token.type != other.token.type or self.token.value != other.token.value:
to_compare = self.token.repr_value if isinstance(other.token, str) else self.token
if to_compare != other.token:
return False
if len(self.expected_tokens) != len(other.expected_tokens):
return False
for i, t in enumerate(self.expected_tokens):
if t != other.expected_tokens[i]:
return False
return True
return self.expected_tokens == other.expected_tokens
def __hash__(self):
return hash((self.message, self.token, self.expected_tokens))
@dataclass()
class UnexpectedEof(ErrorNode):
class UnexpectedEofNode(ErrorNode):
message: str
class BaseParser:
PREFIX = "parsers."
def __init__(self, name, priority: int, enabled=True):
def __init__(self, name, priority: int, enabled=True, yield_eof=False):
self.log = get_logger("parsers." + self.__class__.__name__)
self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__)
self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__)
@@ -107,6 +101,7 @@ class BaseParser:
self.context: ExecutionContext = None
self.sheerka = None
self.parser_input: ParserInput = None
self.yield_eof = yield_eof
def __eq__(self, other):
if not isinstance(other, self.__class__):
@@ -126,10 +121,9 @@ class BaseParser:
self.error_sink.clear()
try:
self.parser_input.reset(False)
self.parser_input.next_token()
self.parser_input.reset(self.yield_eof)
except LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
self.add_error(e, False)
return False
return True
@@ -165,12 +159,12 @@ class BaseParser:
value = context.return_value_to_str(r)
context.log(f" Recognized '{value}'", self.name)
def get_return_value_body(self, sheerka, source, tree, try_parse):
def get_return_value_body(self, sheerka, source, parsed, try_parse):
"""
All parsers must return their result in a standard way
:param sheerka:
:param source:
:param tree:
:param parsed:
:param try_parse:
:return:
"""
@@ -178,16 +172,18 @@ class BaseParser:
return self.error_sink[0]
if self.has_error:
return sheerka.new(
BuiltinConcepts.ERROR,
body=self.error_sink
)
if parsed is None:
return sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=source,
reason=self.error_sink)
else:
return sheerka.new(BuiltinConcepts.ERROR,
body=self.error_sink)
return sheerka.new(
BuiltinConcepts.PARSER_RESULT,
return sheerka.new(BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=tree,
body=parsed,
try_parsed=try_parse)
@staticmethod
+274
View File
@@ -0,0 +1,274 @@
from dataclasses import dataclass, field
import core.builtin_helpers
import core.utils
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.tokenizer import TokenKind, Keywords
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode
from parsers.BaseParser import Node, ErrorNode, NotInitializedNode, UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser
class ParsingException(Exception):
def __init__(self, error):
self.error = error
@dataclass()
class DefaultParserNode(Node):
"""
Base node for all default parser nodes
"""
tokens: list = field(compare=False, repr=False)
@dataclass()
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
pass
@dataclass()
class CannotHandleErrorNode(DefaultParserErrorNode):
"""
The input is not recognized
"""
text: str
@dataclass()
class NameNode(DefaultParserNode):
def get_name(self):
name = ""
first = True
for token in self.tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
name += " "
name += token.value[1:-1] if token.type == TokenKind.STRING else str(token.value)
first = False
return name
def __repr__(self):
return self.get_name()
def __eq__(self, other):
if not isinstance(other, NameNode):
return False
return self.get_name() == other.get_name()
def __hash__(self):
return hash(self.get_name())
@dataclass()
class DefConceptNode(DefaultParserNode):
name: NameNode = NotInitializedNode()
where: ReturnValueConcept = NotInitializedNode()
pre: ReturnValueConcept = NotInitializedNode()
post: ReturnValueConcept = NotInitializedNode()
body: ReturnValueConcept = NotInitializedNode()
ret: ReturnValueConcept = NotInitializedNode()
definition: ReturnValueConcept = NotInitializedNode()
definition_type: str = None
def get_asts(self):
asts = {}
for part_key in ConceptParts:
prop_value = getattr(self, part_key.value)
if isinstance(prop_value, ReturnValueConcept) and \
isinstance(prop_value.body, ParserResultConcept) and \
hasattr(prop_value.body.body, "ast_"):
asts[part_key] = prop_value
return asts
@dataclass()
class IsaConceptNode(DefaultParserNode):
concept: NameNode = NotInitializedNode()
set: NameNode = NotInitializedNode()
class DefConceptParser(BaseCustomGrammarParser):
"""
Parse sheerka specific grammar (like def concept)
"""
KEYWORDS = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST, Keywords.RET]
KEYWORDS_VALUES = [k.value for k in KEYWORDS]
def __init__(self, **kwargs):
BaseCustomGrammarParser.__init__(self, "DefConcept", 60)
def parse(self, context, parser_input: ParserInput):
# default parser can only manage string text
if parser_input.from_tokens:
ret = context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
self.parser_input.next_token()
node = self.parse_def_concept()
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node)
ret = sheerka.ret(self.name, not self.has_error, body)
self.log_result(context, parser_input.as_text(), ret)
return ret
def parse_def_concept(self):
"""
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
"""
token = self.parser_input.token
if token.value != Keywords.DEF.value:
self.add_error(UnexpectedTokenErrorNode("'def' keyword not found.", token, [Keywords.DEF]))
return None
self.context.log("Keyword DEF found.", self.name)
keywords_found = [token]
self.parser_input.next_token()
# ## the definition of a concept consists of several parts
# Keywords.CONCEPT to get the name of the concept
# Keywords.FROM [Keywords.BNF] | [Keywords.DEF] to get the definition of the concept
# Keywords.AS to get the body
# Keywords.WHERE to get the conditions to recognize for the variables
# Keywords.PRE to know if the conditions to evaluate the concept
# Keywords.POST to apply or verify once the concept is executed
# Keywords.RET to transform the concept into another concept
parts = self.get_parts(self.KEYWORDS_VALUES, expected_first_token=Keywords.CONCEPT)
if parts is None:
return None
keywords_found.extend([t[0] for t in parts.values()]) # keep track of all keywords found
node = DefConceptNode(keywords_found)
# if first_token.type == TokenKind.EOF:
# return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
# get the name
node.name = self.get_concept_name(parts[Keywords.CONCEPT])
# get definition
node.definition_type, node.definition = self.get_concept_definition(node, parts)
# get the bodies
node.body = self.get_ast(Keywords.AS, parts)
node.where = self.get_ast(Keywords.WHERE, parts)
node.pre = self.get_ast(Keywords.PRE, parts)
node.post = self.get_ast(Keywords.POST, parts)
node.ret = self.get_ast(Keywords.RET, parts)
return node
def get_concept_name(self, tokens):
name_tokens = core.utils.strip_tokens(tokens[1:])
if len(name_tokens) == 0:
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
return None
for token in name_tokens:
if token.type == TokenKind.NEWLINE:
self.add_error(SyntaxErrorNode([token], "Newline are not allowed in name."))
return None
name_node = NameNode(name_tokens) # skip the first token
return name_node
def get_concept_definition(self, current_concept_def, parts):
if Keywords.FROM not in parts:
return None, NotInitializedNode()
tokens = parts[Keywords.FROM]
if len(tokens) == 1:
self.add_error(SyntaxErrorNode([], f"Empty '{tokens[0].value}' declaration."), False)
return None, NotInitializedNode()
if tokens[1].value == Keywords.BNF.value:
return self.get_concept_bnf_definition(current_concept_def, core.utils.strip_tokens(tokens[2:]))
return self.get_concept_simple_definition(core.utils.strip_tokens(tokens[0:]))
def get_concept_bnf_definition(self, current_concept_def, tokens):
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([], "Empty 'bnf' declaration"), False)
return None, NotInitializedNode()
if tokens[0].type == TokenKind.COLON:
tokens = self.get_body(tokens[1:])
bnf_regex_parser = BnfParser()
desc = f"Resolving BNF {current_concept_def.definition}"
with self.context.push(BuiltinConcepts.INIT_BNF,
current_concept_def,
who=self.name,
obj=current_concept_def,
desc=desc) as sub_context:
parsing_result = bnf_regex_parser.parse(sub_context, tokens)
sub_context.add_values(return_values=parsing_result)
if not parsing_result.status:
self.add_error(parsing_result.value)
return None, NotInitializedNode()
return DEFINITION_TYPE_BNF, parsing_result
def get_concept_simple_definition(self, tokens):
start = 2 if tokens[1].value == Keywords.DEF.value else 1
tokens = core.utils.strip_tokens(tokens[start:])
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([], f"Empty 'from' declaration."), False)
return None, NotInitializedNode()
if tokens[0].type == TokenKind.COLON:
tokens = self.get_body(tokens[1:])
return DEFINITION_TYPE_DEF, NameNode(tokens)
def get_ast(self, keyword, parts):
if keyword not in parts:
return NotInitializedNode()
tokens = parts[keyword]
if len(tokens) == 1:
self.add_error(SyntaxErrorNode(tokens, f"Empty '{tokens[0].value}' declaration."))
return None
source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens[1:])
parsed = core.builtin_helpers.parse_unrecognized(self.context,
source,
parsers="all",
who=self.name,
prop=keyword,
filter_func=core.builtin_helpers.expect_one)
if not parsed.status:
self.add_error(parsed.value)
return None
return parsed
-509
View File
@@ -1,509 +0,0 @@
from dataclasses import dataclass, field
import core.builtin_helpers
import core.utils
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.tokenizer import Tokenizer, TokenKind, Keywords
from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
from parsers.BnfParser import BnfParser
class ParsingException(Exception):
def __init__(self, error):
self.error = error
@dataclass()
class DefaultParserNode(Node):
"""
Base node for all default parser nodes
"""
tokens: list = field(compare=False, repr=False)
@dataclass()
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
pass
@dataclass()
class UnexpectedTokenErrorNode(DefaultParserErrorNode):
message: str
expected_tokens: list
@dataclass()
class SyntaxErrorNode(DefaultParserErrorNode):
"""
The input is recognized, but there is a syntax error
"""
message: str
@dataclass()
class CannotHandleErrorNode(DefaultParserErrorNode):
"""
The input is not recognized
"""
text: str
@dataclass()
class NameNode(DefaultParserNode):
def get_name(self):
name = ""
first = True
for token in self.tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
name += " "
name += token.value[1:-1] if token.type == TokenKind.STRING else str(token.value)
first = False
return name
def __repr__(self):
return self.get_name()
def __eq__(self, other):
if not isinstance(other, NameNode):
return False
return self.get_name() == other.get_name()
def __hash__(self):
return hash(self.get_name())
@dataclass()
class DefConceptNode(DefaultParserNode):
name: NameNode = NotInitializedNode()
where: ReturnValueConcept = NotInitializedNode()
pre: ReturnValueConcept = NotInitializedNode()
post: ReturnValueConcept = NotInitializedNode()
body: ReturnValueConcept = NotInitializedNode()
ret: ReturnValueConcept = NotInitializedNode()
definition: ReturnValueConcept = NotInitializedNode()
definition_type: str = None
def get_asts(self):
asts = {}
for part_key in ConceptParts:
prop_value = getattr(self, part_key.value)
if isinstance(prop_value, ReturnValueConcept) and \
isinstance(prop_value.body, ParserResultConcept) and \
hasattr(prop_value.body.body, "ast_"):
asts[part_key] = prop_value
return asts
@dataclass()
class IsaConceptNode(DefaultParserNode):
concept: NameNode = NotInitializedNode()
set: NameNode = NotInitializedNode()
class DefaultParser(BaseParser):
"""
Parse sheerka specific grammar (like def concept)
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "Default", 60)
@staticmethod
def fix_indentation(tokens):
"""
In the following example
def concept add one to a as:
def func(x):
return x+1
func(a)
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
:param tokens:
:return:
"""
if len(tokens) == 0:
return tokens
tokens = tokens.copy() # do not modify ParserInput.tokens
if tokens[0].type != TokenKind.COLON:
return tokens
if len(tokens) < 3:
raise ParsingException(UnexpectedTokenErrorNode(tokens[0:2],
"Unexpected end of file",
[TokenKind.NEWLINE]))
pos = DefaultParser.eat_white_space(tokens, 1)
if tokens[pos].type != TokenKind.NEWLINE:
raise ParsingException(UnexpectedTokenErrorNode([tokens[pos]],
"Unexpected token after colon",
[TokenKind.NEWLINE]))
pos += 1
if tokens[pos].type != TokenKind.WHITESPACE:
raise ParsingException(SyntaxErrorNode([tokens[pos]],
"Indentation not found."))
indent_size = len(tokens[pos].value)
pos += 1
# now fix the other indentations
# KSI 23/05/2020 Not quite sure this 'fixing' stuff is still relevant,
# as I now have an editor in interactive mode
i = pos
while i < len(tokens) - 1:
if tokens[i].type == TokenKind.NEWLINE:
if tokens[i + 1].type != TokenKind.WHITESPACE:
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
if len(tokens[i + 1].value) < indent_size:
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
tokens[i + 1] = tokens[i + 1].clone()
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
i += 1
return tokens[pos:]
@staticmethod
def eat_white_space(tokens, index):
if index >= len(tokens):
return index
while index < len(tokens) and tokens[index].type == TokenKind.WHITESPACE:
index += 1
return index
def reset_parser(self, context, parser_input):
self.context = context
self.sheerka = context.sheerka
self.parser_input = parser_input
self.parser_input.reset()
self.parser_input.next_token()
def parse(self, context, parser_input: ParserInput):
# default parser can only manage string text
if parser_input.from_tokens:
ret = context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
try:
self.reset_parser(context, parser_input)
tree = self.parse_statement()
except core.tokenizer.LexerError as e:
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
# If a error is found it must be sent to error_sink
# tree must contain what was recognized
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
else:
body = self.get_return_value_body(context.sheerka, parser_input.as_text(), tree, tree)
ret = self.sheerka.ret(
self.name,
not self.has_error,
body)
self.log_result(context, parser_input.as_text(), ret)
return ret
def parse_statement(self):
token = self.parser_input.token
if token.value == Keywords.DEF.value:
self.parser_input.next_token()
self.context.log("Keyword DEF found.", self.name)
return self.parse_def_concept(token)
return self.add_error(CannotHandleErrorNode([token], ""))
def parse_def_concept(self, def_token):
"""
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
"""
# init
keywords_tokens = [def_token]
concept_found = DefConceptNode(keywords_tokens)
# ##
# ## the definition of a concept consists of several parts
# ## Keywords.CONCEPT to get the name of the concept
# ## Keywords.FROM [Keywords.BNF] | [Keywords.DEF] to get the definition of the concept
# ## Keywords.AS to get the body
# ## Keywords.WHERE to get the conditions to recognize for the variables
# ## Keywords.PRE to know if the conditions to evaluate the concept
# ## Keywords.POST to apply or verify once the concept is executed
# Regroup the tokens by parts
first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens)
if first_token.type == TokenKind.EOF:
return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
# get the name
concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts)
# get the definition
def_type, def_value = self.get_concept_definition(concept_found, tokens_found_by_parts)
concept_found.definition_type = def_type
concept_found.definition = def_value
# get the ASTs for the remaining parts
asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts)
concept_found.where = asts_found_by_parts[Keywords.WHERE]
concept_found.pre = asts_found_by_parts[Keywords.PRE]
concept_found.post = asts_found_by_parts[Keywords.POST]
concept_found.body = asts_found_by_parts[Keywords.AS]
concept_found.ret = asts_found_by_parts[Keywords.RET]
return concept_found
def regroup_tokens_by_parts(self, keywords_tokens):
def new_part(t, cma, p):
"""
:param t: token
:param cma: concept_mode_activated
:param p: previous token
:return:
"""
if not t.value in def_concept_parts:
return False
if not cma or not p:
return True
return p.line != t.line
def_concept_parts = [Keywords.CONCEPT.value,
Keywords.FROM.value,
Keywords.AS.value,
Keywords.WHERE.value,
Keywords.PRE.value,
Keywords.POST.value,
Keywords.RET.value]
# tokens found, when trying to recognize the parts
tokens_found_by_parts = {
Keywords.CONCEPT: [],
Keywords.FROM: None,
Keywords.AS: None,
Keywords.WHERE: None,
Keywords.PRE: None,
Keywords.POST: None,
Keywords.RET: None,
}
current_part = Keywords.CONCEPT
token = self.parser_input.token
first_token = token
colon_mode_activated = False # if activate, use keyword + colon to start a new keyword definition
previous_token = None
# more explanation on colon_mode_activated
# You can use the pattern
# def concept <name> as:
# <tab> xxx
# <tab> yyy
# ...
#
# It allows to readability and usage of other keywords inside the bloc#
# Example
# def concept give the the date as:
# from datetime import date
# return date.today()
#
# 'from datetime' will not be considered as a keyword because it's lead by a tab
# whereas in
# def concept in x days as:
# from datetime import date
# return date.today() - x
# where x > 0
#
# where will be recognized as the keyword because it is the first word of the line
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
while token.type != TokenKind.EOF:
if new_part(token, colon_mode_activated, previous_token):
keywords_tokens.append(token) # keep track of the keywords
keyword = Keywords(token.value)
if tokens_found_by_parts[keyword]:
# a part is defined more than once
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
tokens_found_by_parts[current_part].append(token) # adds the token again
else:
tokens_found_by_parts[keyword] = [token]
current_part = keyword
colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON
self.parser_input.next_token()
else:
tokens_found_by_parts[current_part].append(token)
self.parser_input.next_token(False)
previous_token = token
token = self.parser_input.token
return first_token, tokens_found_by_parts
def get_concept_name(self, first_token, tokens_found_by_parts):
name_first_token_index = 1
token = self.parser_input.token
if first_token.value != Keywords.CONCEPT.value:
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
name_first_token_index = 0
name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
if len(name_tokens) == name_first_token_index:
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
if name_tokens[-1].type == TokenKind.NEWLINE:
name_tokens = name_tokens[:-1] # strip trailing newlines
if TokenKind.NEWLINE in [t.type for t in name_tokens]:
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
tokens = name_tokens[name_first_token_index:]
stripped = core.utils.strip_tokens(tokens)
if len(stripped) == 1 and stripped[0].type == TokenKind.STRING and stripped[0].value[0] == '"':
tokens = list(Tokenizer(stripped[0].strip_quote, yield_eof=False))
name_node = NameNode(tokens) # skip the first token
return name_node
def get_concept_definition(self, current_concept_def, tokens_found_by_parts):
if tokens_found_by_parts[Keywords.FROM] is None:
return None, NotInitializedNode()
definition_tokens = tokens_found_by_parts[Keywords.FROM]
if len(definition_tokens) == 1:
self.add_error(SyntaxErrorNode([], "Empty declaration"), False)
return None, NotInitializedNode()
if definition_tokens[1].value == Keywords.BNF.value:
return self.get_concept_bnf_definition(current_concept_def, definition_tokens)
return self.get_concept_simple_definition(definition_tokens)
def get_concept_bnf_definition(self, current_concept_def, definition_tokens):
try:
tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[2:]))
except ParsingException as ex:
self.add_error(ex.error)
return None, NotInitializedNode()
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
return None, NotInitializedNode()
bnf_regex_parser = BnfParser()
desc = f"Resolving BNF {current_concept_def.definition}"
with self.context.push(BuiltinConcepts.INIT_BNF,
current_concept_def,
who=self.name,
obj=current_concept_def,
desc=desc) as sub_context:
parsing_result = bnf_regex_parser.parse(sub_context, tokens)
sub_context.add_values(return_values=parsing_result)
if not parsing_result.status:
self.add_error(parsing_result.value)
return None, NotInitializedNode()
return DEFINITION_TYPE_BNF, parsing_result
def get_concept_simple_definition(self, definition_tokens):
start = 2 if definition_tokens[1].value == Keywords.DEF.value else 1
try:
tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[start:]))
except ParsingException as ex:
self.add_error(ex.error)
return None, NotInitializedNode()
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([definition_tokens[start]], "Empty declaration"), False)
return None, NotInitializedNode()
return DEFINITION_TYPE_DEF, NameNode(tokens)
def get_concept_parts(self, tokens_found_by_parts):
asts_found_by_parts = {
Keywords.AS: NotInitializedNode(),
Keywords.WHERE: NotInitializedNode(),
Keywords.PRE: NotInitializedNode(),
Keywords.POST: NotInitializedNode(),
Keywords.RET: NotInitializedNode()
}
for keyword in tokens_found_by_parts:
if keyword == Keywords.CONCEPT or keyword == Keywords.FROM:
continue # already done
tokens = tokens_found_by_parts[keyword]
if tokens is None:
continue # nothing to do
if len(tokens) == 1: # check for empty declarations
self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False)
continue
try:
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
except ParsingException as ex:
self.add_error(ex.error)
continue
# ask the other parsers if they recognize the tokens
source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens)
parsed = core.builtin_helpers.parse_unrecognized(self.context,
source,
parsers="all",
who=self.name,
prop=keyword,
filter_func=core.builtin_helpers.expect_one)
if not parsed.status:
self.add_error(parsed.value)
continue
asts_found_by_parts[keyword] = parsed
#
# with self.context.push(BuiltinConcepts.PARSING, keyword, who=self.name, desc=f"Parsing {keyword}") as sub_context:
# parser_input = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens)
# to_parse = self.sheerka.ret(
# sub_context.who,
# True,
# self.sheerka.new(BuiltinConcepts.USER_INPUT, body=parser_input))
# steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
# if keyword in (Keywords.WHERE, Keywords.PRE):
# sub_context.protected_hints.add(BuiltinConcepts.EVAL_QUESTION_REQUESTED)
# parsed = self.sheerka.execute(sub_context, to_parse, steps)
# parsing_result = core.builtin_helpers.expect_one(sub_context, parsed)
# sub_context.add_values(return_values=parsing_result)
#
# if not parsing_result.status:
# self.add_error(parsing_result.value)
# continue
#
# asts_found_by_parts[keyword] = parsing_result
return asts_found_by_parts
+5 -4
View File
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import LexerError, TokenKind, Token
from parsers.BaseParser import Node, BaseParser, UnexpectedTokenErrorNode, UnexpectedEof, ErrorNode
from parsers.BaseParser import Node, BaseParser, UnexpectedTokenErrorNode, UnexpectedEofNode, ErrorNode
class ExprNode(Node):
@@ -189,7 +189,7 @@ class ExpressionParser(BaseParser):
"""
def __init__(self, **kwargs):
super().__init__("Expression", 50, False)
super().__init__("Expression", 50, False, yield_eof=True)
def parse(self, context, parser_input: ParserInput):
"""
@@ -215,6 +215,7 @@ class ExpressionParser(BaseParser):
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
self.parser_input.next_token()
tree = self.parse_or()
token = self.parser_input.token
if token and token.type != TokenKind.EOF:
@@ -240,7 +241,7 @@ class ExpressionParser(BaseParser):
self.parser_input.next_token()
expr = self.parse_and()
if expr is None:
self.add_error(UnexpectedEof("When parsing 'or'"))
self.add_error(UnexpectedEofNode("When parsing 'or'"))
return OrNode(*parts)
parts.append(expr)
token = self.parser_input.token
@@ -258,7 +259,7 @@ class ExpressionParser(BaseParser):
self.parser_input.next_token()
expr = self.parse_names()
if expr is None:
self.add_error(UnexpectedEof("When parsing 'and'"))
self.add_error(UnexpectedEofNode("When parsing 'and'"))
return AndNode(*parts)
parts.append(expr)
token = self.parser_input.token
+132
View File
@@ -0,0 +1,132 @@
from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.builtin_helpers import parse_unrecognized, expect_one
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.tokenizer import Keywords
from core.utils import strip_tokens
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, KeywordNotFound
from parsers.BaseParser import BaseParser, Node
@dataclass
class FormatAstNode:
pass
@dataclass
class FormatAstRawText(FormatAstNode):
text: str
@dataclass
class FormatRuleNode(Node):
tokens: dict
rule: ReturnValueConcept = None
format_ast: FormatAstNode = None
class FormatRuleParser(BaseCustomGrammarParser):
"""
Class that will parse formatting rules definitions
when xxx print yyy
where xxx will be evaluated in the context of BuiltinConcepts.EVAL_QUESTION_REQUESTED
and yyy is a internal way to describe a format (yet another one)
"""
KEYWORDS = [Keywords.WHEN, Keywords.PRINT]
KEYWORDS_VALUES = [k.value for k in KEYWORDS]
def __init__(self, **kwargs):
BaseCustomGrammarParser.__init__(self, "FormatRule", 60)
def parse(self, context, parser_input: ParserInput):
"""
:param context:
:param parser_input:
:return:
"""
if not isinstance(parser_input, ParserInput):
return None
if parser_input.from_tokens:
ret = context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
self.parser_input.next_token()
rule = self.parse_rule()
body = self.get_return_value_body(sheerka, parser_input.as_text(), rule, rule)
ret = sheerka.ret(self.name, not self.has_error, body)
self.log_result(context, parser_input.as_text(), ret)
return ret
def parse_rule(self):
parts = self.get_parts(self.KEYWORDS_VALUES)
if parts is None:
return None
node = FormatRuleNode(parts)
try:
res = self.get_when(parts[Keywords.WHEN])
if res is None:
return node
node.rule = res
parsed = self.get_print(parts[Keywords.PRINT])
if parsed is None:
return node
node.format_ast = parsed
except KeyError as e:
self.add_error(KeywordNotFound([], [e.args[0].value]))
return None
return node
def get_when(self, tokens):
"""
Validate the when part of the rule.
:param tokens:
:return:
"""
source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, strip_tokens(tokens[1:]))
parsed = parse_unrecognized(self.context,
source,
parsers="all",
who=self.name,
prop=Keywords.WHEN,
filter_func=expect_one)
if not parsed.status:
self.add_error(parsed.value)
return None
return parsed
def get_print(self, tokens):
"""
Validate the print part
:param tokens:
:return:
"""
source = BaseParser.get_text_from_tokens(strip_tokens(tokens[1:]))
return FormatAstRawText(source)
+12 -9
View File
@@ -7,7 +7,7 @@ from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind, Token
from core.utils import get_n_clones
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEof, Node
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEofNode, Node
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
# No need to check for Python code as the source code node will resolve to python code anyway
@@ -143,7 +143,7 @@ class FunctionParser(BaseParser):
so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]]
:param kwargs:
"""
super().__init__("Function", 55, True)
super().__init__("Function", 55)
self.sep = sep
self.longest_concepts_only = longest_concepts_only
self.record_errors = True
@@ -179,6 +179,7 @@ class FunctionParser(BaseParser):
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
self.parser_input.next_token()
node = self.parse_function()
if self.parser_input.next_token():
@@ -219,7 +220,7 @@ class FunctionParser(BaseParser):
return None
if not self.parser_input.next_token():
self.add_error(UnexpectedEof(f"Unexpected EOF while parsing left parenthesis"))
self.add_error(UnexpectedEofNode(f"Unexpected EOF while parsing left parenthesis"))
return None
token = self.parser_input.token
@@ -231,7 +232,7 @@ class FunctionParser(BaseParser):
start_node = NamesNode(start, start + 1, self.parser_input.tokens[start:start + 2])
if not self.parser_input.next_token():
self.add_error(UnexpectedEof(f"Unexpected EOF after left parenthesis"))
self.add_error(UnexpectedEofNode(f"Unexpected EOF after left parenthesis"))
return FunctionNode(start_node, None, None)
params = self.parse_parameters()
@@ -239,7 +240,7 @@ class FunctionParser(BaseParser):
return FunctionNode(start_node, None, params)
token = self.parser_input.token
if token.type != TokenKind.RPAR:
if not token or token.type != TokenKind.RPAR:
self.add_error(UnexpectedTokenErrorNode(f"Right parenthesis not found",
token,
[TokenKind.RPAR]))
@@ -261,7 +262,7 @@ class FunctionParser(BaseParser):
token = self.parser_input.token
if token.type == TokenKind.EOF:
self.add_error(UnexpectedEof(f"Unexpected EOF while parsing parameters"))
self.add_error(UnexpectedEofNode(f"Unexpected EOF while parsing parameters"))
return None
if token.type == TokenKind.RPAR:
@@ -269,10 +270,12 @@ class FunctionParser(BaseParser):
if token.value == self.sep:
sep_pos = self.parser_input.pos
self.parser_input.next_token()
has_next = self.parser_input.next_token() # it's before add_sep() to capture trailing whitespace
function_parameter.add_sep(sep_pos,
self.parser_input.pos - 1,
self.parser_input.tokens[sep_pos: self.parser_input.pos])
if not has_next:
break
return nodes
@@ -292,8 +295,8 @@ class FunctionParser(BaseParser):
tokens = []
while True:
token = self.parser_input.token
# if token is None:
# break
if token is None:
break
if token.value == self.sep or token.type == TokenKind.RPAR:
break
+2 -2
View File
@@ -29,8 +29,8 @@ class SheerkaPrinter:
def __init__(self, sheerka):
self.sheerka = sheerka
self.formatter = Formatter()
self.custom_concepts_printers = None
self.reset()
self.custom_concepts_printers = {}
# self.reset()
def reset(self):
self.custom_concepts_printers = {
+1 -1
View File
@@ -27,7 +27,7 @@ class BaseTest:
where="isinstance(a, int) and isinstance(b, int)\n",
pre="isinstance(a, int) and isinstance(b, int)\n",
post="isinstance(res, int)\n",
body="def func(x,y):\n return x+y\nfunc(a,b)\n",
body="def func(x,y):\n return x+y\nfunc(a,b)",
desc="specific description")
concept.def_var("a", "value1")
concept.def_var("b", "value2")
+64 -16
View File
@@ -1,6 +1,6 @@
import pytest
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer
from core.tokenizer import Tokenizer, TokenKind
@pytest.mark.parametrize("text, start, end, expected", [
@@ -14,38 +14,86 @@ def test_i_can_use_parser_input(text, start, end, expected):
assert parser_input.as_text() == expected
def test_i_can_get_the_next_token():
parser_input = ParserInput("def concept a from bnf 'xyz' as 'xyz'").reset()
def test_i_can_get_the_next_token_when_yield_eof_is_activated():
parser_input = ParserInput("def concept a from bnf 'xyz' as 'xyz'", yield_oef=True).reset()
res = []
parser_input.next_token()
while True:
res.append(f"{parser_input.token.repr_value}")
if parser_input.token.type == TokenKind.EOF:
break
parser_input.next_token()
expected = ['def', 'concept', 'a', 'from', 'bnf', "'xyz'", 'as', "'xyz'", '<EOF>']
assert res == expected
def test_i_can_get_the_next_token_when_yield_eof_is_deactivated():
parser_input = ParserInput("def concept a from bnf 'xyz' as 'xyz'", yield_oef=False).reset()
res = []
while parser_input.next_token():
res.append(f"{parser_input.token.str_value}")
res.append(f"{parser_input.token.repr_value}")
assert res == ['def', 'concept', 'a', 'from', 'bnf', "'xyz'", 'as', "'xyz'", '']
expected = ['def', 'concept', 'a', 'from', 'bnf', "'xyz'", 'as', "'xyz'"]
assert res == expected
def test_i_can_get_the_next_token_when_start_and_end_are_provided():
parser_input = ParserInput("def concept a concept name from bnf 'xyz' as 'xyz'", start=4, end=9).reset()
res = []
while parser_input.next_token(skip_whitespace=False):
res.append(f"{parser_input.token.str_value}")
res.append(f"{parser_input.token.repr_value}")
assert res == ['a', ' ', 'concept', ' ', 'name', ' ']
assert res == ['a', '<ws>', 'concept', '<ws>', 'name', '<ws>']
def test_i_can_get_the_next_token_when_initialised_with_tokens():
tokens = list(Tokenizer(" def concept a as 'xyz' "))
parser_input = ParserInput(" def concept a as 'xyz' ", tokens).reset()
def test_i_can_get_next_token_when_yield_eof_is_false():
parser_input = ParserInput("def concept a from bnf 'xyz' as 'xyz'", yield_oef=False).reset()
res = []
while parser_input.next_token():
res.append(f"{parser_input.token.str_value}")
res.append(f"{parser_input.token.repr_value}")
assert res == ['def', 'concept', 'a', 'as', "'xyz'", '']
assert res == ['def', 'concept', 'a', 'from', 'bnf', "'xyz'", 'as', "'xyz'"]
tokens = list(Tokenizer(" def concept a as 'xyz' ", yield_eof=False))
parser_input = ParserInput(" def concept a as 'xyz' ", tokens).reset()
def test_i_can_override_yield_oef_within_reset():
parser_input = ParserInput("def concept a from bnf 'xyz' as 'xyz'", yield_oef=False).reset(yield_oef=True)
res = []
parser_input.next_token()
while True:
res.append(f"{parser_input.token.repr_value}")
if parser_input.token.type == TokenKind.EOF:
break
parser_input.next_token()
assert res == ['def', 'concept', 'a', 'from', 'bnf', "'xyz'", 'as', "'xyz'", "<EOF>"]
assert not parser_input.yield_oef
@pytest.mark.parametrize("list_has_eof, parser_has_eof, reset_has_eof", [
(True, True, True),
(True, False, True),
(False, True, True),
(False, False, True),
(True, True, False),
(True, False, False),
(False, True, False),
(False, False, False),
])
def test_i_can_get_the_next_token_when_initialised_with_tokens(list_has_eof, parser_has_eof, reset_has_eof):
tokens = list(Tokenizer(" def concept a as 'xyz' ", yield_eof=list_has_eof))
parser_input = ParserInput(" def concept a as 'xyz' ", tokens, yield_oef=parser_has_eof).reset()
parser_input.reset(reset_has_eof)
res = []
while parser_input.next_token():
res.append(f"{parser_input.token.str_value}")
res.append(f"{parser_input.token.repr_value}")
assert res == ['def', 'concept', 'a', 'as', "'xyz'"]
expected = ['def', 'concept', 'a', 'as', "'xyz'"]
if reset_has_eof:
expected.append("<EOF>")
assert res == expected
def test_i_can_parse_twice():
+1 -1
View File
@@ -28,7 +28,7 @@ class TestSheerkaUsingMemoryBasedSheerka(TestUsingMemoryBasedSheerka):
sheerka = self.get_sheerka()
# test existence of some parser (not all)
assert "parsers.DefaultParser.DefaultParser" in sheerka.parsers
assert "parsers.DefConceptParser.DefConceptParser" in sheerka.parsers
assert "parsers.BnfNodeParser.BnfNodeParser" in sheerka.parsers
assert "parsers.SyaNodeParser.SyaNodeParser" in sheerka.parsers
assert "parsers.AtomNodeParser.AtomNodeParser" in sheerka.parsers
+5 -5
View File
@@ -55,15 +55,15 @@ def test_i_can_get_base_classes():
# example of classes that should be in the result
base_parser = core.utils.get_class("parsers.BaseParser.BaseParser")
default_parser = core.utils.get_class("parsers.DefaultParser.DefaultParser")
def_concept_parser = core.utils.get_class("parsers.DefConceptParser.DefConceptParser")
exact_concept_parser = core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser")
python_parser = core.utils.get_class("parsers.PythonParser.PythonParser")
node = core.utils.get_class("parsers.BaseParser.Node")
def_concept_node = core.utils.get_class("parsers.DefaultParser.DefConceptNode")
def_concept_node = core.utils.get_class("parsers.DefConceptParser.DefConceptNode")
python_node = core.utils.get_class("parsers.PythonParser.PythonNode")
assert base_parser in classes
assert default_parser in classes
assert def_concept_parser in classes
assert exact_concept_parser in classes
assert python_parser in classes
assert node in classes
@@ -76,13 +76,13 @@ def test_i_can_get_sub_classes():
# example of classes that should be (or not) in the result
base_parser = core.utils.get_class("parsers.BaseParser.BaseParser")
default_parser = core.utils.get_class("parsers.DefaultParser.DefaultParser")
def_concept_parser = core.utils.get_class("parsers.DefConceptParser.DefConceptParser")
exact_concept_parser = core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser")
python_parser = core.utils.get_class("parsers.PythonParser.PythonParser")
bnf_node_parser = core.utils.get_class("parsers.BnfNodeParser.BnfNodeParser")
assert base_parser not in sub_classes
assert default_parser in sub_classes
assert def_concept_parser in sub_classes
assert exact_concept_parser in sub_classes
assert python_parser in sub_classes
assert bnf_node_parser in sub_classes
+1 -1
View File
@@ -8,7 +8,7 @@ from evaluators.AddConceptEvaluator import AddConceptEvaluator
from parsers.BaseParser import BaseParser
from parsers.BnfNodeParser import Sequence, StrMatch, ZeroOrMore, ConceptExpression
from parsers.BnfParser import BnfParser
from parsers.DefaultParser import DefConceptNode, NameNode
from parsers.DefConceptParser import DefConceptNode, NameNode
from parsers.PythonParser import PythonNode, PythonParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -3,7 +3,7 @@ from core.builtin_concepts import ReturnValueConcept, ParserResultConcept, Built
from core.concept import Concept
from core.tokenizer import Tokenizer
from evaluators.AddConceptInSetEvaluator import AddConceptInSetEvaluator
from parsers.DefaultParser import IsaConceptNode, NameNode
from parsers.DefConceptParser import IsaConceptNode, NameNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -0,0 +1,227 @@
import pytest
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Keywords, Tokenizer, TokenKind
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode, KeywordNotFound
from parsers.BaseParser import UnexpectedEofNode, UnexpectedTokenErrorNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
class TestBaseCustomGrammarParser(TestUsingMemoryBasedSheerka):
@staticmethod
def compare_results(actual, expected, compare_str=False):
resolved_expected = {}
for k, v in expected.items():
tokens = list(Tokenizer(v, yield_eof=False))
resolved_expected[k] = [tokens[0]] + tokens[2:]
def get_better_representation(value):
better_repr = {}
for k, tokens in value.items():
value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]])
better_repr[k] = [tokens[0].repr_value, value]
return better_repr
actual_to_compare = get_better_representation(actual)
expected_to_compare = get_better_representation(resolved_expected)
assert actual_to_compare == expected_to_compare
def init_parser(self, text):
sheerka, context = self.init_concepts()
parser = BaseCustomGrammarParser("TestBaseCustomLanguageParser", 0)
parser.reset_parser(context, ParserInput(text))
parser.parser_input.next_token(False) # do not skip starting whitespaces
return sheerka, context, parser
@pytest.mark.parametrize("text, expected", [
("when xxx yyy", {Keywords.WHEN: "when xxx yyy"}),
("when uuu vvv print xxx yyy", {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}),
("print xxx yyy when uuu vvv", {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}),
(" when xxx", {Keywords.WHEN: "when xxx"}),
])
def test_i_can_get_parts(self, text, expected):
sheerka, context, parser = self.init_parser(text)
res = parser.get_parts(["when", "print"])
self.compare_results(res, expected)
def test_i_can_get_parts_when_multilines(self):
text = """when
def func(x):
return x+1
func(a)
"""
expected = {Keywords.WHEN: "when def func(x):\n\treturn x+1\nfunc(a)\n"}
sheerka, context, parser = self.init_parser(text)
res = parser.get_parts(["when"])
self.compare_results(res, expected)
@pytest.mark.parametrize("text", [
"",
"no keyword",
"anything before when xxx print yyy",
])
def test_i_cannot_get_parts_when_no_keyword_found(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["when", "print"]) is None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], KeywordNotFound)
assert parser.error_sink[0].keywords == ['when', 'print']
def test_i_cannot_get_part_when_the_first_expected_token_is_incorrect(self):
sheerka, context, parser = self.init_parser("when xxx print yyy")
assert parser.get_parts(["when", "print"], Keywords.PRINT) is None
assert parser.error_sink == [UnexpectedTokenErrorNode(f"'print' keyword not found.",
"when",
[Keywords.PRINT])]
def test_i_can_detect_when_a_keyword_appears_several_times(self):
sheerka, context, parser = self.init_parser("print hello when True print True")
parser.get_parts(["print"])
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], SyntaxErrorNode)
assert parser.error_sink[0].message == "Too many 'print' declarations."
@pytest.mark.parametrize("text", [
"print",
"print ",
"when xxx print",
"when xxx print ",
])
def test_i_can_detect_incorrect_end_of_file_after_keyword(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["print", "when"]) is not None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], UnexpectedEofNode)
assert parser.error_sink[0].message == "While parsing keyword 'print'."
def test_i_can_double_quoted_strings_are_expanded(self):
"""
When inside a double quote, the double quote is removed and its content it used as is.
It allows usage of keywords withing parts
:return:
"""
sheerka, context, parser = self.init_parser('print "when can be used" when True')
expected = {Keywords.PRINT: "print when can be used", Keywords.WHEN: "when True"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected)
def test_single_quoted_strings_are_not_expanded(self):
sheerka, context, parser = self.init_parser("print 'when can be used' when True")
expected = {Keywords.PRINT: "print 'when can be used' ", Keywords.WHEN: "when True"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected)
def test_i_can_manage_colon(self):
text = """when:
xxx
when
print
print:
xxx:
when
print
yyy
"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy", Keywords.WHEN: "when xxx\nwhen\nprint"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
def test_indentation_is_normalized_when_using_colon(self):
text = """print:
xxx:
when
print
yyy
"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx:\n when\n print\nyyy"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
def test_i_can_mix_parts_with_colon_and_parts_without_colon(self):
text = """when:
xxx
when
print
print xxx"""
sheerka, context, parser = self.init_parser(text)
expected = {Keywords.PRINT: "print xxx", Keywords.WHEN: "when xxx\nwhen\nprint"}
res = parser.get_parts(["print", "when"])
self.compare_results(res, expected, compare_str=True)
@pytest.mark.parametrize("text", [
"when:\nx x",
"when: \nx x",
])
def test_i_cannot_manage_colon_when_tab_is_missing(self, text):
sheerka, context, parser = self.init_parser(text)
assert parser.get_parts(["when"])
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])]
@pytest.mark.parametrize("text", [
"",
"\n",
" \n",
"x", # less than two characters
"\n\t"
])
def test_i_cannot_get_body_when_body_is_too_short(self, text):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [SyntaxErrorNode(None, "Body is empty or too short.")]
def test_a_new_line_is_expected_when_get_body(self):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer("not a newline", yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("New line not found.", "not", [TokenKind.NEWLINE])]
@pytest.mark.parametrize("text", [
"\nx x",
" \nx x",
])
def test_tab_is_mandatory_after_new_line_when_get_body(self, text):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "x", [TokenKind.WHITESPACE])]
def test_i_can_detect_missing_tab_when_get_body(self):
text = "\n\txxx\n\tyyy\nzzz"
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer(text, yield_eof=False))) is None
assert parser.error_sink == [UnexpectedTokenErrorNode("Indentation not found.", "zzz", [TokenKind.WHITESPACE])]
def test_i_can_detect_invalid_indentation_when_get_body(self):
sheerka, context, parser = self.init_parser("")
assert parser.get_body(list(Tokenizer("\n\t\txxx\n\tyyy", yield_eof=False))) is None
assert parser.error_sink == [SyntaxErrorNode(None, "Invalid indentation.")]
def test_i_can_get_body(self):
sheerka, context, parser = self.init_parser("")
res = parser.get_body(list(Tokenizer("\n\txxx\n\tyyyy", yield_eof=False)))
expected = list(Tokenizer("xxx\n yyyy", yield_eof=False))
expected[2].value = ""
assert [t.repr_value for t in res] == [t.repr_value for t in expected]
assert parser.error_sink == []
+1 -1
View File
@@ -33,7 +33,7 @@ def update_concepts_ids(sheerka, parsing_expression):
update_concepts_ids(sheerka, pe)
eof_token = Token(TokenKind.EOF, "", 0, 0, 0)
eof_token = "<EOF>"
class TestBnfParser(TestUsingMemoryBasedSheerka):
@@ -7,10 +7,11 @@ from core.concept import DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF, Concept, CV
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Keywords, Tokenizer, LexerError
from parsers.BaseNodeParser import SCWC
from parsers.BaseParser import NotInitializedNode, UnexpectedEofNode
from parsers.BnfNodeParser import OrderedChoice, ConceptExpression, StrMatch, Sequence
from parsers.BnfParser import BnfParser
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode
from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode
from parsers.DefConceptParser import DefConceptParser, NameNode, SyntaxErrorNode
from parsers.DefConceptParser import UnexpectedTokenErrorNode, DefConceptNode
from parsers.FunctionParser import FunctionParser
from parsers.PythonParser import PythonParser, PythonNode
@@ -48,7 +49,7 @@ def get_concept_part(part):
if isinstance(part, str):
node = PythonNode(part.strip(), ast.parse(part.strip(), mode="eval"))
return ReturnValueConcept(
who="parsers.Default",
who="parsers.DefConcept",
status=True,
value=ParserResultConcept(
source=part,
@@ -59,7 +60,7 @@ def get_concept_part(part):
# node = PythonNode(part.strip(), ast.parse(part.strip(), mode="eval"))
nodes = compute_expected_array({}, part.source, [SCWC(part.first, part.last, *part.content)])
return ReturnValueConcept(
who="parsers.Default",
who="parsers.DefConcept",
status=True,
value=ParserResultConcept(
source=part.source,
@@ -70,7 +71,7 @@ def get_concept_part(part):
if isinstance(part, PN):
node = PythonNode(part.source.strip(), ast.parse(part.source.strip(), mode=part.mode))
return ReturnValueConcept(
who="parsers.Default",
who="parsers.DefConcept",
status=True,
value=ParserResultConcept(
source=part.source,
@@ -79,7 +80,7 @@ def get_concept_part(part):
if isinstance(part, PythonNode):
return ReturnValueConcept(
who="parsers.Default",
who="parsers.DefConcept",
status=True,
value=ParserResultConcept(
source=part.source,
@@ -110,13 +111,26 @@ class FN:
content: list
class TestDefaultParser(TestUsingMemoryBasedSheerka):
class TestDefConceptParser(TestUsingMemoryBasedSheerka):
def init_parser(self, *concepts):
sheerka, context, *updated = self.init_concepts(*concepts, singleton=True)
parser = DefaultParser()
parser = DefConceptParser()
return sheerka, context, parser, *updated
@pytest.mark.parametrize("text, error", [
("concept", UnexpectedTokenErrorNode("'def' keyword not found.", "concept", [Keywords.DEF])),
("hello word", UnexpectedTokenErrorNode("'def' keyword not found.", "hello", [Keywords.DEF])),
("def hello", UnexpectedTokenErrorNode("'concept' keyword not found.", "hello", [Keywords.CONCEPT])),
])
def test_i_can_detect_not_for_me(self, text, error):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME)
assert res.value.reason == [error]
@pytest.mark.parametrize("text, expected", [
("def concept hello", get_def_concept(name="hello")),
("def concept hello ", get_def_concept(name="hello")),
@@ -124,13 +138,11 @@ class TestDefaultParser(TestUsingMemoryBasedSheerka):
("def concept a+b", get_def_concept(name="a + b")),
("def concept 'a+b'+c", get_def_concept(name="'a+b' + c")),
("def concept 'as if'", get_def_concept(name="'as if'")),
("def concept 'as' if", get_def_concept(name="'as if'")),
("def concept hello as 'hello'", get_def_concept(name="hello", body="'hello'")),
("def concept hello as 1", get_def_concept(name="hello", body="1")),
("def concept hello as 1 + 1", get_def_concept(name="hello", body="1 + 1")),
("def concept 'as' if", get_def_concept(name="'as' if")),
('def concept "as if"', get_def_concept(name="as if")),
])
def test_i_can_parse_def_concept(self, text, expected):
sheerka, context, parser = self.init_parser()
def test_i_can_parse_def_concept_name(self, text, expected):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
node = res.value.value
@@ -140,6 +152,113 @@ class TestDefaultParser(TestUsingMemoryBasedSheerka):
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_name_is_mandatory(self):
text = "def concept as 'hello'"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.body[0], SyntaxErrorNode)
assert return_value.body[0].message == "Name is mandatory"
@pytest.mark.parametrize("text", [
"def concept hello\nmy friend",
"def concept hello \nmy friend",
"def concept hello\n my friend",
"def concept hello \n my friend",
"def concept hello from hello\nmy friend",
"def concept hello from def hello\nmy friend",
"def concept hello from bnf hello\nmy friend",
"def concept hello from:\n\thello\nmy friend",
"def concept hello from def:\n\thello\nmy friend",
"def concept hello from bnf:\n\thello\nmy friend",
])
def test_new_line_is_not_allowed_in_the_name(self, text):
text = "def concept hello \n my friend as 'hello'"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert return_value.body == [SyntaxErrorNode(None, "Newline are not allowed in name.")]
def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self):
text = "def hello as a where b pre c post d"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(return_value.reason[0], UnexpectedTokenErrorNode)
assert return_value.reason[0].message == "'concept' keyword not found."
assert return_value.reason[0].expected_tokens == [Keywords.CONCEPT]
assert return_value.reason[0].token.value == "hello"
def test_i_can_detect_empty_declaration(self):
sheerka, context, parser, *concepts = self.init_parser()
text = "def concept foo as where True"
res = parser.parse(context, ParserInput(text))
error = res.body.body[0]
assert not res.status
assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR)
assert isinstance(error, SyntaxErrorNode)
assert error.message == "Empty 'as' declaration."
def test_empty_parts_are_not_initialized(self):
sheerka, context, parser, *concepts = self.init_parser()
text = "def concept foo"
res = parser.parse(context, ParserInput(text))
parser_result = res.body
node = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert isinstance(node, DefConceptNode)
assert node.body == NotInitializedNode()
assert node.where == NotInitializedNode()
assert node.pre == NotInitializedNode()
assert node.post == NotInitializedNode()
assert node.ret == NotInitializedNode()
@pytest.mark.parametrize("part", [
"as",
"pre",
"post",
"ret",
"where"
])
def test_i_can_parse_def_concept_parts(self, part):
sheerka, context, parser, *concepts = self.init_parser()
text = "def concept foo " + part + " True"
res = parser.parse(context, ParserInput(text))
node = res.value.value
assert res.status
assert res.who == parser.name
assert res.value.source == text
assert isinstance(res.value, ParserResultConcept)
part_mapping = "body" if part == "as" else part
args = {part_mapping: get_concept_part("True")}
expected = get_def_concept("foo", **args)
assert node == expected
def test_i_can_detect_error_in_declaration(self):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput("def concept hello where 1+"))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.TOO_MANY_ERRORS)
def test_i_can_parse_complex_def_concept_statement(self):
text = """def concept a mult b
where a,b
@@ -148,7 +267,7 @@ post isinstance(res, a)
as res = a * b
ret a if isinstance(a, Concept) else self
"""
sheerka, context, parser = self.init_parser()
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
expected_concept = get_def_concept(
@@ -177,7 +296,7 @@ func(a)
body=PN("def func(x):\n return x+1\nfunc(a)\n", "exec")
)
sheerka, context, parser = self.init_parser()
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
@@ -199,7 +318,7 @@ def concept add one to a as:
ast.parse("def func(x):\n return x+1\nfunc(a)", mode="exec"))
)
sheerka, context, parser = self.init_parser()
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
@@ -208,156 +327,17 @@ def concept add one to a as:
assert return_value.value == expected_concept
@pytest.mark.parametrize("text", [
"def concept foo as:\npass",
"def concept foo where:\npass",
"def concept foo pre:\npass",
"def concept foo post:\npass",
"def concept foo from:\nanother definition",
"def concept foo from def:\nanother definition",
"def concept foo from bnf:\n'another' 'definition'",
"def concept name from bnf",
"def concept name from bnf ",
"def concept name from bnf as True",
])
def test_indentation_is_mandatory_after_a_colon(self, text):
sheerka, context, parser = self.init_parser()
def test_i_cannot_parse_empty_bnf_definition(self, text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
error = res.body
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.body[0], SyntaxErrorNode)
assert return_value.body[0].message == "Indentation not found."
@pytest.mark.parametrize("text", [
"def concept plus from:\n\ta plus b",
"def concept plus from def:\n\ta plus b",
# space before the colon
"def concept plus from :\n\ta plus b",
"def concept plus from def :\n\ta plus b",
# space after the colon
"def concept plus from: \n\ta plus b",
"def concept plus from def: \n\ta plus b",
])
def test_i_can_use_colon_and_definition_together(self, text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("a plus b", yield_eof=False)]
@pytest.mark.parametrize("text", [
"def concept plus from bnf:\n\t'a' 'plus' 'b'",
"def concept plus from bnf :\n\t'a' 'plus' 'b'",
"def concept plus from bnf: \n\t'a' 'plus' 'b'",
])
def test_i_can_use_colon_and_bnf_definition_together(self, text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
assert res.status
assert defined_concept.definition.status
assert defined_concept.definition.body.body == Sequence(StrMatch("a"), StrMatch("plus"), StrMatch("b"))
def test_i_can_use_colon_to_protect_keyword(self):
text = """
def concept today as:
from datetime import date
today = date.today()
from:
give me the date !
"""
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
assert defined_concept.body.status
def test_i_can_use_colon_to_protect_keyword_2(self):
text = """
def concept today as:
from datetime import date
today = date.today()
from give me the date !
"""
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
assert defined_concept.body.status
def test_name_is_mandatory(self):
text = "def concept as 'hello'"
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.body[0], SyntaxErrorNode)
assert return_value.body[0].message == "Name is mandatory"
def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self):
text = "def hello as a where b pre c post d"
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.body[0], UnexpectedTokenErrorNode)
assert return_value.body[0].message == "Syntax error."
assert return_value.body[0].expected_tokens == [Keywords.CONCEPT]
@pytest.mark.parametrize("text", [
"def concept hello where 1+",
"def concept hello pre 1+",
"def concept hello post 1+",
"def concept hello as 1+"
])
def test_i_can_detect_error_in_declaration(self, text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.TOO_MANY_ERRORS)
@pytest.mark.parametrize("text", [
"def concept hello\nmy friend",
"def concept hello \nmy friend",
"def concept hello\n my friend",
"def concept hello \n my friend",
"def concept hello from hello\nmy friend",
"def concept hello from def hello\nmy friend",
"def concept hello from bnf hello\nmy friend",
"def concept hello from:\n\thello\nmy friend",
"def concept hello from def:\n\thello\nmy friend",
"def concept hello from bnf:\n\thello\nmy friend",
])
def test_new_line_is_not_allowed_in_the_name(self, text):
text = "def concept hello \n my friend as 'hello'"
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert return_value.body == [SyntaxErrorNode([], "Newline are not allowed in name.")]
assert sheerka.isinstance(error, BuiltinConcepts.ERROR)
assert error.body == [SyntaxErrorNode([], "Empty 'bnf' declaration")]
def test_i_can_parse_def_concept_from_bnf(self):
text = "def concept name from bnf a_concept | 'a_string' as __definition[0]"
@@ -383,35 +363,40 @@ from give me the date !
assert not parser.has_error
@pytest.mark.parametrize("text", [
'def concept "def concept x"',
'def concept "def concept x" as x',
"def concept plus from bnf:\n\t'a' 'plus' 'b'",
"def concept plus from bnf :\n\t'a' 'plus' 'b'",
"def concept plus from bnf: \n\t'a' 'plus' 'b'",
])
def test_i_can_use_double_quotes_to_protect_keywords(self, text):
sheerka, context, parser = self.init_parser()
def test_i_can_use_colon_and_bnf_definition_together(self, text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
concept_defined = res.value.value
defined_concept = res.body.body
assert res.status
assert concept_defined.name.tokens == list(Tokenizer("def concept x", yield_eof=False))
assert defined_concept.definition.status
assert defined_concept.definition.body.body == Sequence(StrMatch("a"), StrMatch("plus"), StrMatch("b"))
@pytest.mark.parametrize("text", [
"def concept name from bnf as here is my body",
"def concept name from def as here is my body",
"def concept name from as here is my body"
@pytest.mark.parametrize("text, error", [
("def concept name from def as True", SyntaxErrorNode([], "Empty 'from' declaration.")),
("def concept name from def", SyntaxErrorNode([], "Empty 'from' declaration.")),
("def concept name from def ", SyntaxErrorNode([], "Empty 'from' declaration.")),
("def concept name from as True", SyntaxErrorNode([], "Empty 'from' declaration.")),
("def concept name from", UnexpectedEofNode("While parsing keyword 'from'.")),
("def concept name from ", UnexpectedEofNode("While parsing keyword 'from'.")),
])
def test_i_can_detect_empty_bnf_declaration(self, text):
sheerka, context, parser = self.init_parser()
def test_i_can_detect_empty_def_declaration(self, text, error):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR)
assert res.value.body[0] == SyntaxErrorNode([], "Empty declaration")
assert res.value.body[0] == error
@pytest.mark.parametrize("text", [
"def concept addition from a plus b as a + b",
"def concept addition from def a plus b as a + b"])
def test_i_can_def_concept_from_definition(self, text):
sheerka, context, parser = self.init_parser()
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
expected = get_def_concept("addition", definition="a plus b", body="a + b")
node = res.value.value
@@ -422,6 +407,114 @@ from give me the date !
assert isinstance(res.value, ParserResultConcept)
assert node == expected
@pytest.mark.parametrize("text", [
"def concept plus from:\n\ta plus b",
"def concept plus from def:\n\ta plus b",
# space before the colon
"def concept plus from :\n\ta plus b",
"def concept plus from def :\n\ta plus b",
# space after the colon
"def concept plus from: \n\ta plus b",
"def concept plus from def: \n\ta plus b",
])
def test_i_can_use_colon_and_definition_together(self, text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("a plus b", yield_eof=False)]
def test_i_can_use_colon_to_protect_keyword(self):
text = """
def concept today as:
from datetime import date
today = date.today()
from:
give me the date !
"""
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
assert defined_concept.body.status
def test_i_can_use_colon_to_protect_keyword_2(self):
text = """
def concept today as:
from datetime import date
today = date.today()
from give me the date !
"""
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
defined_concept = res.body.body
defined_concept_tokens = [t.repr_value for t in defined_concept.definition.tokens]
assert res.status
assert defined_concept.definition_type == DEFINITION_TYPE_DEF
assert defined_concept_tokens == [t.repr_value for t in Tokenizer("give me the date !", yield_eof=False)]
assert defined_concept.body.status
@pytest.mark.parametrize("text", [
"def",
"def concept_name"
])
def test_i_cannot_parse_invalid_entries(self, text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode)
@pytest.mark.parametrize("text, error_msg, error_text", [
("'name", "Missing Trailing quote", "'name"),
("foo isa 'name", "Missing Trailing quote", "'name"),
("def concept 'name", "Missing Trailing quote", "'name"),
("def concept name as 'body", "Missing Trailing quote", "'body"),
("def concept name from bnf 'expression", "Missing Trailing quote", "'expression"),
("def concept c::", "Concept identifiers not found", ""),
])
def test_i_cannot_parse_when_tokenizer_fails(self, text, error_msg, error_text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], LexerError)
assert res.body.body[0].message == error_msg
assert res.body.body[0].text == error_text
def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self):
text = "def concept name from bnf unknown"
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ("key", "unknown")
@pytest.mark.parametrize("text", [
'def concept "def concept x"',
'def concept "def concept x" as x',
])
def test_i_can_use_double_quotes_to_protect_keywords(self, text):
sheerka, context, parser, *concepts = self.init_parser()
res = parser.parse(context, ParserInput(text))
concept_defined = res.value.value
assert res.status
assert concept_defined.name.tokens == list(Tokenizer("def concept x", yield_eof=False))
def test_i_can_parse_when_ambiguity_in_where_pre_clause(self):
sheerka, context, parser, *concepts = self.init_parser(
Concept("x is a y", pre="in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED)"),
@@ -430,7 +523,7 @@ from give me the date !
text = "def concept foo x y where x is a y"
res = parser.parse(context, ParserInput(text))
expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.Default",
expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.DefConcept",
parser="parsers.ExactConcept")
expected = get_def_concept("foo x y", where=expected_body)
node = res.value.value
@@ -443,7 +536,7 @@ from give me the date !
text = "def concept foo x y pre x is a y"
res = parser.parse(context, ParserInput(text))
expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.Default",
expected_body = self.pretval(CV(concepts[0], pre=True), source="x is a y", who="parsers.DefConcept",
parser="parsers.ExactConcept")
expected = get_def_concept("foo x y", pre=expected_body)
node = res.value.value
@@ -454,63 +547,5 @@ from give me the date !
assert isinstance(res.value, ParserResultConcept)
assert node == expected
def test_i_can_detect_not_for_me(self):
text = "hello world"
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(res.value.body[0], CannotHandleErrorNode)
@pytest.mark.parametrize("text", [
"def",
"def concept_name"
])
def test_i_cannot_parse_invalid_entries(self, text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
@pytest.mark.parametrize("text", [
"concept",
"isa number",
"name isa",
])
def test_i_cannot_parse_not_for_me_entries(self, text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(res.body.body[0], CannotHandleErrorNode)
@pytest.mark.parametrize("text, error_msg, error_text", [
("'name", "Missing Trailing quote", "'name"),
("foo isa 'name", "Missing Trailing quote", "'name"),
("def concept 'name", "Missing Trailing quote", "'name"),
("def concept name as 'body", "Missing Trailing quote", "'body"),
("def concept name from bnf 'expression", "Missing Trailing quote", "'expression"),
("def concept c::", "Concept identifiers not found", ""),
])
def test_i_cannot_parse_when_tokenizer_fails(self, text, error_msg, error_text):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], LexerError)
assert res.body.body[0].message == error_msg
assert res.body.body[0].text == error_text
def test_i_cannot_parse_bnf_definition_referencing_unknown_concept(self):
text = "def concept name from bnf unknown"
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
assert not res.status
assert context.sheerka.isinstance(res.value, BuiltinConcepts.UNKNOWN_CONCEPT)
assert res.value.body == ("key", "unknown")
+21 -21
View File
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer, TokenKind
from parsers.BaseParser import UnexpectedEof, UnexpectedTokenErrorNode
from parsers.BaseParser import UnexpectedEofNode, UnexpectedTokenErrorNode
from parsers.ExpressionParser import PropertyEqualsNode, PropertyEqualsSequenceNode, PropertyContainsNode, AndNode, \
OrNode, NotNode, LambdaNode, IsaNode, NameExprNode, ExpressionParser, LeftPartNotFoundError, TrueifyVisitor
@@ -33,14 +33,14 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("expression, expected", [
("one complicated expression", n("one complicated expression")),
("function_call(a,b,c)", n("function_call(a,b,c)")),
("one expression or another expression", OrNode(n("one expression"), n("another expression"))),
("one expression and another expression", AndNode(n("one expression"), n("another expression"))),
("one or two or three", OrNode(n("one"), n("two"), n("three"))),
("one and two and three", AndNode(n("one"), n("two"), n("three"))),
("one or two and three", OrNode(n("one"), AndNode(n("two"), n("three")))),
("one and two or three", OrNode(AndNode(n("one"), n("two")), n("three"))),
("one and (two or three)", AndNode(n("one"), OrNode(n("two"), n("three")))),
# ("function_call(a,b,c)", n("function_call(a,b,c)")),
# ("one expression or another expression", OrNode(n("one expression"), n("another expression"))),
# ("one expression and another expression", AndNode(n("one expression"), n("another expression"))),
# ("one or two or three", OrNode(n("one"), n("two"), n("three"))),
# ("one and two and three", AndNode(n("one"), n("two"), n("three"))),
# ("one or two and three", OrNode(n("one"), AndNode(n("two"), n("three")))),
# ("one and two or three", OrNode(AndNode(n("one"), n("two")), n("three"))),
# ("one and (two or three)", AndNode(n("one"), OrNode(n("two"), n("three")))),
])
def test_i_can_parse_expression(self, expression, expected):
sheerka, context, parser = self.init_parser()
@@ -54,12 +54,12 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
assert expressions == expected
@pytest.mark.parametrize("expression, expected_errors", [
("one or", [UnexpectedEof("When parsing 'or'")]),
("one and", [UnexpectedEof("When parsing 'and'")]),
("one or", [UnexpectedEofNode("When parsing 'or'")]),
("one and", [UnexpectedEofNode("When parsing 'and'")]),
("and one", [LeftPartNotFoundError()]),
("or one", [LeftPartNotFoundError()]),
("or", [LeftPartNotFoundError(), UnexpectedEof("When parsing 'or'")]),
("and", [LeftPartNotFoundError(), UnexpectedEof("When parsing 'and'")]),
("or", [LeftPartNotFoundError(), UnexpectedEofNode("When parsing 'or'")]),
("and", [LeftPartNotFoundError(), UnexpectedEofNode("When parsing 'and'")]),
])
def test_i_can_detect_error(self, expression, expected_errors):
sheerka, context, parser = self.init_parser()
@@ -74,17 +74,17 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
res = parser.parse(context, ParserInput("("))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
assert res.body.body[0].token.type == TokenKind.EOF
assert res.body.body[0].expected_tokens == [TokenKind.RPAR]
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode)
assert res.body.reason[0].token.type == TokenKind.EOF
assert res.body.reason[0].expected_tokens == [TokenKind.RPAR]
res = parser.parse(context, ParserInput(")"))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
assert res.body.body[0].token.type == TokenKind.RPAR
assert res.body.body[0].expected_tokens == []
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert isinstance(res.body.reason[0], UnexpectedTokenErrorNode)
assert res.body.reason[0].token.type == TokenKind.RPAR
assert res.body.reason[0].expected_tokens == []
res = parser.parse(context, ParserInput("one and two)"))
assert not res.status
+71
View File
@@ -0,0 +1,71 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseCustomGrammarParser import KeywordNotFound
from parsers.FormatRuleParser import FormatRuleParser, FormatAstRawText, FormatRuleNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
cmap = {}
class TestFormatRuleParser(TestUsingMemoryBasedSheerka):
sheerka = None
@classmethod
def setup_class(cls):
t = cls()
cls.sheerka, context, _ = t.init_parser(cmap)
def init_parser(self, concepts_map=None):
if concepts_map is not None:
sheerka, context, *concepts = self.init_concepts(*concepts_map.values(), create_new=True)
else:
sheerka = TestFormatRuleParser.sheerka
context = self.get_context(sheerka)
parser = FormatRuleParser()
return sheerka, context, parser
def test_i_can_detect_empty_expression(self):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(""))
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
def test_input_must_be_a_parser_input(self):
sheerka, context, parser = self.init_parser()
parser.parse(context, "not a parser input") is None
def test_i_can_parse_a_simple_rule(self):
sheerka, context, parser = self.init_parser()
text = "when isinstance(last_value(), Concept) print hello world!"
res = parser.parse(context, ParserInput(text))
parser_result = res.body
format_rule = res.body.body
rule = format_rule.rule
format_ast = format_rule.format_ast
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert isinstance(format_rule, FormatRuleNode)
assert sheerka.isinstance(rule, BuiltinConcepts.RETURN_VALUE)
assert format_ast == FormatAstRawText("hello world!")
@pytest.mark.parametrize("text, error", [
("hello world", [KeywordNotFound(None, keywords=['when', 'print'])]),
("when True", [KeywordNotFound([], keywords=['print'])]),
("print True", [KeywordNotFound([], keywords=['when'])]),
])
def test_cannot_parse_when_not_for_me(self, text, error):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
not_for_me = res.body
assert not res.status
assert sheerka.isinstance(not_for_me, BuiltinConcepts.NOT_FOR_ME)
assert not_for_me.reason == error
+2
View File
@@ -70,6 +70,8 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka):
sheerka, context, parser = self.init_parser()
parser.reset_parser(context, ParserInput(expression))
parser.parser_input.next_token()
res = parser.parse_function()
assert res == expected
+3 -3
View File
@@ -4,7 +4,7 @@ from core.concept import Concept, ConceptParts
from core.sheerka.ExecutionContext import ExecutionContext
from core.tokenizer import Tokenizer
from evaluators.ConceptEvaluator import ConceptEvaluator
from parsers.DefaultParser import DefaultParser
from parsers.DefConceptParser import DefConceptParser
from sdp.sheerkaDataProvider import Event
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -267,10 +267,10 @@ class TestSheerkaPickleHandler(TestUsingMemoryBasedSheerka):
decoded = sheerkapickle.decode(sheerka, to_string)
assert decoded == sheerka.ret("c:1001:", True, 10)
ret_val = sheerka.ret(DefaultParser(), True, 10)
ret_val = sheerka.ret(DefConceptParser(), True, 10)
to_string = sheerkapickle.encode(sheerka, ret_val)
decoded = sheerkapickle.decode(sheerka, to_string)
assert decoded == sheerka.ret("parsers.Default", True, 10)
assert decoded == sheerka.ret("parsers.DefConcept", True, 10)
ret_val = sheerka.ret(ConceptEvaluator(), True, 10)
to_string = sheerkapickle.encode(sheerka, ret_val)