Fixed #9 : I can parse 'def concept'

This commit is contained in:
2023-06-11 09:45:44 +02:00
parent 62391f786e
commit ba397b0b72
22 changed files with 3043 additions and 93 deletions
+1
View File
@@ -363,3 +363,4 @@ def dict_product(a, b):
res.append(items)
return res
-1
View File
@@ -80,7 +80,6 @@ class Concept:
def __init__(self, metadata: ConceptMetadata):
self._metadata: ConceptMetadata = metadata
self._bnf = None # compiled bnf expression
self._runtime_info = ConceptRuntimeInfo() # runtime settings for the concept
self._all_attrs = None
+149
View File
@@ -0,0 +1,149 @@
from common.global_symbols import NotInit
from core.ExecutionContext import ContextActions, ExecutionContext
from core.ReturnValue import ReturnValue
from core.concept import DefinitionType
from core.error import ErrorContext, SheerkaException
from evaluators.base_evaluator import EvaluatorEvalResult, EvaluatorMatchResult, OneReturnValueEvaluator
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.ConceptDefinitionParser import ConceptDefinition
from parsers.parser_utils import ParsingException
from parsers.tokenizer import TokenKind, Tokenizer
NAMES_TOKEN_TYPES = {TokenKind.IDENTIFIER,
TokenKind.STRING}
CONCEPT_PARTS_TO_USE = [
"body",
"where",
"pre",
"post",
"ret",
]
class DefConceptEvaluator(OneReturnValueEvaluator):
"""
This evaluator create the definition of a new concept
after it is recognized by the RecognizeDefConcept parser
"""
NAME = "DefConcept"
def __init__(self):
super().__init__(self.NAME, ContextActions.EVALUATION, 50)
def matches(self, context: ExecutionContext,
return_value: ReturnValue) -> EvaluatorMatchResult:
return EvaluatorMatchResult(return_value.status and isinstance(return_value.value, ConceptDefinition))
def eval(self, context: ExecutionContext,
evaluation_context: object,
return_value: ReturnValue) -> EvaluatorEvalResult:
try:
concept_def = return_value.value
variables = self._get_variables(context, concept_def)
parameters = None
if concept_def.definition_type == DefinitionType.BNF:
self._validate_bnf(context, concept_def)
ret_val = context.sheerka.define_new_concept(context,
concept_def.name,
False,
False,
concept_def.body,
concept_def.where,
concept_def.pre,
concept_def.post,
concept_def.ret,
concept_def.definition,
concept_def.definition_type,
concept_def.auto_eval,
variables=variables,
parameters=parameters)
if ret_val.status:
new = ReturnValue(self.NAME, True, ret_val.value, parents=[return_value])
return EvaluatorEvalResult([new], [return_value])
else:
error_ret_val = ReturnValue(self.NAME, False, ret_val.value, [return_value])
return EvaluatorEvalResult([error_ret_val], [])
except ParsingException as ex:
error_context = ErrorContext(self.NAME, context, ex.error)
error_ret_val = ReturnValue(self.NAME, False, error_context, [return_value])
return EvaluatorEvalResult([error_ret_val], [])
def _get_variables(self, context: ExecutionContext, concept_def: ConceptDefinition):
variables_found = set() # list of names, there is no tuple
definition = concept_def.definition or concept_def.name
possible_vars_from_name = self._get_possible_vars_from_def(context, definition)
possible_vars_from_name_as_set = set(possible_vars_from_name)
for part in CONCEPT_PARTS_TO_USE:
# if these possibles variables are referenced in other parts of the definition, they may be variables
part_value = getattr(concept_def, part)
if part_value == "":
continue
possible_vars_from_part = self._get_possible_vars_from_part(context, part_value)
variables_found.update(possible_vars_from_name_as_set & possible_vars_from_part)
# add variables from add_var
if concept_def.def_var:
variables_found.update(concept_def.def_var)
with_default_value = [v if isinstance(v, tuple) else (v, NotInit) for v in variables_found]
# variables are sorted
sorted_vars = []
for possible_var in possible_vars_from_name:
for found in with_default_value:
if possible_var == found[0]:
sorted_vars.append(found)
return sorted_vars
@staticmethod
def _get_possible_vars_from_def(context, definition):
"""
:param context:
:type context:
:param definition:
:type definition:
:return: list of names
:rtype:
"""
names = (str(t.value) for t in Tokenizer(definition) if t.type in NAMES_TOKEN_TYPES)
possible_vars = filter(lambda x: not context.sheerka.is_a_concept_name(x), names)
return list(possible_vars)
@staticmethod
def _get_possible_vars_from_part(context, part):
"""
:param context:
:type context:
:param part:
:type part:
:return:
:rtype:
"""
# not the final implementation
# In the final impl,
# we first need to check if the part is a concept call (rather than a concept name)
names = (str(t.value) for t in Tokenizer(part) if t.type in NAMES_TOKEN_TYPES)
possible_vars = filter(lambda x: not context.sheerka.is_a_concept_name(x), names)
return set(possible_vars)
@staticmethod
def _validate_bnf(context, definition):
parser = BnfDefinitionParser(context, definition.definition, definition.name)
parser.parse()
if parser.error_sink:
raise ParsingException(parser.error_sink[0])
return True
+40
View File
@@ -0,0 +1,40 @@
from core.BuiltinConcepts import BuiltinConcepts
from core.ExecutionContext import ContextActions, ExecutionContext
from core.ReturnValue import ReturnValue
from core.error import ErrorContext
from evaluators.base_evaluator import EvaluatorEvalResult, EvaluatorMatchResult, OneReturnValueEvaluator
from parsers.ConceptDefinitionParser import ConceptDefinitionParser
class RecognizeDefConcept(OneReturnValueEvaluator):
"""
class the recognize input 'def concept <name> [as <body>] [where <where>] [pre <pre>] [ret <ret>]'
"""
NAME = "RecognizeDefConcept"
def __init__(self):
super().__init__(self.NAME, ContextActions.PARSING, 90)
def matches(self, context: ExecutionContext, return_value: ReturnValue) -> EvaluatorMatchResult:
return EvaluatorMatchResult(return_value.status and
context.sheerka.isinstance(return_value.value, BuiltinConcepts.PARSER_INPUT) and
return_value.value.body.original_text.startswith("def concept"))
def eval(self, context: ExecutionContext,
evaluation_context: object,
return_value: ReturnValue) -> EvaluatorEvalResult:
parser_input = return_value.value.body
parser_input.reset()
parser_input.seek(2) # eat 'def '
parser = ConceptDefinitionParser(True)
node = parser.parse(parser_input)
if parser.error_sink:
error_context = ErrorContext(self.NAME, context, parser.error_sink)
error_ret_val = ReturnValue(self.NAME, False, error_context, [return_value])
return EvaluatorEvalResult([error_ret_val], [])
else:
new = ReturnValue(self.NAME, True, node, parents=[return_value])
return EvaluatorEvalResult([new], [return_value])
+212
View File
@@ -0,0 +1,212 @@
from dataclasses import dataclass
from common.global_symbols import NotFound
from core.error import ErrorObj
from parsers.parser_utils import SimpleParser, UnexpectedEof, UnexpectedToken
from parsers.peg_parser import ConceptExpression, OneOrMore, Optional, OrderedChoice, RegExMatch, Sequence, StrMatch, \
VariableExpression, ZeroOrMore
from parsers.tokenizer import TokenKind, Tokenizer
@dataclass
class UnknownConcept(ErrorObj):
concept_id: str
concept_name: str
def get_error_msg(self) -> str:
return f"Cannot find concept defined by id='{self.concept_id}' and/or name '{self.concept_name}'"
class BnfDefinitionParser(SimpleParser):
"""
Parser used to transform literal into ParsingExpression
example :
a | b c -> Sequence(OrderedChoice(a, b), c)
'|' (pipe) is used for OrderedChoice
' ' space is used for Sequence
'?' (question mark) is used for Optional
'*' (star) is used for ZeroOrMore
'+' (plus) is used for OneOrMore
"""
def __init__(self, context, text, concept_name=None):
super().__init__(text, skip_whitespace_default_behaviour=False)
self.context = context
self.concept_name = concept_name # name of the concept currently being constructed
self.nb_open_par = 0
self.next_token(skip_whitespace=True)
def maybe_sequence(self, first, second):
return self.token.type == second or \
self.token.type == first and self.check_next_token().type == second
def parse(self):
tree = self._parse_choice()
if self.token.type != TokenKind.EOF:
self.add_error(UnexpectedToken(self.token, TokenKind.EOF))
return None if self.error_sink else tree
def _parse_choice(self):
"""
a | b | c
<choice> := <sequence> ( '|' <sequence> )*
:return:
:rtype:
"""
sequence = self._parse_sequence()
self.eat_whitespace()
if self.token.type != TokenKind.VBAR:
return sequence
elements = [sequence]
while True:
# maybe eat the vertical bar
self.eat_whitespace()
if self.token is None or self.token.type != TokenKind.VBAR:
break
self.next_token(skip_whitespace=True)
sequence = self._parse_sequence()
elements.append(sequence)
return self._eat_rule_name_if_needed(OrderedChoice(*elements))
def _parse_sequence(self):
"""
a b c
:return:
"""
expr_and_modifier = self._parse_modifier()
if self.token.type == TokenKind.EOF or \
self.token.type == TokenKind.EQUALS or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
return expr_and_modifier
elements = [expr_and_modifier]
while True:
if self.token is None or \
self.token.type == TokenKind.EOF or \
self.token.type == TokenKind.EQUALS or \
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
break
self.eat_whitespace()
sequence = self._parse_modifier()
elements.append(sequence)
return self._eat_rule_name_if_needed(Sequence(*elements))
def _parse_modifier(self):
"""
a? | a* | a+
:return:
"""
expression = self._parse_expression()
if self.token.type == TokenKind.QMARK:
self.next_token()
return self._eat_rule_name_if_needed(Optional(expression))
if self.token.type == TokenKind.STAR:
self.next_token()
return self._eat_rule_name_if_needed(ZeroOrMore(expression))
if self.token.type == TokenKind.PLUS:
self.next_token()
return self._eat_rule_name_if_needed(OneOrMore(expression))
return expression
def _parse_expression(self):
if self.token.type == TokenKind.EOF:
self.add_error(UnexpectedEof("lpar | concept | ident | string | regrex", self.token))
if self.token.type == TokenKind.LPAR:
self.nb_open_par += 1
self.next_token()
expr = self._parse_choice()
if self.token.type == TokenKind.RPAR:
self.nb_open_par -= 1
self.next_token()
return self._eat_rule_name_if_needed(expr)
else:
self.add_error(UnexpectedToken(self.token, TokenKind.RPAR))
return expr
if self.token.type == TokenKind.CONCEPT:
concept_name, concept_id = self.token.value
metadata = self.context.sheerka.get_by_id(concept_id) if concept_id \
else self.context.sheerka.get_by_name(concept_name)
if metadata is NotFound:
self.add_error(UnknownConcept(concept_id, concept_name))
self.next_token()
return None
expr = ConceptExpression(metadata.id, rule_name=metadata.name)
self.next_token()
return self._eat_rule_name_if_needed(expr)
if self.token.type == TokenKind.IDENTIFIER:
concept_name = self.token.str_value
if concept_name == self.concept_name:
# recursive construction, the concept id is not known yet
expr = ConceptExpression(None, rule_name=concept_name)
elif (metadata := self.context.sheerka.get_by_name(concept_name)) is NotFound:
# unknown concept, it's a variable definition
expr = VariableExpression(concept_name)
else:
expr = ConceptExpression(metadata.id, rule_name=concept_name)
self.next_token()
return self._eat_rule_name_if_needed(expr)
if self.token.type == TokenKind.STRING:
tokens = list(Tokenizer(self.token.strip_quote, yield_eof=False))
if len(tokens) == 1:
self.next_token()
return self._eat_rule_name_if_needed(StrMatch(tokens[0].str_value))
else:
elements = [StrMatch(t.str_value, skip_whitespace=False) for t in tokens]
elements[-1].skip_white_space = True
ret = Sequence(*elements)
self.next_token()
return self._eat_rule_name_if_needed(ret)
if self.token.type == TokenKind.REGEX:
ret = RegExMatch(self.token.strip_quote)
self.next_token()
return self._eat_rule_name_if_needed(ret)
ret = StrMatch(self.token.strip_quote)
self.next_token()
return self._eat_rule_name_if_needed(ret)
def _eat_rule_name_if_needed(self, expression):
if self.token.type == TokenKind.EQUALS:
self.next_token() # eat equals
if self.token.type != TokenKind.IDENTIFIER:
return self.add_error(UnexpectedToken(self.token, TokenKind.IDENTIFIER))
expression.rule_name = self.token.value
self.next_token()
return expression
+219
View File
@@ -0,0 +1,219 @@
import re
from dataclasses import dataclass
from common.global_symbols import NotInit
from core.concept import DefinitionType
from parsers.ParserInput import ParserInput
from parsers.parser_utils import ParsingError, ParsingException, SimpleParser, UnexpectedEof, UnexpectedToken, \
get_text_from_tokens, \
parse_parts, \
strip_tokens
from parsers.tokenizer import Keywords, TokenKind
var_with_value_regex = re.compile("(\\w+)\\s*=\\s*(\\d+)")
DEF_CONCEPT_PARTS = [
Keywords.CONCEPT,
Keywords.FROM,
Keywords.AS,
Keywords.WHERE,
Keywords.PRE,
Keywords.POST,
Keywords.RET,
Keywords.AUTO_EVAL,
Keywords.DEF_VAR
]
@dataclass
class ConceptDefinition:
name: str
definition_type: str = None
definition: str = ""
body: str = ""
where: str = ""
pre: str = ""
post: str = ""
ret: str = ""
auto_eval: bool = False
def_var: list = None
class VariableParser(SimpleParser):
"""
Simple parser to parse the definition of the variable
expected forms
a b
a, b
a = value
"""
def parse(self):
res = []
while self.next_token():
if self.token.type == TokenKind.COMMA:
continue
var_name = self.token.value
if self.check_next_token().type == TokenKind.EQUALS:
self.next_token()
self.next_token()
var_value = int(self.token.value) if self.token.type == TokenKind.NUMBER else self.token.value
res.append((var_name, var_value))
else:
res.append((var_name, NotInit))
return res
class ConceptDefinitionParser:
def __init__(self, start_already_recognized=False):
self.error_sink = []
self.start_already_recognized = start_already_recognized
self.custom_str = {TokenKind.STRING: lambda t: t.value[1:-1] if t.value[0] == '"' else t.value}
def add_error(self, error):
self.error_sink.append(error)
def parse(self, parser_input: ParserInput) -> ConceptDefinition | None:
if not self.start_already_recognized:
# When called by the evaluator, 'def concept is already recognized'
# So there is no need to do it again
if not parser_input.next_token():
self.add_error(UnexpectedEof(Keywords.DEF, None))
return None
token = parser_input.token
if not (token.type == TokenKind.IDENTIFIER and token.value == Keywords.DEF):
self.add_error(UnexpectedToken(token, Keywords.DEF))
return None
parser_input.next_token()
parts = parse_parts(parser_input,
self.error_sink,
DEF_CONCEPT_PARTS,
first_token=DEF_CONCEPT_PARTS[0],
allow_multiple=[DEF_CONCEPT_PARTS[-1]])
if parts is None:
assert self.error_sink
return None
try:
name = self._get_concept_name(parts)
def_type, definition = self._get_concept_definition(parts)
body = self._get_part(Keywords.AS, parts)
where = self._get_part(Keywords.WHERE, parts)
pre = self._get_part(Keywords.PRE, parts)
post = self._get_part(Keywords.POST, parts)
ret = self._get_part(Keywords.RET, parts)
auto_eval = self._get_concept_auto_eval(parts)
def_var = self._get_concept_variables(parts)
return ConceptDefinition(name=name,
definition_type=def_type,
definition=definition,
body=body,
where=where,
pre=pre,
post=post,
ret=ret,
auto_eval=auto_eval,
def_var=def_var)
except ParsingException as ex:
self.add_error(ex.error)
return None
def _get_concept_name(self, parts):
tokens = parts[Keywords.CONCEPT]
name_tokens = strip_tokens(tokens[1:])
if len(name_tokens) == 0:
raise ParsingException(ParsingError([], "Name is mandatory."))
for token in name_tokens:
if token.type == TokenKind.NEWLINE:
raise ParsingException(ParsingError([token], "Newlines are not allowed in name."))
# normalize the name of the concept
no_whitespace = [t for t in name_tokens if t.type != TokenKind.WHITESPACE]
return " ".join(self.custom_str.get(token.type, lambda t: t.str_value)(token) for token in no_whitespace)
@staticmethod
def _get_concept_definition(parts):
if Keywords.FROM not in parts:
return None, ""
tokens = parts[Keywords.FROM]
if len(tokens) == 1:
raise ParsingException(ParsingError(tokens[0], f"Empty '{Keywords.FROM}' declaration."))
if tokens[1].value == Keywords.BNF:
to_use = strip_tokens(tokens[2:])
definition_type = DefinitionType.BNF
elif tokens[1].value == Keywords.DEF:
to_use = strip_tokens(tokens[2:])
definition_type = DefinitionType.DEFAULT
else:
to_use = strip_tokens(tokens[1:])
definition_type = DefinitionType.DEFAULT
if len(to_use) == 0:
raise ParsingException(ParsingError(tokens[0], f"Empty '{Keywords.FROM}' declaration."))
return definition_type, get_text_from_tokens(to_use)
@staticmethod
def _get_concept_auto_eval(parts):
if Keywords.AUTO_EVAL not in parts:
return False
tokens = parts[Keywords.AUTO_EVAL]
if len(tokens) == 1:
raise ParsingException(ParsingError(tokens[0], f"Empty '{Keywords.AUTO_EVAL}' declaration."))
if len(tokens) > 2 or tokens[1].type != TokenKind.IDENTIFIER:
raise ParsingException(ParsingError(tokens[1:], f"Invalid 'auto_eval' declaration"))
auto_eval_value = tokens[1].value.lower()
if auto_eval_value == "true":
return True
elif auto_eval_value == "false":
return False
raise ParsingException(ParsingError(tokens[1],
f"Invalid 'auto_eval' declaration ({auto_eval_value} is not recognized)"))
@staticmethod
def _get_concept_variables(parts):
if Keywords.DEF_VAR not in parts:
return None
tokens = parts[Keywords.DEF_VAR]
if len(tokens) == 1:
raise ParsingException(ParsingError(tokens[0], f"Empty '{Keywords.DEF_VAR}' declaration."))
res = []
for part in [t.strip_quote for t in tokens[1:]]:
res.extend(VariableParser(part).parse())
return res
@staticmethod
def _get_part(part, parts):
if part not in parts:
return ""
tokens = parts[part]
to_use = strip_tokens(tokens[1:])
if len(to_use) == 0:
raise ParsingException(ParsingError(tokens[0], f"Empty '{part}' declaration."))
return get_text_from_tokens(to_use)
+70 -5
View File
@@ -1,18 +1,23 @@
from common.utils import get_text_from_tokens
from parsers.tokenizer import Tokenizer
from parsers.tokenizer import Token, TokenKind, Tokenizer
class ParserInput:
def __init__(self, text, yield_oef=True):
def __init__(self, text):
self.original_text = text
self.yield_oef = yield_oef
self.all_tokens = None
self.exception = None
self.all_tokens: list = None
self.exception: Exception = None
self.pos: int = None
self.end: int = None
self.token: Token = None
def init(self) -> bool:
try:
# the eof if forced, but will not be yield if not set to.
self.all_tokens = list(Tokenizer(self.original_text, yield_eof=True))
self.pos = -1
self.end = len(self.all_tokens)
return True
except Exception as ex:
self.all_tokens = None
@@ -25,5 +30,65 @@ class ParserInput:
return get_text_from_tokens(self.all_tokens, custom_switcher, tracker)
def reset(self):
if self.exception:
raise self.exception
self.pos = -1
def next_token(self, skip_whitespace=True) -> bool:
self.pos += 1
if self.pos >= self.end:
self.token = self.all_tokens[-1]
return False
self.token = self.all_tokens[self.pos]
if skip_whitespace:
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
self.pos += 1
if self.pos > self.end:
self.token = self.all_tokens[-1]
return False
self.token = self.all_tokens[self.pos]
if self.token.type == TokenKind.EOF:
return False
return True
def check_next_token(self, skip_whitespace=True):
"""
Returns the token after the current one
Never returns None (returns TokenKind.EOF instead)
"""
my_pos = self.pos + 1
if my_pos > self.end:
return self.all_tokens[-1]
if skip_whitespace:
while self.all_tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
my_pos += 1
if my_pos > self.end:
return self.all_tokens[-1]
return self.all_tokens[my_pos]
def seek(self, pos):
"""
Move the token offset to position pos
:param pos:
:return: True is pos is a valid position False otherwise
"""
if pos < 0 or pos > self.end:
self.token = None
return False
self.pos = pos
self.token = self.all_tokens[self.pos]
return True
def __repr__(self):
return f"ParserInput('{self.original_text}', len={len(self.all_tokens)})"
+438
View File
@@ -0,0 +1,438 @@
from dataclasses import dataclass
from core.error import ErrorObj, SheerkaException
from parsers.tokenizer import Token, TokenKind, Tokenizer
DEFAULT_TAB_SIZE = 4
class ParsingException(SheerkaException):
"""
Generic Exception for the parsers
It embeds the read exception
"""
def __init__(self, error: ErrorObj):
self.error: ErrorObj = error
def get_error_msg(self) -> str:
return self.error.get_error_msg()
@dataclass()
class ParsingError(ErrorObj):
"""
The input is recognized, but there is a syntax error
"""
tokens: list
message: str
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, ParsingError):
return False
if self.message != other.message:
return False
if other.tokens is not None and self.tokens != other.tokens:
return False
return True
def __hash__(self):
return hash(self.message)
def get_error_msg(self) -> str:
return f"Syntax error: {self.message}"
@dataclass
class UnexpectedToken(ErrorObj):
token: Token
expected: TokenKind
def get_error_msg(self) -> str:
return f"Unexpected token {self.token} found while expected {self.expected}"
@dataclass()
class KeywordNotFound(ErrorObj):
keywords: list
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, KeywordNotFound):
return False
if self.keywords != other.keywords:
return False
return True
def __hash__(self):
return hash(self.keywords)
def get_error_msg(self) -> str:
return f"Expected keyword(s) {self.keywords} are not found."
@dataclass()
class UnexpectedEof(ErrorObj):
keyword: str
last_token: Token | None
def get_error_msg(self):
if self.last_token:
return f"Unexpected eof while parsing keyword '{self.keyword}' at index {self.last_token.index}"
else:
return f"Unexpected eof while parsing keyword '{self.keyword}'"
def strip_tokens(tokens, strip_eof=False):
"""
Remove the starting and trailing spaces and newline
"""
if tokens is None:
return None
start = 0
length = len(tokens)
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
start += 1
if start == length:
return []
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
if strip_eof \
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
end = length - 1
while end > 0 and tokens[end].type in end_tokens:
end -= 1
return tokens[start: end + 1]
def get_text_from_tokens(tokens, custom_switcher=None, tracker=None):
"""
Create the source code, from the list of token
:param tokens: list of tokens
:param custom_switcher: to override the behaviour (the return value) of some token
:param tracker: keep track of the original token value when custom switched
:return:
"""
if tokens is None:
return ""
res = ""
if not hasattr(tokens, "__iter__"):
tokens = [tokens]
switcher = {
}
if custom_switcher:
switcher.update(custom_switcher)
for token in tokens:
value = switcher.get(token.type, lambda t: t.str_value)(token)
res += value
if tracker is not None and token.type in custom_switcher:
tracker[value] = token
return res
def parse_parts(parser_input, error_sink, keywords, first_token=None, strip=False, allow_multiple=None):
"""
Reads Parser Input and groups the tokens by keywords
ex:
tokens = Tokenizer("as a b c pre u v w where x y z")
keywords = ["as", "pre", "where"]
assert get_parts(keywords) == {
Keyword("as"): [Token("as"), Token("a"), Token(<ws>), Token("b"), Token(<ws>), Token("c"), Token(<ws>)],
Keyword("pre"): [Token("pre"), Token("u"), Token(<ws>), Token("v"), Token(<ws>), Token("w"), Token(<ws>)],
Keyword("where"): [Token("where"), Token("x"), Token(<ws>), Token("y"), Token(<ws>), Token("z"), Token(<ws>)]}
* The order of appearance of the keywords is not important
"as w pre y where z" and "where z pre y as w" will produce the same dictionary
* I can use double quote to protect keyword
where "x y" will produce the entry Keyword("where"): [Token("x"), Token(<ws>), Token("y"), Token(<ws>)]
where 'x y' will produce the entry Keyword("where"): [Token("'x y'")]
* If a keyword does not appear in allow_multiple, it will recognize only once
tokens = Tokenizer("def concept x is a concept")
keywords = ["concept"], allow_multiple={}
assert get_parts(keywords) == {
Keyword("concept"): [Token("x"), Token(<ws>), Token("is"), Token(<ws>), Token("concept")]}
* If the token appears in allow_multiple, it can be parsed several time
in this case, in result, one token will represent one occurrence of the keyword (whitespaces are discarded)
tokens = Tokenizer("def_var var1 def_var var2")
keywords = ["def_var"], allow_multiple={"def_var"}
assert get_parts(keywords) == {
Keyword("def_var"): [Token("def_var"), Token("var1"), Token("var2")]}
Long declaration are transformed into a string token
tokens = Tokenizer("def_var a very long declaration")
keywords = ["def_var"], allow_multiple={"def_var"}
assert get_parts(keywords) == {
Keyword("def_var"): [Token("def_var"), Token("'a very long declaration'")]}
:param parser_input:
:param error_sink:
:param keywords:
:param first_token: it must be a Keyword
:param strip: if True, the returned tokens will be trimmed
:param allow_multiple: set indicating the keywords that may appear several times
:return: dictionary
"""
if allow_multiple is None:
allow_multiple = set()
def new_part(_token, _colon_mode_activated, _previous, _already_found):
"""
:param _token: current token
:param _colon_mode_activated: colon_mode_activated
:param _previous: previous token
:param _already_found: keyword that are already found
:return:
"""
if _token.value not in keywords:
# not even a keyword!
return False
if _token.value in _already_found and _token.value not in allow_multiple:
# keywords are recognized only once
return False
if not _colon_mode_activated or not _previous:
return True
return _previous.line != _token.line
def manage_buffer(_res, _keyword, _buffer):
stripped = strip_tokens(_buffer)
# manage colon first, to sure that what is protected by the quotes remains protected
if len(stripped) > 0 and stripped[0].type == TokenKind.COLON:
body = _get_body(stripped[1:], error_sink)
if body:
_res[_keyword].extend(body)
# only add one token when allow multiple is True
elif _keyword in allow_multiple:
if len(stripped) > 1:
buffer_as_str_token = Token(TokenKind.STRING,
"'" + get_text_from_tokens(stripped) + "'",
stripped[0].index,
stripped[0].line,
stripped[0].column)
_res[_keyword].append(buffer_as_str_token)
else:
_res[_keyword].append(stripped[0])
# replace double-quoted strings by their content
elif len(stripped) == 1 and stripped[0].type == TokenKind.STRING and stripped[0].value[0] == '"':
_res[_keyword].extend(list(Tokenizer(stripped[0].strip_quote, yield_eof=False)))
elif strip:
_res[_keyword].extend(stripped)
else:
_res[_keyword].extend(_buffer)
if parser_input.token is None:
error_sink.append(KeywordNotFound(keywords))
return None
if parser_input.token.type == TokenKind.WHITESPACE:
parser_input.next_token()
token = parser_input.token
if first_token and token.value != first_token:
error_sink.append(UnexpectedToken(token, first_token))
return None
if token.value not in keywords:
error_sink.append(KeywordNotFound(keywords))
return None
colon_mode_activated = False # if activated, use keyword + colon to start a new keyword definition
previous_token = None
res = {}
keywords_found = set()
keyword = None
buffer = []
# More explanations on colon_mode_activated
# You can use the pattern
# def concept <name> as:
# <tab> xxx
# <tab> yyy
# ...
#
# It allows more readability
# It also permits the usage of other keywords inside the block
# Example
# def concept give the date as:
# from datetime import date # I can use the 'from' keyword !!!
# return date.today()
#
# Note that I can choose to use colon or not
#
# def concept in x days as:
# from datetime import date
# return date.today() - x
# where x > 0
#
# is a valid declaration (there is not colon for the where clause)
# loop through the tokens, and put them in the correct tokens_found_by_parts entry
while True:
if new_part(token, colon_mode_activated, previous_token, keywords_found):
# manage the previous part
if buffer:
manage_buffer(res, keyword, buffer)
buffer.clear()
keyword = token.value
if keyword not in res:
res[keyword] = [token] # to keep track of when it starts
keywords_found.add(token.value)
colon_mode_activated = parser_input.check_next_token().type == TokenKind.COLON
if not parser_input.next_token():
error_sink.append(UnexpectedEof(keyword, token))
break
else:
buffer.append(token)
if not parser_input.next_token(skip_whitespace=False):
break
previous_token = token
token = parser_input.token
# do not forget to flush the buffer
if buffer:
manage_buffer(res, keyword, buffer)
return res
def _skip_whitespaces(tokens):
i = 0
while i < len(tokens) and tokens[i].type == TokenKind.WHITESPACE:
i += 1
return i
def _get_body(tokens, error_sink):
"""
Get the body of a keyword definition
It manages colon body, but the colon must be stripped first
:param tokens:
:return:
"""
def get_tab_size(default_tab_size, text):
return sum([1 if isinstance(c, str) else default_tab_size for c in text])
pos = _skip_whitespaces(tokens)
if len(tokens) - pos < 3:
error_sink.append(ParsingError(tokens, "Body is empty or too short."))
return None
if tokens[pos].type != TokenKind.NEWLINE:
error_sink.append(UnexpectedToken(tokens[pos], TokenKind.NEWLINE))
return None
pos += 1
if tokens[pos].type != TokenKind.WHITESPACE:
error_sink.append(UnexpectedToken(tokens[pos], TokenKind.WHITESPACE))
return None
indent_size = get_tab_size(DEFAULT_TAB_SIZE, tokens[pos].value)
pos += 1
i = pos
while i < len(tokens) - 1:
if tokens[i].type == TokenKind.NEWLINE:
if tokens[i + 1].type != TokenKind.WHITESPACE:
error_sink.append(UnexpectedToken(tokens[i + 1], TokenKind.WHITESPACE))
return None
if get_tab_size(DEFAULT_TAB_SIZE, tokens[i + 1].value) < indent_size:
error_sink.append(ParsingError([tokens[i + 1]], "Invalid indentation."))
return None
tokens[i + 1] = tokens[i + 1].clone()
tokens[i + 1].value = " " * (get_tab_size(DEFAULT_TAB_SIZE, tokens[i + 1].value) - indent_size)
i += 1
return tokens[pos:]
class SimpleParser:
def __init__(self, text, skip_whitespace_default_behaviour=True):
self.error_sink = []
self.token: Token = None
self.source: str = ""
self.skip_whitespace = skip_whitespace_default_behaviour
self._iter_tokens = iter(Tokenizer(text))
self._look_ahead = None
def add_error(self, error):
self.error_sink.append(error)
def next_token(self, skip_whitespace=None):
skip_whitespace = self.skip_whitespace if skip_whitespace is None else skip_whitespace
if self.token and self.token.type == TokenKind.EOF:
return False
self.token = self._look_ahead or next(self._iter_tokens)
self.source += self.token.str_value
self._look_ahead = None
if skip_whitespace:
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
self.token = next(self._iter_tokens)
self.source += self.token.str_value
return self.token.type != TokenKind.EOF
def check_next_token(self, skip_whitespace=None):
skip_whitespace = self.skip_whitespace if skip_whitespace is None else skip_whitespace
if self._look_ahead:
return self._look_ahead
self._look_ahead = next(self._iter_tokens)
if skip_whitespace:
while self._look_ahead.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
self._look_ahead = next(self._iter_tokens)
return self._look_ahead
def eat_whitespace(self):
if self._look_ahead is not None:
self.token = self._look_ahead
self.source += self.token.str_value
self._look_ahead = None
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
self.token = next(self._iter_tokens)
self.source += self.token.str_value
File diff suppressed because it is too large Load Diff
+28 -46
View File
@@ -5,6 +5,25 @@ from common.global_symbols import VARIABLE_PREFIX
from common.utils import str_concept
class Keywords:
DEF = "def"
CONCEPT = "concept"
RULE = "rule"
FROM = "from"
BNF = "bnf"
AS = "as"
WHERE = "where"
PRE = "pre"
POST = "post"
ISA = "isa"
RET = "ret"
WHEN = "when"
PRINT = "print"
THEN = "then"
AUTO_EVAL = "auto_eval"
DEF_VAR = "def_var"
class TokenKind(Enum):
EOF = "eof"
WHITESPACE = "whitespace"
@@ -138,31 +157,12 @@ class LexerError(Exception):
column: int
class Keywords(Enum):
DEF = "def"
CONCEPT = "concept"
RULE = "rule"
FROM = "from"
BNF = "bnf"
AS = "as"
WHERE = "where"
PRE = "pre"
POST = "post"
ISA = "isa"
RET = "ret"
WHEN = "when"
PRINT = "print"
THEN = "then"
AUTO_EVAL = "auto_eval"
DEF_VAR = "def_var"
class Tokenizer:
"""
Class that can iterate on the tokens
"""
def __init__(self, text, yield_eof=True, parse_word=False):
def __init__(self, text, yield_eof=True, parse_word=False, parse_quote=False):
self.text = text
self.text_len = len(text)
self.column = 1
@@ -170,6 +170,7 @@ class Tokenizer:
self.i = 0
self.yield_eof = yield_eof
self.parse_word = parse_word
self.parse_quote = parse_quote
def __iter__(self):
@@ -385,7 +386,7 @@ class Tokenizer:
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
elif c == "'" and self.i > 0 and self.text[self.i - 1] != " ":
elif self.parse_quote and c == "'" and self.i > 0 and self.text[self.i - 1] != " ":
yield Token(TokenKind.QUOTE, "'", self.i, self.line, self.column)
self.i += 1
self.column += 1
@@ -542,28 +543,9 @@ class Tokenizer:
return result
def strip_tokens(tokens, strip_eof=False):
"""
Remove the starting and trailing spaces and newline
"""
if tokens is None:
return None
start = 0
length = len(tokens)
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
start += 1
if start == length:
return []
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
if strip_eof \
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
end = length - 1
while end > 0 and tokens[end].type in end_tokens:
end -= 1
return tokens[start: end + 1]
def remove_positions(self):
for token in self:
token.line = 1
token.column = 1
token.index = 0
yield token
+19 -15
View File
@@ -11,8 +11,9 @@ from core.BuiltinConcepts import BuiltinConcepts
from core.ExecutionContext import ExecutionContext
from core.ReturnValue import ReturnValue
from core.concept import Concept, ConceptDefaultPropsAttrs, ConceptMetadata, DefinitionType
from core.error import ErrorContext, SheerkaException
from parsers.tokenizer import TokenKind, Tokenizer, strip_tokens
from core.error import ErrorContext, ErrorObj
from parsers.parser_utils import strip_tokens
from parsers.tokenizer import TokenKind, Tokenizer
from services.BaseService import BaseService
PROPERTIES_FOR_DIGEST = ("name", "key",
@@ -22,26 +23,25 @@ PROPERTIES_FOR_DIGEST = ("name", "key",
"desc", "bound_body", "autouse", "props", "variables", "parameters")
class ConceptAlreadyDefined(SheerkaException):
def __init__(self, concept: ConceptMetadata, already_defined_id: str):
self.concept = concept
self.already_defined_id = already_defined_id
@dataclass
class ConceptAlreadyDefined(ErrorObj):
concept: ConceptMetadata
already_defined_id: str
def get_error_msg(self) -> str:
return f"Concept {self.concept.name}, is already defined (id={self.already_defined_id})"
@dataclass
class InvalidBnf(SheerkaException):
def __init__(self, bnf: str):
self.bnf = bnf
class InvalidBnf(ErrorObj):
bnf: str
def get_error_msg(self) -> str:
return f"Invalid bnf '{self.bnf}'"
@dataclass
class FirstItemError(SheerkaException):
class FirstItemError(ErrorObj):
pass
@@ -78,6 +78,7 @@ class ConceptManager(BaseService):
self.sheerka.bind_service_method(self.NAME, self.get_by_name, False)
self.sheerka.bind_service_method(self.NAME, self.get_by_id, False)
self.sheerka.bind_service_method(self.NAME, self.get_by_key, False)
self.sheerka.bind_service_method(self.NAME, self.is_a_concept_name, False)
register_concept_cache = self.sheerka.om.register_concept_cache
@@ -108,11 +109,11 @@ class ConceptManager(BaseService):
_(7, BuiltinConcepts.INVALID_CONCEPT, desc="invalid concept", variables=("concept_id", "reason"))
_(8, BuiltinConcepts.EVALUATION_ERROR, desc="evaluation error", variables=("concept", "reason"))
self.init_log.debug('%s builtin concepts created',
len(self.sheerka.om.current_cache_manager().concept_caches))
def define_new_concept(self, context: ExecutionContext,
def define_new_concept(self,
context: ExecutionContext,
name: str,
is_builtin: bool = False, # is the concept defined Sheerka
is_unique: bool = False, # is the concept a singleton
@@ -263,7 +264,7 @@ class ConceptManager(BaseService):
Returns a concept metadata, using its name
:param key:
:type key:
:return:
:return: NotFound if not found
:rtype:
"""
return self.sheerka.om.get(self.CONCEPTS_BY_NAME_ENTRY, key)
@@ -273,7 +274,7 @@ class ConceptManager(BaseService):
Returns a concept metadata, using its name
:param concept_id:
:type concept_id:
:return:
:return: NotFound if not found
:rtype:
"""
return self.sheerka.om.get(self.CONCEPTS_BY_ID_ENTRY, concept_id)
@@ -283,7 +284,7 @@ class ConceptManager(BaseService):
Returns a concept metadata, using its name
:param key:
:type key:
:return:
:return: NotFound if not found
:rtype:
"""
return self.sheerka.om.get(self.CONCEPTS_BY_KEY_ENTRY, key)
@@ -291,6 +292,9 @@ class ConceptManager(BaseService):
def get_all_concepts(self):
return list(sorted(self.sheerka.om.list(self.CONCEPTS_BY_ID_ENTRY), key=lambda item: int(item.id)))
def is_a_concept_name(self, name):
return self.sheerka.om.exists(self.CONCEPTS_BY_NAME_ENTRY, name)
@staticmethod
def compute_metadata_digest(metadata: ConceptMetadata):
"""
+11
View File
@@ -4,7 +4,10 @@ from os import path
import pytest
from core.BuiltinConcepts import BuiltinConcepts
from core.Sheerka import Sheerka
from helpers import _rv
from parsers.ParserInput import ParserInput
from sdp.sheerkaDataProvider import SheerkaDataProvider
@@ -32,6 +35,14 @@ class BaseTest:
return SheerkaDataProvider("mem://", name="test")
class BaseParserTest(BaseTest):
@staticmethod
def get_parser_input(context, command):
pi = ParserInput(command)
pi.init()
return _rv(context.sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=pi))
class UsingFileBasedSheerka(BaseTest):
TESTS_ROOT_DIRECTORY = path.abspath("../build/tests")
SHEERKA_ROOT_DIR = os.path.join(TESTS_ROOT_DIRECTORY, ".sheerka")
+7 -2
View File
@@ -1,10 +1,11 @@
from dataclasses import dataclass
from enum import Enum
import pytest
from common.utils import decode_enum, dict_product, get_class, get_text_from_tokens, str_concept, to_dict, unstr_concept
from helpers import get_concept
from parsers.tokenizer import Keywords, Token, TokenKind, Tokenizer
from parsers.tokenizer import Token, TokenKind, Tokenizer
@dataclass
@@ -28,6 +29,10 @@ class Obj2:
prop2: object
class MyEnum(Enum):
CONCEPT = "concept"
def get_tokens(lst):
res = []
for e in lst:
@@ -106,7 +111,7 @@ def test_i_can_str_concept():
("xxx", None),
("xxx.", None),
("xxx.yyy", None),
("parsers.tokenizer.Keywords.CONCEPT", Keywords.CONCEPT),
("tests.common.test_utils.MyEnum.CONCEPT", MyEnum.CONCEPT),
])
def test_i_can_decode_enum(text, expected):
actual = decode_enum(text)
@@ -0,0 +1,94 @@
import pytest
from base import BaseTest
from common.global_symbols import NotInit
from conftest import NewOntology
from core.BuiltinConcepts import BuiltinConcepts
from evaluators.DefConceptEvaluator import DefConceptEvaluator
from evaluators.RecognizeDefConcept import RecognizeDefConcept
from helpers import _rv, _rvf, get_concepts
from parsers.ConceptDefinitionParser import ConceptDefinition
from parsers.ParserInput import ParserInput
from parsers.parser_utils import UnexpectedEof
def get_ret_val_from(context, command):
pi = ParserInput(command)
pi.init()
parser_start = _rv(context.sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=pi))
ret = RecognizeDefConcept().eval(context, None, parser_start)
return ret.new[0]
class TestDefConceptEvaluator(BaseTest):
@pytest.fixture()
def evaluator(self, sheerka):
return sheerka.evaluators[DefConceptEvaluator.NAME]
def test_i_can_match(self, sheerka, context, evaluator):
ret_val = _rv(ConceptDefinition(name="foo"))
assert evaluator.matches(context, ret_val).status is True
ret_val = _rv("Not a ConceptDefinition class")
assert evaluator.matches(context, ret_val).status is False
ret_val = _rvf(ConceptDefinition(name="foo")) # status is false
assert evaluator.matches(context, ret_val).status is False
def test_i_can_add_a_new_concept(self, context, evaluator):
ret_val_input = get_ret_val_from(context, "def concept foo")
res = evaluator.eval(context, None, ret_val_input)
assert len(res.new) == 1
assert res.new[0].status
assert context.sheerka.isinstance(res.new[0].value, BuiltinConcepts.NEW_CONCEPT)
assert res.eaten == [ret_val_input]
def test_i_cannot_add_when_definition_validation_fails(self, context, evaluator):
ret_val_input = get_ret_val_from(context, "def concept foo from bnf a |") # only one '|' is required
res = evaluator.eval(context, None, ret_val_input)
assert len(res.new) == 1
assert not res.new[0].status
assert isinstance(res.new[0].value.value, UnexpectedEof)
assert res.eaten == []
@pytest.mark.parametrize("concept_def, expected", [
(ConceptDefinition(name="inc a", ret="a"), [("a", NotInit)]),
(ConceptDefinition(name="inc a", where="isinstance(a, int)"), [("a", NotInit)]),
(ConceptDefinition(name="inc a", def_var=[("a", 10)]), [("a", 10)]),
(ConceptDefinition(name="inc a", def_var=["a"]), [("a", NotInit)]),
(ConceptDefinition(name="a + b", where="a is an int", ret="b"), [("a", NotInit), ("b", NotInit)]),
(ConceptDefinition(name="b + a", where="a is an int", ret="b"), [("b", NotInit), ("a", NotInit)]),
])
def test_i_can_get_variables(self, context, evaluator, concept_def, expected):
assert evaluator._get_variables(context, concept_def) == expected
def test_concept_name_is_not_considered_as_variable(self, context, evaluator):
with NewOntology(context, "test_concept_name_is_not_considered_as_variable"):
get_concepts(context, "one", use_sheerka=True)
concept_def = ConceptDefinition(name="add one + a", where="one is an int")
assert evaluator._get_variables(context, concept_def) == []
@pytest.mark.parametrize("concept_def, expected", [
("def concept add a b where a and b", [("a", NotInit), ("b", NotInit)]),
("def concept add a b where a ret b", [("a", NotInit), ("b", NotInit)]),
("def concept add a b where xxx a and yyy b", [("a", NotInit), ("b", NotInit)]),
("def concept add b a where xxx a and yyy b", [("b", NotInit), ("a", NotInit)]),
("def concept add a b def_var a,b", [("a", NotInit), ("b", NotInit)]),
("def concept add a b def_var a b", [("a", NotInit), ("b", NotInit)]),
("def concept add a b def_var a def_var b", [("a", NotInit), ("b", NotInit)]),
("def concept add a b def_var a=10 def_var b", [("a", 10), ("b", NotInit)]),
("def concept add a b def_var a='hello' def_var b", [("a", "'hello'"), ("b", NotInit)]),
])
def test_i_can_add_a_new_concept_with_variables(self, context, evaluator, concept_def, expected):
with NewOntology(context, "test_i_can_add_a_new_concept_with_variables"):
ret_val_input = get_ret_val_from(context, concept_def)
res = evaluator.eval(context, None, ret_val_input)
assert len(res.new) == 1
assert res.new[0].status
new_concept = res.new[0].value
assert context.sheerka.isinstance(new_concept, BuiltinConcepts.NEW_CONCEPT)
assert new_concept.body.variables == expected
+5 -12
View File
@@ -1,6 +1,6 @@
import pytest
from base import BaseTest
from base import BaseParserTest
from core.BuiltinConcepts import BuiltinConcepts
from core.error import ErrorContext
from evaluators.PythonParser import PythonParser
@@ -8,7 +8,7 @@ from helpers import _rv, _rvf
from parsers.ParserInput import ParserInput
class TestPythonParser(BaseTest):
class TestPythonParser(BaseParserTest):
@pytest.fixture()
def evaluator(self, sheerka):
return sheerka.evaluators[PythonParser.NAME]
@@ -28,9 +28,7 @@ class TestPythonParser(BaseTest):
"a = 20"
])
def test_i_can_parse_python(self, sheerka, context, evaluator, text):
pi = ParserInput(text)
pi.init()
start = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=pi))
start = self.get_parser_input(context, text)
res = evaluator.eval(context, None, start)
@@ -43,9 +41,7 @@ class TestPythonParser(BaseTest):
def test_invalid_python_are_rejected(self, sheerka, context, evaluator):
text = "1 + "
pi = ParserInput(text)
pi.init()
start = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=pi))
start = self.get_parser_input(context, text)
res = evaluator.eval(context, None, start)
@@ -57,9 +53,7 @@ class TestPythonParser(BaseTest):
assert ret_val.parents == [start]
def test_i_can_detect_concepts(self, sheerka, context, evaluator):
pi = ParserInput("c:one: + c:two:")
pi.init()
start = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=pi))
start = self.get_parser_input(context, "c:one: + c:two:")
res = evaluator.eval(context, None, start)
@@ -72,4 +66,3 @@ class TestPythonParser(BaseTest):
assert len(ret_val.value.pf.namespace) == 2
assert ret_val.value.pf.namespace["__C__KEY_one__ID_00None00__C__"].value == ("one", None)
assert ret_val.value.pf.namespace["__C__KEY_two__ID_00None00__C__"].value == ("two", None)
@@ -0,0 +1,51 @@
import pytest
from base import BaseParserTest
from core.BuiltinConcepts import BuiltinConcepts
from core.error import ErrorContext
from evaluators.RecognizeDefConcept import RecognizeDefConcept
from helpers import _rv, _rvf
from parsers.ConceptDefinitionParser import ConceptDefinition
from parsers.ParserInput import ParserInput
class TestRecognizeDefConcept(BaseParserTest):
@pytest.fixture()
def evaluator(self, sheerka):
return sheerka.evaluators[RecognizeDefConcept.NAME]
def test_i_can_match(self, sheerka, context, evaluator):
ret_val = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=ParserInput("def concept")))
assert evaluator.matches(context, ret_val).status is True
ret_val = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=ParserInput("def")))
assert evaluator.matches(context, ret_val).status is False
ret_val = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=ParserInput("other text")))
assert evaluator.matches(context, ret_val).status is False
ret_val = _rv(sheerka.newn(BuiltinConcepts.UNKNOWN_CONCEPT)) # it responds to USER_INPUT only
assert evaluator.matches(context, ret_val).status is False
ret_val = _rvf(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=ParserInput("def concept"))) # status is false
assert evaluator.matches(context, ret_val).status is False
def test_i_can_recognize_a_def_concept(self, context, evaluator):
ret_val_input = self.get_parser_input(context, "def concept one as 1")
res = evaluator.eval(context, None, ret_val_input)
assert len(res.new) == 1
assert res.new[0].status
assert isinstance(res.new[0].value, ConceptDefinition)
assert res.eaten == [ret_val_input]
def test_i_can_manage_when_def_concept_fails(self, context, evaluator):
ret_val_input = self.get_parser_input(context, "def concept")
res = evaluator.eval(context, None, ret_val_input)
assert len(res.new) == 1
assert not res.new[0].status
assert isinstance(res.new[0].value, ErrorContext)
assert res.eaten == []
+167
View File
@@ -0,0 +1,167 @@
import pytest
from common.utils import unstr_concept
from conftest import NewOntology
from helpers import get_concepts
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.parser_utils import UnexpectedEof, UnexpectedToken
from parsers.peg_parser import ConceptExpression, OneOrMore, Optional, OrderedChoice, RegExMatch, Sequence, StrMatch, \
VariableExpression, ZeroOrMore
def _cexp(concept_str, rule_name=None):
concept_name, concept_id = unstr_concept(concept_str)
return ConceptExpression(concept_id, rule_name or concept_name)
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("r'str'", RegExMatch("str")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(
StrMatch("1"),
OrderedChoice(StrMatch("2"), StrMatch("3")),
OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
("'str'=var", StrMatch("str", rule_name="var")),
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("r'str'=var", RegExMatch("str", rule_name="var")),
("r'foo'?=var", Optional(RegExMatch("foo"), rule_name="var")),
("(r'foo'?)=var", Optional(RegExMatch("foo"), rule_name="var")),
("r'foo'*=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
("(r'foo'*)=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
("r'foo'+=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
("(r'foo'+)=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
("r'foo'=var?", Optional(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)?", Optional(RegExMatch("foo", rule_name="var"))),
("r'foo'=var*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
("r'foo'=var+", OneOrMore(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)+", OneOrMore(RegExMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")),
("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")),
("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'*=x 'b'+=y)=z", Sequence(
ZeroOrMore(StrMatch("a"), rule_name="x"),
OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")),
("'--filter'",
Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter")))
])
def test_i_can_parse_simple_bnf_definition(context, expression, expected):
parser = BnfDefinitionParser(context, expression)
res = parser.parse()
assert res == expected
assert not parser.error_sink
assert parser.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", _cexp("c:foo#1001:")),
("foo*", ZeroOrMore(_cexp("c:foo#1001:"))),
("foo 'and' bar+", Sequence(_cexp("c:foo#1001:"), StrMatch("and"), OneOrMore(_cexp("c:bar#1002:")))),
("foo | bar?", OrderedChoice(_cexp("c:foo#1001:"), Optional(_cexp("c:bar#1002:")))),
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), _cexp("c:var#1003:"))),
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), _cexp("c:var#1003:"))),
("foo=f", _cexp("c:foo#1001:", "f")),
("foo=f 'constant'", Sequence(_cexp("c:foo#1001:", "f"), StrMatch("constant"))),
("def 'concept'", Sequence(_cexp("c:def#1004:"), StrMatch("concept"))),
("c:foo:", _cexp("c:foo#1001:")),
("c:#1001:", _cexp("c:foo#1001:")),
])
def test_i_can_parse_bnf_definition_with_concepts(context, expression, expected):
with NewOntology(context, "test_i_can_parse_bnf_definition_with_concept"):
get_concepts(context, "foo", "bar", "var", "def", use_sheerka=True)
parser = BnfDefinitionParser(context, expression)
res = parser.parse()
assert res == expected
assert not parser.error_sink
assert parser.source == expression
@pytest.mark.parametrize("expression, expected", [
("x", VariableExpression("x")),
("x bar", Sequence(VariableExpression("x"), _cexp("c:bar#1001:"))),
("bar x", Sequence(_cexp("c:bar#1001:"), VariableExpression("x"))),
("x 'and' bar", Sequence(VariableExpression("x"), StrMatch("and"), _cexp("c:bar#1001:"))),
("x | bar", OrderedChoice(VariableExpression("x"), _cexp("c:bar#1001:"))),
("x*", ZeroOrMore(VariableExpression("x"))),
("x+", OneOrMore(VariableExpression("x"))),
("'str' = x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
("'str''='x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
("foo=x", VariableExpression("x")),
])
def test_i_can_parse_bnf_definition_with_variables(context, expression, expected):
with NewOntology(context, "test_i_can_parse_bnf_definition_with_variables"):
get_concepts(context, "bar", use_sheerka=True)
parser = BnfDefinitionParser(context, expression)
res = parser.parse()
assert res == expected
assert not parser.error_sink
assert parser.source == expression
def test_i_can_parse_when_the_concept_is_still_under_creation(context):
# I want to parse something like
# def concept add from bnf add | mult
# 'add' is used while being under construction
# 'add' must not be detected as a variable
parser = BnfDefinitionParser(context, "add | 'mult'", concept_name="add")
res = parser.parse()
assert res == OrderedChoice(_cexp("c:add:"), StrMatch("mult"))
assert not parser.error_sink
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEof),
("1|", UnexpectedEof),
("(1|)", UnexpectedToken),
("1=", UnexpectedToken),
])
def test_i_can_detect_errors(context, expression, error):
parser = BnfDefinitionParser(context, expression)
res = parser.parse()
assert res is None
assert len(parser.error_sink) > 0
assert isinstance(parser.error_sink[0], error)
+280
View File
@@ -0,0 +1,280 @@
import pytest
from common.global_symbols import NotInit
from core.concept import DefinitionType
from parsers.ConceptDefinitionParser import ConceptDefinition, ConceptDefinitionParser
from parsers.ParserInput import ParserInput
from parsers.parser_utils import ParsingError, UnexpectedEof, UnexpectedToken
from parsers.tokenizer import Keywords, Token, TokenKind
def get_parser_input(text):
pi = ParserInput(text)
assert pi.init()
return pi
class TestRecognizeDefConcept:
@pytest.fixture()
def parser(self, sheerka):
return ConceptDefinitionParser()
@pytest.mark.parametrize("text", [
"",
" "])
def test_i_can_detect_empty_input(self, parser, text):
pi = get_parser_input(text)
res = parser.parse(pi)
assert res is None
assert parser.error_sink == [UnexpectedEof(Keywords.DEF, None)]
def test_must_start_with_def_keyword(self, parser):
pi = get_parser_input("hello")
res = parser.parse(pi)
assert res is None
assert parser.error_sink == [UnexpectedToken(Token(TokenKind.IDENTIFIER, "hello", 0, 1, 1), Keywords.DEF)]
@pytest.mark.parametrize("text, expected", [
("def concept hello", ConceptDefinition(name="hello")),
("def concept hello ", ConceptDefinition(name="hello")),
("def concept a + b", ConceptDefinition(name="a + b")),
("def concept a+b", ConceptDefinition(name="a + b")),
("def concept 'a+b'+c", ConceptDefinition(name="'a+b' + c")),
('def concept "a+b"+c', ConceptDefinition(name="a+b + c")),
('def concept "as if"', ConceptDefinition(name="as if")),
("def concept 'as if'", ConceptDefinition(name="'as if'")),
("def concept 'as' \"if\"", ConceptDefinition(name="'as' if")),
('def concept \'as\' "if"', ConceptDefinition(name="'as' if")),
])
def test_i_can_parse_def_concept_name(self, parser, text, expected):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert actual == expected
def test_concept_name_is_mandatory(self, parser):
pi = get_parser_input("def concept as foo")
actual = parser.parse(pi)
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], ParsingError)
assert parser.error_sink[0].message == "Name is mandatory."
assert actual is None
def test_new_line_is_not_allowed_in_concept_name(self, parser):
pi = get_parser_input("def concept complicated \n name as foo")
actual = parser.parse(pi)
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], ParsingError)
assert parser.error_sink[0].message == "Newlines are not allowed in name."
assert actual is None
@pytest.mark.parametrize("text, part", [
("def concept foo as where True", "as"),
("def concept foo where as 1 + 1", "where"),
("def concept foo pre as 1 + 1", "pre"),
("def concept foo post as 1 + 1", "post"),
("def concept foo ret as 1 + 1", "ret"),
])
def test_empty_declarations_are_not_allowed(self, parser, text, part):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert actual is None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], ParsingError)
assert parser.error_sink[0].message == f"Empty '{part}' declaration."
def test_empty_parts_are_not_initialized(self, parser):
pi = get_parser_input("def concept foo")
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body is ""
assert actual.where is ""
assert actual.pre is ""
assert actual.post is ""
assert actual.ret is ""
def test_i_can_manage_all_parts(self, parser):
concept_def = "def concept foo"
concept_def += " where my where clause"
concept_def += " pre my pre clause"
concept_def += " as my body"
concept_def += " ret my return value"
concept_def += " post my post condition"
pi = get_parser_input(concept_def)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body == "my body"
assert actual.where == "my where clause"
assert actual.pre == "my pre clause"
assert actual.post == "my post condition"
assert actual.ret == "my return value"
@pytest.mark.parametrize("body", [
"c:#1001: is an int",
"c:one: is an int",
"'one' is an int",
'"one" is an in',
])
def test_i_can_manage_special_tokens_in_part(self, parser, body):
text = f"def concept foo as {body}"
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body == body
@pytest.mark.parametrize("text, expected_type, expected_definition, ", [
("def concept foo from def 'hello world'", DefinitionType.DEFAULT, "'hello world'"),
("def concept foo from 'hello world'", DefinitionType.DEFAULT, "'hello world'"),
("def concept foo from bnf my bnf definition", DefinitionType.BNF, "my bnf definition"),
])
def test_i_can_set_concept_definition(self, parser, text, expected_type, expected_definition):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.definition_type == expected_type
assert actual.definition == expected_definition
@pytest.mark.parametrize("text", [
"def concept foo from where True",
"def concept foo from bnf where True",
"def concept foo from def where True",
"def concept foo from bnf",
"def concept foo from def ",
])
def test_empy_definition_are_not_allowed(self, parser, text):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert actual is None
assert parser.error_sink[0].message == "Empty 'from' declaration."
def test_i_can_parse_multiline_definition(self, parser):
text = """
def concept add one to a as
def func(x):
return x+1
func(a)
"""
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body == "def func(x):\n return x+1\nfunc(a)"
def test_i_can_parse_indention_mode(self, parser):
text = """
def concept add one to a as:
def func(x):
return x+1
func(a)
"""
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body == "def func(x):\n return x+1\nfunc(a)"
def test_i_can_detect_invalid_indentation(self, parser):
text = """
def concept add one to a as:
def func(x):
return x+1
func(a)
"""
pi = get_parser_input(text)
actual = parser.parse(pi)
assert actual is None
assert len(parser.error_sink) > 0
def test_i_can_can_use_colon_to_protect_keywords(self, parser):
text = """
def concept today as:
from datetime import date
today = date.today()
from:
give me the date !
"""
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body == "from datetime import date\ntoday = date.today()"
assert actual.definition == "give me the date !"
def test_i_can_parse_bnf_concept_with_regex(self, parser):
text = "def concept sha512 from bnf number | r'[a-f0-9]+' | (number r'[a-f0-9]+')+"
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.definition == "number | r'[a-f0-9]+' | (number r'[a-f0-9]+')+"
@pytest.mark.parametrize("text, expected", [
("def concept foo auto_eval True", True),
("def concept foo auto_eval true", True),
("def concept foo auto_eval False", False),
("def concept foo auto_eval false", False),
])
def test_i_can_parse_auto_eval(self, parser, text, expected):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.auto_eval == expected
def test_auto_eval_is_set_to_false_by_default(self, parser):
pi = get_parser_input("def concept foo")
actual = parser.parse(pi)
assert actual.auto_eval is False
def test_empty_auto_eval_is_not_allowed(self, parser):
pi = get_parser_input("def concept foo auto_eval as 1")
actual = parser.parse(pi)
assert actual is None
assert parser.error_sink[0].message == "Empty 'auto_eval' declaration."
def test_i_cannot_parse_wrong_value(self, parser):
pi = get_parser_input("def concept foo auto_eval wrong_value")
actual = parser.parse(pi)
assert actual is None
assert parser.error_sink[0].message == "Invalid 'auto_eval' declaration (wrong_value is not recognized)"
@pytest.mark.parametrize("text, expected", [
("def concept foo def_var var", [("var", NotInit)]),
("def concept foo def_var var1 def_var var2", [("var1", NotInit), ("var2", NotInit)]),
("def concept foo def_var var1 var2", [("var1", NotInit), ("var2", NotInit)]),
("def concept foo def_var var1, var2", [("var1", NotInit), ("var2", NotInit)]),
("def concept foo def_var var1=10", [("var1", 10)]),
("def concept foo def_var var1 = 10", [("var1", 10)]),
("def concept foo def_var var1 = 'hello'", [("var1", "'hello'")]),
("def concept foo def_var var1 = hello", [("var1", "hello")]),
("def concept foo def_var var1, var2 = 10", [("var1", NotInit), ("var2", 10)]),
("def concept foo def_var var1='hello', var2 = 10", [("var1", "'hello'"), ("var2", 10)]),
("def concept foo def_var var1='hello' var2 = 10", [("var1", "'hello'"), ("var2", 10)]),
])
def test_i_can_parse_variable_definitions(self, parser, text, expected):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.def_var == expected
def test_empty_def_var_is_not_allowed(self, parser):
pi = get_parser_input("def concept foo def_var as 1")
actual = parser.parse(pi)
assert actual is None
assert parser.error_sink[0].message == "Empty 'def_var' declaration."
+128
View File
@@ -8,6 +8,8 @@ def test_i_can_parser_input():
parser_input = ParserInput("def concept a")
assert parser_input.init() is True
assert parser_input.exception is None
assert parser_input.pos == -1
assert parser_input.end == 6
def test_i_can_detect_errors():
@@ -38,3 +40,129 @@ def test_i_must_call_init_before_call_as_text():
parser_input.as_text()
assert ex.value.args[0] == "You must call init() first !"
def test_i_can_get_next_token():
parser_input = ParserInput("def concept a")
parser_input.init()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "def"
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "concept"
assert parser_input.next_token(skip_whitespace=False) is True
assert parser_input.token.type == TokenKind.WHITESPACE
assert parser_input.token.value == " "
assert parser_input.next_token(skip_whitespace=False) is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "a"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
def test_next_after_eof_is_eof():
parser_input = ParserInput("hi")
parser_input.init()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "hi"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
def test_i_can_manage_blank_input():
parser_input = ParserInput(" ")
parser_input.init()
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
def test_i_can_manage_blank_input_when_skip_whitespace_is_false():
parser_input = ParserInput(" ")
parser_input.init()
assert parser_input.next_token(skip_whitespace=False) is True
assert parser_input.token.type == TokenKind.WHITESPACE
assert parser_input.token.value == " "
assert parser_input.next_token(skip_whitespace=False) is False
assert parser_input.token.type == TokenKind.EOF
def test_i_can_reset():
parser_input = ParserInput("hello world ")
parser_input.init()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "hello"
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "world"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
parser_input.reset()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "hello"
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "world"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
def test_i_can_parse_when_input_ends_by_white_space():
parser_input = ParserInput("hello world ")
parser_input.init()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "hello"
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "world"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
def test_i_can_parse_when_input_starts_by_white_space():
parser_input = ParserInput(" hello world")
parser_input.init()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "hello"
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "world"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
+95
View File
@@ -0,0 +1,95 @@
import pytest
from parsers.ParserInput import ParserInput
from parsers.parser_utils import parse_parts, strip_tokens
from parsers.tokenizer import Keywords, Tokenizer
def compare_results(actual, expected, compare_str=False):
resolved_expected = {}
for k, v in expected.items():
if isinstance(v, str):
# case like {Keywords.DEF_VAR: "def_var var1 def_var var2"}
tokens = list(Tokenizer(v, yield_eof=False))
resolved_expected[k] = [tokens[0]] + tokens[2:]
else:
# case like {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])}
resolved_expected[k] = v
def get_better_representation(value):
better_repr = {}
for k, tokens in value.items():
value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]])
better_repr[k] = [tokens[0].repr_value, value]
return better_repr
# it's easier to compare two list of string
actual_to_compare = get_better_representation(actual)
expected_to_compare = get_better_representation(resolved_expected)
assert actual_to_compare == expected_to_compare
def get_tokens(lst):
"""
Returns a list of Tokens, for a list of item
:param lst:
:type lst:
:return:
:rtype:
"""
return list(Tokenizer(lst, yield_eof=False).remove_positions())
@pytest.mark.parametrize("input_as_list, expected_as_list", [
([" "], []),
([" ", "one"], ["one"]),
(["one", " "], ["one"]),
([" ", "one", " "], ["one"]),
(["\n", "one"], ["one"]),
(["one", "\n"], ["one"]),
(["\n", "one", "\n"], ["one"]),
([" ", "\n", "one"], ["one"]),
(["one", " ", "\n"], ["one"]),
([" ", "\n", "one", " ", "\n"], ["one"]),
(["\n", " ", "one"], ["one"]),
(["one", "\n", " "], ["one"]),
(["\n", " ", "one", "\n", " "], ["one"]),
([" ", "\n", " ", "one"], ["one"]),
(["one", " ", "\n", " "], ["one"]),
([" ", "\n", " ", "one", " ", "\n", " "], ["one"]),
(["\n", " ", "\n", "one"], ["one"]),
(["one", "\n", " ", "\n"], ["one"]),
(["\n", " ", "\n", "one", "\n", " ", "\n"], ["one"]),
])
def test_i_can_strip(input_as_list, expected_as_list):
actual = strip_tokens(get_tokens(input_as_list)) # KSI 20201007 Why not use Tokenizer ?!! For perf ?
expected = get_tokens(expected_as_list)
assert actual == expected
@pytest.mark.parametrize("text, strip, expected", [
("when xxx yyy", False, {Keywords.WHEN: "when xxx yyy"}),
("when uuu vvv print xxx yyy", False, {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}),
("print xxx yyy when uuu vvv", False, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}),
(" when xxx", False, {Keywords.WHEN: "when xxx"}),
("when xxx yyy", True, {Keywords.WHEN: "when xxx yyy"}),
("when uuu vvv print xxx yyy", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
("print xxx yyy when uuu vvv", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
(" when xxx", True, {Keywords.WHEN: "when xxx"}),
])
def test_i_can_get_parts(text, strip, expected):
parser_input = ParserInput(text)
parser_input.init()
parser_input.next_token()
error_sink = []
res = parse_parts(parser_input, error_sink, ["when", "print"], strip=strip)
compare_results(res, expected)
+12 -5
View File
@@ -5,7 +5,7 @@ from parsers.tokenizer import LexerError, Token, TokenKind, Tokenizer
def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:"
source += "$£€!_identifier°~_^\\`==#__var__10r/regex\nregex/r:xxx#1:**//%that's"
source += "$£€!_identifier°~_^\\`==#__var__10r/regex\nregex/r:xxx#1:**//%"
tokens = list(Tokenizer(source))
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
@@ -61,11 +61,8 @@ def test_i_can_tokenize():
assert tokens[51] == Token(TokenKind.STARSTAR, "**", 143, 7, 15)
assert tokens[52] == Token(TokenKind.SLASHSLASH, "//", 145, 7, 17)
assert tokens[53] == Token(TokenKind.PERCENT, "%", 147, 7, 19)
assert tokens[54] == Token(TokenKind.IDENTIFIER, "that", 148, 7, 20)
assert tokens[55] == Token(TokenKind.QUOTE, "'", 152, 7, 24)
assert tokens[56] == Token(TokenKind.IDENTIFIER, "s", 153, 7, 25)
assert tokens[57] == Token(TokenKind.EOF, '', 154, 7, 26)
assert tokens[54] == Token(TokenKind.EOF, '', 148, 7, 20)
@pytest.mark.parametrize("text, expected", [
@@ -209,3 +206,13 @@ def test_i_can_parse_regex_token(text, expected):
assert tokens[0].str_value == "r" + expected
assert tokens[0].repr_value == "r" + expected
assert tokens[0].strip_quote == expected[1:-1]
@pytest.mark.parametrize("text, parse_quote, expected", [
("a='hello'", False, ["a", "=", "'hello'"]),
("a='hello'", True, ["a", "=", "'", "hello", "'"]),
("a= 'hello'", True, ["a", "=", " ", "'hello'"]),
])
def test_i_can_choose_to_parse_quote(text, parse_quote, expected):
tokens = list(Tokenizer(text, parse_quote=parse_quote, yield_eof=False))
assert [t.value for t in tokens] == expected
+12 -7
View File
@@ -1,17 +1,22 @@
import logging
from enum import Enum
import pytest
from base import BaseTest
from common.global_symbols import NoFirstToken, NotFound, NotInit, Removed
from helpers import get_concept, get_concepts
from helpers import get_concepts
from ontologies.SheerkaOntologyManager import SheerkaOntologyManager
from parsers.tokenizer import Keywords
from sheerkapickle import tags
from sheerkapickle.sheerkaplicker import SheerkaPickler
from sheerkapickle.sheerkaunpickler import SheerkaUnpickler
class MyEnum(Enum):
DEF = "def"
WHERE = "where"
class Obj:
def __init__(self, a, b, c):
self.a = a
@@ -47,7 +52,7 @@ class TestSheerkaPickler(BaseTest):
([1, [3.14, "a string"]], [1, [3.14, "a string"]]),
([1, (3.14, "a string")], [1, {tags.TUPLE: [3.14, "a string"]}]),
([], []),
(Keywords.DEF, {tags.ENUM: 'parsers.tokenizer.Keywords.DEF'}),
(MyEnum.DEF, {tags.ENUM: 'tests.sheerkapickle.test_SheerkaPickler.MyEnum.DEF'}),
])
def test_i_can_flatten_and_restore_primitives(self, sheerka, obj, expected):
flatten = SheerkaPickler(sheerka).flatten(obj)
@@ -128,7 +133,7 @@ class TestSheerkaPickler(BaseTest):
@pytest.mark.parametrize("obj, expected", [
({None: "a"}, {'null': "a"}),
({Keywords.DEF: "a"}, {'parsers.tokenizer.Keywords.DEF': 'a'}),
({MyEnum.DEF: "a"}, {'tests.sheerkapickle.test_SheerkaPickler.MyEnum.DEF': 'a'}),
({(1, 2): "a"}, {(1, 2): "a"}),
])
def test_i_can_manage_specific_keys_in_dictionaries(self, sheerka, obj, expected):
@@ -151,15 +156,15 @@ class TestSheerkaPickler(BaseTest):
def test_i_can_manage_references(self, sheerka):
foo = Obj("foo", "bar", "baz")
obj = [Keywords.DEF, foo, Keywords.WHERE, Keywords.DEF, foo]
obj = [MyEnum.DEF, foo, MyEnum.WHERE, MyEnum.DEF, foo]
flatten = SheerkaPickler(sheerka).flatten(obj)
assert flatten == [{'_sheerka/enum': 'parsers.tokenizer.Keywords.DEF'},
assert flatten == [{'_sheerka/enum': 'tests.sheerkapickle.test_SheerkaPickler.MyEnum.DEF'},
{'_sheerka/obj': 'tests.sheerkapickle.test_SheerkaPickler.Obj',
'a': 'foo',
'b': 'bar',
'c': 'baz'},
{'_sheerka/enum': 'parsers.tokenizer.Keywords.WHERE'},
{'_sheerka/enum': 'tests.sheerkapickle.test_SheerkaPickler.MyEnum.WHERE'},
{'_sheerka/id': 0},
{'_sheerka/id': 1}]