Fixed #9 : I can parse 'def concept'
This commit is contained in:
@@ -363,3 +363,4 @@ def dict_product(a, b):
|
||||
res.append(items)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
@@ -80,7 +80,6 @@ class Concept:
|
||||
def __init__(self, metadata: ConceptMetadata):
|
||||
|
||||
self._metadata: ConceptMetadata = metadata
|
||||
self._bnf = None # compiled bnf expression
|
||||
self._runtime_info = ConceptRuntimeInfo() # runtime settings for the concept
|
||||
self._all_attrs = None
|
||||
|
||||
|
||||
@@ -0,0 +1,149 @@
|
||||
from common.global_symbols import NotInit
|
||||
from core.ExecutionContext import ContextActions, ExecutionContext
|
||||
from core.ReturnValue import ReturnValue
|
||||
from core.concept import DefinitionType
|
||||
from core.error import ErrorContext, SheerkaException
|
||||
from evaluators.base_evaluator import EvaluatorEvalResult, EvaluatorMatchResult, OneReturnValueEvaluator
|
||||
from parsers.BnfDefinitionParser import BnfDefinitionParser
|
||||
from parsers.ConceptDefinitionParser import ConceptDefinition
|
||||
from parsers.parser_utils import ParsingException
|
||||
from parsers.tokenizer import TokenKind, Tokenizer
|
||||
|
||||
NAMES_TOKEN_TYPES = {TokenKind.IDENTIFIER,
|
||||
TokenKind.STRING}
|
||||
CONCEPT_PARTS_TO_USE = [
|
||||
"body",
|
||||
"where",
|
||||
"pre",
|
||||
"post",
|
||||
"ret",
|
||||
]
|
||||
|
||||
|
||||
class DefConceptEvaluator(OneReturnValueEvaluator):
|
||||
"""
|
||||
This evaluator create the definition of a new concept
|
||||
after it is recognized by the RecognizeDefConcept parser
|
||||
"""
|
||||
|
||||
NAME = "DefConcept"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, ContextActions.EVALUATION, 50)
|
||||
|
||||
def matches(self, context: ExecutionContext,
|
||||
return_value: ReturnValue) -> EvaluatorMatchResult:
|
||||
return EvaluatorMatchResult(return_value.status and isinstance(return_value.value, ConceptDefinition))
|
||||
|
||||
def eval(self, context: ExecutionContext,
|
||||
evaluation_context: object,
|
||||
return_value: ReturnValue) -> EvaluatorEvalResult:
|
||||
|
||||
try:
|
||||
concept_def = return_value.value
|
||||
variables = self._get_variables(context, concept_def)
|
||||
parameters = None
|
||||
|
||||
if concept_def.definition_type == DefinitionType.BNF:
|
||||
self._validate_bnf(context, concept_def)
|
||||
|
||||
ret_val = context.sheerka.define_new_concept(context,
|
||||
concept_def.name,
|
||||
False,
|
||||
False,
|
||||
concept_def.body,
|
||||
concept_def.where,
|
||||
concept_def.pre,
|
||||
concept_def.post,
|
||||
concept_def.ret,
|
||||
concept_def.definition,
|
||||
concept_def.definition_type,
|
||||
concept_def.auto_eval,
|
||||
variables=variables,
|
||||
parameters=parameters)
|
||||
|
||||
if ret_val.status:
|
||||
new = ReturnValue(self.NAME, True, ret_val.value, parents=[return_value])
|
||||
return EvaluatorEvalResult([new], [return_value])
|
||||
else:
|
||||
error_ret_val = ReturnValue(self.NAME, False, ret_val.value, [return_value])
|
||||
return EvaluatorEvalResult([error_ret_val], [])
|
||||
|
||||
except ParsingException as ex:
|
||||
error_context = ErrorContext(self.NAME, context, ex.error)
|
||||
error_ret_val = ReturnValue(self.NAME, False, error_context, [return_value])
|
||||
return EvaluatorEvalResult([error_ret_val], [])
|
||||
|
||||
def _get_variables(self, context: ExecutionContext, concept_def: ConceptDefinition):
|
||||
variables_found = set() # list of names, there is no tuple
|
||||
definition = concept_def.definition or concept_def.name
|
||||
possible_vars_from_name = self._get_possible_vars_from_def(context, definition)
|
||||
|
||||
possible_vars_from_name_as_set = set(possible_vars_from_name)
|
||||
for part in CONCEPT_PARTS_TO_USE:
|
||||
# if these possibles variables are referenced in other parts of the definition, they may be variables
|
||||
part_value = getattr(concept_def, part)
|
||||
if part_value == "":
|
||||
continue
|
||||
|
||||
possible_vars_from_part = self._get_possible_vars_from_part(context, part_value)
|
||||
variables_found.update(possible_vars_from_name_as_set & possible_vars_from_part)
|
||||
|
||||
# add variables from add_var
|
||||
if concept_def.def_var:
|
||||
variables_found.update(concept_def.def_var)
|
||||
|
||||
with_default_value = [v if isinstance(v, tuple) else (v, NotInit) for v in variables_found]
|
||||
|
||||
# variables are sorted
|
||||
sorted_vars = []
|
||||
for possible_var in possible_vars_from_name:
|
||||
for found in with_default_value:
|
||||
if possible_var == found[0]:
|
||||
sorted_vars.append(found)
|
||||
|
||||
return sorted_vars
|
||||
|
||||
@staticmethod
|
||||
def _get_possible_vars_from_def(context, definition):
|
||||
"""
|
||||
|
||||
:param context:
|
||||
:type context:
|
||||
:param definition:
|
||||
:type definition:
|
||||
:return: list of names
|
||||
:rtype:
|
||||
"""
|
||||
names = (str(t.value) for t in Tokenizer(definition) if t.type in NAMES_TOKEN_TYPES)
|
||||
possible_vars = filter(lambda x: not context.sheerka.is_a_concept_name(x), names)
|
||||
|
||||
return list(possible_vars)
|
||||
|
||||
@staticmethod
|
||||
def _get_possible_vars_from_part(context, part):
|
||||
"""
|
||||
|
||||
:param context:
|
||||
:type context:
|
||||
:param part:
|
||||
:type part:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
# not the final implementation
|
||||
# In the final impl,
|
||||
# we first need to check if the part is a concept call (rather than a concept name)
|
||||
names = (str(t.value) for t in Tokenizer(part) if t.type in NAMES_TOKEN_TYPES)
|
||||
possible_vars = filter(lambda x: not context.sheerka.is_a_concept_name(x), names)
|
||||
|
||||
return set(possible_vars)
|
||||
|
||||
@staticmethod
|
||||
def _validate_bnf(context, definition):
|
||||
parser = BnfDefinitionParser(context, definition.definition, definition.name)
|
||||
parser.parse()
|
||||
if parser.error_sink:
|
||||
raise ParsingException(parser.error_sink[0])
|
||||
|
||||
return True
|
||||
@@ -0,0 +1,40 @@
|
||||
from core.BuiltinConcepts import BuiltinConcepts
|
||||
from core.ExecutionContext import ContextActions, ExecutionContext
|
||||
from core.ReturnValue import ReturnValue
|
||||
from core.error import ErrorContext
|
||||
from evaluators.base_evaluator import EvaluatorEvalResult, EvaluatorMatchResult, OneReturnValueEvaluator
|
||||
from parsers.ConceptDefinitionParser import ConceptDefinitionParser
|
||||
|
||||
|
||||
class RecognizeDefConcept(OneReturnValueEvaluator):
|
||||
"""
|
||||
class the recognize input 'def concept <name> [as <body>] [where <where>] [pre <pre>] [ret <ret>]'
|
||||
"""
|
||||
NAME = "RecognizeDefConcept"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(self.NAME, ContextActions.PARSING, 90)
|
||||
|
||||
def matches(self, context: ExecutionContext, return_value: ReturnValue) -> EvaluatorMatchResult:
|
||||
return EvaluatorMatchResult(return_value.status and
|
||||
context.sheerka.isinstance(return_value.value, BuiltinConcepts.PARSER_INPUT) and
|
||||
return_value.value.body.original_text.startswith("def concept"))
|
||||
|
||||
def eval(self, context: ExecutionContext,
|
||||
evaluation_context: object,
|
||||
return_value: ReturnValue) -> EvaluatorEvalResult:
|
||||
parser_input = return_value.value.body
|
||||
parser_input.reset()
|
||||
parser_input.seek(2) # eat 'def '
|
||||
|
||||
parser = ConceptDefinitionParser(True)
|
||||
node = parser.parse(parser_input)
|
||||
|
||||
if parser.error_sink:
|
||||
error_context = ErrorContext(self.NAME, context, parser.error_sink)
|
||||
error_ret_val = ReturnValue(self.NAME, False, error_context, [return_value])
|
||||
return EvaluatorEvalResult([error_ret_val], [])
|
||||
|
||||
else:
|
||||
new = ReturnValue(self.NAME, True, node, parents=[return_value])
|
||||
return EvaluatorEvalResult([new], [return_value])
|
||||
@@ -0,0 +1,212 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from common.global_symbols import NotFound
|
||||
from core.error import ErrorObj
|
||||
from parsers.parser_utils import SimpleParser, UnexpectedEof, UnexpectedToken
|
||||
from parsers.peg_parser import ConceptExpression, OneOrMore, Optional, OrderedChoice, RegExMatch, Sequence, StrMatch, \
|
||||
VariableExpression, ZeroOrMore
|
||||
from parsers.tokenizer import TokenKind, Tokenizer
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnknownConcept(ErrorObj):
|
||||
concept_id: str
|
||||
concept_name: str
|
||||
|
||||
def get_error_msg(self) -> str:
|
||||
return f"Cannot find concept defined by id='{self.concept_id}' and/or name '{self.concept_name}'"
|
||||
|
||||
|
||||
class BnfDefinitionParser(SimpleParser):
|
||||
"""
|
||||
Parser used to transform literal into ParsingExpression
|
||||
example :
|
||||
a | b c -> Sequence(OrderedChoice(a, b), c)
|
||||
|
||||
'|' (pipe) is used for OrderedChoice
|
||||
' ' space is used for Sequence
|
||||
'?' (question mark) is used for Optional
|
||||
'*' (star) is used for ZeroOrMore
|
||||
'+' (plus) is used for OneOrMore
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, context, text, concept_name=None):
|
||||
super().__init__(text, skip_whitespace_default_behaviour=False)
|
||||
self.context = context
|
||||
self.concept_name = concept_name # name of the concept currently being constructed
|
||||
|
||||
self.nb_open_par = 0
|
||||
self.next_token(skip_whitespace=True)
|
||||
|
||||
def maybe_sequence(self, first, second):
|
||||
return self.token.type == second or \
|
||||
self.token.type == first and self.check_next_token().type == second
|
||||
|
||||
def parse(self):
|
||||
tree = self._parse_choice()
|
||||
|
||||
if self.token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedToken(self.token, TokenKind.EOF))
|
||||
|
||||
return None if self.error_sink else tree
|
||||
|
||||
def _parse_choice(self):
|
||||
"""
|
||||
a | b | c
|
||||
|
||||
<choice> := <sequence> ( '|' <sequence> )*
|
||||
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
sequence = self._parse_sequence()
|
||||
|
||||
self.eat_whitespace()
|
||||
|
||||
if self.token.type != TokenKind.VBAR:
|
||||
return sequence
|
||||
|
||||
elements = [sequence]
|
||||
while True:
|
||||
# maybe eat the vertical bar
|
||||
self.eat_whitespace()
|
||||
if self.token is None or self.token.type != TokenKind.VBAR:
|
||||
break
|
||||
self.next_token(skip_whitespace=True)
|
||||
|
||||
sequence = self._parse_sequence()
|
||||
elements.append(sequence)
|
||||
|
||||
return self._eat_rule_name_if_needed(OrderedChoice(*elements))
|
||||
|
||||
def _parse_sequence(self):
|
||||
"""
|
||||
a b c
|
||||
:return:
|
||||
"""
|
||||
expr_and_modifier = self._parse_modifier()
|
||||
if self.token.type == TokenKind.EOF or \
|
||||
self.token.type == TokenKind.EQUALS or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
return expr_and_modifier
|
||||
|
||||
elements = [expr_and_modifier]
|
||||
while True:
|
||||
if self.token is None or \
|
||||
self.token.type == TokenKind.EOF or \
|
||||
self.token.type == TokenKind.EQUALS or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
break
|
||||
self.eat_whitespace()
|
||||
|
||||
sequence = self._parse_modifier()
|
||||
elements.append(sequence)
|
||||
|
||||
return self._eat_rule_name_if_needed(Sequence(*elements))
|
||||
|
||||
def _parse_modifier(self):
|
||||
"""
|
||||
a? | a* | a+
|
||||
:return:
|
||||
"""
|
||||
expression = self._parse_expression()
|
||||
|
||||
if self.token.type == TokenKind.QMARK:
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(Optional(expression))
|
||||
|
||||
if self.token.type == TokenKind.STAR:
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(ZeroOrMore(expression))
|
||||
|
||||
if self.token.type == TokenKind.PLUS:
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(OneOrMore(expression))
|
||||
|
||||
return expression
|
||||
|
||||
def _parse_expression(self):
|
||||
if self.token.type == TokenKind.EOF:
|
||||
self.add_error(UnexpectedEof("lpar | concept | ident | string | regrex", self.token))
|
||||
|
||||
if self.token.type == TokenKind.LPAR:
|
||||
self.nb_open_par += 1
|
||||
self.next_token()
|
||||
expr = self._parse_choice()
|
||||
if self.token.type == TokenKind.RPAR:
|
||||
self.nb_open_par -= 1
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(expr)
|
||||
else:
|
||||
self.add_error(UnexpectedToken(self.token, TokenKind.RPAR))
|
||||
return expr
|
||||
|
||||
if self.token.type == TokenKind.CONCEPT:
|
||||
concept_name, concept_id = self.token.value
|
||||
metadata = self.context.sheerka.get_by_id(concept_id) if concept_id \
|
||||
else self.context.sheerka.get_by_name(concept_name)
|
||||
|
||||
if metadata is NotFound:
|
||||
self.add_error(UnknownConcept(concept_id, concept_name))
|
||||
self.next_token()
|
||||
return None
|
||||
|
||||
expr = ConceptExpression(metadata.id, rule_name=metadata.name)
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(expr)
|
||||
|
||||
if self.token.type == TokenKind.IDENTIFIER:
|
||||
|
||||
concept_name = self.token.str_value
|
||||
|
||||
if concept_name == self.concept_name:
|
||||
# recursive construction, the concept id is not known yet
|
||||
expr = ConceptExpression(None, rule_name=concept_name)
|
||||
|
||||
elif (metadata := self.context.sheerka.get_by_name(concept_name)) is NotFound:
|
||||
# unknown concept, it's a variable definition
|
||||
expr = VariableExpression(concept_name)
|
||||
|
||||
else:
|
||||
expr = ConceptExpression(metadata.id, rule_name=concept_name)
|
||||
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(expr)
|
||||
|
||||
if self.token.type == TokenKind.STRING:
|
||||
tokens = list(Tokenizer(self.token.strip_quote, yield_eof=False))
|
||||
if len(tokens) == 1:
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(StrMatch(tokens[0].str_value))
|
||||
|
||||
else:
|
||||
elements = [StrMatch(t.str_value, skip_whitespace=False) for t in tokens]
|
||||
elements[-1].skip_white_space = True
|
||||
ret = Sequence(*elements)
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(ret)
|
||||
|
||||
if self.token.type == TokenKind.REGEX:
|
||||
ret = RegExMatch(self.token.strip_quote)
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(ret)
|
||||
|
||||
ret = StrMatch(self.token.strip_quote)
|
||||
self.next_token()
|
||||
return self._eat_rule_name_if_needed(ret)
|
||||
|
||||
def _eat_rule_name_if_needed(self, expression):
|
||||
|
||||
if self.token.type == TokenKind.EQUALS:
|
||||
self.next_token() # eat equals
|
||||
|
||||
if self.token.type != TokenKind.IDENTIFIER:
|
||||
return self.add_error(UnexpectedToken(self.token, TokenKind.IDENTIFIER))
|
||||
|
||||
expression.rule_name = self.token.value
|
||||
self.next_token()
|
||||
|
||||
return expression
|
||||
@@ -0,0 +1,219 @@
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
from common.global_symbols import NotInit
|
||||
from core.concept import DefinitionType
|
||||
from parsers.ParserInput import ParserInput
|
||||
from parsers.parser_utils import ParsingError, ParsingException, SimpleParser, UnexpectedEof, UnexpectedToken, \
|
||||
get_text_from_tokens, \
|
||||
parse_parts, \
|
||||
strip_tokens
|
||||
from parsers.tokenizer import Keywords, TokenKind
|
||||
|
||||
var_with_value_regex = re.compile("(\\w+)\\s*=\\s*(\\d+)")
|
||||
|
||||
DEF_CONCEPT_PARTS = [
|
||||
Keywords.CONCEPT,
|
||||
Keywords.FROM,
|
||||
Keywords.AS,
|
||||
Keywords.WHERE,
|
||||
Keywords.PRE,
|
||||
Keywords.POST,
|
||||
Keywords.RET,
|
||||
Keywords.AUTO_EVAL,
|
||||
Keywords.DEF_VAR
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConceptDefinition:
|
||||
name: str
|
||||
definition_type: str = None
|
||||
definition: str = ""
|
||||
body: str = ""
|
||||
where: str = ""
|
||||
pre: str = ""
|
||||
post: str = ""
|
||||
ret: str = ""
|
||||
auto_eval: bool = False
|
||||
def_var: list = None
|
||||
|
||||
|
||||
class VariableParser(SimpleParser):
|
||||
"""
|
||||
Simple parser to parse the definition of the variable
|
||||
expected forms
|
||||
a b
|
||||
a, b
|
||||
a = value
|
||||
"""
|
||||
|
||||
def parse(self):
|
||||
res = []
|
||||
while self.next_token():
|
||||
if self.token.type == TokenKind.COMMA:
|
||||
continue
|
||||
|
||||
var_name = self.token.value
|
||||
if self.check_next_token().type == TokenKind.EQUALS:
|
||||
self.next_token()
|
||||
self.next_token()
|
||||
var_value = int(self.token.value) if self.token.type == TokenKind.NUMBER else self.token.value
|
||||
res.append((var_name, var_value))
|
||||
else:
|
||||
res.append((var_name, NotInit))
|
||||
|
||||
return res
|
||||
|
||||
|
||||
class ConceptDefinitionParser:
|
||||
|
||||
def __init__(self, start_already_recognized=False):
|
||||
self.error_sink = []
|
||||
self.start_already_recognized = start_already_recognized
|
||||
self.custom_str = {TokenKind.STRING: lambda t: t.value[1:-1] if t.value[0] == '"' else t.value}
|
||||
|
||||
def add_error(self, error):
|
||||
self.error_sink.append(error)
|
||||
|
||||
def parse(self, parser_input: ParserInput) -> ConceptDefinition | None:
|
||||
if not self.start_already_recognized:
|
||||
# When called by the evaluator, 'def concept is already recognized'
|
||||
# So there is no need to do it again
|
||||
if not parser_input.next_token():
|
||||
self.add_error(UnexpectedEof(Keywords.DEF, None))
|
||||
return None
|
||||
|
||||
token = parser_input.token
|
||||
if not (token.type == TokenKind.IDENTIFIER and token.value == Keywords.DEF):
|
||||
self.add_error(UnexpectedToken(token, Keywords.DEF))
|
||||
return None
|
||||
|
||||
parser_input.next_token()
|
||||
|
||||
parts = parse_parts(parser_input,
|
||||
self.error_sink,
|
||||
DEF_CONCEPT_PARTS,
|
||||
first_token=DEF_CONCEPT_PARTS[0],
|
||||
allow_multiple=[DEF_CONCEPT_PARTS[-1]])
|
||||
|
||||
if parts is None:
|
||||
assert self.error_sink
|
||||
return None
|
||||
|
||||
try:
|
||||
name = self._get_concept_name(parts)
|
||||
|
||||
def_type, definition = self._get_concept_definition(parts)
|
||||
|
||||
body = self._get_part(Keywords.AS, parts)
|
||||
where = self._get_part(Keywords.WHERE, parts)
|
||||
pre = self._get_part(Keywords.PRE, parts)
|
||||
post = self._get_part(Keywords.POST, parts)
|
||||
ret = self._get_part(Keywords.RET, parts)
|
||||
|
||||
auto_eval = self._get_concept_auto_eval(parts)
|
||||
def_var = self._get_concept_variables(parts)
|
||||
|
||||
return ConceptDefinition(name=name,
|
||||
definition_type=def_type,
|
||||
definition=definition,
|
||||
body=body,
|
||||
where=where,
|
||||
pre=pre,
|
||||
post=post,
|
||||
ret=ret,
|
||||
auto_eval=auto_eval,
|
||||
def_var=def_var)
|
||||
except ParsingException as ex:
|
||||
self.add_error(ex.error)
|
||||
return None
|
||||
|
||||
def _get_concept_name(self, parts):
|
||||
tokens = parts[Keywords.CONCEPT]
|
||||
name_tokens = strip_tokens(tokens[1:])
|
||||
|
||||
if len(name_tokens) == 0:
|
||||
raise ParsingException(ParsingError([], "Name is mandatory."))
|
||||
|
||||
for token in name_tokens:
|
||||
if token.type == TokenKind.NEWLINE:
|
||||
raise ParsingException(ParsingError([token], "Newlines are not allowed in name."))
|
||||
|
||||
# normalize the name of the concept
|
||||
no_whitespace = [t for t in name_tokens if t.type != TokenKind.WHITESPACE]
|
||||
return " ".join(self.custom_str.get(token.type, lambda t: t.str_value)(token) for token in no_whitespace)
|
||||
|
||||
@staticmethod
|
||||
def _get_concept_definition(parts):
|
||||
if Keywords.FROM not in parts:
|
||||
return None, ""
|
||||
|
||||
tokens = parts[Keywords.FROM]
|
||||
|
||||
if len(tokens) == 1:
|
||||
raise ParsingException(ParsingError(tokens[0], f"Empty '{Keywords.FROM}' declaration."))
|
||||
|
||||
if tokens[1].value == Keywords.BNF:
|
||||
to_use = strip_tokens(tokens[2:])
|
||||
definition_type = DefinitionType.BNF
|
||||
elif tokens[1].value == Keywords.DEF:
|
||||
to_use = strip_tokens(tokens[2:])
|
||||
definition_type = DefinitionType.DEFAULT
|
||||
else:
|
||||
to_use = strip_tokens(tokens[1:])
|
||||
definition_type = DefinitionType.DEFAULT
|
||||
|
||||
if len(to_use) == 0:
|
||||
raise ParsingException(ParsingError(tokens[0], f"Empty '{Keywords.FROM}' declaration."))
|
||||
|
||||
return definition_type, get_text_from_tokens(to_use)
|
||||
|
||||
@staticmethod
|
||||
def _get_concept_auto_eval(parts):
|
||||
if Keywords.AUTO_EVAL not in parts:
|
||||
return False
|
||||
|
||||
tokens = parts[Keywords.AUTO_EVAL]
|
||||
if len(tokens) == 1:
|
||||
raise ParsingException(ParsingError(tokens[0], f"Empty '{Keywords.AUTO_EVAL}' declaration."))
|
||||
|
||||
if len(tokens) > 2 or tokens[1].type != TokenKind.IDENTIFIER:
|
||||
raise ParsingException(ParsingError(tokens[1:], f"Invalid 'auto_eval' declaration"))
|
||||
|
||||
auto_eval_value = tokens[1].value.lower()
|
||||
if auto_eval_value == "true":
|
||||
return True
|
||||
elif auto_eval_value == "false":
|
||||
return False
|
||||
|
||||
raise ParsingException(ParsingError(tokens[1],
|
||||
f"Invalid 'auto_eval' declaration ({auto_eval_value} is not recognized)"))
|
||||
|
||||
@staticmethod
|
||||
def _get_concept_variables(parts):
|
||||
if Keywords.DEF_VAR not in parts:
|
||||
return None
|
||||
|
||||
tokens = parts[Keywords.DEF_VAR]
|
||||
if len(tokens) == 1:
|
||||
raise ParsingException(ParsingError(tokens[0], f"Empty '{Keywords.DEF_VAR}' declaration."))
|
||||
|
||||
res = []
|
||||
for part in [t.strip_quote for t in tokens[1:]]:
|
||||
res.extend(VariableParser(part).parse())
|
||||
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def _get_part(part, parts):
|
||||
if part not in parts:
|
||||
return ""
|
||||
|
||||
tokens = parts[part]
|
||||
to_use = strip_tokens(tokens[1:])
|
||||
|
||||
if len(to_use) == 0:
|
||||
raise ParsingException(ParsingError(tokens[0], f"Empty '{part}' declaration."))
|
||||
|
||||
return get_text_from_tokens(to_use)
|
||||
@@ -1,18 +1,23 @@
|
||||
from common.utils import get_text_from_tokens
|
||||
from parsers.tokenizer import Tokenizer
|
||||
from parsers.tokenizer import Token, TokenKind, Tokenizer
|
||||
|
||||
|
||||
class ParserInput:
|
||||
def __init__(self, text, yield_oef=True):
|
||||
def __init__(self, text):
|
||||
self.original_text = text
|
||||
self.yield_oef = yield_oef
|
||||
self.all_tokens = None
|
||||
self.exception = None
|
||||
|
||||
self.all_tokens: list = None
|
||||
self.exception: Exception = None
|
||||
self.pos: int = None
|
||||
self.end: int = None
|
||||
self.token: Token = None
|
||||
|
||||
def init(self) -> bool:
|
||||
try:
|
||||
# the eof if forced, but will not be yield if not set to.
|
||||
self.all_tokens = list(Tokenizer(self.original_text, yield_eof=True))
|
||||
self.pos = -1
|
||||
self.end = len(self.all_tokens)
|
||||
return True
|
||||
except Exception as ex:
|
||||
self.all_tokens = None
|
||||
@@ -25,5 +30,65 @@ class ParserInput:
|
||||
|
||||
return get_text_from_tokens(self.all_tokens, custom_switcher, tracker)
|
||||
|
||||
def reset(self):
|
||||
if self.exception:
|
||||
raise self.exception
|
||||
|
||||
self.pos = -1
|
||||
|
||||
def next_token(self, skip_whitespace=True) -> bool:
|
||||
self.pos += 1
|
||||
|
||||
if self.pos >= self.end:
|
||||
self.token = self.all_tokens[-1]
|
||||
return False
|
||||
|
||||
self.token = self.all_tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
self.pos += 1
|
||||
if self.pos > self.end:
|
||||
self.token = self.all_tokens[-1]
|
||||
return False
|
||||
|
||||
self.token = self.all_tokens[self.pos]
|
||||
|
||||
if self.token.type == TokenKind.EOF:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def check_next_token(self, skip_whitespace=True):
|
||||
"""
|
||||
Returns the token after the current one
|
||||
Never returns None (returns TokenKind.EOF instead)
|
||||
"""
|
||||
my_pos = self.pos + 1
|
||||
if my_pos > self.end:
|
||||
return self.all_tokens[-1]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.all_tokens[my_pos].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
my_pos += 1
|
||||
if my_pos > self.end:
|
||||
return self.all_tokens[-1]
|
||||
|
||||
return self.all_tokens[my_pos]
|
||||
|
||||
def seek(self, pos):
|
||||
"""
|
||||
Move the token offset to position pos
|
||||
:param pos:
|
||||
:return: True is pos is a valid position False otherwise
|
||||
"""
|
||||
if pos < 0 or pos > self.end:
|
||||
self.token = None
|
||||
return False
|
||||
|
||||
self.pos = pos
|
||||
self.token = self.all_tokens[self.pos]
|
||||
return True
|
||||
|
||||
def __repr__(self):
|
||||
return f"ParserInput('{self.original_text}', len={len(self.all_tokens)})"
|
||||
|
||||
@@ -0,0 +1,438 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.error import ErrorObj, SheerkaException
|
||||
from parsers.tokenizer import Token, TokenKind, Tokenizer
|
||||
|
||||
DEFAULT_TAB_SIZE = 4
|
||||
|
||||
|
||||
class ParsingException(SheerkaException):
|
||||
"""
|
||||
Generic Exception for the parsers
|
||||
It embeds the read exception
|
||||
"""
|
||||
|
||||
def __init__(self, error: ErrorObj):
|
||||
self.error: ErrorObj = error
|
||||
|
||||
def get_error_msg(self) -> str:
|
||||
return self.error.get_error_msg()
|
||||
|
||||
|
||||
@dataclass()
|
||||
class ParsingError(ErrorObj):
|
||||
"""
|
||||
The input is recognized, but there is a syntax error
|
||||
"""
|
||||
tokens: list
|
||||
message: str
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if not isinstance(other, ParsingError):
|
||||
return False
|
||||
|
||||
if self.message != other.message:
|
||||
return False
|
||||
|
||||
if other.tokens is not None and self.tokens != other.tokens:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.message)
|
||||
|
||||
def get_error_msg(self) -> str:
|
||||
return f"Syntax error: {self.message}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnexpectedToken(ErrorObj):
|
||||
token: Token
|
||||
expected: TokenKind
|
||||
|
||||
def get_error_msg(self) -> str:
|
||||
return f"Unexpected token {self.token} found while expected {self.expected}"
|
||||
|
||||
|
||||
@dataclass()
|
||||
class KeywordNotFound(ErrorObj):
|
||||
keywords: list
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if not isinstance(other, KeywordNotFound):
|
||||
return False
|
||||
|
||||
if self.keywords != other.keywords:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.keywords)
|
||||
|
||||
def get_error_msg(self) -> str:
|
||||
return f"Expected keyword(s) {self.keywords} are not found."
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedEof(ErrorObj):
|
||||
keyword: str
|
||||
last_token: Token | None
|
||||
|
||||
def get_error_msg(self):
|
||||
if self.last_token:
|
||||
return f"Unexpected eof while parsing keyword '{self.keyword}' at index {self.last_token.index}"
|
||||
else:
|
||||
return f"Unexpected eof while parsing keyword '{self.keyword}'"
|
||||
|
||||
|
||||
def strip_tokens(tokens, strip_eof=False):
|
||||
"""
|
||||
Remove the starting and trailing spaces and newline
|
||||
"""
|
||||
if tokens is None:
|
||||
return None
|
||||
|
||||
start = 0
|
||||
length = len(tokens)
|
||||
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
start += 1
|
||||
|
||||
if start == length:
|
||||
return []
|
||||
|
||||
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
|
||||
if strip_eof \
|
||||
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
||||
|
||||
end = length - 1
|
||||
while end > 0 and tokens[end].type in end_tokens:
|
||||
end -= 1
|
||||
|
||||
return tokens[start: end + 1]
|
||||
|
||||
|
||||
def get_text_from_tokens(tokens, custom_switcher=None, tracker=None):
|
||||
"""
|
||||
Create the source code, from the list of token
|
||||
:param tokens: list of tokens
|
||||
:param custom_switcher: to override the behaviour (the return value) of some token
|
||||
:param tracker: keep track of the original token value when custom switched
|
||||
:return:
|
||||
"""
|
||||
if tokens is None:
|
||||
return ""
|
||||
res = ""
|
||||
|
||||
if not hasattr(tokens, "__iter__"):
|
||||
tokens = [tokens]
|
||||
|
||||
switcher = {
|
||||
}
|
||||
|
||||
if custom_switcher:
|
||||
switcher.update(custom_switcher)
|
||||
|
||||
for token in tokens:
|
||||
value = switcher.get(token.type, lambda t: t.str_value)(token)
|
||||
res += value
|
||||
if tracker is not None and token.type in custom_switcher:
|
||||
tracker[value] = token
|
||||
return res
|
||||
|
||||
|
||||
def parse_parts(parser_input, error_sink, keywords, first_token=None, strip=False, allow_multiple=None):
|
||||
"""
|
||||
Reads Parser Input and groups the tokens by keywords
|
||||
ex:
|
||||
tokens = Tokenizer("as a b c pre u v w where x y z")
|
||||
keywords = ["as", "pre", "where"]
|
||||
assert get_parts(keywords) == {
|
||||
Keyword("as"): [Token("as"), Token("a"), Token(<ws>), Token("b"), Token(<ws>), Token("c"), Token(<ws>)],
|
||||
Keyword("pre"): [Token("pre"), Token("u"), Token(<ws>), Token("v"), Token(<ws>), Token("w"), Token(<ws>)],
|
||||
Keyword("where"): [Token("where"), Token("x"), Token(<ws>), Token("y"), Token(<ws>), Token("z"), Token(<ws>)]}
|
||||
|
||||
* The order of appearance of the keywords is not important
|
||||
"as w pre y where z" and "where z pre y as w" will produce the same dictionary
|
||||
|
||||
* I can use double quote to protect keyword
|
||||
where "x y" will produce the entry Keyword("where"): [Token("x"), Token(<ws>), Token("y"), Token(<ws>)]
|
||||
where 'x y' will produce the entry Keyword("where"): [Token("'x y'")]
|
||||
|
||||
* If a keyword does not appear in allow_multiple, it will recognize only once
|
||||
tokens = Tokenizer("def concept x is a concept")
|
||||
keywords = ["concept"], allow_multiple={}
|
||||
assert get_parts(keywords) == {
|
||||
Keyword("concept"): [Token("x"), Token(<ws>), Token("is"), Token(<ws>), Token("concept")]}
|
||||
|
||||
* If the token appears in allow_multiple, it can be parsed several time
|
||||
in this case, in result, one token will represent one occurrence of the keyword (whitespaces are discarded)
|
||||
tokens = Tokenizer("def_var var1 def_var var2")
|
||||
keywords = ["def_var"], allow_multiple={"def_var"}
|
||||
assert get_parts(keywords) == {
|
||||
Keyword("def_var"): [Token("def_var"), Token("var1"), Token("var2")]}
|
||||
|
||||
Long declaration are transformed into a string token
|
||||
tokens = Tokenizer("def_var a very long declaration")
|
||||
keywords = ["def_var"], allow_multiple={"def_var"}
|
||||
assert get_parts(keywords) == {
|
||||
Keyword("def_var"): [Token("def_var"), Token("'a very long declaration'")]}
|
||||
|
||||
:param parser_input:
|
||||
:param error_sink:
|
||||
:param keywords:
|
||||
:param first_token: it must be a Keyword
|
||||
:param strip: if True, the returned tokens will be trimmed
|
||||
:param allow_multiple: set indicating the keywords that may appear several times
|
||||
:return: dictionary
|
||||
"""
|
||||
|
||||
if allow_multiple is None:
|
||||
allow_multiple = set()
|
||||
|
||||
def new_part(_token, _colon_mode_activated, _previous, _already_found):
|
||||
"""
|
||||
|
||||
:param _token: current token
|
||||
:param _colon_mode_activated: colon_mode_activated
|
||||
:param _previous: previous token
|
||||
:param _already_found: keyword that are already found
|
||||
:return:
|
||||
"""
|
||||
if _token.value not in keywords:
|
||||
# not even a keyword!
|
||||
return False
|
||||
|
||||
if _token.value in _already_found and _token.value not in allow_multiple:
|
||||
# keywords are recognized only once
|
||||
return False
|
||||
|
||||
if not _colon_mode_activated or not _previous:
|
||||
return True
|
||||
|
||||
return _previous.line != _token.line
|
||||
|
||||
def manage_buffer(_res, _keyword, _buffer):
|
||||
stripped = strip_tokens(_buffer)
|
||||
|
||||
# manage colon first, to sure that what is protected by the quotes remains protected
|
||||
if len(stripped) > 0 and stripped[0].type == TokenKind.COLON:
|
||||
body = _get_body(stripped[1:], error_sink)
|
||||
if body:
|
||||
_res[_keyword].extend(body)
|
||||
|
||||
# only add one token when allow multiple is True
|
||||
elif _keyword in allow_multiple:
|
||||
if len(stripped) > 1:
|
||||
buffer_as_str_token = Token(TokenKind.STRING,
|
||||
"'" + get_text_from_tokens(stripped) + "'",
|
||||
stripped[0].index,
|
||||
stripped[0].line,
|
||||
stripped[0].column)
|
||||
_res[_keyword].append(buffer_as_str_token)
|
||||
else:
|
||||
_res[_keyword].append(stripped[0])
|
||||
|
||||
# replace double-quoted strings by their content
|
||||
elif len(stripped) == 1 and stripped[0].type == TokenKind.STRING and stripped[0].value[0] == '"':
|
||||
_res[_keyword].extend(list(Tokenizer(stripped[0].strip_quote, yield_eof=False)))
|
||||
|
||||
elif strip:
|
||||
_res[_keyword].extend(stripped)
|
||||
|
||||
else:
|
||||
_res[_keyword].extend(_buffer)
|
||||
|
||||
if parser_input.token is None:
|
||||
error_sink.append(KeywordNotFound(keywords))
|
||||
return None
|
||||
|
||||
if parser_input.token.type == TokenKind.WHITESPACE:
|
||||
parser_input.next_token()
|
||||
|
||||
token = parser_input.token
|
||||
if first_token and token.value != first_token:
|
||||
error_sink.append(UnexpectedToken(token, first_token))
|
||||
return None
|
||||
|
||||
if token.value not in keywords:
|
||||
error_sink.append(KeywordNotFound(keywords))
|
||||
return None
|
||||
|
||||
colon_mode_activated = False # if activated, use keyword + colon to start a new keyword definition
|
||||
previous_token = None
|
||||
|
||||
res = {}
|
||||
keywords_found = set()
|
||||
keyword = None
|
||||
buffer = []
|
||||
|
||||
# More explanations on colon_mode_activated
|
||||
# You can use the pattern
|
||||
# def concept <name> as:
|
||||
# <tab> xxx
|
||||
# <tab> yyy
|
||||
# ...
|
||||
#
|
||||
# It allows more readability
|
||||
# It also permits the usage of other keywords inside the block
|
||||
# Example
|
||||
# def concept give the date as:
|
||||
# from datetime import date # I can use the 'from' keyword !!!
|
||||
# return date.today()
|
||||
#
|
||||
# Note that I can choose to use colon or not
|
||||
#
|
||||
# def concept in x days as:
|
||||
# from datetime import date
|
||||
# return date.today() - x
|
||||
# where x > 0
|
||||
#
|
||||
# is a valid declaration (there is not colon for the where clause)
|
||||
|
||||
# loop through the tokens, and put them in the correct tokens_found_by_parts entry
|
||||
while True:
|
||||
if new_part(token, colon_mode_activated, previous_token, keywords_found):
|
||||
# manage the previous part
|
||||
if buffer:
|
||||
manage_buffer(res, keyword, buffer)
|
||||
buffer.clear()
|
||||
|
||||
keyword = token.value
|
||||
if keyword not in res:
|
||||
res[keyword] = [token] # to keep track of when it starts
|
||||
|
||||
keywords_found.add(token.value)
|
||||
colon_mode_activated = parser_input.check_next_token().type == TokenKind.COLON
|
||||
if not parser_input.next_token():
|
||||
error_sink.append(UnexpectedEof(keyword, token))
|
||||
break
|
||||
else:
|
||||
buffer.append(token)
|
||||
if not parser_input.next_token(skip_whitespace=False):
|
||||
break
|
||||
|
||||
previous_token = token
|
||||
token = parser_input.token
|
||||
|
||||
# do not forget to flush the buffer
|
||||
if buffer:
|
||||
manage_buffer(res, keyword, buffer)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def _skip_whitespaces(tokens):
|
||||
i = 0
|
||||
while i < len(tokens) and tokens[i].type == TokenKind.WHITESPACE:
|
||||
i += 1
|
||||
|
||||
return i
|
||||
|
||||
|
||||
def _get_body(tokens, error_sink):
|
||||
"""
|
||||
Get the body of a keyword definition
|
||||
It manages colon body, but the colon must be stripped first
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def get_tab_size(default_tab_size, text):
|
||||
return sum([1 if isinstance(c, str) else default_tab_size for c in text])
|
||||
|
||||
pos = _skip_whitespaces(tokens)
|
||||
|
||||
if len(tokens) - pos < 3:
|
||||
error_sink.append(ParsingError(tokens, "Body is empty or too short."))
|
||||
return None
|
||||
|
||||
if tokens[pos].type != TokenKind.NEWLINE:
|
||||
error_sink.append(UnexpectedToken(tokens[pos], TokenKind.NEWLINE))
|
||||
return None
|
||||
pos += 1
|
||||
|
||||
if tokens[pos].type != TokenKind.WHITESPACE:
|
||||
error_sink.append(UnexpectedToken(tokens[pos], TokenKind.WHITESPACE))
|
||||
return None
|
||||
|
||||
indent_size = get_tab_size(DEFAULT_TAB_SIZE, tokens[pos].value)
|
||||
pos += 1
|
||||
|
||||
i = pos
|
||||
while i < len(tokens) - 1:
|
||||
if tokens[i].type == TokenKind.NEWLINE:
|
||||
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
||||
error_sink.append(UnexpectedToken(tokens[i + 1], TokenKind.WHITESPACE))
|
||||
return None
|
||||
|
||||
if get_tab_size(DEFAULT_TAB_SIZE, tokens[i + 1].value) < indent_size:
|
||||
error_sink.append(ParsingError([tokens[i + 1]], "Invalid indentation."))
|
||||
return None
|
||||
|
||||
tokens[i + 1] = tokens[i + 1].clone()
|
||||
tokens[i + 1].value = " " * (get_tab_size(DEFAULT_TAB_SIZE, tokens[i + 1].value) - indent_size)
|
||||
i += 1
|
||||
|
||||
return tokens[pos:]
|
||||
|
||||
|
||||
class SimpleParser:
|
||||
def __init__(self, text, skip_whitespace_default_behaviour=True):
|
||||
self.error_sink = []
|
||||
self.token: Token = None
|
||||
self.source: str = ""
|
||||
self.skip_whitespace = skip_whitespace_default_behaviour
|
||||
|
||||
self._iter_tokens = iter(Tokenizer(text))
|
||||
self._look_ahead = None
|
||||
|
||||
def add_error(self, error):
|
||||
self.error_sink.append(error)
|
||||
|
||||
def next_token(self, skip_whitespace=None):
|
||||
skip_whitespace = self.skip_whitespace if skip_whitespace is None else skip_whitespace
|
||||
|
||||
if self.token and self.token.type == TokenKind.EOF:
|
||||
return False
|
||||
|
||||
self.token = self._look_ahead or next(self._iter_tokens)
|
||||
self.source += self.token.str_value
|
||||
self._look_ahead = None
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
self.token = next(self._iter_tokens)
|
||||
self.source += self.token.str_value
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
|
||||
def check_next_token(self, skip_whitespace=None):
|
||||
skip_whitespace = self.skip_whitespace if skip_whitespace is None else skip_whitespace
|
||||
|
||||
if self._look_ahead:
|
||||
return self._look_ahead
|
||||
|
||||
self._look_ahead = next(self._iter_tokens)
|
||||
if skip_whitespace:
|
||||
while self._look_ahead.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
self._look_ahead = next(self._iter_tokens)
|
||||
|
||||
return self._look_ahead
|
||||
|
||||
def eat_whitespace(self):
|
||||
if self._look_ahead is not None:
|
||||
self.token = self._look_ahead
|
||||
self.source += self.token.str_value
|
||||
self._look_ahead = None
|
||||
|
||||
while self.token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
self.token = next(self._iter_tokens)
|
||||
self.source += self.token.str_value
|
||||
File diff suppressed because it is too large
Load Diff
+28
-46
@@ -5,6 +5,25 @@ from common.global_symbols import VARIABLE_PREFIX
|
||||
from common.utils import str_concept
|
||||
|
||||
|
||||
class Keywords:
|
||||
DEF = "def"
|
||||
CONCEPT = "concept"
|
||||
RULE = "rule"
|
||||
FROM = "from"
|
||||
BNF = "bnf"
|
||||
AS = "as"
|
||||
WHERE = "where"
|
||||
PRE = "pre"
|
||||
POST = "post"
|
||||
ISA = "isa"
|
||||
RET = "ret"
|
||||
WHEN = "when"
|
||||
PRINT = "print"
|
||||
THEN = "then"
|
||||
AUTO_EVAL = "auto_eval"
|
||||
DEF_VAR = "def_var"
|
||||
|
||||
|
||||
class TokenKind(Enum):
|
||||
EOF = "eof"
|
||||
WHITESPACE = "whitespace"
|
||||
@@ -138,31 +157,12 @@ class LexerError(Exception):
|
||||
column: int
|
||||
|
||||
|
||||
class Keywords(Enum):
|
||||
DEF = "def"
|
||||
CONCEPT = "concept"
|
||||
RULE = "rule"
|
||||
FROM = "from"
|
||||
BNF = "bnf"
|
||||
AS = "as"
|
||||
WHERE = "where"
|
||||
PRE = "pre"
|
||||
POST = "post"
|
||||
ISA = "isa"
|
||||
RET = "ret"
|
||||
WHEN = "when"
|
||||
PRINT = "print"
|
||||
THEN = "then"
|
||||
AUTO_EVAL = "auto_eval"
|
||||
DEF_VAR = "def_var"
|
||||
|
||||
|
||||
class Tokenizer:
|
||||
"""
|
||||
Class that can iterate on the tokens
|
||||
"""
|
||||
|
||||
def __init__(self, text, yield_eof=True, parse_word=False):
|
||||
def __init__(self, text, yield_eof=True, parse_word=False, parse_quote=False):
|
||||
self.text = text
|
||||
self.text_len = len(text)
|
||||
self.column = 1
|
||||
@@ -170,6 +170,7 @@ class Tokenizer:
|
||||
self.i = 0
|
||||
self.yield_eof = yield_eof
|
||||
self.parse_word = parse_word
|
||||
self.parse_quote = parse_quote
|
||||
|
||||
def __iter__(self):
|
||||
|
||||
@@ -385,7 +386,7 @@ class Tokenizer:
|
||||
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
|
||||
self.i += len(number)
|
||||
self.column += len(number)
|
||||
elif c == "'" and self.i > 0 and self.text[self.i - 1] != " ":
|
||||
elif self.parse_quote and c == "'" and self.i > 0 and self.text[self.i - 1] != " ":
|
||||
yield Token(TokenKind.QUOTE, "'", self.i, self.line, self.column)
|
||||
self.i += 1
|
||||
self.column += 1
|
||||
@@ -542,28 +543,9 @@ class Tokenizer:
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def strip_tokens(tokens, strip_eof=False):
|
||||
"""
|
||||
Remove the starting and trailing spaces and newline
|
||||
"""
|
||||
if tokens is None:
|
||||
return None
|
||||
|
||||
start = 0
|
||||
length = len(tokens)
|
||||
while start < length and tokens[start].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
start += 1
|
||||
|
||||
if start == length:
|
||||
return []
|
||||
|
||||
end_tokens = (TokenKind.WHITESPACE, TokenKind.NEWLINE, TokenKind.EOF) \
|
||||
if strip_eof \
|
||||
else (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
||||
|
||||
end = length - 1
|
||||
while end > 0 and tokens[end].type in end_tokens:
|
||||
end -= 1
|
||||
|
||||
return tokens[start: end + 1]
|
||||
def remove_positions(self):
|
||||
for token in self:
|
||||
token.line = 1
|
||||
token.column = 1
|
||||
token.index = 0
|
||||
yield token
|
||||
|
||||
@@ -11,8 +11,9 @@ from core.BuiltinConcepts import BuiltinConcepts
|
||||
from core.ExecutionContext import ExecutionContext
|
||||
from core.ReturnValue import ReturnValue
|
||||
from core.concept import Concept, ConceptDefaultPropsAttrs, ConceptMetadata, DefinitionType
|
||||
from core.error import ErrorContext, SheerkaException
|
||||
from parsers.tokenizer import TokenKind, Tokenizer, strip_tokens
|
||||
from core.error import ErrorContext, ErrorObj
|
||||
from parsers.parser_utils import strip_tokens
|
||||
from parsers.tokenizer import TokenKind, Tokenizer
|
||||
from services.BaseService import BaseService
|
||||
|
||||
PROPERTIES_FOR_DIGEST = ("name", "key",
|
||||
@@ -22,26 +23,25 @@ PROPERTIES_FOR_DIGEST = ("name", "key",
|
||||
"desc", "bound_body", "autouse", "props", "variables", "parameters")
|
||||
|
||||
|
||||
class ConceptAlreadyDefined(SheerkaException):
|
||||
def __init__(self, concept: ConceptMetadata, already_defined_id: str):
|
||||
self.concept = concept
|
||||
self.already_defined_id = already_defined_id
|
||||
@dataclass
|
||||
class ConceptAlreadyDefined(ErrorObj):
|
||||
concept: ConceptMetadata
|
||||
already_defined_id: str
|
||||
|
||||
def get_error_msg(self) -> str:
|
||||
return f"Concept {self.concept.name}, is already defined (id={self.already_defined_id})"
|
||||
|
||||
|
||||
@dataclass
|
||||
class InvalidBnf(SheerkaException):
|
||||
def __init__(self, bnf: str):
|
||||
self.bnf = bnf
|
||||
class InvalidBnf(ErrorObj):
|
||||
bnf: str
|
||||
|
||||
def get_error_msg(self) -> str:
|
||||
return f"Invalid bnf '{self.bnf}'"
|
||||
|
||||
|
||||
@dataclass
|
||||
class FirstItemError(SheerkaException):
|
||||
class FirstItemError(ErrorObj):
|
||||
pass
|
||||
|
||||
|
||||
@@ -78,6 +78,7 @@ class ConceptManager(BaseService):
|
||||
self.sheerka.bind_service_method(self.NAME, self.get_by_name, False)
|
||||
self.sheerka.bind_service_method(self.NAME, self.get_by_id, False)
|
||||
self.sheerka.bind_service_method(self.NAME, self.get_by_key, False)
|
||||
self.sheerka.bind_service_method(self.NAME, self.is_a_concept_name, False)
|
||||
|
||||
register_concept_cache = self.sheerka.om.register_concept_cache
|
||||
|
||||
@@ -108,11 +109,11 @@ class ConceptManager(BaseService):
|
||||
_(7, BuiltinConcepts.INVALID_CONCEPT, desc="invalid concept", variables=("concept_id", "reason"))
|
||||
_(8, BuiltinConcepts.EVALUATION_ERROR, desc="evaluation error", variables=("concept", "reason"))
|
||||
|
||||
|
||||
self.init_log.debug('%s builtin concepts created',
|
||||
len(self.sheerka.om.current_cache_manager().concept_caches))
|
||||
|
||||
def define_new_concept(self, context: ExecutionContext,
|
||||
def define_new_concept(self,
|
||||
context: ExecutionContext,
|
||||
name: str,
|
||||
is_builtin: bool = False, # is the concept defined Sheerka
|
||||
is_unique: bool = False, # is the concept a singleton
|
||||
@@ -263,7 +264,7 @@ class ConceptManager(BaseService):
|
||||
Returns a concept metadata, using its name
|
||||
:param key:
|
||||
:type key:
|
||||
:return:
|
||||
:return: NotFound if not found
|
||||
:rtype:
|
||||
"""
|
||||
return self.sheerka.om.get(self.CONCEPTS_BY_NAME_ENTRY, key)
|
||||
@@ -273,7 +274,7 @@ class ConceptManager(BaseService):
|
||||
Returns a concept metadata, using its name
|
||||
:param concept_id:
|
||||
:type concept_id:
|
||||
:return:
|
||||
:return: NotFound if not found
|
||||
:rtype:
|
||||
"""
|
||||
return self.sheerka.om.get(self.CONCEPTS_BY_ID_ENTRY, concept_id)
|
||||
@@ -283,7 +284,7 @@ class ConceptManager(BaseService):
|
||||
Returns a concept metadata, using its name
|
||||
:param key:
|
||||
:type key:
|
||||
:return:
|
||||
:return: NotFound if not found
|
||||
:rtype:
|
||||
"""
|
||||
return self.sheerka.om.get(self.CONCEPTS_BY_KEY_ENTRY, key)
|
||||
@@ -291,6 +292,9 @@ class ConceptManager(BaseService):
|
||||
def get_all_concepts(self):
|
||||
return list(sorted(self.sheerka.om.list(self.CONCEPTS_BY_ID_ENTRY), key=lambda item: int(item.id)))
|
||||
|
||||
def is_a_concept_name(self, name):
|
||||
return self.sheerka.om.exists(self.CONCEPTS_BY_NAME_ENTRY, name)
|
||||
|
||||
@staticmethod
|
||||
def compute_metadata_digest(metadata: ConceptMetadata):
|
||||
"""
|
||||
|
||||
@@ -4,7 +4,10 @@ from os import path
|
||||
|
||||
import pytest
|
||||
|
||||
from core.BuiltinConcepts import BuiltinConcepts
|
||||
from core.Sheerka import Sheerka
|
||||
from helpers import _rv
|
||||
from parsers.ParserInput import ParserInput
|
||||
from sdp.sheerkaDataProvider import SheerkaDataProvider
|
||||
|
||||
|
||||
@@ -32,6 +35,14 @@ class BaseTest:
|
||||
return SheerkaDataProvider("mem://", name="test")
|
||||
|
||||
|
||||
class BaseParserTest(BaseTest):
|
||||
@staticmethod
|
||||
def get_parser_input(context, command):
|
||||
pi = ParserInput(command)
|
||||
pi.init()
|
||||
return _rv(context.sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=pi))
|
||||
|
||||
|
||||
class UsingFileBasedSheerka(BaseTest):
|
||||
TESTS_ROOT_DIRECTORY = path.abspath("../build/tests")
|
||||
SHEERKA_ROOT_DIR = os.path.join(TESTS_ROOT_DIRECTORY, ".sheerka")
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
import pytest
|
||||
|
||||
from common.utils import decode_enum, dict_product, get_class, get_text_from_tokens, str_concept, to_dict, unstr_concept
|
||||
from helpers import get_concept
|
||||
from parsers.tokenizer import Keywords, Token, TokenKind, Tokenizer
|
||||
from parsers.tokenizer import Token, TokenKind, Tokenizer
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -28,6 +29,10 @@ class Obj2:
|
||||
prop2: object
|
||||
|
||||
|
||||
class MyEnum(Enum):
|
||||
CONCEPT = "concept"
|
||||
|
||||
|
||||
def get_tokens(lst):
|
||||
res = []
|
||||
for e in lst:
|
||||
@@ -106,7 +111,7 @@ def test_i_can_str_concept():
|
||||
("xxx", None),
|
||||
("xxx.", None),
|
||||
("xxx.yyy", None),
|
||||
("parsers.tokenizer.Keywords.CONCEPT", Keywords.CONCEPT),
|
||||
("tests.common.test_utils.MyEnum.CONCEPT", MyEnum.CONCEPT),
|
||||
])
|
||||
def test_i_can_decode_enum(text, expected):
|
||||
actual = decode_enum(text)
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
import pytest
|
||||
|
||||
from base import BaseTest
|
||||
from common.global_symbols import NotInit
|
||||
from conftest import NewOntology
|
||||
from core.BuiltinConcepts import BuiltinConcepts
|
||||
from evaluators.DefConceptEvaluator import DefConceptEvaluator
|
||||
from evaluators.RecognizeDefConcept import RecognizeDefConcept
|
||||
from helpers import _rv, _rvf, get_concepts
|
||||
from parsers.ConceptDefinitionParser import ConceptDefinition
|
||||
from parsers.ParserInput import ParserInput
|
||||
from parsers.parser_utils import UnexpectedEof
|
||||
|
||||
|
||||
def get_ret_val_from(context, command):
|
||||
pi = ParserInput(command)
|
||||
pi.init()
|
||||
parser_start = _rv(context.sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=pi))
|
||||
ret = RecognizeDefConcept().eval(context, None, parser_start)
|
||||
return ret.new[0]
|
||||
|
||||
|
||||
class TestDefConceptEvaluator(BaseTest):
|
||||
@pytest.fixture()
|
||||
def evaluator(self, sheerka):
|
||||
return sheerka.evaluators[DefConceptEvaluator.NAME]
|
||||
|
||||
def test_i_can_match(self, sheerka, context, evaluator):
|
||||
ret_val = _rv(ConceptDefinition(name="foo"))
|
||||
assert evaluator.matches(context, ret_val).status is True
|
||||
|
||||
ret_val = _rv("Not a ConceptDefinition class")
|
||||
assert evaluator.matches(context, ret_val).status is False
|
||||
|
||||
ret_val = _rvf(ConceptDefinition(name="foo")) # status is false
|
||||
assert evaluator.matches(context, ret_val).status is False
|
||||
|
||||
def test_i_can_add_a_new_concept(self, context, evaluator):
|
||||
ret_val_input = get_ret_val_from(context, "def concept foo")
|
||||
res = evaluator.eval(context, None, ret_val_input)
|
||||
|
||||
assert len(res.new) == 1
|
||||
assert res.new[0].status
|
||||
assert context.sheerka.isinstance(res.new[0].value, BuiltinConcepts.NEW_CONCEPT)
|
||||
assert res.eaten == [ret_val_input]
|
||||
|
||||
def test_i_cannot_add_when_definition_validation_fails(self, context, evaluator):
|
||||
ret_val_input = get_ret_val_from(context, "def concept foo from bnf a |") # only one '|' is required
|
||||
res = evaluator.eval(context, None, ret_val_input)
|
||||
|
||||
assert len(res.new) == 1
|
||||
assert not res.new[0].status
|
||||
assert isinstance(res.new[0].value.value, UnexpectedEof)
|
||||
assert res.eaten == []
|
||||
|
||||
@pytest.mark.parametrize("concept_def, expected", [
|
||||
(ConceptDefinition(name="inc a", ret="a"), [("a", NotInit)]),
|
||||
(ConceptDefinition(name="inc a", where="isinstance(a, int)"), [("a", NotInit)]),
|
||||
(ConceptDefinition(name="inc a", def_var=[("a", 10)]), [("a", 10)]),
|
||||
(ConceptDefinition(name="inc a", def_var=["a"]), [("a", NotInit)]),
|
||||
(ConceptDefinition(name="a + b", where="a is an int", ret="b"), [("a", NotInit), ("b", NotInit)]),
|
||||
(ConceptDefinition(name="b + a", where="a is an int", ret="b"), [("b", NotInit), ("a", NotInit)]),
|
||||
])
|
||||
def test_i_can_get_variables(self, context, evaluator, concept_def, expected):
|
||||
assert evaluator._get_variables(context, concept_def) == expected
|
||||
|
||||
def test_concept_name_is_not_considered_as_variable(self, context, evaluator):
|
||||
with NewOntology(context, "test_concept_name_is_not_considered_as_variable"):
|
||||
get_concepts(context, "one", use_sheerka=True)
|
||||
|
||||
concept_def = ConceptDefinition(name="add one + a", where="one is an int")
|
||||
assert evaluator._get_variables(context, concept_def) == []
|
||||
|
||||
@pytest.mark.parametrize("concept_def, expected", [
|
||||
("def concept add a b where a and b", [("a", NotInit), ("b", NotInit)]),
|
||||
("def concept add a b where a ret b", [("a", NotInit), ("b", NotInit)]),
|
||||
("def concept add a b where xxx a and yyy b", [("a", NotInit), ("b", NotInit)]),
|
||||
("def concept add b a where xxx a and yyy b", [("b", NotInit), ("a", NotInit)]),
|
||||
("def concept add a b def_var a,b", [("a", NotInit), ("b", NotInit)]),
|
||||
("def concept add a b def_var a b", [("a", NotInit), ("b", NotInit)]),
|
||||
("def concept add a b def_var a def_var b", [("a", NotInit), ("b", NotInit)]),
|
||||
("def concept add a b def_var a=10 def_var b", [("a", 10), ("b", NotInit)]),
|
||||
("def concept add a b def_var a='hello' def_var b", [("a", "'hello'"), ("b", NotInit)]),
|
||||
])
|
||||
def test_i_can_add_a_new_concept_with_variables(self, context, evaluator, concept_def, expected):
|
||||
with NewOntology(context, "test_i_can_add_a_new_concept_with_variables"):
|
||||
ret_val_input = get_ret_val_from(context, concept_def)
|
||||
res = evaluator.eval(context, None, ret_val_input)
|
||||
|
||||
assert len(res.new) == 1
|
||||
assert res.new[0].status
|
||||
new_concept = res.new[0].value
|
||||
assert context.sheerka.isinstance(new_concept, BuiltinConcepts.NEW_CONCEPT)
|
||||
assert new_concept.body.variables == expected
|
||||
@@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
|
||||
from base import BaseTest
|
||||
from base import BaseParserTest
|
||||
from core.BuiltinConcepts import BuiltinConcepts
|
||||
from core.error import ErrorContext
|
||||
from evaluators.PythonParser import PythonParser
|
||||
@@ -8,7 +8,7 @@ from helpers import _rv, _rvf
|
||||
from parsers.ParserInput import ParserInput
|
||||
|
||||
|
||||
class TestPythonParser(BaseTest):
|
||||
class TestPythonParser(BaseParserTest):
|
||||
@pytest.fixture()
|
||||
def evaluator(self, sheerka):
|
||||
return sheerka.evaluators[PythonParser.NAME]
|
||||
@@ -28,9 +28,7 @@ class TestPythonParser(BaseTest):
|
||||
"a = 20"
|
||||
])
|
||||
def test_i_can_parse_python(self, sheerka, context, evaluator, text):
|
||||
pi = ParserInput(text)
|
||||
pi.init()
|
||||
start = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=pi))
|
||||
start = self.get_parser_input(context, text)
|
||||
|
||||
res = evaluator.eval(context, None, start)
|
||||
|
||||
@@ -43,9 +41,7 @@ class TestPythonParser(BaseTest):
|
||||
|
||||
def test_invalid_python_are_rejected(self, sheerka, context, evaluator):
|
||||
text = "1 + "
|
||||
pi = ParserInput(text)
|
||||
pi.init()
|
||||
start = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=pi))
|
||||
start = self.get_parser_input(context, text)
|
||||
|
||||
res = evaluator.eval(context, None, start)
|
||||
|
||||
@@ -57,9 +53,7 @@ class TestPythonParser(BaseTest):
|
||||
assert ret_val.parents == [start]
|
||||
|
||||
def test_i_can_detect_concepts(self, sheerka, context, evaluator):
|
||||
pi = ParserInput("c:one: + c:two:")
|
||||
pi.init()
|
||||
start = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=pi))
|
||||
start = self.get_parser_input(context, "c:one: + c:two:")
|
||||
|
||||
res = evaluator.eval(context, None, start)
|
||||
|
||||
@@ -72,4 +66,3 @@ class TestPythonParser(BaseTest):
|
||||
assert len(ret_val.value.pf.namespace) == 2
|
||||
assert ret_val.value.pf.namespace["__C__KEY_one__ID_00None00__C__"].value == ("one", None)
|
||||
assert ret_val.value.pf.namespace["__C__KEY_two__ID_00None00__C__"].value == ("two", None)
|
||||
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
import pytest
|
||||
|
||||
from base import BaseParserTest
|
||||
from core.BuiltinConcepts import BuiltinConcepts
|
||||
from core.error import ErrorContext
|
||||
from evaluators.RecognizeDefConcept import RecognizeDefConcept
|
||||
from helpers import _rv, _rvf
|
||||
from parsers.ConceptDefinitionParser import ConceptDefinition
|
||||
from parsers.ParserInput import ParserInput
|
||||
|
||||
|
||||
class TestRecognizeDefConcept(BaseParserTest):
|
||||
@pytest.fixture()
|
||||
def evaluator(self, sheerka):
|
||||
return sheerka.evaluators[RecognizeDefConcept.NAME]
|
||||
|
||||
def test_i_can_match(self, sheerka, context, evaluator):
|
||||
ret_val = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=ParserInput("def concept")))
|
||||
assert evaluator.matches(context, ret_val).status is True
|
||||
|
||||
ret_val = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=ParserInput("def")))
|
||||
assert evaluator.matches(context, ret_val).status is False
|
||||
|
||||
ret_val = _rv(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=ParserInput("other text")))
|
||||
assert evaluator.matches(context, ret_val).status is False
|
||||
|
||||
ret_val = _rv(sheerka.newn(BuiltinConcepts.UNKNOWN_CONCEPT)) # it responds to USER_INPUT only
|
||||
assert evaluator.matches(context, ret_val).status is False
|
||||
|
||||
ret_val = _rvf(sheerka.newn(BuiltinConcepts.PARSER_INPUT, pi=ParserInput("def concept"))) # status is false
|
||||
assert evaluator.matches(context, ret_val).status is False
|
||||
|
||||
def test_i_can_recognize_a_def_concept(self, context, evaluator):
|
||||
ret_val_input = self.get_parser_input(context, "def concept one as 1")
|
||||
res = evaluator.eval(context, None, ret_val_input)
|
||||
|
||||
assert len(res.new) == 1
|
||||
assert res.new[0].status
|
||||
assert isinstance(res.new[0].value, ConceptDefinition)
|
||||
|
||||
assert res.eaten == [ret_val_input]
|
||||
|
||||
def test_i_can_manage_when_def_concept_fails(self, context, evaluator):
|
||||
ret_val_input = self.get_parser_input(context, "def concept")
|
||||
res = evaluator.eval(context, None, ret_val_input)
|
||||
|
||||
assert len(res.new) == 1
|
||||
assert not res.new[0].status
|
||||
assert isinstance(res.new[0].value, ErrorContext)
|
||||
|
||||
assert res.eaten == []
|
||||
@@ -0,0 +1,167 @@
|
||||
import pytest
|
||||
|
||||
from common.utils import unstr_concept
|
||||
from conftest import NewOntology
|
||||
from helpers import get_concepts
|
||||
from parsers.BnfDefinitionParser import BnfDefinitionParser
|
||||
from parsers.parser_utils import UnexpectedEof, UnexpectedToken
|
||||
from parsers.peg_parser import ConceptExpression, OneOrMore, Optional, OrderedChoice, RegExMatch, Sequence, StrMatch, \
|
||||
VariableExpression, ZeroOrMore
|
||||
|
||||
|
||||
def _cexp(concept_str, rule_name=None):
|
||||
concept_name, concept_id = unstr_concept(concept_str)
|
||||
return ConceptExpression(concept_id, rule_name or concept_name)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("expression, expected", [
|
||||
("'str'", StrMatch("str")),
|
||||
("1", StrMatch("1")),
|
||||
(" 1", StrMatch("1")),
|
||||
(",", StrMatch(",")),
|
||||
("r'str'", RegExMatch("str")),
|
||||
("'foo'?", Optional(StrMatch("foo"))),
|
||||
("'foo'*", ZeroOrMore(StrMatch("foo"))),
|
||||
("'foo'+", OneOrMore(StrMatch("foo"))),
|
||||
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
||||
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
|
||||
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
|
||||
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
|
||||
("1 2 | 3 4+", OrderedChoice(
|
||||
Sequence(StrMatch("1"), StrMatch("2")),
|
||||
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
|
||||
("1 (2 | 3) 4+", Sequence(
|
||||
StrMatch("1"),
|
||||
OrderedChoice(StrMatch("2"), StrMatch("3")),
|
||||
OneOrMore(StrMatch("4")))),
|
||||
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
|
||||
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
|
||||
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
|
||||
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
|
||||
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
|
||||
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
|
||||
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
|
||||
("(1 )", StrMatch("1")),
|
||||
("'str'=var", StrMatch("str", rule_name="var")),
|
||||
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
|
||||
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
|
||||
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
|
||||
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
|
||||
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
|
||||
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
|
||||
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
|
||||
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
|
||||
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
|
||||
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
|
||||
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
|
||||
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
|
||||
("r'str'=var", RegExMatch("str", rule_name="var")),
|
||||
("r'foo'?=var", Optional(RegExMatch("foo"), rule_name="var")),
|
||||
("(r'foo'?)=var", Optional(RegExMatch("foo"), rule_name="var")),
|
||||
("r'foo'*=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
|
||||
("(r'foo'*)=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
|
||||
("r'foo'+=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
|
||||
("(r'foo'+)=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
|
||||
("r'foo'=var?", Optional(RegExMatch("foo", rule_name="var"))),
|
||||
("(r'foo'=var)?", Optional(RegExMatch("foo", rule_name="var"))),
|
||||
("r'foo'=var*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
|
||||
("(r'foo'=var)*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
|
||||
("r'foo'=var+", OneOrMore(RegExMatch("foo", rule_name="var"))),
|
||||
("(r'foo'=var)+", OneOrMore(RegExMatch("foo", rule_name="var"))),
|
||||
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
|
||||
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
|
||||
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
|
||||
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
|
||||
("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")),
|
||||
("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")),
|
||||
("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
|
||||
("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
|
||||
("('a'*=x 'b'+=y)=z", Sequence(
|
||||
ZeroOrMore(StrMatch("a"), rule_name="x"),
|
||||
OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")),
|
||||
("'--filter'",
|
||||
Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter")))
|
||||
])
|
||||
def test_i_can_parse_simple_bnf_definition(context, expression, expected):
|
||||
parser = BnfDefinitionParser(context, expression)
|
||||
res = parser.parse()
|
||||
|
||||
assert res == expected
|
||||
assert not parser.error_sink
|
||||
assert parser.source == expression
|
||||
|
||||
|
||||
@pytest.mark.parametrize("expression, expected", [
|
||||
("foo", _cexp("c:foo#1001:")),
|
||||
("foo*", ZeroOrMore(_cexp("c:foo#1001:"))),
|
||||
("foo 'and' bar+", Sequence(_cexp("c:foo#1001:"), StrMatch("and"), OneOrMore(_cexp("c:bar#1002:")))),
|
||||
("foo | bar?", OrderedChoice(_cexp("c:foo#1001:"), Optional(_cexp("c:bar#1002:")))),
|
||||
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), _cexp("c:var#1003:"))),
|
||||
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), _cexp("c:var#1003:"))),
|
||||
("foo=f", _cexp("c:foo#1001:", "f")),
|
||||
("foo=f 'constant'", Sequence(_cexp("c:foo#1001:", "f"), StrMatch("constant"))),
|
||||
("def 'concept'", Sequence(_cexp("c:def#1004:"), StrMatch("concept"))),
|
||||
("c:foo:", _cexp("c:foo#1001:")),
|
||||
("c:#1001:", _cexp("c:foo#1001:")),
|
||||
])
|
||||
def test_i_can_parse_bnf_definition_with_concepts(context, expression, expected):
|
||||
with NewOntology(context, "test_i_can_parse_bnf_definition_with_concept"):
|
||||
get_concepts(context, "foo", "bar", "var", "def", use_sheerka=True)
|
||||
|
||||
parser = BnfDefinitionParser(context, expression)
|
||||
res = parser.parse()
|
||||
|
||||
assert res == expected
|
||||
assert not parser.error_sink
|
||||
assert parser.source == expression
|
||||
|
||||
|
||||
@pytest.mark.parametrize("expression, expected", [
|
||||
("x", VariableExpression("x")),
|
||||
("x bar", Sequence(VariableExpression("x"), _cexp("c:bar#1001:"))),
|
||||
("bar x", Sequence(_cexp("c:bar#1001:"), VariableExpression("x"))),
|
||||
("x 'and' bar", Sequence(VariableExpression("x"), StrMatch("and"), _cexp("c:bar#1001:"))),
|
||||
("x | bar", OrderedChoice(VariableExpression("x"), _cexp("c:bar#1001:"))),
|
||||
("x*", ZeroOrMore(VariableExpression("x"))),
|
||||
("x+", OneOrMore(VariableExpression("x"))),
|
||||
("'str' = x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
|
||||
("'str''='x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
|
||||
("foo=x", VariableExpression("x")),
|
||||
])
|
||||
def test_i_can_parse_bnf_definition_with_variables(context, expression, expected):
|
||||
with NewOntology(context, "test_i_can_parse_bnf_definition_with_variables"):
|
||||
get_concepts(context, "bar", use_sheerka=True)
|
||||
|
||||
parser = BnfDefinitionParser(context, expression)
|
||||
res = parser.parse()
|
||||
|
||||
assert res == expected
|
||||
assert not parser.error_sink
|
||||
assert parser.source == expression
|
||||
|
||||
|
||||
def test_i_can_parse_when_the_concept_is_still_under_creation(context):
|
||||
# I want to parse something like
|
||||
# def concept add from bnf add | mult
|
||||
# 'add' is used while being under construction
|
||||
# 'add' must not be detected as a variable
|
||||
parser = BnfDefinitionParser(context, "add | 'mult'", concept_name="add")
|
||||
res = parser.parse()
|
||||
|
||||
assert res == OrderedChoice(_cexp("c:add:"), StrMatch("mult"))
|
||||
assert not parser.error_sink
|
||||
|
||||
|
||||
@pytest.mark.parametrize("expression, error", [
|
||||
("1 ", UnexpectedEof),
|
||||
("1|", UnexpectedEof),
|
||||
("(1|)", UnexpectedToken),
|
||||
("1=", UnexpectedToken),
|
||||
])
|
||||
def test_i_can_detect_errors(context, expression, error):
|
||||
parser = BnfDefinitionParser(context, expression)
|
||||
res = parser.parse()
|
||||
|
||||
assert res is None
|
||||
assert len(parser.error_sink) > 0
|
||||
assert isinstance(parser.error_sink[0], error)
|
||||
@@ -0,0 +1,280 @@
|
||||
import pytest
|
||||
|
||||
from common.global_symbols import NotInit
|
||||
from core.concept import DefinitionType
|
||||
from parsers.ConceptDefinitionParser import ConceptDefinition, ConceptDefinitionParser
|
||||
from parsers.ParserInput import ParserInput
|
||||
from parsers.parser_utils import ParsingError, UnexpectedEof, UnexpectedToken
|
||||
from parsers.tokenizer import Keywords, Token, TokenKind
|
||||
|
||||
|
||||
def get_parser_input(text):
|
||||
pi = ParserInput(text)
|
||||
assert pi.init()
|
||||
|
||||
return pi
|
||||
|
||||
|
||||
class TestRecognizeDefConcept:
|
||||
@pytest.fixture()
|
||||
def parser(self, sheerka):
|
||||
return ConceptDefinitionParser()
|
||||
|
||||
@pytest.mark.parametrize("text", [
|
||||
"",
|
||||
" "])
|
||||
def test_i_can_detect_empty_input(self, parser, text):
|
||||
pi = get_parser_input(text)
|
||||
res = parser.parse(pi)
|
||||
|
||||
assert res is None
|
||||
assert parser.error_sink == [UnexpectedEof(Keywords.DEF, None)]
|
||||
|
||||
def test_must_start_with_def_keyword(self, parser):
|
||||
pi = get_parser_input("hello")
|
||||
|
||||
res = parser.parse(pi)
|
||||
assert res is None
|
||||
assert parser.error_sink == [UnexpectedToken(Token(TokenKind.IDENTIFIER, "hello", 0, 1, 1), Keywords.DEF)]
|
||||
|
||||
@pytest.mark.parametrize("text, expected", [
|
||||
("def concept hello", ConceptDefinition(name="hello")),
|
||||
("def concept hello ", ConceptDefinition(name="hello")),
|
||||
("def concept a + b", ConceptDefinition(name="a + b")),
|
||||
("def concept a+b", ConceptDefinition(name="a + b")),
|
||||
("def concept 'a+b'+c", ConceptDefinition(name="'a+b' + c")),
|
||||
('def concept "a+b"+c', ConceptDefinition(name="a+b + c")),
|
||||
('def concept "as if"', ConceptDefinition(name="as if")),
|
||||
("def concept 'as if'", ConceptDefinition(name="'as if'")),
|
||||
("def concept 'as' \"if\"", ConceptDefinition(name="'as' if")),
|
||||
('def concept \'as\' "if"', ConceptDefinition(name="'as' if")),
|
||||
])
|
||||
def test_i_can_parse_def_concept_name(self, parser, text, expected):
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
assert actual == expected
|
||||
|
||||
def test_concept_name_is_mandatory(self, parser):
|
||||
pi = get_parser_input("def concept as foo")
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert len(parser.error_sink) == 1
|
||||
assert isinstance(parser.error_sink[0], ParsingError)
|
||||
assert parser.error_sink[0].message == "Name is mandatory."
|
||||
assert actual is None
|
||||
|
||||
def test_new_line_is_not_allowed_in_concept_name(self, parser):
|
||||
pi = get_parser_input("def concept complicated \n name as foo")
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert len(parser.error_sink) == 1
|
||||
assert isinstance(parser.error_sink[0], ParsingError)
|
||||
assert parser.error_sink[0].message == "Newlines are not allowed in name."
|
||||
assert actual is None
|
||||
|
||||
@pytest.mark.parametrize("text, part", [
|
||||
("def concept foo as where True", "as"),
|
||||
("def concept foo where as 1 + 1", "where"),
|
||||
("def concept foo pre as 1 + 1", "pre"),
|
||||
("def concept foo post as 1 + 1", "post"),
|
||||
("def concept foo ret as 1 + 1", "ret"),
|
||||
])
|
||||
def test_empty_declarations_are_not_allowed(self, parser, text, part):
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert actual is None
|
||||
assert len(parser.error_sink) == 1
|
||||
assert isinstance(parser.error_sink[0], ParsingError)
|
||||
assert parser.error_sink[0].message == f"Empty '{part}' declaration."
|
||||
|
||||
def test_empty_parts_are_not_initialized(self, parser):
|
||||
pi = get_parser_input("def concept foo")
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert isinstance(actual, ConceptDefinition)
|
||||
assert actual.body is ""
|
||||
assert actual.where is ""
|
||||
assert actual.pre is ""
|
||||
assert actual.post is ""
|
||||
assert actual.ret is ""
|
||||
|
||||
def test_i_can_manage_all_parts(self, parser):
|
||||
concept_def = "def concept foo"
|
||||
concept_def += " where my where clause"
|
||||
concept_def += " pre my pre clause"
|
||||
concept_def += " as my body"
|
||||
concept_def += " ret my return value"
|
||||
concept_def += " post my post condition"
|
||||
pi = get_parser_input(concept_def)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert isinstance(actual, ConceptDefinition)
|
||||
assert actual.body == "my body"
|
||||
assert actual.where == "my where clause"
|
||||
assert actual.pre == "my pre clause"
|
||||
assert actual.post == "my post condition"
|
||||
assert actual.ret == "my return value"
|
||||
|
||||
@pytest.mark.parametrize("body", [
|
||||
"c:#1001: is an int",
|
||||
"c:one: is an int",
|
||||
"'one' is an int",
|
||||
'"one" is an in',
|
||||
])
|
||||
def test_i_can_manage_special_tokens_in_part(self, parser, body):
|
||||
text = f"def concept foo as {body}"
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert isinstance(actual, ConceptDefinition)
|
||||
assert actual.body == body
|
||||
|
||||
@pytest.mark.parametrize("text, expected_type, expected_definition, ", [
|
||||
("def concept foo from def 'hello world'", DefinitionType.DEFAULT, "'hello world'"),
|
||||
("def concept foo from 'hello world'", DefinitionType.DEFAULT, "'hello world'"),
|
||||
("def concept foo from bnf my bnf definition", DefinitionType.BNF, "my bnf definition"),
|
||||
])
|
||||
def test_i_can_set_concept_definition(self, parser, text, expected_type, expected_definition):
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert isinstance(actual, ConceptDefinition)
|
||||
assert actual.definition_type == expected_type
|
||||
assert actual.definition == expected_definition
|
||||
|
||||
@pytest.mark.parametrize("text", [
|
||||
"def concept foo from where True",
|
||||
"def concept foo from bnf where True",
|
||||
"def concept foo from def where True",
|
||||
"def concept foo from bnf",
|
||||
"def concept foo from def ",
|
||||
])
|
||||
def test_empy_definition_are_not_allowed(self, parser, text):
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert actual is None
|
||||
assert parser.error_sink[0].message == "Empty 'from' declaration."
|
||||
|
||||
def test_i_can_parse_multiline_definition(self, parser):
|
||||
text = """
|
||||
def concept add one to a as
|
||||
def func(x):
|
||||
return x+1
|
||||
func(a)
|
||||
"""
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert isinstance(actual, ConceptDefinition)
|
||||
assert actual.body == "def func(x):\n return x+1\nfunc(a)"
|
||||
|
||||
def test_i_can_parse_indention_mode(self, parser):
|
||||
text = """
|
||||
def concept add one to a as:
|
||||
def func(x):
|
||||
return x+1
|
||||
func(a)
|
||||
"""
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert isinstance(actual, ConceptDefinition)
|
||||
assert actual.body == "def func(x):\n return x+1\nfunc(a)"
|
||||
|
||||
def test_i_can_detect_invalid_indentation(self, parser):
|
||||
text = """
|
||||
def concept add one to a as:
|
||||
def func(x):
|
||||
return x+1
|
||||
func(a)
|
||||
"""
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert actual is None
|
||||
assert len(parser.error_sink) > 0
|
||||
|
||||
def test_i_can_can_use_colon_to_protect_keywords(self, parser):
|
||||
text = """
|
||||
def concept today as:
|
||||
from datetime import date
|
||||
today = date.today()
|
||||
from:
|
||||
give me the date !
|
||||
"""
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert isinstance(actual, ConceptDefinition)
|
||||
assert actual.body == "from datetime import date\ntoday = date.today()"
|
||||
assert actual.definition == "give me the date !"
|
||||
|
||||
def test_i_can_parse_bnf_concept_with_regex(self, parser):
|
||||
text = "def concept sha512 from bnf number | r'[a-f0-9]+' | (number r'[a-f0-9]+')+"
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert isinstance(actual, ConceptDefinition)
|
||||
assert actual.definition == "number | r'[a-f0-9]+' | (number r'[a-f0-9]+')+"
|
||||
|
||||
@pytest.mark.parametrize("text, expected", [
|
||||
("def concept foo auto_eval True", True),
|
||||
("def concept foo auto_eval true", True),
|
||||
("def concept foo auto_eval False", False),
|
||||
("def concept foo auto_eval false", False),
|
||||
])
|
||||
def test_i_can_parse_auto_eval(self, parser, text, expected):
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert isinstance(actual, ConceptDefinition)
|
||||
assert actual.auto_eval == expected
|
||||
|
||||
def test_auto_eval_is_set_to_false_by_default(self, parser):
|
||||
pi = get_parser_input("def concept foo")
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert actual.auto_eval is False
|
||||
|
||||
def test_empty_auto_eval_is_not_allowed(self, parser):
|
||||
pi = get_parser_input("def concept foo auto_eval as 1")
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert actual is None
|
||||
assert parser.error_sink[0].message == "Empty 'auto_eval' declaration."
|
||||
|
||||
def test_i_cannot_parse_wrong_value(self, parser):
|
||||
pi = get_parser_input("def concept foo auto_eval wrong_value")
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert actual is None
|
||||
assert parser.error_sink[0].message == "Invalid 'auto_eval' declaration (wrong_value is not recognized)"
|
||||
|
||||
@pytest.mark.parametrize("text, expected", [
|
||||
("def concept foo def_var var", [("var", NotInit)]),
|
||||
("def concept foo def_var var1 def_var var2", [("var1", NotInit), ("var2", NotInit)]),
|
||||
("def concept foo def_var var1 var2", [("var1", NotInit), ("var2", NotInit)]),
|
||||
("def concept foo def_var var1, var2", [("var1", NotInit), ("var2", NotInit)]),
|
||||
("def concept foo def_var var1=10", [("var1", 10)]),
|
||||
("def concept foo def_var var1 = 10", [("var1", 10)]),
|
||||
("def concept foo def_var var1 = 'hello'", [("var1", "'hello'")]),
|
||||
("def concept foo def_var var1 = hello", [("var1", "hello")]),
|
||||
("def concept foo def_var var1, var2 = 10", [("var1", NotInit), ("var2", 10)]),
|
||||
("def concept foo def_var var1='hello', var2 = 10", [("var1", "'hello'"), ("var2", 10)]),
|
||||
("def concept foo def_var var1='hello' var2 = 10", [("var1", "'hello'"), ("var2", 10)]),
|
||||
])
|
||||
def test_i_can_parse_variable_definitions(self, parser, text, expected):
|
||||
pi = get_parser_input(text)
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert isinstance(actual, ConceptDefinition)
|
||||
assert actual.def_var == expected
|
||||
|
||||
def test_empty_def_var_is_not_allowed(self, parser):
|
||||
pi = get_parser_input("def concept foo def_var as 1")
|
||||
actual = parser.parse(pi)
|
||||
|
||||
assert actual is None
|
||||
assert parser.error_sink[0].message == "Empty 'def_var' declaration."
|
||||
@@ -8,6 +8,8 @@ def test_i_can_parser_input():
|
||||
parser_input = ParserInput("def concept a")
|
||||
assert parser_input.init() is True
|
||||
assert parser_input.exception is None
|
||||
assert parser_input.pos == -1
|
||||
assert parser_input.end == 6
|
||||
|
||||
|
||||
def test_i_can_detect_errors():
|
||||
@@ -38,3 +40,129 @@ def test_i_must_call_init_before_call_as_text():
|
||||
parser_input.as_text()
|
||||
|
||||
assert ex.value.args[0] == "You must call init() first !"
|
||||
|
||||
|
||||
def test_i_can_get_next_token():
|
||||
parser_input = ParserInput("def concept a")
|
||||
parser_input.init()
|
||||
|
||||
assert parser_input.next_token() is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "def"
|
||||
|
||||
assert parser_input.next_token() is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "concept"
|
||||
|
||||
assert parser_input.next_token(skip_whitespace=False) is True
|
||||
assert parser_input.token.type == TokenKind.WHITESPACE
|
||||
assert parser_input.token.value == " "
|
||||
|
||||
assert parser_input.next_token(skip_whitespace=False) is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "a"
|
||||
|
||||
assert parser_input.next_token() is False
|
||||
assert parser_input.token.type == TokenKind.EOF
|
||||
|
||||
|
||||
def test_next_after_eof_is_eof():
|
||||
parser_input = ParserInput("hi")
|
||||
parser_input.init()
|
||||
|
||||
assert parser_input.next_token() is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "hi"
|
||||
|
||||
assert parser_input.next_token() is False
|
||||
assert parser_input.token.type == TokenKind.EOF
|
||||
|
||||
assert parser_input.next_token() is False
|
||||
assert parser_input.token.type == TokenKind.EOF
|
||||
|
||||
assert parser_input.next_token() is False
|
||||
assert parser_input.token.type == TokenKind.EOF
|
||||
|
||||
|
||||
def test_i_can_manage_blank_input():
|
||||
parser_input = ParserInput(" ")
|
||||
parser_input.init()
|
||||
|
||||
assert parser_input.next_token() is False
|
||||
assert parser_input.token.type == TokenKind.EOF
|
||||
|
||||
|
||||
def test_i_can_manage_blank_input_when_skip_whitespace_is_false():
|
||||
parser_input = ParserInput(" ")
|
||||
parser_input.init()
|
||||
|
||||
assert parser_input.next_token(skip_whitespace=False) is True
|
||||
assert parser_input.token.type == TokenKind.WHITESPACE
|
||||
assert parser_input.token.value == " "
|
||||
|
||||
assert parser_input.next_token(skip_whitespace=False) is False
|
||||
assert parser_input.token.type == TokenKind.EOF
|
||||
|
||||
|
||||
def test_i_can_reset():
|
||||
parser_input = ParserInput("hello world ")
|
||||
parser_input.init()
|
||||
|
||||
assert parser_input.next_token() is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "hello"
|
||||
|
||||
assert parser_input.next_token() is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "world"
|
||||
|
||||
assert parser_input.next_token() is False
|
||||
assert parser_input.token.type == TokenKind.EOF
|
||||
|
||||
assert parser_input.next_token() is False
|
||||
assert parser_input.token.type == TokenKind.EOF
|
||||
|
||||
parser_input.reset()
|
||||
|
||||
assert parser_input.next_token() is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "hello"
|
||||
|
||||
assert parser_input.next_token() is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "world"
|
||||
|
||||
assert parser_input.next_token() is False
|
||||
assert parser_input.token.type == TokenKind.EOF
|
||||
|
||||
|
||||
def test_i_can_parse_when_input_ends_by_white_space():
|
||||
parser_input = ParserInput("hello world ")
|
||||
parser_input.init()
|
||||
|
||||
assert parser_input.next_token() is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "hello"
|
||||
|
||||
assert parser_input.next_token() is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "world"
|
||||
|
||||
assert parser_input.next_token() is False
|
||||
assert parser_input.token.type == TokenKind.EOF
|
||||
|
||||
|
||||
def test_i_can_parse_when_input_starts_by_white_space():
|
||||
parser_input = ParserInput(" hello world")
|
||||
parser_input.init()
|
||||
|
||||
assert parser_input.next_token() is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "hello"
|
||||
|
||||
assert parser_input.next_token() is True
|
||||
assert parser_input.token.type == TokenKind.IDENTIFIER
|
||||
assert parser_input.token.value == "world"
|
||||
|
||||
assert parser_input.next_token() is False
|
||||
assert parser_input.token.type == TokenKind.EOF
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
import pytest
|
||||
|
||||
from parsers.ParserInput import ParserInput
|
||||
from parsers.parser_utils import parse_parts, strip_tokens
|
||||
from parsers.tokenizer import Keywords, Tokenizer
|
||||
|
||||
|
||||
def compare_results(actual, expected, compare_str=False):
|
||||
resolved_expected = {}
|
||||
for k, v in expected.items():
|
||||
if isinstance(v, str):
|
||||
# case like {Keywords.DEF_VAR: "def_var var1 def_var var2"}
|
||||
tokens = list(Tokenizer(v, yield_eof=False))
|
||||
resolved_expected[k] = [tokens[0]] + tokens[2:]
|
||||
else:
|
||||
# case like {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])}
|
||||
resolved_expected[k] = v
|
||||
|
||||
def get_better_representation(value):
|
||||
better_repr = {}
|
||||
for k, tokens in value.items():
|
||||
value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]])
|
||||
better_repr[k] = [tokens[0].repr_value, value]
|
||||
return better_repr
|
||||
|
||||
# it's easier to compare two list of string
|
||||
actual_to_compare = get_better_representation(actual)
|
||||
expected_to_compare = get_better_representation(resolved_expected)
|
||||
|
||||
assert actual_to_compare == expected_to_compare
|
||||
|
||||
|
||||
def get_tokens(lst):
|
||||
"""
|
||||
Returns a list of Tokens, for a list of item
|
||||
:param lst:
|
||||
:type lst:
|
||||
:return:
|
||||
:rtype:
|
||||
"""
|
||||
return list(Tokenizer(lst, yield_eof=False).remove_positions())
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_as_list, expected_as_list", [
|
||||
([" "], []),
|
||||
([" ", "one"], ["one"]),
|
||||
(["one", " "], ["one"]),
|
||||
([" ", "one", " "], ["one"]),
|
||||
|
||||
(["\n", "one"], ["one"]),
|
||||
(["one", "\n"], ["one"]),
|
||||
(["\n", "one", "\n"], ["one"]),
|
||||
|
||||
([" ", "\n", "one"], ["one"]),
|
||||
(["one", " ", "\n"], ["one"]),
|
||||
([" ", "\n", "one", " ", "\n"], ["one"]),
|
||||
|
||||
(["\n", " ", "one"], ["one"]),
|
||||
(["one", "\n", " "], ["one"]),
|
||||
(["\n", " ", "one", "\n", " "], ["one"]),
|
||||
|
||||
([" ", "\n", " ", "one"], ["one"]),
|
||||
(["one", " ", "\n", " "], ["one"]),
|
||||
([" ", "\n", " ", "one", " ", "\n", " "], ["one"]),
|
||||
|
||||
(["\n", " ", "\n", "one"], ["one"]),
|
||||
(["one", "\n", " ", "\n"], ["one"]),
|
||||
(["\n", " ", "\n", "one", "\n", " ", "\n"], ["one"]),
|
||||
|
||||
])
|
||||
def test_i_can_strip(input_as_list, expected_as_list):
|
||||
actual = strip_tokens(get_tokens(input_as_list)) # KSI 20201007 Why not use Tokenizer ?!! For perf ?
|
||||
expected = get_tokens(expected_as_list)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("text, strip, expected", [
|
||||
("when xxx yyy", False, {Keywords.WHEN: "when xxx yyy"}),
|
||||
("when uuu vvv print xxx yyy", False, {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}),
|
||||
("print xxx yyy when uuu vvv", False, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}),
|
||||
(" when xxx", False, {Keywords.WHEN: "when xxx"}),
|
||||
|
||||
("when xxx yyy", True, {Keywords.WHEN: "when xxx yyy"}),
|
||||
("when uuu vvv print xxx yyy", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
|
||||
("print xxx yyy when uuu vvv", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
|
||||
(" when xxx", True, {Keywords.WHEN: "when xxx"}),
|
||||
])
|
||||
def test_i_can_get_parts(text, strip, expected):
|
||||
parser_input = ParserInput(text)
|
||||
parser_input.init()
|
||||
parser_input.next_token()
|
||||
error_sink = []
|
||||
|
||||
res = parse_parts(parser_input, error_sink, ["when", "print"], strip=strip)
|
||||
compare_results(res, expected)
|
||||
@@ -5,7 +5,7 @@ from parsers.tokenizer import LexerError, Token, TokenKind, Tokenizer
|
||||
|
||||
def test_i_can_tokenize():
|
||||
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:"
|
||||
source += "$£€!_identifier°~_^\\`==#__var__10r/regex\nregex/r:xxx#1:**//%that's"
|
||||
source += "$£€!_identifier°~_^\\`==#__var__10r/regex\nregex/r:xxx#1:**//%"
|
||||
tokens = list(Tokenizer(source))
|
||||
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
|
||||
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
|
||||
@@ -61,11 +61,8 @@ def test_i_can_tokenize():
|
||||
assert tokens[51] == Token(TokenKind.STARSTAR, "**", 143, 7, 15)
|
||||
assert tokens[52] == Token(TokenKind.SLASHSLASH, "//", 145, 7, 17)
|
||||
assert tokens[53] == Token(TokenKind.PERCENT, "%", 147, 7, 19)
|
||||
assert tokens[54] == Token(TokenKind.IDENTIFIER, "that", 148, 7, 20)
|
||||
assert tokens[55] == Token(TokenKind.QUOTE, "'", 152, 7, 24)
|
||||
assert tokens[56] == Token(TokenKind.IDENTIFIER, "s", 153, 7, 25)
|
||||
|
||||
assert tokens[57] == Token(TokenKind.EOF, '', 154, 7, 26)
|
||||
assert tokens[54] == Token(TokenKind.EOF, '', 148, 7, 20)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("text, expected", [
|
||||
@@ -209,3 +206,13 @@ def test_i_can_parse_regex_token(text, expected):
|
||||
assert tokens[0].str_value == "r" + expected
|
||||
assert tokens[0].repr_value == "r" + expected
|
||||
assert tokens[0].strip_quote == expected[1:-1]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("text, parse_quote, expected", [
|
||||
("a='hello'", False, ["a", "=", "'hello'"]),
|
||||
("a='hello'", True, ["a", "=", "'", "hello", "'"]),
|
||||
("a= 'hello'", True, ["a", "=", " ", "'hello'"]),
|
||||
])
|
||||
def test_i_can_choose_to_parse_quote(text, parse_quote, expected):
|
||||
tokens = list(Tokenizer(text, parse_quote=parse_quote, yield_eof=False))
|
||||
assert [t.value for t in tokens] == expected
|
||||
|
||||
@@ -1,17 +1,22 @@
|
||||
import logging
|
||||
from enum import Enum
|
||||
|
||||
import pytest
|
||||
|
||||
from base import BaseTest
|
||||
from common.global_symbols import NoFirstToken, NotFound, NotInit, Removed
|
||||
from helpers import get_concept, get_concepts
|
||||
from helpers import get_concepts
|
||||
from ontologies.SheerkaOntologyManager import SheerkaOntologyManager
|
||||
from parsers.tokenizer import Keywords
|
||||
from sheerkapickle import tags
|
||||
from sheerkapickle.sheerkaplicker import SheerkaPickler
|
||||
from sheerkapickle.sheerkaunpickler import SheerkaUnpickler
|
||||
|
||||
|
||||
class MyEnum(Enum):
|
||||
DEF = "def"
|
||||
WHERE = "where"
|
||||
|
||||
|
||||
class Obj:
|
||||
def __init__(self, a, b, c):
|
||||
self.a = a
|
||||
@@ -47,7 +52,7 @@ class TestSheerkaPickler(BaseTest):
|
||||
([1, [3.14, "a string"]], [1, [3.14, "a string"]]),
|
||||
([1, (3.14, "a string")], [1, {tags.TUPLE: [3.14, "a string"]}]),
|
||||
([], []),
|
||||
(Keywords.DEF, {tags.ENUM: 'parsers.tokenizer.Keywords.DEF'}),
|
||||
(MyEnum.DEF, {tags.ENUM: 'tests.sheerkapickle.test_SheerkaPickler.MyEnum.DEF'}),
|
||||
])
|
||||
def test_i_can_flatten_and_restore_primitives(self, sheerka, obj, expected):
|
||||
flatten = SheerkaPickler(sheerka).flatten(obj)
|
||||
@@ -128,7 +133,7 @@ class TestSheerkaPickler(BaseTest):
|
||||
|
||||
@pytest.mark.parametrize("obj, expected", [
|
||||
({None: "a"}, {'null': "a"}),
|
||||
({Keywords.DEF: "a"}, {'parsers.tokenizer.Keywords.DEF': 'a'}),
|
||||
({MyEnum.DEF: "a"}, {'tests.sheerkapickle.test_SheerkaPickler.MyEnum.DEF': 'a'}),
|
||||
({(1, 2): "a"}, {(1, 2): "a"}),
|
||||
])
|
||||
def test_i_can_manage_specific_keys_in_dictionaries(self, sheerka, obj, expected):
|
||||
@@ -151,15 +156,15 @@ class TestSheerkaPickler(BaseTest):
|
||||
|
||||
def test_i_can_manage_references(self, sheerka):
|
||||
foo = Obj("foo", "bar", "baz")
|
||||
obj = [Keywords.DEF, foo, Keywords.WHERE, Keywords.DEF, foo]
|
||||
obj = [MyEnum.DEF, foo, MyEnum.WHERE, MyEnum.DEF, foo]
|
||||
flatten = SheerkaPickler(sheerka).flatten(obj)
|
||||
|
||||
assert flatten == [{'_sheerka/enum': 'parsers.tokenizer.Keywords.DEF'},
|
||||
assert flatten == [{'_sheerka/enum': 'tests.sheerkapickle.test_SheerkaPickler.MyEnum.DEF'},
|
||||
{'_sheerka/obj': 'tests.sheerkapickle.test_SheerkaPickler.Obj',
|
||||
'a': 'foo',
|
||||
'b': 'bar',
|
||||
'c': 'baz'},
|
||||
{'_sheerka/enum': 'parsers.tokenizer.Keywords.WHERE'},
|
||||
{'_sheerka/enum': 'tests.sheerkapickle.test_SheerkaPickler.MyEnum.WHERE'},
|
||||
{'_sheerka/id': 0},
|
||||
{'_sheerka/id': 1}]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user