661 lines
20 KiB
Python
661 lines
20 KiB
Python
from dataclasses import dataclass
|
|
from typing import List, Union
|
|
|
|
from core.builtin_concepts_ids import BuiltinConcepts
|
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
|
from core.tokenizer import Token, TokenKind, Tokenizer, LexerError
|
|
from core.utils import tokens_are_matching, get_text_from_tokens
|
|
from parsers.BaseNodeParser import UnrecognizedTokensNode
|
|
from parsers.BaseParser import Node, ParsingError, BaseParser, ErrorSink, UnexpectedTokenParsingError
|
|
|
|
|
|
class ComparisonType:
|
|
EQUALS = "EQ"
|
|
NOT_EQUAlS = "NOT_EQ"
|
|
LESS_THAN = "LT"
|
|
LESS_THAN_OR_EQUALS = "LTE"
|
|
GREATER_THAN = "GT"
|
|
GREATER_THAN_OR_EQUALS = "GTE"
|
|
IN = "IN"
|
|
NOT_IN = "NOT_IN"
|
|
|
|
|
|
@dataclass()
|
|
class LeftPartNotFoundError(ParsingError):
|
|
"""
|
|
When the expression starts with 'or' or 'and'
|
|
"""
|
|
pass
|
|
|
|
|
|
@dataclass()
|
|
class ParenthesisMismatchError(ParsingError):
|
|
token: Token
|
|
|
|
|
|
class ExprNode(Node):
|
|
"""
|
|
Base ExprNode
|
|
eval() must be overridden
|
|
"""
|
|
|
|
def __init__(self, start: int, end: int, tokens: List[Token]):
|
|
self.start = start
|
|
self.end = end
|
|
self.tokens = tokens
|
|
self.source = None
|
|
|
|
def eval(self, obj):
|
|
return True
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, ExprNode):
|
|
return False
|
|
|
|
if self.start != other.start or self.end != other.end:
|
|
return False
|
|
|
|
if other.tokens is not None and other.tokens != self.tokens:
|
|
return False
|
|
|
|
return True
|
|
|
|
def __hash__(self):
|
|
return hash((self.start, self.end))
|
|
|
|
def get_source(self):
|
|
if self.source is None:
|
|
self.source = get_text_from_tokens(self.tokens)
|
|
|
|
return self.source
|
|
|
|
|
|
class NameExprNode(ExprNode):
|
|
def __init__(self, start, end, tokens):
|
|
super().__init__(start, end, tokens)
|
|
self.value = "".join([t.str_value for t in self.tokens])
|
|
|
|
def eval(self, obj):
|
|
return self.value
|
|
|
|
def get_value(self):
|
|
return self.value
|
|
|
|
def __repr__(self):
|
|
return f"NameExprNode(start={self.start}, end={self.end}, '{self.value}')"
|
|
|
|
def __str__(self):
|
|
return self.value
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, NameExprNode):
|
|
return False
|
|
|
|
return super().__eq__(other)
|
|
|
|
def __hash__(self):
|
|
return super().__hash__()
|
|
|
|
def to_unrecognized(self):
|
|
"""
|
|
UnrecognizedTokensNode with all tokens
|
|
"""
|
|
return UnrecognizedTokensNode(self.start, self.end, self.tokens).fix_source()
|
|
|
|
def to_str_unrecognized(self):
|
|
"""
|
|
UnrecognizedTokensNode with one token, which is a string token of all the tokens
|
|
"""
|
|
token = Token(TokenKind.STRING,
|
|
"'" + self.str_value() + "'",
|
|
self.tokens[0].index,
|
|
self.tokens[0].line,
|
|
self.tokens[0].column)
|
|
return UnrecognizedTokensNode(self.start, self.end, [token]).fix_source()
|
|
|
|
|
|
class AndNode(ExprNode):
|
|
def __init__(self, start, end, tokens, *parts: ExprNode):
|
|
super().__init__(start, end, tokens)
|
|
self.parts = parts
|
|
|
|
def eval(self, obj):
|
|
res = self.parts[0].eval(obj) and self.parts[1].eval(obj)
|
|
for part in self.parts[2:]:
|
|
res &= part.eval(obj)
|
|
return res
|
|
|
|
def __repr__(self):
|
|
return f"AndNode(start={self.start}, end={self.end}, " + ", ".join([repr(p) for p in self.parts]) + ")"
|
|
|
|
def __str__(self):
|
|
return " and ".join([str(p) for p in self.parts])
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, AndNode):
|
|
return False
|
|
|
|
if self.start != other.start or self.end != other.end:
|
|
return False
|
|
|
|
if other.tokens is not None and other.tokens != self.tokens:
|
|
return False
|
|
|
|
return self.parts == other.parts
|
|
|
|
def __hash__(self):
|
|
return hash((self.start, self.end, self.parts))
|
|
|
|
|
|
class OrNode(ExprNode):
|
|
def __init__(self, start, end, tokens, *parts: ExprNode):
|
|
super().__init__(start, end, tokens)
|
|
self.parts = parts
|
|
|
|
def eval(self, obj):
|
|
res = self.parts[0].eval(obj) or self.parts[1].eval(obj)
|
|
for part in self.parts[2:]:
|
|
res |= part.eval(obj)
|
|
return res
|
|
|
|
def __repr__(self):
|
|
return f"OrNode(start={self.start}, end={self.end}, " + ", ".join([repr(p) for p in self.parts]) + ")"
|
|
|
|
def __str__(self):
|
|
return " or ".join([str(p) for p in self.parts])
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, OrNode):
|
|
return False
|
|
|
|
if self.start != other.start or self.end != other.end:
|
|
return False
|
|
|
|
if other.tokens is not None and other.tokens != self.tokens:
|
|
return False
|
|
|
|
return self.parts == other.parts
|
|
|
|
def __hash__(self):
|
|
return hash((self.start, self.end, self.parts))
|
|
|
|
|
|
class NotNode(ExprNode):
|
|
def __init__(self, start, end, tokens, node: ExprNode):
|
|
super().__init__(start, end, tokens)
|
|
self.node = node
|
|
|
|
def eval(self, obj):
|
|
return not self.node.eval(obj)
|
|
|
|
def get_value(self):
|
|
return self.node.get_value()
|
|
|
|
def __repr__(self):
|
|
return f"NotNode(start={self.start}, end={self.end}, {self.node!r})"
|
|
|
|
def __str__(self):
|
|
return f"not {self.node}"
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, NotNode):
|
|
return False
|
|
|
|
if self.start != other.start or self.end != other.end:
|
|
return False
|
|
|
|
if other.tokens is not None and other.tokens != self.tokens:
|
|
return False
|
|
|
|
return self.node == other.node
|
|
|
|
def __hash__(self):
|
|
return hash((self.start, self.end, self.node))
|
|
|
|
|
|
class ParenthesisNode(ExprNode):
|
|
"""
|
|
Contains the boundaries of an expression inside parenthesis
|
|
Need it, just to keep track of the boundaries of the parenthesis
|
|
"""
|
|
|
|
def __init__(self, start, end, tokens, node: ExprNode):
|
|
super().__init__(start, end, tokens)
|
|
self.node = node
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, ParenthesisNode):
|
|
return False
|
|
|
|
if self.start != other.start or self.end != other.end:
|
|
return False
|
|
|
|
if other.tokens is not None and other.tokens != self.tokens:
|
|
return False
|
|
|
|
return self.node == other.node
|
|
|
|
def __hash__(self):
|
|
return hash((self.start, self.end, self.node))
|
|
|
|
def __repr__(self):
|
|
return f"ParenthesisNode(start={self.start}, end={self.end}, node={self.node!r})"
|
|
|
|
def __str__(self):
|
|
return f"({self.node})"
|
|
|
|
|
|
class VariableNode(ExprNode):
|
|
def __init__(self, start, end, tokens, name, *attributes):
|
|
super().__init__(start, end, tokens)
|
|
self.name = name.strip()
|
|
self.attributes = [attr.strip() for attr in attributes]
|
|
if len(self.attributes) > 0:
|
|
self.attributes_str = ".".join(self.attributes)
|
|
else:
|
|
self.attributes_str = None
|
|
|
|
def __eq__(self, other):
|
|
if id(self) == id(other):
|
|
return True
|
|
|
|
if not isinstance(other, VariableNode):
|
|
return False
|
|
|
|
return self.name == other.name and self.attributes == other.attributes
|
|
|
|
def __hash__(self):
|
|
return hash((self.name, self.attributes))
|
|
|
|
def __repr__(self):
|
|
prefix = f"VariableNode(start={self.start}, end={self.end}, '{self.name}"
|
|
if len(self.attributes) > 0:
|
|
return prefix + "." + ".".join(self.attributes) + "')"
|
|
else:
|
|
return prefix + "')"
|
|
|
|
def __str__(self):
|
|
if self.attributes:
|
|
return self.name + "." + ".".join(self.attributes)
|
|
else:
|
|
return self.name
|
|
|
|
def unpack(self):
|
|
return [self.name] + self.attributes
|
|
|
|
|
|
class ComparisonNode(ExprNode):
|
|
def __init__(self, start, end, tokens, comp: str, left: ExprNode, right: ExprNode):
|
|
super().__init__(start, end, tokens)
|
|
self.comp = comp
|
|
self.left = left
|
|
self.right = right
|
|
|
|
def __eq__(self, other):
|
|
if id(self) == id(other):
|
|
return True
|
|
|
|
if not isinstance(other, ComparisonNode):
|
|
return False
|
|
|
|
return (self.comp == other.comp and
|
|
self.left == other.left and
|
|
self.right == other.right)
|
|
|
|
def __hash__(self):
|
|
return hash((self.comp, self.left, self.right))
|
|
|
|
def __repr__(self):
|
|
return f"ComparisonNode(start={self.start}, end={self.end}, {self.left!r} {self.comp} {self.right!r})"
|
|
|
|
def __str__(self):
|
|
return f"{self.left} {self.comp} {self.right}"
|
|
|
|
@staticmethod
|
|
def rebuild_source(left, op, right):
|
|
if isinstance(right, str):
|
|
right = f"'{right}'"
|
|
|
|
if op == ComparisonType.EQUALS:
|
|
return f"{left} == {right}"
|
|
|
|
if op == ComparisonType.NOT_EQUAlS:
|
|
return f"{left} != {right}"
|
|
|
|
if op == ComparisonType.LESS_THAN:
|
|
return f"{left} < {right}"
|
|
|
|
if op == ComparisonType.LESS_THAN_OR_EQUALS:
|
|
return f"{left} <= {right}"
|
|
|
|
if op == ComparisonType.GREATER_THAN:
|
|
return f"{left} > {right}"
|
|
|
|
if op == ComparisonType.GREATER_THAN_OR_EQUALS:
|
|
return f"{left} >= {right}"
|
|
|
|
if op == ComparisonType.IN:
|
|
return f"{left} in ({right})"
|
|
|
|
if op == ComparisonType.NOT_IN:
|
|
return f"{left} not in ({right})"
|
|
|
|
|
|
@dataclass()
|
|
class FunctionParameter:
|
|
"""
|
|
class the represent result of the parameter parsing
|
|
"""
|
|
value: NameExprNode # value parsed
|
|
separator: NameExprNode = None # holds the value and the position of the separator
|
|
|
|
def add_sep(self, start, end, tokens):
|
|
self.separator = NameExprNode(start, end, tokens)
|
|
|
|
def value_to_unrecognized(self):
|
|
return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source()
|
|
|
|
def separator_to_unrecognized(self):
|
|
if self.separator is None:
|
|
return None
|
|
return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source()
|
|
|
|
|
|
class FunctionNode(ExprNode):
|
|
|
|
def __init__(self, start, end, tokens,
|
|
first: NameExprNode, last: NameExprNode, parameters: Union[None, List[FunctionParameter]]):
|
|
super().__init__(start, end, tokens)
|
|
self.first = first
|
|
self.last = last
|
|
self.parameters = parameters
|
|
|
|
def __eq__(self, other):
|
|
if id(self) == id(other):
|
|
return True
|
|
|
|
if not isinstance(other, FunctionNode):
|
|
return False
|
|
|
|
return (self.first == other.first and
|
|
self.last == other.last and
|
|
self.parameters == other.parameters)
|
|
|
|
def __hash__(self):
|
|
return hash((self.first, self.last, self.parameters))
|
|
|
|
def __repr__(self):
|
|
return f"FunctionNode(start={self.start}, end={self.end}, {self.first!r} {self.last} {self.parameters!r})"
|
|
|
|
def __str__(self):
|
|
return f"{self.first} {self.parameters} {self.last}"
|
|
|
|
|
|
class BaseExpressionParser(BaseParser):
|
|
|
|
def reset_parser_input(self, parser_input: ParserInput, error_sink):
|
|
try:
|
|
error_sink.clear()
|
|
parser_input.reset(self.yield_eof)
|
|
except LexerError as e:
|
|
error_sink.add_error(e)
|
|
return False
|
|
|
|
parser_input.next_token()
|
|
return True
|
|
|
|
def parse(self, context, parser_input: ParserInput):
|
|
"""
|
|
:param context:
|
|
:param parser_input:
|
|
:return:
|
|
"""
|
|
|
|
if not isinstance(parser_input, ParserInput):
|
|
return None
|
|
|
|
context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name)
|
|
sheerka = context.sheerka
|
|
|
|
if parser_input.is_empty():
|
|
return context.sheerka.ret(self.name,
|
|
False,
|
|
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
|
|
|
error_sink = ErrorSink()
|
|
if not self.reset_parser_input(parser_input, error_sink):
|
|
return context.sheerka.ret(
|
|
self.name,
|
|
False,
|
|
context.sheerka.new(BuiltinConcepts.ERROR, body=error_sink.sink))
|
|
|
|
node = self.parse_input(context, parser_input, error_sink)
|
|
|
|
token = parser_input.token
|
|
if token and token.type != TokenKind.EOF:
|
|
if token.type == TokenKind.RPAR:
|
|
error_sink.add_error(ParenthesisMismatchError(token))
|
|
else:
|
|
error_sink.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.EOF]))
|
|
|
|
if isinstance(node, ParenthesisNode):
|
|
node = node.node
|
|
|
|
value = self.get_return_value_body(context.sheerka,
|
|
parser_input.as_text(),
|
|
node,
|
|
node,
|
|
error_sink.sink)
|
|
|
|
ret = context.sheerka.ret(self.name,
|
|
not error_sink.has_error,
|
|
value)
|
|
|
|
return ret
|
|
|
|
def parse_input(self, context, parser_input, error_sink):
|
|
raise NotImplementedError
|
|
|
|
def parse_tokens_stop_condition(self, token, parser_input):
|
|
raise NotImplementedError
|
|
|
|
def parse_tokens(self, context, parser_input, error_sink):
|
|
def stop():
|
|
return token.type == TokenKind.EOF or \
|
|
paren_count == 0 and (token.type == TokenKind.RPAR or
|
|
self.parse_tokens_stop_condition(token, parser_input))
|
|
|
|
token = parser_input.token
|
|
if token.type == TokenKind.EOF:
|
|
return None
|
|
|
|
if token.type == TokenKind.LPAR:
|
|
last_paren = token
|
|
start = parser_input.pos
|
|
parser_input.next_token()
|
|
expr = self.parse_input(context, parser_input, error_sink)
|
|
token = parser_input.token
|
|
if token.type != TokenKind.RPAR:
|
|
error_sink.add_error(ParenthesisMismatchError(last_paren))
|
|
return expr
|
|
end = parser_input.pos
|
|
parser_input.next_token()
|
|
return ParenthesisNode(start, end, None, expr)
|
|
|
|
paren_count = 0
|
|
last_paren = None
|
|
start = parser_input.pos
|
|
end = parser_input.pos
|
|
last_is_whitespace = False
|
|
while not stop():
|
|
last_is_whitespace = token.type == TokenKind.WHITESPACE
|
|
end += 1
|
|
if token.type == TokenKind.LPAR:
|
|
last_paren = token
|
|
paren_count += 1
|
|
if token.type == TokenKind.RPAR:
|
|
paren_count -= 1
|
|
parser_input.next_token(False)
|
|
token = parser_input.token
|
|
|
|
if last_is_whitespace:
|
|
end -= 1
|
|
|
|
if start == end:
|
|
if token.type != TokenKind.RPAR:
|
|
error_sink.add_error(LeftPartNotFoundError())
|
|
return None
|
|
|
|
if paren_count != 0:
|
|
error_sink.add_error(ParenthesisMismatchError(last_paren))
|
|
return None
|
|
|
|
if self.expr_parser:
|
|
new_parsing_input = ParserInput(
|
|
None,
|
|
tokens=parser_input.tokens,
|
|
length=parser_input.length,
|
|
start=start,
|
|
end=end - 1,
|
|
yield_oef=False).reset()
|
|
new_parsing_input.next_token()
|
|
return self.expr_parser.parse_input(context, new_parsing_input, error_sink)
|
|
else:
|
|
return NameExprNode(start, end - 1, parser_input.tokens[start:end])
|
|
|
|
|
|
class ExpressionVisitor:
|
|
"""
|
|
Pyhtonic implementation of visitors for ExprNode
|
|
"""
|
|
|
|
def visit(self, expr_node):
|
|
name = expr_node.__class__.__name__
|
|
|
|
method = 'visit_' + name
|
|
visitor = getattr(self, method, self.generic_visit)
|
|
return visitor(expr_node)
|
|
|
|
def generic_visit(self, expr_node):
|
|
"""Called if no explicit visitor function exists for a node."""
|
|
for field, value in expr_node.__dict__.items():
|
|
if isinstance(value, (list, tuple)):
|
|
for item in value:
|
|
if isinstance(item, ExprNode):
|
|
self.visit(item)
|
|
elif isinstance(value, ExprNode):
|
|
self.visit(value)
|
|
|
|
|
|
class TrueifyVisitor(ExpressionVisitor):
|
|
"""
|
|
Visit an ExprNode
|
|
replace all the nodes containing a variable to 'trueify' with True
|
|
The node containing both variables to trueify and to skip are skipped
|
|
"""
|
|
|
|
def __init__(self, to_trueify, to_skip):
|
|
self.to_trueify = to_trueify
|
|
self.to_skip = to_skip
|
|
|
|
def visit_AndNode(self, expr_node):
|
|
parts = []
|
|
for part in expr_node.parts:
|
|
parts.append(self.visit(part))
|
|
return AndNode(expr_node.start, expr_node.end, expr_node.tokens, *parts)
|
|
|
|
def visit_OrNode(self, expr_node):
|
|
parts = []
|
|
for part in expr_node.parts:
|
|
parts.append(self.visit(part))
|
|
return OrNode(expr_node.start, expr_node.end, expr_node.tokens, *parts)
|
|
|
|
def visit_NameExprNode(self, expr_node):
|
|
return_true = False
|
|
for t in expr_node.tokens:
|
|
if t.type == TokenKind.IDENTIFIER:
|
|
if t.value in self.to_skip:
|
|
return expr_node
|
|
if t.value in self.to_trueify:
|
|
return_true = True
|
|
|
|
return NameExprNode(expr_node.start,
|
|
expr_node.end,
|
|
[Token(TokenKind.IDENTIFIER, "True", -1, -1, -1)]) if return_true else expr_node
|
|
|
|
|
|
is_question_tokens = list(Tokenizer("is_question()"))
|
|
eval_question_requested_in_context = list(Tokenizer("context.in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED)"))
|
|
|
|
|
|
class IsAQuestionVisitor(ExpressionVisitor):
|
|
"""
|
|
visit an expression and return True if is_question or context.in_context(BuiltinConcepts.EVAL_QUESTION_REQUESTED)
|
|
if found.
|
|
"""
|
|
|
|
def visit_NameExprNode(self, expr_node):
|
|
if tokens_are_matching(expr_node.tokens, is_question_tokens) or \
|
|
tokens_are_matching(expr_node.tokens, eval_question_requested_in_context):
|
|
return True
|
|
return None
|
|
|
|
def visit_AndNode(self, expr_node):
|
|
"""
|
|
AND | True | False | None
|
|
------+-------+-------+----------
|
|
False | False | False | False
|
|
True | True | False | True
|
|
None | True | False | None
|
|
"""
|
|
res = self.visit(expr_node.parts[0])
|
|
if isinstance(res, bool) and not res:
|
|
return res
|
|
|
|
for part in expr_node.parts[1:]:
|
|
visited = self.visit(part)
|
|
if isinstance(visited, bool):
|
|
if not visited:
|
|
return visited
|
|
else:
|
|
res = visited
|
|
|
|
return res
|
|
|
|
def visit_OrNode(self, expr_node):
|
|
"""
|
|
OR | True | False | None
|
|
------+-------+-------+----------
|
|
True | True | True | True
|
|
False | True | False | False
|
|
None | True | False | None
|
|
"""
|
|
res = self.visit(expr_node.parts[0])
|
|
if isinstance(res, bool) and res:
|
|
return res
|
|
|
|
for part in expr_node.parts[1:]:
|
|
visited = self.visit(part)
|
|
if isinstance(visited, bool):
|
|
if visited:
|
|
return visited
|
|
else:
|
|
res = visited
|
|
|
|
return res
|
|
|
|
def visit_NotNode(self, expr_node):
|
|
"""
|
|
| NOT
|
|
------+-------
|
|
False | True
|
|
True | False
|
|
None | None
|
|
"""
|
|
visited = self.visit(expr_node.node)
|
|
return None if visited is None else not visited
|
|
|
|
def is_a_question(self, expr_node):
|
|
res = self.visit(expr_node)
|
|
return isinstance(res, bool) and res
|