diff --git a/src/core/utils.py b/src/core/utils.py index a770d09..f63d72d 100644 --- a/src/core/utils.py +++ b/src/core/utils.py @@ -545,16 +545,21 @@ def decode_concept(text, wrapper="C"): return None, None -def tokens_index(tokens, sub_tokens, skip=0): +def tokens_index(tokens, sub_tokens, skip=0, start_from_end=False): """ Index of the sub tokens in tokens :param tokens: tokens :param sub_tokens: sub tokens to search :param skip: number of found to skip + :param start_from_end: start by the end :return: """ expected = [token.value for token in sub_tokens if token.type != TokenKind.EOF] - for i in range(0, len(tokens) - len(expected) + 1): + indexes = range(0, len(tokens) - len(expected) + 1) + if start_from_end: + indexes = reversed(indexes) + + for i in indexes: for j in range(len(expected)): if tokens[i + j].value != expected[j]: break diff --git a/src/parsers/BaseParser.py b/src/parsers/BaseParser.py index b429f6f..876ed74 100644 --- a/src/parsers/BaseParser.py +++ b/src/parsers/BaseParser.py @@ -232,3 +232,9 @@ class BaseParser: @staticmethod def get_name(name): return BaseParser.PREFIX + name + + +class BaseExprParser(BaseParser): + + def parse_input(self): + raise NotImplementedError diff --git a/src/parsers/ExpressionParser.py b/src/parsers/ExpressionParser.py new file mode 100644 index 0000000..fa203d1 --- /dev/null +++ b/src/parsers/ExpressionParser.py @@ -0,0 +1,72 @@ +from core.builtin_concepts_ids import BuiltinConcepts +from core.sheerka.services.SheerkaExecute import ParserInput +from parsers.BaseParser import BaseExprParser +from parsers.FunctionParser import FunctionParser +from parsers.LogicalOperatorParser import LogicalOperatorParser +from parsers.RelationalOperatorParser import RelationalOperatorParser +from parsers.expressions import ParenthesisNode, NameExprNode + + +class ExpressionParser(BaseExprParser): + """ + Parses xxx (== | > | < | >= | <= | != | in | not in) yyy + Nothing else + """ + + NAME = "Expression" + + def __init__(self, **kwargs): + super().__init__(ExpressionParser.NAME, 60, False, yield_eof=True) + self.logical_parser = LogicalOperatorParser() + self.relational_parser = RelationalOperatorParser() + self.function_parser = FunctionParser() + + def parse(self, context, parser_input: ParserInput): + """ + :param context: + :param parser_input: + :return: + """ + + if not isinstance(parser_input, ParserInput): + return None + + context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name) + sheerka = context.sheerka + + if parser_input.is_empty(): + return context.sheerka.ret(self.name, + False, + sheerka.new(BuiltinConcepts.IS_EMPTY)) + + if not self.reset_parser(context, parser_input): + return self.sheerka.ret( + self.name, + False, + context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) + + self.parser_input.next_token() + + node = self.parse_input() + if isinstance(node, ParenthesisNode): + node = node.node + + value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), node, node) + + ret = self.sheerka.ret( + self.name, + not self.has_error, + value) + + return ret + + def parse_input(self): + pos = self.parser_input.pos + for parser in []: # [self.logical_parser, self.relational_parser, self.function_parser]: + self.parser_input.seek(pos) # reset position + if parser.reset_parser(self.context, self.parser_input): + res = parser.parse_input() + if res and not parser.has_error: + return res + + return NameExprNode(self.parser_input.start, self.parser_input.end, self.parser_input.as_tokens()) diff --git a/src/parsers/FunctionParser.py b/src/parsers/FunctionParser.py index 9de73ee..6c222cb 100644 --- a/src/parsers/FunctionParser.py +++ b/src/parsers/FunctionParser.py @@ -6,14 +6,14 @@ from core.concept import Concept from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind from core.utils import get_n_clones -from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode -from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, Node +from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode +from parsers.BaseParser import UnexpectedTokenParsingError, UnexpectedEofParsingError, Node, BaseExprParser from parsers.BnfNodeParser import BnfNodeParser from parsers.PythonWithConceptsParser import PythonWithConceptsParser from parsers.RuleParser import RuleParser from parsers.SequenceNodeParser import SequenceNodeParser from parsers.SyaNodeParser import SyaNodeParser -from parsers.expressions import NameExprNode +from parsers.expressions import NameExprNode, FunctionNode, FunctionParameter PARSERS = [RuleParser.NAME, SequenceNodeParser.NAME, @@ -26,34 +26,7 @@ class FunctionParserNode(Node): pass -@dataclass() -class FunctionParameter: - """ - class the represent result of the parameter parsing - """ - value: NameExprNode # value parsed - separator: NameExprNode = None # holds the value and the position of the separator - - def add_sep(self, start, end, tokens): - self.separator = NameExprNode(start, end, tokens) - - def value_to_unrecognized(self): - return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source() - - def separator_to_unrecognized(self): - if self.separator is None: - return None - return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source() - - -@dataclass -class FunctionNode(FunctionParserNode): - first: NameExprNode # beginning of the function (it should represent the name of the function) - last: NameExprNode # last part of the function (it should be the trailing parenthesis) - parameters: list - - -class FunctionParser(BaseParser): +class FunctionParser(BaseExprParser): """ The parser will be used to parse func(x, y, z) where x, y and z can be source code, concepts or other functions @@ -144,6 +117,9 @@ class FunctionParser(BaseParser): return res[0] if len(res) == 1 else res + def parse_input(self): + return self.parse_function() + def parse_function(self): start = self.parser_input.pos @@ -168,20 +144,23 @@ class FunctionParser(BaseParser): start_node = NameExprNode(start, start + 1, self.parser_input.tokens[start:start + 2]) if not self.parser_input.next_token(): self.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis")) - return FunctionNode(start_node, None, None) + return FunctionNode(start, start + 1, [], start_node, None, None) params = self.parse_parameters() if self.has_error: - return FunctionNode(start_node, None, params) + return FunctionNode(start, self.parser_input.pos, [], start_node, None, params) token = self.parser_input.token if not token or token.type != TokenKind.RPAR: self.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found", token, [TokenKind.RPAR])) - return FunctionNode(start_node, None, params) + return FunctionNode(start, self.parser_input.pos, [], start_node, None, params) - return FunctionNode(start_node, + return FunctionNode(start, + self.parser_input.pos, + self.parser_input.tokens[start:self.parser_input.pos + 1], + start_node, NameExprNode(self.parser_input.pos, self.parser_input.pos, [token]), params) diff --git a/src/parsers/LogicalOperatorParser.py b/src/parsers/LogicalOperatorParser.py index 1ae22b5..1feeb81 100644 --- a/src/parsers/LogicalOperatorParser.py +++ b/src/parsers/LogicalOperatorParser.py @@ -7,7 +7,7 @@ from core.sheerka.services.sheerka_service import FailedToCompileError from core.tokenizer import TokenKind, Tokenizer, Keywords from core.utils import get_text_from_tokens from parsers.BaseNodeParser import UnrecognizedTokensNode -from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError +from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, BaseExprParser from parsers.PythonWithConceptsParser import PythonWithConceptsParser from parsers.expressions import ParenthesisNode, OrNode, AndNode, NotNode, LeftPartNotFoundError, \ ParenthesisMismatchError, NameExprNode, ExprNode, VariableNode, ComparisonNode @@ -60,7 +60,7 @@ class ReteConditionsEmitter: return [AndConditions(conditions)] -class LogicalOperatorParser(BaseParser): +class LogicalOperatorParser(BaseExprParser): """ will parser logic expression like not (a and b or c) @@ -93,7 +93,7 @@ class LogicalOperatorParser(BaseParser): if not isinstance(parser_input, ParserInput): return None - context.log(f"Parsing '{parser_input}' with LogicalOperatorParser", self.name) + context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name) sheerka = context.sheerka if parser_input.is_empty(): @@ -108,7 +108,7 @@ class LogicalOperatorParser(BaseParser): context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) self.parser_input.next_token() - tree = self.parse_or() + tree = self.parse_input() token = self.parser_input.token if token and token.type != TokenKind.EOF: self.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [])) @@ -124,6 +124,9 @@ class LogicalOperatorParser(BaseParser): return ret + def parse_input(self): + return self.parse_or() + def parse_or(self): start = self.parser_input.pos expr = self.parse_and() diff --git a/src/parsers/RelationalOperatorParser.py b/src/parsers/RelationalOperatorParser.py index d8519f2..2b64282 100644 --- a/src/parsers/RelationalOperatorParser.py +++ b/src/parsers/RelationalOperatorParser.py @@ -4,19 +4,21 @@ from core.builtin_concepts_ids import BuiltinConcepts from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute from core.tokenizer import TokenKind, Token from core.utils import get_text_from_tokens -from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError +from parsers.BaseParser import UnexpectedTokenParsingError, BaseExprParser from parsers.expressions import ComparisonNode, ParenthesisMismatchError, NameExprNode, ComparisonType, VariableNode, \ ParenthesisNode, LeftPartNotFoundError -class RelationalOperatorParser(BaseParser): +class RelationalOperatorParser(BaseExprParser): """ Parses xxx (== | > | < | >= | <= | != | in | not in) yyy Nothing else """ + NAME = "RelationalOperator" + def __init__(self, **kwargs): - super().__init__("Expression", 60, False, yield_eof=True) + super().__init__(self.NAME, 60, False, yield_eof=True) def parse(self, context, parser_input: Union[ParserInput, List[Token]]): """ @@ -31,7 +33,7 @@ class RelationalOperatorParser(BaseParser): elif not isinstance(parser_input, ParserInput): return None - context.log(f"Parsing '{parser_input}' with ComparisonExpressionParser", self.name) + context.log(f"Parsing '{parser_input}' with {self.NAME}Parser", self.name) sheerka = context.sheerka if parser_input.is_empty(): @@ -47,7 +49,7 @@ class RelationalOperatorParser(BaseParser): self.parser_input.next_token() - node = self.parse_compare() + node = self.parse_input() if isinstance(node, ParenthesisNode): node = node.node @@ -60,6 +62,9 @@ class RelationalOperatorParser(BaseParser): return ret + def parse_input(self): + return self.parse_compare() + def parse_compare(self): start = self.parser_input.pos left = self.parse_names() diff --git a/src/parsers/expressions.py b/src/parsers/expressions.py index d2c5be6..890f2bd 100644 --- a/src/parsers/expressions.py +++ b/src/parsers/expressions.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import List, Tuple +from typing import List, Tuple, Union from core.tokenizer import Token, TokenKind, Tokenizer from core.utils import tokens_are_matching @@ -63,7 +63,6 @@ class ExprNode(Node): class NameExprNode(ExprNode): def __init__(self, start, end, tokens): super().__init__(start, end, tokens) - self.tokens = tokens self.value = "".join([t.str_value for t in self.tokens]) def eval(self, obj): @@ -224,8 +223,8 @@ class ParenthesisNode(ExprNode): if self.start != other.start or self.end != other.end: return False - # if other.tokens is not None and other.tokens != self.tokens: - # return False + if other.tokens is not None and other.tokens != self.tokens: + return False return self.node == other.node @@ -302,6 +301,33 @@ class ComparisonNode(ExprNode): return f"{self.left} {self.comp} {self.right}" +@dataclass() +class FunctionParameter: + """ + class the represent result of the parameter parsing + """ + value: NameExprNode # value parsed + separator: NameExprNode = None # holds the value and the position of the separator + + def add_sep(self, start, end, tokens): + self.separator = NameExprNode(start, end, tokens) + + def value_to_unrecognized(self): + return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source() + + def separator_to_unrecognized(self): + if self.separator is None: + return None + return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source() + + +@dataclass +class FunctionNode(ExprNode): + first: NameExprNode # beginning of the function (it should represent the name of the function) + last: NameExprNode # last part of the function (it should be the trailing parenthesis) + parameters: Union[None, List[FunctionParameter]] + + class ExpressionVisitor: """ Pyhtonic implementation of visitors for ExprNode diff --git a/tests/parsers/parsers_utils.py b/tests/parsers/parsers_utils.py index 0d01124..a180ec8 100644 --- a/tests/parsers/parsers_utils.py +++ b/tests/parsers/parsers_utils.py @@ -14,7 +14,7 @@ from parsers.FunctionParser import FunctionNode from parsers.PythonParser import PythonNode from parsers.SyaNodeParser import SyaConceptParserHelper from parsers.expressions import NameExprNode, AndNode, OrNode, NotNode, VariableNode, ComparisonNode, ComparisonType, \ - ParenthesisNode + FunctionParameter from sheerkarete.common import V from sheerkarete.conditions import Condition, AndConditions @@ -972,8 +972,13 @@ def get_expr_node_from_test_node(full_text, test_node): return start, end def get_pos_from_source(source): + if isinstance(source, tuple): + source, to_skip = source[0], source[1] + else: + to_skip = 0 + source_as_node = list(Tokenizer(source, yield_eof=False)) - start = tokens_index(full_text_as_tokens, source_as_node) + start = tokens_index(full_text_as_tokens, source_as_node, skip=to_skip) end = start + len(source_as_node) - 1 return start, end @@ -1017,11 +1022,31 @@ def get_expr_node_from_test_node(full_text, test_node): return ComparisonNode(start, end, full_text_as_tokens[start: end + 1], node_type, left_node, right_node) - if isinstance(node, PAREN): - value_as_tokens = list(Tokenizer(node.source, yield_eof=False)) - start = tokens_index(full_text_as_tokens, value_as_tokens, 0) - end = start + len(value_as_tokens) - 1 - return ParenthesisNode(start, end, value_as_tokens, get_expr_node(node.node)) + if isinstance(node, FN): + start, end = get_pos_from_source(node.first) + first = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + start, end = get_pos_from_source(node.last) + last = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + parameters = [] + for param_value, sep in node.parameters: + if isinstance(param_value, str): + start, end = get_pos_from_source(param_value) + param_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + else: + param_as_expr_node = get_expr_node(param_value) + + if sep: + sep_tokens = Tokenizer(sep, yield_eof=False) + start = param_as_expr_node.end + 1 + end = start + len(list(sep_tokens)) - 1 + sep_as_expr_node = NameExprNode(start, end, full_text_as_tokens[start: end + 1]) + else: + sep_as_expr_node = None + + parameters.append(FunctionParameter(param_as_expr_node, sep_as_expr_node)) + + start, end = first.start, last.end + return FunctionNode(start, end, full_text_as_tokens[start: end + 1], first, last, parameters) return get_expr_node(test_node) diff --git a/tests/parsers/test_ExpressionParser.py b/tests/parsers/test_ExpressionParser.py new file mode 100644 index 0000000..a92445a --- /dev/null +++ b/tests/parsers/test_ExpressionParser.py @@ -0,0 +1,51 @@ +import pytest + +from core.builtin_concepts_ids import BuiltinConcepts +from core.sheerka.services.SheerkaExecute import ParserInput +from core.tokenizer import Tokenizer +from parsers.ExpressionParser import ExpressionParser +from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka +from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR + + +class TestExpressionParser(TestUsingMemoryBasedSheerka): + def init_parser(self): + sheerka, context = self.init_concepts() + parser = ExpressionParser() + return sheerka, context, parser + + def input_parser_with_source(self, source): + sheerka, context, parser = self.init_parser() + parser.reset_parser(context, ParserInput(source)) + return sheerka, context, parser + + def test_i_can_detect_empty_expression(self): + sheerka, context, parser = self.init_parser() + res = parser.parse(context, ParserInput("")) + + assert not res.status + assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) + + @pytest.mark.parametrize("expression, expected", [ + ("something that i do not recognize", EXPR("something that i do not recognize")), + ]) + def test_i_can_parse_input(self, expression, expected): + sheerka, context, parser = self.input_parser_with_source(expression) + expected = get_expr_node_from_test_node(expression, expected) + + parsed = parser.parse_input() + + assert not parser.has_error + assert parsed == expected + + def test_i_can_parse_sub_tokens(self): + sheerka, context, parser = self.init_parser() + + expression = "do not care var.attr do not care either" + parser_input = ParserInput("text", Tokenizer(expression, yield_eof=False), start=6, end=8) + + parser.reset_parser(context, parser_input) + parsed = parser.parse_input() + + assert not parser.has_error + assert parsed == get_expr_node_from_test_node(expression, [VAR("var.attr")]) diff --git a/tests/parsers/test_FunctionParser.py b/tests/parsers/test_FunctionParser.py index 070d3cc..1f72d08 100644 --- a/tests/parsers/test_FunctionParser.py +++ b/tests/parsers/test_FunctionParser.py @@ -6,7 +6,8 @@ from core.sheerka.services.SheerkaExecute import ParserInput from parsers.FunctionParser import FunctionParser from parsers.PythonParser import PythonErrorNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka -from tests.parsers.parsers_utils import compute_expected_array, SCN, SCWC, CN, UTN, CNC, RN, FN, get_test_obj +from tests.parsers.parsers_utils import compute_expected_array, SCN, SCWC, CN, UTN, CNC, RN, FN, get_test_obj, \ + get_expr_node_from_test_node cmap = { "one": Concept("one"), @@ -64,12 +65,12 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka): ("concept(one)", FN("concept(", ")", ["one"])), ("func(one)", FN("func(", ")", ["one"])), ("func(a long two, 'three', ;:$*)", FN("func(", ")", ["a long two, ", "'three', ", ";:$*"])), - ("func(func1(one), two, func2(func3(), func4(three)))", FN("func(", ")", [ + ("func(func1(one), two, func2(func3(), func4(three)))", FN("func(", (")", 4), [ (FN("func1(", ")", ["one"]), ", "), "two, ", - (FN("func2(", ")", [ - (FN("func3(", ")", []), ", "), - (FN("func4(", ")", ["three"]), None), + (FN("func2(", (")", 3), [ + (FN("func3(", (")", 1), []), ", "), + (FN("func4(", (")", 2), ["three"]), None), ]), None) ])), ]) @@ -80,8 +81,8 @@ class TestFunctionParser(TestUsingMemoryBasedSheerka): parser.parser_input.next_token() res = parser.parse_function() - transformed_res = get_test_obj(res, expected) - assert transformed_res == expected + expected = get_expr_node_from_test_node(expression, expected) + assert res == expected def test_i_can_parse_function_when_rule(self): sheerka, context, parser = self.init_parser() diff --git a/tests/parsers/test_RelationalOperatorParser.py b/tests/parsers/test_RelationalOperatorParser.py index 6e63b5e..3e0cbd4 100644 --- a/tests/parsers/test_RelationalOperatorParser.py +++ b/tests/parsers/test_RelationalOperatorParser.py @@ -3,6 +3,7 @@ import pytest from core.builtin_concepts_ids import BuiltinConcepts from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import TokenKind, Tokenizer +from parsers.BaseParser import UnexpectedTokenParsingError from parsers.RelationalOperatorParser import RelationalOperatorParser from parsers.expressions import ParenthesisMismatchError from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka @@ -94,9 +95,7 @@ class TestRelationalOperatorParser(TestUsingMemoryBasedSheerka): assert not res.status assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR) - assert isinstance(res.body.body[0], UnexpectedTokenError) - - + assert isinstance(res.body.body[0], UnexpectedTokenParsingError) def test_i_can_parse_tokens_rather_than_parser_input(self): sheerka, context, parser = self.init_parser()