diff --git a/src/parsers/ExpressionParser.py b/src/parsers/ExpressionParser.py index fa203d1..70d6668 100644 --- a/src/parsers/ExpressionParser.py +++ b/src/parsers/ExpressionParser.py @@ -1,22 +1,23 @@ from core.builtin_concepts_ids import BuiltinConcepts from core.sheerka.services.SheerkaExecute import ParserInput +from core.tokenizer import TokenKind +from core.utils import get_text_from_tokens from parsers.BaseParser import BaseExprParser from parsers.FunctionParser import FunctionParser from parsers.LogicalOperatorParser import LogicalOperatorParser from parsers.RelationalOperatorParser import RelationalOperatorParser -from parsers.expressions import ParenthesisNode, NameExprNode +from parsers.expressions import ParenthesisNode, NameExprNode, VariableNode class ExpressionParser(BaseExprParser): """ - Parses xxx (== | > | < | >= | <= | != | in | not in) yyy - Nothing else + Parses expressions """ NAME = "Expression" def __init__(self, **kwargs): - super().__init__(ExpressionParser.NAME, 60, False, yield_eof=True) + super().__init__(ExpressionParser.NAME, 60, False, yield_eof=False) self.logical_parser = LogicalOperatorParser() self.relational_parser = RelationalOperatorParser() self.function_parser = FunctionParser() @@ -62,11 +63,38 @@ class ExpressionParser(BaseExprParser): def parse_input(self): pos = self.parser_input.pos - for parser in []: # [self.logical_parser, self.relational_parser, self.function_parser]: + for parser in []: # [self.logical_parser, self.relational_parser, self.function_parser]: self.parser_input.seek(pos) # reset position if parser.reset_parser(self.context, self.parser_input): res = parser.parse_input() if res and not parser.has_error: return res - return NameExprNode(self.parser_input.start, self.parser_input.end, self.parser_input.as_tokens()) + # try to recognize a VariableNode + dots_found = [] + for i, token in enumerate(self.parser_input.as_tokens()): + if token.type == TokenKind.DOT: + dots_found.append(i) + continue + + if not (token.type == TokenKind.WHITESPACE or + token.type == TokenKind.IDENTIFIER and token.value.isidentifier()): + return NameExprNode(self.parser_input.start, self.parser_input.end, self.parser_input.as_tokens()) + + if len(dots_found) == 0: + return VariableNode(pos, self.parser_input.end, self.parser_input.as_tokens(), self.parser_input.as_text()) + + parts = [] + current_dot_pos = pos + for dot_found in dots_found: + parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: dot_found])) + current_dot_pos = dot_found + 1 + + # do not forget the trailing part + parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: self.parser_input.end + 1])) + + return VariableNode(self.parser_input.start, + self.parser_input.end, + self.parser_input.as_tokens(), + parts[0], + *parts[1:]) diff --git a/src/parsers/expressions.py b/src/parsers/expressions.py index 890f2bd..1b70455 100644 --- a/src/parsers/expressions.py +++ b/src/parsers/expressions.py @@ -72,7 +72,7 @@ class NameExprNode(ExprNode): return self.value def __repr__(self): - return f"NameExprNode('{self.value}')" + return f"NameExprNode(start={self.start}, end={self.end}, '{self.value}')" def __str__(self): return self.value diff --git a/tests/parsers/test_ExpressionParser.py b/tests/parsers/test_ExpressionParser.py index a92445a..08db492 100644 --- a/tests/parsers/test_ExpressionParser.py +++ b/tests/parsers/test_ExpressionParser.py @@ -4,6 +4,7 @@ from core.builtin_concepts_ids import BuiltinConcepts from core.sheerka.services.SheerkaExecute import ParserInput from core.tokenizer import Tokenizer from parsers.ExpressionParser import ExpressionParser +from parsers.expressions import VariableNode from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR @@ -17,6 +18,7 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): def input_parser_with_source(self, source): sheerka, context, parser = self.init_parser() parser.reset_parser(context, ParserInput(source)) + parser.parser_input.next_token() return sheerka, context, parser def test_i_can_detect_empty_expression(self): @@ -27,7 +29,9 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) @pytest.mark.parametrize("expression, expected", [ - ("something that i do not recognize", EXPR("something that i do not recognize")), + ("var1 + var 2", EXPR("var1 + var 2")), + ("variable", VAR("variable")), + ("var.attr", VAR("var.attr")), ]) def test_i_can_parse_input(self, expression, expected): sheerka, context, parser = self.input_parser_with_source(expression) @@ -38,14 +42,27 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka): assert not parser.has_error assert parsed == expected + @pytest.mark.parametrize("expression", [ + "var.attr1.attr2", + "var . attr1 . attr2", + ]) + def test_i_can_parse_variable(self, expression): + sheerka, context, parser = self.input_parser_with_source(expression) + parsed = parser.parse_input() + + assert not parser.has_error + assert isinstance(parsed, VariableNode) + assert parsed.name == "var" + assert parsed.attributes == ["attr1", "attr2"] + def test_i_can_parse_sub_tokens(self): sheerka, context, parser = self.init_parser() - expression = "do not care var.attr do not care either" - parser_input = ParserInput("text", Tokenizer(expression, yield_eof=False), start=6, end=8) + expression = "do not care var1 + var2 do not care either" + parser_input = ParserInput("text", list(Tokenizer(expression, yield_eof=False)), start=6, end=10) parser.reset_parser(context, parser_input) parsed = parser.parse_input() assert not parser.has_error - assert parsed == get_expr_node_from_test_node(expression, [VAR("var.attr")]) + assert parsed == get_expr_node_from_test_node(expression, EXPR("var1 + var2"))