Working on #48 : Simple version of ExpressionParser

This commit is contained in:
2021-03-10 15:58:05 +01:00
parent 966a1ed814
commit 998ea160be
3 changed files with 56 additions and 11 deletions
+34 -6
View File
@@ -1,22 +1,23 @@
from core.builtin_concepts_ids import BuiltinConcepts from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind
from core.utils import get_text_from_tokens
from parsers.BaseParser import BaseExprParser from parsers.BaseParser import BaseExprParser
from parsers.FunctionParser import FunctionParser from parsers.FunctionParser import FunctionParser
from parsers.LogicalOperatorParser import LogicalOperatorParser from parsers.LogicalOperatorParser import LogicalOperatorParser
from parsers.RelationalOperatorParser import RelationalOperatorParser from parsers.RelationalOperatorParser import RelationalOperatorParser
from parsers.expressions import ParenthesisNode, NameExprNode from parsers.expressions import ParenthesisNode, NameExprNode, VariableNode
class ExpressionParser(BaseExprParser): class ExpressionParser(BaseExprParser):
""" """
Parses xxx (== | > | < | >= | <= | != | in | not in) yyy Parses expressions
Nothing else
""" """
NAME = "Expression" NAME = "Expression"
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__(ExpressionParser.NAME, 60, False, yield_eof=True) super().__init__(ExpressionParser.NAME, 60, False, yield_eof=False)
self.logical_parser = LogicalOperatorParser() self.logical_parser = LogicalOperatorParser()
self.relational_parser = RelationalOperatorParser() self.relational_parser = RelationalOperatorParser()
self.function_parser = FunctionParser() self.function_parser = FunctionParser()
@@ -62,11 +63,38 @@ class ExpressionParser(BaseExprParser):
def parse_input(self): def parse_input(self):
pos = self.parser_input.pos pos = self.parser_input.pos
for parser in []: # [self.logical_parser, self.relational_parser, self.function_parser]: for parser in []: # [self.logical_parser, self.relational_parser, self.function_parser]:
self.parser_input.seek(pos) # reset position self.parser_input.seek(pos) # reset position
if parser.reset_parser(self.context, self.parser_input): if parser.reset_parser(self.context, self.parser_input):
res = parser.parse_input() res = parser.parse_input()
if res and not parser.has_error: if res and not parser.has_error:
return res return res
return NameExprNode(self.parser_input.start, self.parser_input.end, self.parser_input.as_tokens()) # try to recognize a VariableNode
dots_found = []
for i, token in enumerate(self.parser_input.as_tokens()):
if token.type == TokenKind.DOT:
dots_found.append(i)
continue
if not (token.type == TokenKind.WHITESPACE or
token.type == TokenKind.IDENTIFIER and token.value.isidentifier()):
return NameExprNode(self.parser_input.start, self.parser_input.end, self.parser_input.as_tokens())
if len(dots_found) == 0:
return VariableNode(pos, self.parser_input.end, self.parser_input.as_tokens(), self.parser_input.as_text())
parts = []
current_dot_pos = pos
for dot_found in dots_found:
parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: dot_found]))
current_dot_pos = dot_found + 1
# do not forget the trailing part
parts.append(get_text_from_tokens(self.parser_input.tokens[current_dot_pos: self.parser_input.end + 1]))
return VariableNode(self.parser_input.start,
self.parser_input.end,
self.parser_input.as_tokens(),
parts[0],
*parts[1:])
+1 -1
View File
@@ -72,7 +72,7 @@ class NameExprNode(ExprNode):
return self.value return self.value
def __repr__(self): def __repr__(self):
return f"NameExprNode('{self.value}')" return f"NameExprNode(start={self.start}, end={self.end}, '{self.value}')"
def __str__(self): def __str__(self):
return self.value return self.value
+21 -4
View File
@@ -4,6 +4,7 @@ from core.builtin_concepts_ids import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer from core.tokenizer import Tokenizer
from parsers.ExpressionParser import ExpressionParser from parsers.ExpressionParser import ExpressionParser
from parsers.expressions import VariableNode
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR from tests.parsers.parsers_utils import get_expr_node_from_test_node, VAR, EXPR
@@ -17,6 +18,7 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
def input_parser_with_source(self, source): def input_parser_with_source(self, source):
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
parser.reset_parser(context, ParserInput(source)) parser.reset_parser(context, ParserInput(source))
parser.parser_input.next_token()
return sheerka, context, parser return sheerka, context, parser
def test_i_can_detect_empty_expression(self): def test_i_can_detect_empty_expression(self):
@@ -27,7 +29,9 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
@pytest.mark.parametrize("expression, expected", [ @pytest.mark.parametrize("expression, expected", [
("something that i do not recognize", EXPR("something that i do not recognize")), ("var1 + var 2", EXPR("var1 + var 2")),
("variable", VAR("variable")),
("var.attr", VAR("var.attr")),
]) ])
def test_i_can_parse_input(self, expression, expected): def test_i_can_parse_input(self, expression, expected):
sheerka, context, parser = self.input_parser_with_source(expression) sheerka, context, parser = self.input_parser_with_source(expression)
@@ -38,14 +42,27 @@ class TestExpressionParser(TestUsingMemoryBasedSheerka):
assert not parser.has_error assert not parser.has_error
assert parsed == expected assert parsed == expected
@pytest.mark.parametrize("expression", [
"var.attr1.attr2",
"var . attr1 . attr2",
])
def test_i_can_parse_variable(self, expression):
sheerka, context, parser = self.input_parser_with_source(expression)
parsed = parser.parse_input()
assert not parser.has_error
assert isinstance(parsed, VariableNode)
assert parsed.name == "var"
assert parsed.attributes == ["attr1", "attr2"]
def test_i_can_parse_sub_tokens(self): def test_i_can_parse_sub_tokens(self):
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
expression = "do not care var.attr do not care either" expression = "do not care var1 + var2 do not care either"
parser_input = ParserInput("text", Tokenizer(expression, yield_eof=False), start=6, end=8) parser_input = ParserInput("text", list(Tokenizer(expression, yield_eof=False)), start=6, end=10)
parser.reset_parser(context, parser_input) parser.reset_parser(context, parser_input)
parsed = parser.parse_input() parsed = parser.parse_input()
assert not parser.has_error assert not parser.has_error
assert parsed == get_expr_node_from_test_node(expression, [VAR("var.attr")]) assert parsed == get_expr_node_from_test_node(expression, EXPR("var1 + var2"))