Fixed #9 : I can parse 'def concept'

This commit is contained in:
2023-06-11 09:45:44 +02:00
parent 62391f786e
commit ba397b0b72
22 changed files with 3043 additions and 93 deletions
+167
View File
@@ -0,0 +1,167 @@
import pytest
from common.utils import unstr_concept
from conftest import NewOntology
from helpers import get_concepts
from parsers.BnfDefinitionParser import BnfDefinitionParser
from parsers.parser_utils import UnexpectedEof, UnexpectedToken
from parsers.peg_parser import ConceptExpression, OneOrMore, Optional, OrderedChoice, RegExMatch, Sequence, StrMatch, \
VariableExpression, ZeroOrMore
def _cexp(concept_str, rule_name=None):
concept_name, concept_id = unstr_concept(concept_str)
return ConceptExpression(concept_id, rule_name or concept_name)
@pytest.mark.parametrize("expression, expected", [
("'str'", StrMatch("str")),
("1", StrMatch("1")),
(" 1", StrMatch("1")),
(",", StrMatch(",")),
("r'str'", RegExMatch("str")),
("'foo'?", Optional(StrMatch("foo"))),
("'foo'*", ZeroOrMore(StrMatch("foo"))),
("'foo'+", OneOrMore(StrMatch("foo"))),
("1 | 2 | 3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1|2|3", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"))),
("1'|' 2 '|' 3", Sequence(StrMatch("1"), StrMatch("|"), StrMatch("2"), StrMatch("|"), StrMatch("3"))),
("1 2 'foo'", Sequence(StrMatch("1"), StrMatch("2"), StrMatch("foo"))),
("1 2 | 3 4+", OrderedChoice(
Sequence(StrMatch("1"), StrMatch("2")),
Sequence(StrMatch("3"), OneOrMore(StrMatch("4"))))),
("1 (2 | 3) 4+", Sequence(
StrMatch("1"),
OrderedChoice(StrMatch("2"), StrMatch("3")),
OneOrMore(StrMatch("4")))),
("(1|2)+", OneOrMore(OrderedChoice(StrMatch("1"), StrMatch("2")))),
("(1 2)+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")))),
("1 *", Sequence(StrMatch("1"), StrMatch("*"))),
("1 ?", Sequence(StrMatch("1"), StrMatch("?"))),
("1 +", Sequence(StrMatch("1"), StrMatch("+"))),
("(1|*) +", Sequence(OrderedChoice(StrMatch("1"), StrMatch("*")), StrMatch("+"))),
("1, :&", Sequence(StrMatch("1"), StrMatch(","), StrMatch(":"), StrMatch("&"))),
("(1 )", StrMatch("1")),
("'str'=var", StrMatch("str", rule_name="var")),
("'foo'?=var", Optional(StrMatch("foo"), rule_name="var")),
("('foo'?)=var", Optional(StrMatch("foo"), rule_name="var")),
("'foo'*=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("('foo'*)=var", ZeroOrMore(StrMatch("foo"), rule_name="var")),
("'foo'+=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("('foo'+)=var", OneOrMore(StrMatch("foo"), rule_name="var")),
("'foo'=var?", Optional(StrMatch("foo", rule_name="var"))),
("('foo'=var)?", Optional(StrMatch("foo", rule_name="var"))),
("'foo'=var*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)*", ZeroOrMore(StrMatch("foo", rule_name="var"))),
("'foo'=var+", OneOrMore(StrMatch("foo", rule_name="var"))),
("('foo'=var)+", OneOrMore(StrMatch("foo", rule_name="var"))),
("r'str'=var", RegExMatch("str", rule_name="var")),
("r'foo'?=var", Optional(RegExMatch("foo"), rule_name="var")),
("(r'foo'?)=var", Optional(RegExMatch("foo"), rule_name="var")),
("r'foo'*=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
("(r'foo'*)=var", ZeroOrMore(RegExMatch("foo"), rule_name="var")),
("r'foo'+=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
("(r'foo'+)=var", OneOrMore(RegExMatch("foo"), rule_name="var")),
("r'foo'=var?", Optional(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)?", Optional(RegExMatch("foo", rule_name="var"))),
("r'foo'=var*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)*", ZeroOrMore(RegExMatch("foo", rule_name="var"))),
("r'foo'=var+", OneOrMore(RegExMatch("foo", rule_name="var"))),
("(r'foo'=var)+", OneOrMore(RegExMatch("foo", rule_name="var"))),
("(1 | 2 | 3)=var", OrderedChoice(StrMatch("1"), StrMatch("2"), StrMatch("3"), rule_name="var")),
("(1 2)=var", Sequence(StrMatch("1"), StrMatch("2"), rule_name="var")),
("(1 2)+=var", OneOrMore(Sequence(StrMatch("1"), StrMatch("2")), rule_name="var")),
("(1 2)=var+", OneOrMore(Sequence(StrMatch("1"), StrMatch("2"), rule_name="var"))),
("(1=a 2=b)=c", Sequence(StrMatch("1", rule_name="a"), StrMatch("2", rule_name="b"), rule_name="c")),
("(1*=a)", ZeroOrMore(StrMatch("1"), rule_name="a")),
("'a'* 'b'+", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'* 'b'+)", Sequence(ZeroOrMore(StrMatch("a")), OneOrMore(StrMatch("b")))),
("('a'*=x 'b'+=y)=z", Sequence(
ZeroOrMore(StrMatch("a"), rule_name="x"),
OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")),
("'--filter'",
Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter")))
])
def test_i_can_parse_simple_bnf_definition(context, expression, expected):
parser = BnfDefinitionParser(context, expression)
res = parser.parse()
assert res == expected
assert not parser.error_sink
assert parser.source == expression
@pytest.mark.parametrize("expression, expected", [
("foo", _cexp("c:foo#1001:")),
("foo*", ZeroOrMore(_cexp("c:foo#1001:"))),
("foo 'and' bar+", Sequence(_cexp("c:foo#1001:"), StrMatch("and"), OneOrMore(_cexp("c:bar#1002:")))),
("foo | bar?", OrderedChoice(_cexp("c:foo#1001:"), Optional(_cexp("c:bar#1002:")))),
("'str' = var", Sequence(StrMatch("str"), StrMatch("="), _cexp("c:var#1003:"))),
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), _cexp("c:var#1003:"))),
("foo=f", _cexp("c:foo#1001:", "f")),
("foo=f 'constant'", Sequence(_cexp("c:foo#1001:", "f"), StrMatch("constant"))),
("def 'concept'", Sequence(_cexp("c:def#1004:"), StrMatch("concept"))),
("c:foo:", _cexp("c:foo#1001:")),
("c:#1001:", _cexp("c:foo#1001:")),
])
def test_i_can_parse_bnf_definition_with_concepts(context, expression, expected):
with NewOntology(context, "test_i_can_parse_bnf_definition_with_concept"):
get_concepts(context, "foo", "bar", "var", "def", use_sheerka=True)
parser = BnfDefinitionParser(context, expression)
res = parser.parse()
assert res == expected
assert not parser.error_sink
assert parser.source == expression
@pytest.mark.parametrize("expression, expected", [
("x", VariableExpression("x")),
("x bar", Sequence(VariableExpression("x"), _cexp("c:bar#1001:"))),
("bar x", Sequence(_cexp("c:bar#1001:"), VariableExpression("x"))),
("x 'and' bar", Sequence(VariableExpression("x"), StrMatch("and"), _cexp("c:bar#1001:"))),
("x | bar", OrderedChoice(VariableExpression("x"), _cexp("c:bar#1001:"))),
("x*", ZeroOrMore(VariableExpression("x"))),
("x+", OneOrMore(VariableExpression("x"))),
("'str' = x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
("'str''='x", Sequence(StrMatch("str"), StrMatch("="), VariableExpression("x"))),
("foo=x", VariableExpression("x")),
])
def test_i_can_parse_bnf_definition_with_variables(context, expression, expected):
with NewOntology(context, "test_i_can_parse_bnf_definition_with_variables"):
get_concepts(context, "bar", use_sheerka=True)
parser = BnfDefinitionParser(context, expression)
res = parser.parse()
assert res == expected
assert not parser.error_sink
assert parser.source == expression
def test_i_can_parse_when_the_concept_is_still_under_creation(context):
# I want to parse something like
# def concept add from bnf add | mult
# 'add' is used while being under construction
# 'add' must not be detected as a variable
parser = BnfDefinitionParser(context, "add | 'mult'", concept_name="add")
res = parser.parse()
assert res == OrderedChoice(_cexp("c:add:"), StrMatch("mult"))
assert not parser.error_sink
@pytest.mark.parametrize("expression, error", [
("1 ", UnexpectedEof),
("1|", UnexpectedEof),
("(1|)", UnexpectedToken),
("1=", UnexpectedToken),
])
def test_i_can_detect_errors(context, expression, error):
parser = BnfDefinitionParser(context, expression)
res = parser.parse()
assert res is None
assert len(parser.error_sink) > 0
assert isinstance(parser.error_sink[0], error)
+280
View File
@@ -0,0 +1,280 @@
import pytest
from common.global_symbols import NotInit
from core.concept import DefinitionType
from parsers.ConceptDefinitionParser import ConceptDefinition, ConceptDefinitionParser
from parsers.ParserInput import ParserInput
from parsers.parser_utils import ParsingError, UnexpectedEof, UnexpectedToken
from parsers.tokenizer import Keywords, Token, TokenKind
def get_parser_input(text):
pi = ParserInput(text)
assert pi.init()
return pi
class TestRecognizeDefConcept:
@pytest.fixture()
def parser(self, sheerka):
return ConceptDefinitionParser()
@pytest.mark.parametrize("text", [
"",
" "])
def test_i_can_detect_empty_input(self, parser, text):
pi = get_parser_input(text)
res = parser.parse(pi)
assert res is None
assert parser.error_sink == [UnexpectedEof(Keywords.DEF, None)]
def test_must_start_with_def_keyword(self, parser):
pi = get_parser_input("hello")
res = parser.parse(pi)
assert res is None
assert parser.error_sink == [UnexpectedToken(Token(TokenKind.IDENTIFIER, "hello", 0, 1, 1), Keywords.DEF)]
@pytest.mark.parametrize("text, expected", [
("def concept hello", ConceptDefinition(name="hello")),
("def concept hello ", ConceptDefinition(name="hello")),
("def concept a + b", ConceptDefinition(name="a + b")),
("def concept a+b", ConceptDefinition(name="a + b")),
("def concept 'a+b'+c", ConceptDefinition(name="'a+b' + c")),
('def concept "a+b"+c', ConceptDefinition(name="a+b + c")),
('def concept "as if"', ConceptDefinition(name="as if")),
("def concept 'as if'", ConceptDefinition(name="'as if'")),
("def concept 'as' \"if\"", ConceptDefinition(name="'as' if")),
('def concept \'as\' "if"', ConceptDefinition(name="'as' if")),
])
def test_i_can_parse_def_concept_name(self, parser, text, expected):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert actual == expected
def test_concept_name_is_mandatory(self, parser):
pi = get_parser_input("def concept as foo")
actual = parser.parse(pi)
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], ParsingError)
assert parser.error_sink[0].message == "Name is mandatory."
assert actual is None
def test_new_line_is_not_allowed_in_concept_name(self, parser):
pi = get_parser_input("def concept complicated \n name as foo")
actual = parser.parse(pi)
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], ParsingError)
assert parser.error_sink[0].message == "Newlines are not allowed in name."
assert actual is None
@pytest.mark.parametrize("text, part", [
("def concept foo as where True", "as"),
("def concept foo where as 1 + 1", "where"),
("def concept foo pre as 1 + 1", "pre"),
("def concept foo post as 1 + 1", "post"),
("def concept foo ret as 1 + 1", "ret"),
])
def test_empty_declarations_are_not_allowed(self, parser, text, part):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert actual is None
assert len(parser.error_sink) == 1
assert isinstance(parser.error_sink[0], ParsingError)
assert parser.error_sink[0].message == f"Empty '{part}' declaration."
def test_empty_parts_are_not_initialized(self, parser):
pi = get_parser_input("def concept foo")
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body is ""
assert actual.where is ""
assert actual.pre is ""
assert actual.post is ""
assert actual.ret is ""
def test_i_can_manage_all_parts(self, parser):
concept_def = "def concept foo"
concept_def += " where my where clause"
concept_def += " pre my pre clause"
concept_def += " as my body"
concept_def += " ret my return value"
concept_def += " post my post condition"
pi = get_parser_input(concept_def)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body == "my body"
assert actual.where == "my where clause"
assert actual.pre == "my pre clause"
assert actual.post == "my post condition"
assert actual.ret == "my return value"
@pytest.mark.parametrize("body", [
"c:#1001: is an int",
"c:one: is an int",
"'one' is an int",
'"one" is an in',
])
def test_i_can_manage_special_tokens_in_part(self, parser, body):
text = f"def concept foo as {body}"
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body == body
@pytest.mark.parametrize("text, expected_type, expected_definition, ", [
("def concept foo from def 'hello world'", DefinitionType.DEFAULT, "'hello world'"),
("def concept foo from 'hello world'", DefinitionType.DEFAULT, "'hello world'"),
("def concept foo from bnf my bnf definition", DefinitionType.BNF, "my bnf definition"),
])
def test_i_can_set_concept_definition(self, parser, text, expected_type, expected_definition):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.definition_type == expected_type
assert actual.definition == expected_definition
@pytest.mark.parametrize("text", [
"def concept foo from where True",
"def concept foo from bnf where True",
"def concept foo from def where True",
"def concept foo from bnf",
"def concept foo from def ",
])
def test_empy_definition_are_not_allowed(self, parser, text):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert actual is None
assert parser.error_sink[0].message == "Empty 'from' declaration."
def test_i_can_parse_multiline_definition(self, parser):
text = """
def concept add one to a as
def func(x):
return x+1
func(a)
"""
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body == "def func(x):\n return x+1\nfunc(a)"
def test_i_can_parse_indention_mode(self, parser):
text = """
def concept add one to a as:
def func(x):
return x+1
func(a)
"""
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body == "def func(x):\n return x+1\nfunc(a)"
def test_i_can_detect_invalid_indentation(self, parser):
text = """
def concept add one to a as:
def func(x):
return x+1
func(a)
"""
pi = get_parser_input(text)
actual = parser.parse(pi)
assert actual is None
assert len(parser.error_sink) > 0
def test_i_can_can_use_colon_to_protect_keywords(self, parser):
text = """
def concept today as:
from datetime import date
today = date.today()
from:
give me the date !
"""
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.body == "from datetime import date\ntoday = date.today()"
assert actual.definition == "give me the date !"
def test_i_can_parse_bnf_concept_with_regex(self, parser):
text = "def concept sha512 from bnf number | r'[a-f0-9]+' | (number r'[a-f0-9]+')+"
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.definition == "number | r'[a-f0-9]+' | (number r'[a-f0-9]+')+"
@pytest.mark.parametrize("text, expected", [
("def concept foo auto_eval True", True),
("def concept foo auto_eval true", True),
("def concept foo auto_eval False", False),
("def concept foo auto_eval false", False),
])
def test_i_can_parse_auto_eval(self, parser, text, expected):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.auto_eval == expected
def test_auto_eval_is_set_to_false_by_default(self, parser):
pi = get_parser_input("def concept foo")
actual = parser.parse(pi)
assert actual.auto_eval is False
def test_empty_auto_eval_is_not_allowed(self, parser):
pi = get_parser_input("def concept foo auto_eval as 1")
actual = parser.parse(pi)
assert actual is None
assert parser.error_sink[0].message == "Empty 'auto_eval' declaration."
def test_i_cannot_parse_wrong_value(self, parser):
pi = get_parser_input("def concept foo auto_eval wrong_value")
actual = parser.parse(pi)
assert actual is None
assert parser.error_sink[0].message == "Invalid 'auto_eval' declaration (wrong_value is not recognized)"
@pytest.mark.parametrize("text, expected", [
("def concept foo def_var var", [("var", NotInit)]),
("def concept foo def_var var1 def_var var2", [("var1", NotInit), ("var2", NotInit)]),
("def concept foo def_var var1 var2", [("var1", NotInit), ("var2", NotInit)]),
("def concept foo def_var var1, var2", [("var1", NotInit), ("var2", NotInit)]),
("def concept foo def_var var1=10", [("var1", 10)]),
("def concept foo def_var var1 = 10", [("var1", 10)]),
("def concept foo def_var var1 = 'hello'", [("var1", "'hello'")]),
("def concept foo def_var var1 = hello", [("var1", "hello")]),
("def concept foo def_var var1, var2 = 10", [("var1", NotInit), ("var2", 10)]),
("def concept foo def_var var1='hello', var2 = 10", [("var1", "'hello'"), ("var2", 10)]),
("def concept foo def_var var1='hello' var2 = 10", [("var1", "'hello'"), ("var2", 10)]),
])
def test_i_can_parse_variable_definitions(self, parser, text, expected):
pi = get_parser_input(text)
actual = parser.parse(pi)
assert isinstance(actual, ConceptDefinition)
assert actual.def_var == expected
def test_empty_def_var_is_not_allowed(self, parser):
pi = get_parser_input("def concept foo def_var as 1")
actual = parser.parse(pi)
assert actual is None
assert parser.error_sink[0].message == "Empty 'def_var' declaration."
+128
View File
@@ -8,6 +8,8 @@ def test_i_can_parser_input():
parser_input = ParserInput("def concept a")
assert parser_input.init() is True
assert parser_input.exception is None
assert parser_input.pos == -1
assert parser_input.end == 6
def test_i_can_detect_errors():
@@ -38,3 +40,129 @@ def test_i_must_call_init_before_call_as_text():
parser_input.as_text()
assert ex.value.args[0] == "You must call init() first !"
def test_i_can_get_next_token():
parser_input = ParserInput("def concept a")
parser_input.init()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "def"
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "concept"
assert parser_input.next_token(skip_whitespace=False) is True
assert parser_input.token.type == TokenKind.WHITESPACE
assert parser_input.token.value == " "
assert parser_input.next_token(skip_whitespace=False) is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "a"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
def test_next_after_eof_is_eof():
parser_input = ParserInput("hi")
parser_input.init()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "hi"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
def test_i_can_manage_blank_input():
parser_input = ParserInput(" ")
parser_input.init()
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
def test_i_can_manage_blank_input_when_skip_whitespace_is_false():
parser_input = ParserInput(" ")
parser_input.init()
assert parser_input.next_token(skip_whitespace=False) is True
assert parser_input.token.type == TokenKind.WHITESPACE
assert parser_input.token.value == " "
assert parser_input.next_token(skip_whitespace=False) is False
assert parser_input.token.type == TokenKind.EOF
def test_i_can_reset():
parser_input = ParserInput("hello world ")
parser_input.init()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "hello"
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "world"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
parser_input.reset()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "hello"
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "world"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
def test_i_can_parse_when_input_ends_by_white_space():
parser_input = ParserInput("hello world ")
parser_input.init()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "hello"
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "world"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
def test_i_can_parse_when_input_starts_by_white_space():
parser_input = ParserInput(" hello world")
parser_input.init()
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "hello"
assert parser_input.next_token() is True
assert parser_input.token.type == TokenKind.IDENTIFIER
assert parser_input.token.value == "world"
assert parser_input.next_token() is False
assert parser_input.token.type == TokenKind.EOF
+95
View File
@@ -0,0 +1,95 @@
import pytest
from parsers.ParserInput import ParserInput
from parsers.parser_utils import parse_parts, strip_tokens
from parsers.tokenizer import Keywords, Tokenizer
def compare_results(actual, expected, compare_str=False):
resolved_expected = {}
for k, v in expected.items():
if isinstance(v, str):
# case like {Keywords.DEF_VAR: "def_var var1 def_var var2"}
tokens = list(Tokenizer(v, yield_eof=False))
resolved_expected[k] = [tokens[0]] + tokens[2:]
else:
# case like {Keywords.DEF_VAR: get_tokens(["def_var", "var1", "var2"])}
resolved_expected[k] = v
def get_better_representation(value):
better_repr = {}
for k, tokens in value.items():
value = "".join([t.str_value if compare_str else t.repr_value for t in tokens[1:]])
better_repr[k] = [tokens[0].repr_value, value]
return better_repr
# it's easier to compare two list of string
actual_to_compare = get_better_representation(actual)
expected_to_compare = get_better_representation(resolved_expected)
assert actual_to_compare == expected_to_compare
def get_tokens(lst):
"""
Returns a list of Tokens, for a list of item
:param lst:
:type lst:
:return:
:rtype:
"""
return list(Tokenizer(lst, yield_eof=False).remove_positions())
@pytest.mark.parametrize("input_as_list, expected_as_list", [
([" "], []),
([" ", "one"], ["one"]),
(["one", " "], ["one"]),
([" ", "one", " "], ["one"]),
(["\n", "one"], ["one"]),
(["one", "\n"], ["one"]),
(["\n", "one", "\n"], ["one"]),
([" ", "\n", "one"], ["one"]),
(["one", " ", "\n"], ["one"]),
([" ", "\n", "one", " ", "\n"], ["one"]),
(["\n", " ", "one"], ["one"]),
(["one", "\n", " "], ["one"]),
(["\n", " ", "one", "\n", " "], ["one"]),
([" ", "\n", " ", "one"], ["one"]),
(["one", " ", "\n", " "], ["one"]),
([" ", "\n", " ", "one", " ", "\n", " "], ["one"]),
(["\n", " ", "\n", "one"], ["one"]),
(["one", "\n", " ", "\n"], ["one"]),
(["\n", " ", "\n", "one", "\n", " ", "\n"], ["one"]),
])
def test_i_can_strip(input_as_list, expected_as_list):
actual = strip_tokens(get_tokens(input_as_list)) # KSI 20201007 Why not use Tokenizer ?!! For perf ?
expected = get_tokens(expected_as_list)
assert actual == expected
@pytest.mark.parametrize("text, strip, expected", [
("when xxx yyy", False, {Keywords.WHEN: "when xxx yyy"}),
("when uuu vvv print xxx yyy", False, {Keywords.WHEN: "when uuu vvv ", Keywords.PRINT: "print xxx yyy"}),
("print xxx yyy when uuu vvv", False, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy "}),
(" when xxx", False, {Keywords.WHEN: "when xxx"}),
("when xxx yyy", True, {Keywords.WHEN: "when xxx yyy"}),
("when uuu vvv print xxx yyy", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
("print xxx yyy when uuu vvv", True, {Keywords.WHEN: "when uuu vvv", Keywords.PRINT: "print xxx yyy"}),
(" when xxx", True, {Keywords.WHEN: "when xxx"}),
])
def test_i_can_get_parts(text, strip, expected):
parser_input = ParserInput(text)
parser_input.init()
parser_input.next_token()
error_sink = []
res = parse_parts(parser_input, error_sink, ["when", "print"], strip=strip)
compare_results(res, expected)
+12 -5
View File
@@ -5,7 +5,7 @@ from parsers.tokenizer import LexerError, Token, TokenKind, Tokenizer
def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&<>c:name:"
source += "$£€!_identifier°~_^\\`==#__var__10r/regex\nregex/r:xxx#1:**//%that's"
source += "$£€!_identifier°~_^\\`==#__var__10r/regex\nregex/r:xxx#1:**//%"
tokens = list(Tokenizer(source))
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
@@ -61,11 +61,8 @@ def test_i_can_tokenize():
assert tokens[51] == Token(TokenKind.STARSTAR, "**", 143, 7, 15)
assert tokens[52] == Token(TokenKind.SLASHSLASH, "//", 145, 7, 17)
assert tokens[53] == Token(TokenKind.PERCENT, "%", 147, 7, 19)
assert tokens[54] == Token(TokenKind.IDENTIFIER, "that", 148, 7, 20)
assert tokens[55] == Token(TokenKind.QUOTE, "'", 152, 7, 24)
assert tokens[56] == Token(TokenKind.IDENTIFIER, "s", 153, 7, 25)
assert tokens[57] == Token(TokenKind.EOF, '', 154, 7, 26)
assert tokens[54] == Token(TokenKind.EOF, '', 148, 7, 20)
@pytest.mark.parametrize("text, expected", [
@@ -209,3 +206,13 @@ def test_i_can_parse_regex_token(text, expected):
assert tokens[0].str_value == "r" + expected
assert tokens[0].repr_value == "r" + expected
assert tokens[0].strip_quote == expected[1:-1]
@pytest.mark.parametrize("text, parse_quote, expected", [
("a='hello'", False, ["a", "=", "'hello'"]),
("a='hello'", True, ["a", "=", "'", "hello", "'"]),
("a= 'hello'", True, ["a", "=", " ", "'hello'"]),
])
def test_i_can_choose_to_parse_quote(text, parse_quote, expected):
tokens = list(Tokenizer(text, parse_quote=parse_quote, yield_eof=False))
assert [t.value for t in tokens] == expected