Fixed first token recognition when creating bnf concepts
This commit is contained in:
@@ -348,6 +348,9 @@ class AtomNodeParser(BaseNodeParser):
|
||||
return valid_parser_helpers
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
if not isinstance(parser_input, ParserInput):
|
||||
return None
|
||||
|
||||
if parser_input.is_empty():
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
|
||||
@@ -162,11 +162,17 @@ class BaseParser:
|
||||
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
|
||||
return self.error_sink[0]
|
||||
|
||||
if self.has_error:
|
||||
return sheerka.new(
|
||||
BuiltinConcepts.ERROR,
|
||||
body=self.error_sink
|
||||
)
|
||||
|
||||
return sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=self.error_sink if self.has_error else tree,
|
||||
body=tree,
|
||||
try_parsed=try_parse)
|
||||
|
||||
def get_input_as_text(self, parser_input, custom_switcher=None, tracker=None):
|
||||
|
||||
@@ -391,10 +391,11 @@ class StrMatch(Match):
|
||||
Matches a literal
|
||||
"""
|
||||
|
||||
def __init__(self, to_match, rule_name="", ignore_case=True):
|
||||
def __init__(self, to_match, rule_name="", ignore_case=True, skip_whitespace=True):
|
||||
super(Match, self).__init__(rule_name=rule_name)
|
||||
self.to_match = to_match
|
||||
self.ignore_case = ignore_case
|
||||
self.skip_white_space = skip_whitespace
|
||||
|
||||
def __repr__(self):
|
||||
return self.add_rule_name_if_needed(f"'{self.to_match}'")
|
||||
@@ -415,7 +416,7 @@ class StrMatch(Match):
|
||||
|
||||
if m:
|
||||
node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value)
|
||||
parser_helper.next_token()
|
||||
parser_helper.next_token(self.skip_white_space)
|
||||
return node
|
||||
|
||||
return None
|
||||
@@ -469,6 +470,8 @@ class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor):
|
||||
return self.STOP
|
||||
|
||||
def visit_StrMatch(self, pe):
|
||||
if not pe.to_match:
|
||||
return
|
||||
self.add_first_token(pe.to_match)
|
||||
return self.STOP
|
||||
|
||||
@@ -947,6 +950,9 @@ class BnfNodeParser(BaseNodeParser):
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(parser_input, ParserInput):
|
||||
return None
|
||||
|
||||
context.log(f"Parsing '{parser_input}' with BnfNode", self.name)
|
||||
sheerka = context.sheerka
|
||||
|
||||
|
||||
@@ -266,6 +266,17 @@ class BnfParser(BaseParser):
|
||||
expr.rule_name = concept.name
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
self.next_token()
|
||||
tokens = list(Tokenizer(token.strip_quote, yield_eof=False))
|
||||
if len(tokens) == 1:
|
||||
return self.eat_rule_name_if_needed(StrMatch(tokens[0].str_value))
|
||||
else:
|
||||
elements = [StrMatch(t.str_value, skip_whitespace=False) for t in tokens]
|
||||
elements[-1].skip_white_space = True
|
||||
ret = Sequence(*elements)
|
||||
return self.eat_rule_name_if_needed(ret)
|
||||
|
||||
ret = StrMatch(core.utils.strip_quotes(token.value))
|
||||
self.next_token()
|
||||
return self.eat_rule_name_if_needed(ret)
|
||||
|
||||
@@ -313,7 +313,6 @@ class InFixToPostFix:
|
||||
"""
|
||||
return len(self.stack) > 0 and isinstance(self.stack[-1], type)
|
||||
|
||||
|
||||
def _make_source_code_with_concept(self, start, rpar_token, end):
|
||||
"""
|
||||
|
||||
@@ -1068,6 +1067,10 @@ class SyaNodeParser(BaseNodeParser):
|
||||
:param parser_input:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(parser_input, ParserInput):
|
||||
return None
|
||||
|
||||
if parser_input.is_empty():
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
|
||||
@@ -4,7 +4,6 @@ from logging import Logger
|
||||
import core.utils
|
||||
from core.concept import Concept
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
|
||||
from sheerkapickle import utils, tags, handlers
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import pytest
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF
|
||||
from core.sheerka.Sheerka import Sheerka
|
||||
@@ -156,6 +157,31 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka):
|
||||
assert res.status
|
||||
assert sheerka.get_by_name(concept.name) == concept # it's not a list, ie the entry is not duplicated
|
||||
|
||||
def test_i_can_get_first_token_when_not_a_letter(self):
|
||||
sheerka = self.get_sheerka(cache_only=False)
|
||||
context = self.get_context(sheerka)
|
||||
concept = Concept("--filter a").def_var("a")
|
||||
|
||||
res = sheerka.create_new_concept(context, concept)
|
||||
assert res.status
|
||||
|
||||
# I can get by the first entry
|
||||
assert sheerka.cache_manager.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id]
|
||||
assert sheerka.cache_manager.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id]
|
||||
|
||||
@pytest.mark.parametrize("expression", [
|
||||
"--'filter' ('one' | 'two') ",
|
||||
"'--filter' ('one' | 'two') ",
|
||||
])
|
||||
def test_i_can_get_first_token_when_bnf_concept_and_not_a_letter(self, expression):
|
||||
sheerka, context, bnf_concept = self.init_concepts(
|
||||
Concept("foo", definition=expression),
|
||||
create_new=True)
|
||||
|
||||
# I can get by the first entry
|
||||
assert sheerka.cache_manager.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id]
|
||||
assert sheerka.cache_manager.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id]
|
||||
|
||||
|
||||
class TestSheerkaCreateNewConceptFileBased(TestUsingFileBasedSheerka):
|
||||
def test_i_can_add_several_concepts(self):
|
||||
|
||||
@@ -40,6 +40,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
|
||||
(Optional(StrMatch("foo")), {"foo": ["1002"]}),
|
||||
(ZeroOrMore(StrMatch("foo")), {"foo": ["1002"]}),
|
||||
(OneOrMore(StrMatch("foo")), {"foo": ["1002"]}),
|
||||
(StrMatch("--filter"), {"--filter": ["1002"]}), # add both entries
|
||||
])
|
||||
def test_i_can_get_concepts_by_first_keyword_with_bnf(self, bnf, expected):
|
||||
sheerka = self.get_sheerka()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import pytest
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, ConceptParts, DoNotResolve, DEFINITION_TYPE_DEF, CC
|
||||
from core.concept import Concept, ConceptParts, DoNotResolve
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from parsers.BaseNodeParser import CNC, UTN, CN
|
||||
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
|
||||
@@ -30,6 +30,8 @@ cmap = {
|
||||
# sequence of keywords using def definition
|
||||
# "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF),
|
||||
# "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"),
|
||||
|
||||
"filter": Concept("filter", definition="'--filter' (one | two)")
|
||||
}
|
||||
|
||||
|
||||
@@ -125,7 +127,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
assert res.body.reason == BuiltinConcepts.IS_EMPTY
|
||||
|
||||
@pytest.mark.parametrize("expr, text", [
|
||||
# (StrMatch("foo"), "foo"),
|
||||
(StrMatch("foo"), "foo"),
|
||||
(StrMatch("'foo'"), "'foo'"),
|
||||
(StrMatch("1"), "1"),
|
||||
(StrMatch("3.14"), "3.14"),
|
||||
@@ -171,6 +173,33 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
expected = [CNC("foo", source=text)]
|
||||
self.validate_get_concepts_sequences(my_map, text, expected)
|
||||
|
||||
def test_i_can_use_skip_whitespace_when_mixing_sequence_and_strmatch(self):
|
||||
my_map = {
|
||||
"filter": self.bnf_concept("filter",
|
||||
Sequence(StrMatch("-", skip_whitespace=False),
|
||||
StrMatch("-", skip_whitespace=False),
|
||||
"filter")),
|
||||
}
|
||||
|
||||
sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True)
|
||||
parser = BnfNodeParser()
|
||||
parser.init_from_concepts(context, updated)
|
||||
|
||||
text = "--filter"
|
||||
expected = [CN("filter", source="--filter")]
|
||||
expected_array = compute_expected_array(my_map, text, expected)
|
||||
|
||||
parser.reset_parser(context, ParserInput(text))
|
||||
bnf_parsers_helpers = parser.get_concepts_sequences()
|
||||
assert bnf_parsers_helpers[0].sequence == expected_array
|
||||
assert not bnf_parsers_helpers[0].has_unrecognized
|
||||
|
||||
# but I cannot parse
|
||||
text = "- - filter"
|
||||
parser.reset_parser(context, ParserInput(text))
|
||||
bnf_parsers_helpers = parser.get_concepts_sequences()
|
||||
assert bnf_parsers_helpers[0].has_unrecognized
|
||||
|
||||
def test_i_can_match_multiple_sequences(self):
|
||||
my_map = {
|
||||
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"), StrMatch("three"))),
|
||||
@@ -760,6 +789,20 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
|
||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||
assert concepts_nodes == expected_array
|
||||
|
||||
def test_i_can_parse_filter(self):
|
||||
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
|
||||
|
||||
expression = "--filter one"
|
||||
expected = [CNC("filter", source="--filter one", one="one")]
|
||||
|
||||
res = parser.parse(context, ParserInput(expression))
|
||||
expected_array = compute_expected_array(cmap, expression, expected)
|
||||
parser_result = res.value
|
||||
concepts_nodes = res.value.value
|
||||
|
||||
assert res.status == True
|
||||
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
|
||||
assert concepts_nodes == expected_array
|
||||
|
||||
# @pytest.mark.parametrize("parser_input, expected", [
|
||||
# ("one", [
|
||||
|
||||
@@ -84,6 +84,8 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
|
||||
("('a'*=x 'b'+=y)=z", Sequence(
|
||||
ZeroOrMore(StrMatch("a"), rule_name="x"),
|
||||
OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")),
|
||||
("'--filter'",
|
||||
Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter")))
|
||||
])
|
||||
def test_i_can_parse_regex(self, expression, expected):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
@@ -141,8 +143,9 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
|
||||
sheerka, context, parser = self.init_parser()
|
||||
|
||||
res = parser.parse(context, Tokenizer(expression))
|
||||
ret_value = res.value.value
|
||||
ret_value = res.body.body
|
||||
assert parser.has_error
|
||||
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
|
||||
assert not res.status
|
||||
assert ret_value[0] == error
|
||||
|
||||
|
||||
@@ -187,9 +187,9 @@ func(a)
|
||||
return_value = res.value
|
||||
|
||||
assert not res.status
|
||||
assert isinstance(return_value, ParserResultConcept)
|
||||
assert isinstance(return_value.value[0], SyntaxErrorNode)
|
||||
assert return_value.value[0].message == "Indentation not found."
|
||||
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
|
||||
assert isinstance(return_value.body[0], SyntaxErrorNode)
|
||||
assert return_value.body[0].message == "Indentation not found."
|
||||
|
||||
def test_indentation_is_not_allowed_if_the_colon_is_missing(self):
|
||||
text = """
|
||||
@@ -213,24 +213,22 @@ def concept add one to a as
|
||||
return_value = res.value
|
||||
|
||||
assert not res.status
|
||||
assert isinstance(return_value, ParserResultConcept)
|
||||
assert isinstance(return_value.value[0], SyntaxErrorNode)
|
||||
assert return_value.value[0].message == "Name is mandatory"
|
||||
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
|
||||
assert isinstance(return_value.body[0], SyntaxErrorNode)
|
||||
assert return_value.body[0].message == "Name is mandatory"
|
||||
|
||||
def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self):
|
||||
text = "def hello as a where b pre c post d"
|
||||
|
||||
expected_concept = get_def_concept(name="hello", body="a ", where="b ", pre="c ", post="d")
|
||||
sheerka, context, parser = self.init_parser()
|
||||
res = parser.parse(context, ParserInput(text))
|
||||
return_value = res.value
|
||||
|
||||
assert not res.status
|
||||
assert isinstance(return_value, ParserResultConcept)
|
||||
assert isinstance(return_value.value[0], UnexpectedTokenErrorNode)
|
||||
assert return_value.value[0].message == "Syntax error."
|
||||
assert return_value.value[0].expected_tokens == [Keywords.CONCEPT]
|
||||
assert return_value.try_parsed == expected_concept
|
||||
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
|
||||
assert isinstance(return_value.body[0], UnexpectedTokenErrorNode)
|
||||
assert return_value.body[0].message == "Syntax error."
|
||||
assert return_value.body[0].expected_tokens == [Keywords.CONCEPT]
|
||||
|
||||
@pytest.mark.parametrize("text", [
|
||||
"def concept hello where 1+",
|
||||
@@ -254,7 +252,8 @@ def concept add one to a as
|
||||
return_value = res.value
|
||||
|
||||
assert not res.status
|
||||
assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")]
|
||||
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
|
||||
assert return_value.body == [SyntaxErrorNode([], "Newline are not allowed in name.")]
|
||||
|
||||
def test_i_can_parse_def_concept_from_bnf(self):
|
||||
text = "def concept name from bnf a_concept | 'a_string' as __definition[0]"
|
||||
@@ -289,7 +288,8 @@ def concept add one to a as
|
||||
res = parser.parse(context, ParserInput(text))
|
||||
|
||||
assert not res.status
|
||||
assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration")
|
||||
assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR)
|
||||
assert res.value.body[0] == SyntaxErrorNode([], "Empty declaration")
|
||||
|
||||
@pytest.mark.parametrize("text", [
|
||||
"def concept addition from a plus b as a + b",
|
||||
@@ -341,7 +341,7 @@ def concept add one to a as
|
||||
res = parser.parse(context, ParserInput(text))
|
||||
|
||||
assert not res.status
|
||||
assert isinstance(res.body, ParserResultConcept)
|
||||
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
|
||||
assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
|
||||
|
||||
@pytest.mark.parametrize("text, error_msg, error_text", [
|
||||
|
||||
Reference in New Issue
Block a user