Fixed first token recognition when creating bnf concepts

This commit is contained in:
2020-05-29 08:52:06 +02:00
parent 479461c0a4
commit c498b394e3
11 changed files with 125 additions and 24 deletions
+3
View File
@@ -348,6 +348,9 @@ class AtomNodeParser(BaseNodeParser):
return valid_parser_helpers
def parse(self, context, parser_input: ParserInput):
if not isinstance(parser_input, ParserInput):
return None
if parser_input.is_empty():
return context.sheerka.ret(
self.name,
+7 -1
View File
@@ -162,11 +162,17 @@ class BaseParser:
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
return self.error_sink[0]
if self.has_error:
return sheerka.new(
BuiltinConcepts.ERROR,
body=self.error_sink
)
return sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=source,
body=self.error_sink if self.has_error else tree,
body=tree,
try_parsed=try_parse)
def get_input_as_text(self, parser_input, custom_switcher=None, tracker=None):
+8 -2
View File
@@ -391,10 +391,11 @@ class StrMatch(Match):
Matches a literal
"""
def __init__(self, to_match, rule_name="", ignore_case=True):
def __init__(self, to_match, rule_name="", ignore_case=True, skip_whitespace=True):
super(Match, self).__init__(rule_name=rule_name)
self.to_match = to_match
self.ignore_case = ignore_case
self.skip_white_space = skip_whitespace
def __repr__(self):
return self.add_rule_name_if_needed(f"'{self.to_match}'")
@@ -415,7 +416,7 @@ class StrMatch(Match):
if m:
node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value)
parser_helper.next_token()
parser_helper.next_token(self.skip_white_space)
return node
return None
@@ -469,6 +470,8 @@ class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor):
return self.STOP
def visit_StrMatch(self, pe):
if not pe.to_match:
return
self.add_first_token(pe.to_match)
return self.STOP
@@ -947,6 +950,9 @@ class BnfNodeParser(BaseNodeParser):
:return:
"""
if not isinstance(parser_input, ParserInput):
return None
context.log(f"Parsing '{parser_input}' with BnfNode", self.name)
sheerka = context.sheerka
+11
View File
@@ -266,6 +266,17 @@ class BnfParser(BaseParser):
expr.rule_name = concept.name
return self.eat_rule_name_if_needed(expr)
if token.type == TokenKind.STRING:
self.next_token()
tokens = list(Tokenizer(token.strip_quote, yield_eof=False))
if len(tokens) == 1:
return self.eat_rule_name_if_needed(StrMatch(tokens[0].str_value))
else:
elements = [StrMatch(t.str_value, skip_whitespace=False) for t in tokens]
elements[-1].skip_white_space = True
ret = Sequence(*elements)
return self.eat_rule_name_if_needed(ret)
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return self.eat_rule_name_if_needed(ret)
+4 -1
View File
@@ -313,7 +313,6 @@ class InFixToPostFix:
"""
return len(self.stack) > 0 and isinstance(self.stack[-1], type)
def _make_source_code_with_concept(self, start, rpar_token, end):
"""
@@ -1068,6 +1067,10 @@ class SyaNodeParser(BaseNodeParser):
:param parser_input:
:return:
"""
if not isinstance(parser_input, ParserInput):
return None
if parser_input.is_empty():
return context.sheerka.ret(
self.name,
-1
View File
@@ -4,7 +4,6 @@ from logging import Logger
import core.utils
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from sheerkapickle import utils, tags, handlers
@@ -1,3 +1,4 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF
from core.sheerka.Sheerka import Sheerka
@@ -156,6 +157,31 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka):
assert res.status
assert sheerka.get_by_name(concept.name) == concept # it's not a list, ie the entry is not duplicated
def test_i_can_get_first_token_when_not_a_letter(self):
sheerka = self.get_sheerka(cache_only=False)
context = self.get_context(sheerka)
concept = Concept("--filter a").def_var("a")
res = sheerka.create_new_concept(context, concept)
assert res.status
# I can get by the first entry
assert sheerka.cache_manager.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id]
assert sheerka.cache_manager.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id]
@pytest.mark.parametrize("expression", [
"--'filter' ('one' | 'two') ",
"'--filter' ('one' | 'two') ",
])
def test_i_can_get_first_token_when_bnf_concept_and_not_a_letter(self, expression):
sheerka, context, bnf_concept = self.init_concepts(
Concept("foo", definition=expression),
create_new=True)
# I can get by the first entry
assert sheerka.cache_manager.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id]
assert sheerka.cache_manager.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id]
class TestSheerkaCreateNewConceptFileBased(TestUsingFileBasedSheerka):
def test_i_can_add_several_concepts(self):
+1
View File
@@ -40,6 +40,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
(Optional(StrMatch("foo")), {"foo": ["1002"]}),
(ZeroOrMore(StrMatch("foo")), {"foo": ["1002"]}),
(OneOrMore(StrMatch("foo")), {"foo": ["1002"]}),
(StrMatch("--filter"), {"--filter": ["1002"]}), # add both entries
])
def test_i_can_get_concepts_by_first_keyword_with_bnf(self, bnf, expected):
sheerka = self.get_sheerka()
+45 -2
View File
@@ -1,6 +1,6 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve, DEFINITION_TYPE_DEF, CC
from core.concept import Concept, ConceptParts, DoNotResolve
from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC, UTN, CN
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
@@ -30,6 +30,8 @@ cmap = {
# sequence of keywords using def definition
# "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF),
# "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"),
"filter": Concept("filter", definition="'--filter' (one | two)")
}
@@ -125,7 +127,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert res.body.reason == BuiltinConcepts.IS_EMPTY
@pytest.mark.parametrize("expr, text", [
# (StrMatch("foo"), "foo"),
(StrMatch("foo"), "foo"),
(StrMatch("'foo'"), "'foo'"),
(StrMatch("1"), "1"),
(StrMatch("3.14"), "3.14"),
@@ -171,6 +173,33 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
expected = [CNC("foo", source=text)]
self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_use_skip_whitespace_when_mixing_sequence_and_strmatch(self):
my_map = {
"filter": self.bnf_concept("filter",
Sequence(StrMatch("-", skip_whitespace=False),
StrMatch("-", skip_whitespace=False),
"filter")),
}
sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True)
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
text = "--filter"
expected = [CN("filter", source="--filter")]
expected_array = compute_expected_array(my_map, text, expected)
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences()
assert bnf_parsers_helpers[0].sequence == expected_array
assert not bnf_parsers_helpers[0].has_unrecognized
# but I cannot parse
text = "- - filter"
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences()
assert bnf_parsers_helpers[0].has_unrecognized
def test_i_can_match_multiple_sequences(self):
my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"), StrMatch("three"))),
@@ -760,6 +789,20 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
def test_i_can_parse_filter(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = "--filter one"
expected = [CNC("filter", source="--filter one", one="one")]
res = parser.parse(context, ParserInput(expression))
expected_array = compute_expected_array(cmap, expression, expected)
parser_result = res.value
concepts_nodes = res.value.value
assert res.status == True
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
# @pytest.mark.parametrize("parser_input, expected", [
# ("one", [
+4 -1
View File
@@ -84,6 +84,8 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
("('a'*=x 'b'+=y)=z", Sequence(
ZeroOrMore(StrMatch("a"), rule_name="x"),
OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")),
("'--filter'",
Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter")))
])
def test_i_can_parse_regex(self, expression, expected):
sheerka, context, parser = self.init_parser()
@@ -141,8 +143,9 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
sheerka, context, parser = self.init_parser()
res = parser.parse(context, Tokenizer(expression))
ret_value = res.value.value
ret_value = res.body.body
assert parser.has_error
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert not res.status
assert ret_value[0] == error
+15 -15
View File
@@ -187,9 +187,9 @@ func(a)
return_value = res.value
assert not res.status
assert isinstance(return_value, ParserResultConcept)
assert isinstance(return_value.value[0], SyntaxErrorNode)
assert return_value.value[0].message == "Indentation not found."
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.body[0], SyntaxErrorNode)
assert return_value.body[0].message == "Indentation not found."
def test_indentation_is_not_allowed_if_the_colon_is_missing(self):
text = """
@@ -213,24 +213,22 @@ def concept add one to a as
return_value = res.value
assert not res.status
assert isinstance(return_value, ParserResultConcept)
assert isinstance(return_value.value[0], SyntaxErrorNode)
assert return_value.value[0].message == "Name is mandatory"
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.body[0], SyntaxErrorNode)
assert return_value.body[0].message == "Name is mandatory"
def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self):
text = "def hello as a where b pre c post d"
expected_concept = get_def_concept(name="hello", body="a ", where="b ", pre="c ", post="d")
sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text))
return_value = res.value
assert not res.status
assert isinstance(return_value, ParserResultConcept)
assert isinstance(return_value.value[0], UnexpectedTokenErrorNode)
assert return_value.value[0].message == "Syntax error."
assert return_value.value[0].expected_tokens == [Keywords.CONCEPT]
assert return_value.try_parsed == expected_concept
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.body[0], UnexpectedTokenErrorNode)
assert return_value.body[0].message == "Syntax error."
assert return_value.body[0].expected_tokens == [Keywords.CONCEPT]
@pytest.mark.parametrize("text", [
"def concept hello where 1+",
@@ -254,7 +252,8 @@ def concept add one to a as
return_value = res.value
assert not res.status
assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")]
assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert return_value.body == [SyntaxErrorNode([], "Newline are not allowed in name.")]
def test_i_can_parse_def_concept_from_bnf(self):
text = "def concept name from bnf a_concept | 'a_string' as __definition[0]"
@@ -289,7 +288,8 @@ def concept add one to a as
res = parser.parse(context, ParserInput(text))
assert not res.status
assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration")
assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR)
assert res.value.body[0] == SyntaxErrorNode([], "Empty declaration")
@pytest.mark.parametrize("text", [
"def concept addition from a plus b as a + b",
@@ -341,7 +341,7 @@ def concept add one to a as
res = parser.parse(context, ParserInput(text))
assert not res.status
assert isinstance(res.body, ParserResultConcept)
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
@pytest.mark.parametrize("text, error_msg, error_text", [