Fixed first token recognition when creating bnf concepts

This commit is contained in:
2020-05-29 08:52:06 +02:00
parent 479461c0a4
commit c498b394e3
11 changed files with 125 additions and 24 deletions
+3
View File
@@ -348,6 +348,9 @@ class AtomNodeParser(BaseNodeParser):
return valid_parser_helpers return valid_parser_helpers
def parse(self, context, parser_input: ParserInput): def parse(self, context, parser_input: ParserInput):
if not isinstance(parser_input, ParserInput):
return None
if parser_input.is_empty(): if parser_input.is_empty():
return context.sheerka.ret( return context.sheerka.ret(
self.name, self.name,
+7 -1
View File
@@ -162,11 +162,17 @@ class BaseParser:
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept): if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
return self.error_sink[0] return self.error_sink[0]
if self.has_error:
return sheerka.new(
BuiltinConcepts.ERROR,
body=self.error_sink
)
return sheerka.new( return sheerka.new(
BuiltinConcepts.PARSER_RESULT, BuiltinConcepts.PARSER_RESULT,
parser=self, parser=self,
source=source, source=source,
body=self.error_sink if self.has_error else tree, body=tree,
try_parsed=try_parse) try_parsed=try_parse)
def get_input_as_text(self, parser_input, custom_switcher=None, tracker=None): def get_input_as_text(self, parser_input, custom_switcher=None, tracker=None):
+8 -2
View File
@@ -391,10 +391,11 @@ class StrMatch(Match):
Matches a literal Matches a literal
""" """
def __init__(self, to_match, rule_name="", ignore_case=True): def __init__(self, to_match, rule_name="", ignore_case=True, skip_whitespace=True):
super(Match, self).__init__(rule_name=rule_name) super(Match, self).__init__(rule_name=rule_name)
self.to_match = to_match self.to_match = to_match
self.ignore_case = ignore_case self.ignore_case = ignore_case
self.skip_white_space = skip_whitespace
def __repr__(self): def __repr__(self):
return self.add_rule_name_if_needed(f"'{self.to_match}'") return self.add_rule_name_if_needed(f"'{self.to_match}'")
@@ -415,7 +416,7 @@ class StrMatch(Match):
if m: if m:
node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value) node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value)
parser_helper.next_token() parser_helper.next_token(self.skip_white_space)
return node return node
return None return None
@@ -469,6 +470,8 @@ class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor):
return self.STOP return self.STOP
def visit_StrMatch(self, pe): def visit_StrMatch(self, pe):
if not pe.to_match:
return
self.add_first_token(pe.to_match) self.add_first_token(pe.to_match)
return self.STOP return self.STOP
@@ -947,6 +950,9 @@ class BnfNodeParser(BaseNodeParser):
:return: :return:
""" """
if not isinstance(parser_input, ParserInput):
return None
context.log(f"Parsing '{parser_input}' with BnfNode", self.name) context.log(f"Parsing '{parser_input}' with BnfNode", self.name)
sheerka = context.sheerka sheerka = context.sheerka
+11
View File
@@ -266,6 +266,17 @@ class BnfParser(BaseParser):
expr.rule_name = concept.name expr.rule_name = concept.name
return self.eat_rule_name_if_needed(expr) return self.eat_rule_name_if_needed(expr)
if token.type == TokenKind.STRING:
self.next_token()
tokens = list(Tokenizer(token.strip_quote, yield_eof=False))
if len(tokens) == 1:
return self.eat_rule_name_if_needed(StrMatch(tokens[0].str_value))
else:
elements = [StrMatch(t.str_value, skip_whitespace=False) for t in tokens]
elements[-1].skip_white_space = True
ret = Sequence(*elements)
return self.eat_rule_name_if_needed(ret)
ret = StrMatch(core.utils.strip_quotes(token.value)) ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token() self.next_token()
return self.eat_rule_name_if_needed(ret) return self.eat_rule_name_if_needed(ret)
+4 -1
View File
@@ -313,7 +313,6 @@ class InFixToPostFix:
""" """
return len(self.stack) > 0 and isinstance(self.stack[-1], type) return len(self.stack) > 0 and isinstance(self.stack[-1], type)
def _make_source_code_with_concept(self, start, rpar_token, end): def _make_source_code_with_concept(self, start, rpar_token, end):
""" """
@@ -1068,6 +1067,10 @@ class SyaNodeParser(BaseNodeParser):
:param parser_input: :param parser_input:
:return: :return:
""" """
if not isinstance(parser_input, ParserInput):
return None
if parser_input.is_empty(): if parser_input.is_empty():
return context.sheerka.ret( return context.sheerka.ret(
self.name, self.name,
-1
View File
@@ -4,7 +4,6 @@ from logging import Logger
import core.utils import core.utils
from core.concept import Concept from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from sheerkapickle import utils, tags, handlers from sheerkapickle import utils, tags, handlers
@@ -1,3 +1,4 @@
import pytest
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF from core.concept import PROPERTIES_TO_SERIALIZE, Concept, DEFINITION_TYPE_DEF
from core.sheerka.Sheerka import Sheerka from core.sheerka.Sheerka import Sheerka
@@ -156,6 +157,31 @@ class TestSheerkaCreateNewConcept(TestUsingMemoryBasedSheerka):
assert res.status assert res.status
assert sheerka.get_by_name(concept.name) == concept # it's not a list, ie the entry is not duplicated assert sheerka.get_by_name(concept.name) == concept # it's not a list, ie the entry is not duplicated
def test_i_can_get_first_token_when_not_a_letter(self):
sheerka = self.get_sheerka(cache_only=False)
context = self.get_context(sheerka)
concept = Concept("--filter a").def_var("a")
res = sheerka.create_new_concept(context, concept)
assert res.status
# I can get by the first entry
assert sheerka.cache_manager.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id]
assert sheerka.cache_manager.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [concept.id]
@pytest.mark.parametrize("expression", [
"--'filter' ('one' | 'two') ",
"'--filter' ('one' | 'two') ",
])
def test_i_can_get_first_token_when_bnf_concept_and_not_a_letter(self, expression):
sheerka, context, bnf_concept = self.init_concepts(
Concept("foo", definition=expression),
create_new=True)
# I can get by the first entry
assert sheerka.cache_manager.get(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id]
assert sheerka.cache_manager.get(sheerka.RESOLVED_CONCEPTS_BY_FIRST_KEYWORD_ENTRY, "-") == [bnf_concept.id]
class TestSheerkaCreateNewConceptFileBased(TestUsingFileBasedSheerka): class TestSheerkaCreateNewConceptFileBased(TestUsingFileBasedSheerka):
def test_i_can_add_several_concepts(self): def test_i_can_add_several_concepts(self):
+1
View File
@@ -40,6 +40,7 @@ class TestBaseNodeParser(TestUsingMemoryBasedSheerka):
(Optional(StrMatch("foo")), {"foo": ["1002"]}), (Optional(StrMatch("foo")), {"foo": ["1002"]}),
(ZeroOrMore(StrMatch("foo")), {"foo": ["1002"]}), (ZeroOrMore(StrMatch("foo")), {"foo": ["1002"]}),
(OneOrMore(StrMatch("foo")), {"foo": ["1002"]}), (OneOrMore(StrMatch("foo")), {"foo": ["1002"]}),
(StrMatch("--filter"), {"--filter": ["1002"]}), # add both entries
]) ])
def test_i_can_get_concepts_by_first_keyword_with_bnf(self, bnf, expected): def test_i_can_get_concepts_by_first_keyword_with_bnf(self, bnf, expected):
sheerka = self.get_sheerka() sheerka = self.get_sheerka()
+45 -2
View File
@@ -1,6 +1,6 @@
import pytest import pytest
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve, DEFINITION_TYPE_DEF, CC from core.concept import Concept, ConceptParts, DoNotResolve
from core.sheerka.services.SheerkaExecute import ParserInput from core.sheerka.services.SheerkaExecute import ParserInput
from parsers.BaseNodeParser import CNC, UTN, CN from parsers.BaseNodeParser import CNC, UTN, CN
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
@@ -30,6 +30,8 @@ cmap = {
# sequence of keywords using def definition # sequence of keywords using def definition
# "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF), # "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF),
# "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"), # "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"),
"filter": Concept("filter", definition="'--filter' (one | two)")
} }
@@ -125,7 +127,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert res.body.reason == BuiltinConcepts.IS_EMPTY assert res.body.reason == BuiltinConcepts.IS_EMPTY
@pytest.mark.parametrize("expr, text", [ @pytest.mark.parametrize("expr, text", [
# (StrMatch("foo"), "foo"), (StrMatch("foo"), "foo"),
(StrMatch("'foo'"), "'foo'"), (StrMatch("'foo'"), "'foo'"),
(StrMatch("1"), "1"), (StrMatch("1"), "1"),
(StrMatch("3.14"), "3.14"), (StrMatch("3.14"), "3.14"),
@@ -171,6 +173,33 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
expected = [CNC("foo", source=text)] expected = [CNC("foo", source=text)]
self.validate_get_concepts_sequences(my_map, text, expected) self.validate_get_concepts_sequences(my_map, text, expected)
def test_i_can_use_skip_whitespace_when_mixing_sequence_and_strmatch(self):
my_map = {
"filter": self.bnf_concept("filter",
Sequence(StrMatch("-", skip_whitespace=False),
StrMatch("-", skip_whitespace=False),
"filter")),
}
sheerka, context, *updated = self.init_concepts(*my_map.values(), create_new=False, singleton=True)
parser = BnfNodeParser()
parser.init_from_concepts(context, updated)
text = "--filter"
expected = [CN("filter", source="--filter")]
expected_array = compute_expected_array(my_map, text, expected)
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences()
assert bnf_parsers_helpers[0].sequence == expected_array
assert not bnf_parsers_helpers[0].has_unrecognized
# but I cannot parse
text = "- - filter"
parser.reset_parser(context, ParserInput(text))
bnf_parsers_helpers = parser.get_concepts_sequences()
assert bnf_parsers_helpers[0].has_unrecognized
def test_i_can_match_multiple_sequences(self): def test_i_can_match_multiple_sequences(self):
my_map = { my_map = {
"foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"), StrMatch("three"))), "foo": self.bnf_concept("foo", Sequence(StrMatch("one"), StrMatch("two"), StrMatch("three"))),
@@ -760,6 +789,20 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array assert concepts_nodes == expected_array
def test_i_can_parse_filter(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
expression = "--filter one"
expected = [CNC("filter", source="--filter one", one="one")]
res = parser.parse(context, ParserInput(expression))
expected_array = compute_expected_array(cmap, expression, expected)
parser_result = res.value
concepts_nodes = res.value.value
assert res.status == True
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
# @pytest.mark.parametrize("parser_input, expected", [ # @pytest.mark.parametrize("parser_input, expected", [
# ("one", [ # ("one", [
+4 -1
View File
@@ -84,6 +84,8 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
("('a'*=x 'b'+=y)=z", Sequence( ("('a'*=x 'b'+=y)=z", Sequence(
ZeroOrMore(StrMatch("a"), rule_name="x"), ZeroOrMore(StrMatch("a"), rule_name="x"),
OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")), OneOrMore(StrMatch("b"), rule_name="y"), rule_name="z")),
("'--filter'",
Sequence(StrMatch("-", skip_whitespace=False), StrMatch("-", skip_whitespace=False), StrMatch("filter")))
]) ])
def test_i_can_parse_regex(self, expression, expected): def test_i_can_parse_regex(self, expression, expected):
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
@@ -141,8 +143,9 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.parse(context, Tokenizer(expression)) res = parser.parse(context, Tokenizer(expression))
ret_value = res.value.value ret_value = res.body.body
assert parser.has_error assert parser.has_error
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert not res.status assert not res.status
assert ret_value[0] == error assert ret_value[0] == error
+15 -15
View File
@@ -187,9 +187,9 @@ func(a)
return_value = res.value return_value = res.value
assert not res.status assert not res.status
assert isinstance(return_value, ParserResultConcept) assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.value[0], SyntaxErrorNode) assert isinstance(return_value.body[0], SyntaxErrorNode)
assert return_value.value[0].message == "Indentation not found." assert return_value.body[0].message == "Indentation not found."
def test_indentation_is_not_allowed_if_the_colon_is_missing(self): def test_indentation_is_not_allowed_if_the_colon_is_missing(self):
text = """ text = """
@@ -213,24 +213,22 @@ def concept add one to a as
return_value = res.value return_value = res.value
assert not res.status assert not res.status
assert isinstance(return_value, ParserResultConcept) assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.value[0], SyntaxErrorNode) assert isinstance(return_value.body[0], SyntaxErrorNode)
assert return_value.value[0].message == "Name is mandatory" assert return_value.body[0].message == "Name is mandatory"
def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self): def test_concept_keyword_is_mandatory_but_the_concept_is_recognized(self):
text = "def hello as a where b pre c post d" text = "def hello as a where b pre c post d"
expected_concept = get_def_concept(name="hello", body="a ", where="b ", pre="c ", post="d")
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
res = parser.parse(context, ParserInput(text)) res = parser.parse(context, ParserInput(text))
return_value = res.value return_value = res.value
assert not res.status assert not res.status
assert isinstance(return_value, ParserResultConcept) assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert isinstance(return_value.value[0], UnexpectedTokenErrorNode) assert isinstance(return_value.body[0], UnexpectedTokenErrorNode)
assert return_value.value[0].message == "Syntax error." assert return_value.body[0].message == "Syntax error."
assert return_value.value[0].expected_tokens == [Keywords.CONCEPT] assert return_value.body[0].expected_tokens == [Keywords.CONCEPT]
assert return_value.try_parsed == expected_concept
@pytest.mark.parametrize("text", [ @pytest.mark.parametrize("text", [
"def concept hello where 1+", "def concept hello where 1+",
@@ -254,7 +252,8 @@ def concept add one to a as
return_value = res.value return_value = res.value
assert not res.status assert not res.status
assert return_value.value == [SyntaxErrorNode([], "Newline are not allowed in name.")] assert sheerka.isinstance(return_value, BuiltinConcepts.ERROR)
assert return_value.body == [SyntaxErrorNode([], "Newline are not allowed in name.")]
def test_i_can_parse_def_concept_from_bnf(self): def test_i_can_parse_def_concept_from_bnf(self):
text = "def concept name from bnf a_concept | 'a_string' as __definition[0]" text = "def concept name from bnf a_concept | 'a_string' as __definition[0]"
@@ -289,7 +288,8 @@ def concept add one to a as
res = parser.parse(context, ParserInput(text)) res = parser.parse(context, ParserInput(text))
assert not res.status assert not res.status
assert res.value.value[0] == SyntaxErrorNode([], "Empty declaration") assert sheerka.isinstance(res.value, BuiltinConcepts.ERROR)
assert res.value.body[0] == SyntaxErrorNode([], "Empty declaration")
@pytest.mark.parametrize("text", [ @pytest.mark.parametrize("text", [
"def concept addition from a plus b as a + b", "def concept addition from a plus b as a + b",
@@ -341,7 +341,7 @@ def concept add one to a as
res = parser.parse(context, ParserInput(text)) res = parser.parse(context, ParserInput(text))
assert not res.status assert not res.status
assert isinstance(res.body, ParserResultConcept) assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
assert isinstance(res.body.body[0], UnexpectedTokenErrorNode) assert isinstance(res.body.body[0], UnexpectedTokenErrorNode)
@pytest.mark.parametrize("text, error_msg, error_text", [ @pytest.mark.parametrize("text, error_msg, error_text", [