I can now use keyword in concept definition and parsing

This commit is contained in:
2020-05-22 15:46:04 +02:00
parent 37d3d16e21
commit 3ce6ce2a76
14 changed files with 127 additions and 45 deletions
+1 -1
View File
@@ -246,7 +246,7 @@ class Concept:
key += VARIABLE_PREFIX + str(variables.index(token.value)) key += VARIABLE_PREFIX + str(variables.index(token.value))
else: else:
# value = token.value[1:-1] if token.type == TokenKind.STRING else token.value # value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
key += token.value key += token.value.value if token.type == TokenKind.KEYWORD else token.value
first = False first = False
self.metadata.key = key self.metadata.key = key
+18 -6
View File
@@ -59,6 +59,7 @@ class Token:
line: int line: int
column: int column: int
_strip_quote: str = field(default=None, repr=False, compare=False, hash=None)
_str_value: str = field(default=None, repr=False, compare=False, hash=None) _str_value: str = field(default=None, repr=False, compare=False, hash=None)
def __repr__(self): def __repr__(self):
@@ -75,23 +76,34 @@ class Token:
return f"Token({value})" return f"Token({value})"
@property
def strip_quote(self):
if self._strip_quote:
return self._strip_quote
self._strip_quote = self._to_str(True)
return self._strip_quote
@property @property
def str_value(self): def str_value(self):
if self._str_value: if self._str_value:
return self._str_value return self._str_value
if self.type == TokenKind.STRING: self._str_value = self._to_str(False)
self._str_value = self.value[1:-1]
elif self.type == TokenKind.KEYWORD:
self._str_value = self.value.value
else:
self._str_value = str(self.value)
return self._str_value return self._str_value
@staticmethod @staticmethod
def is_whitespace(token): def is_whitespace(token):
return token and token.type == TokenKind.WHITESPACE return token and token.type == TokenKind.WHITESPACE
def _to_str(self, strip_quote):
if strip_quote and self.type == TokenKind.STRING:
return self.value[1:-1]
elif self.type == TokenKind.KEYWORD:
return self.value.value
else:
return str(self.value)
@dataclass() @dataclass()
class LexerError(Exception): class LexerError(Exception):
+2 -2
View File
@@ -92,7 +92,7 @@ class AtomConceptParserHelper:
self.debug.append(token) self.debug.append(token)
if self.expected_tokens[0] != token.str_value: if self.expected_tokens[0] != token.strip_quote:
self.errors.append(UnexpectedTokenErrorNode( self.errors.append(UnexpectedTokenErrorNode(
f"Found '{token}' while expecting '{self.expected_tokens[0]}'", f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
token, token,
@@ -120,7 +120,7 @@ class AtomConceptParserHelper:
forked.eat_concept(concept, pos) forked.eat_concept(concept, pos)
concept_node = ConceptNode(concept, pos, pos) concept_node = ConceptNode(concept, pos, pos)
expected = [t.str_value for t in Tokenizer(concept.name)][1:-1] expected = [t.strip_quote for t in Tokenizer(concept.name)][1:-1]
if not expected: if not expected:
# the concept is already matched # the concept is already matched
+2 -2
View File
@@ -6,7 +6,7 @@ import core.utils
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
from core.sheerka.ExecutionContext import ExecutionContext from core.sheerka.ExecutionContext import ExecutionContext
from core.tokenizer import TokenKind, LexerError, Token from core.tokenizer import TokenKind, LexerError, Token, Keywords
from parsers.BaseParser import Node, BaseParser, ErrorNode from parsers.BaseParser import Node, BaseParser, ErrorNode
DEBUG_COMPILED = True DEBUG_COMPILED = True
@@ -440,7 +440,7 @@ class CN(HelperWithPos):
self.concept = concept if isinstance(concept, Concept) else None self.concept = concept if isinstance(concept, Concept) else None
def fix_source(self, str_tokens): def fix_source(self, str_tokens):
self.source = "".join(str_tokens) self.source = "".join([s.value if isinstance(s, Keywords) else s for s in str_tokens])
return self return self
def __eq__(self, other): def __eq__(self, other):
+4 -4
View File
@@ -400,11 +400,11 @@ class StrMatch(Match):
def _parse(self, parser_helper): def _parse(self, parser_helper):
token = parser_helper.get_token() token = parser_helper.get_token()
m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \ m = token.str_value.lower() == self.to_match.lower() if self.ignore_case \
else token.value == self.to_match else token.strip_quote == self.to_match
if m: if m:
node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.value) node = TerminalNode(self, parser_helper.pos, parser_helper.pos, token.str_value)
parser_helper.next_token() parser_helper.next_token()
return node return node
@@ -882,7 +882,7 @@ class BnfNodeParser(BaseNodeParser):
else: else:
# regular concepts # regular concepts
tokens = Tokenizer(concept.name) tokens = Tokenizer(concept.name)
nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]] nodes = [StrMatch(token.strip_quote) for token in list(tokens)[:-1]]
pe = inner_resolve(nodes[0] if len(nodes) == 1 else Sequence(nodes), inner_already_seen) pe = inner_resolve(nodes[0] if len(nodes) == 1 else Sequence(nodes), inner_already_seen)
if not isinstance(pe, ParsingExpression): if not isinstance(pe, ParsingExpression):
+8 -7
View File
@@ -74,13 +74,13 @@ class BnfParser(BaseParser):
try: try:
self._current = self.after_current or next(self.lexer_iter) self._current = self.after_current or next(self.lexer_iter)
self.source += str(self._current.value) self.source += self._current.str_value
self.after_current = None self.after_current = None
if skip_whitespace: if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter) self._current = next(self.lexer_iter)
self.source += str(self._current.value) self.source += self._current.str_value
except StopIteration: except StopIteration:
self._current = Token(TokenKind.EOF, "", -1, -1, -1) self._current = Token(TokenKind.EOF, "", -1, -1, -1)
@@ -90,7 +90,7 @@ class BnfParser(BaseParser):
try: try:
self.after_current = next(self.lexer_iter) self.after_current = next(self.lexer_iter)
# self.source += str(self.after_current.value) # self.source += self.after_current.str_value
return self.after_current return self.after_current
except StopIteration: except StopIteration:
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1) self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
@@ -99,13 +99,13 @@ class BnfParser(BaseParser):
def eat_white_space(self): def eat_white_space(self):
if self.after_current is not None: if self.after_current is not None:
self._current = self.after_current self._current = self.after_current
self.source += str(self._current.value) self.source += self._current.str_value
self.after_current = None self.after_current = None
try: try:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE: while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter) self._current = next(self.lexer_iter)
self.source += str(self._current.value) self.source += self._current.str_value
except StopIteration: except StopIteration:
self._current = None self._current = None
@@ -239,10 +239,10 @@ class BnfParser(BaseParser):
# else ConceptExpression(concept) # else ConceptExpression(concept)
return self.eat_rule_name_if_needed(expr) return self.eat_rule_name_if_needed(expr)
if token.type == TokenKind.IDENTIFIER: if token.type in (TokenKind.IDENTIFIER, TokenKind.KEYWORD):
self.next_token() self.next_token()
concept_name = str(token.value) concept_name = token.str_value
# we are trying to match against a concept which is still under construction ! # we are trying to match against a concept which is still under construction !
# (for example of recursive bnf definition) # (for example of recursive bnf definition)
@@ -283,3 +283,4 @@ class BnfParser(BaseParser):
expression.rule_name = token.value expression.rule_name = token.value
self.next_token() self.next_token()
return expression return expression
+6 -15
View File
@@ -131,15 +131,15 @@ class SyaConceptParserHelper:
# True if the next token is the one that is expected # True if the next token is the one that is expected
# Or if the next token is a whitespace and the expected one is the one after # Or if the next token is a whitespace and the expected one is the one after
# (whitespace are sometimes not mandatory) # (whitespace are sometimes not mandatory)
return token.str_value == self.expected[0].str_value or \ return token.strip_quote == self.expected[0].strip_quote or \
self.expected[0].type == TokenKind.WHITESPACE and token.str_value == self.expected[1].str_value self.expected[0].type == TokenKind.WHITESPACE and token.strip_quote == self.expected[1].strip_quote
def is_expected(self, token): def is_expected(self, token):
if self.is_matched() or token.type == TokenKind.WHITESPACE: if self.is_matched() or token.type == TokenKind.WHITESPACE:
return False return False
for expected in self.expected: for expected in self.expected:
if expected.type != TokenKind.VAR_DEF and expected.str_value == token.str_value: if expected.type != TokenKind.VAR_DEF and expected.strip_quote == token.strip_quote:
return True return True
return False return False
@@ -154,7 +154,7 @@ class SyaConceptParserHelper:
:return: :return:
""" """
# No check, as it is used only after is_expected() or is_next() # No check, as it is used only after is_expected() or is_next()
while self.expected[0].str_value != until_token.str_value: while self.expected[0].strip_quote != until_token.strip_quote:
del self.expected[0] del self.expected[0]
del self.expected[0] del self.expected[0]
@@ -193,15 +193,6 @@ class SyaConceptParserHelper:
self.concept = self.concept.concept self.concept = self.concept.concept
return self return self
# @staticmethod
# def _get_token_value(token):
# if token.type == TokenKind.STRING:
# return token.value[1:-1]
# elif token.type == TokenKind.KEYWORD:
# return token.value.value
# else:
# return token.value
def clone(self): def clone(self):
clone = SyaConceptParserHelper(self.concept, self.start, self.end) clone = SyaConceptParserHelper(self.concept, self.start, self.end)
clone.expected = self.expected[:] clone.expected = self.expected[:]
@@ -296,7 +287,7 @@ class InFixToPostFix:
if item.expected[0].type == TokenKind.VAR_DEF: if item.expected[0].type == TokenKind.VAR_DEF:
item.error = "Not enough suffix parameters" item.error = "Not enough suffix parameters"
else: else:
item.error = f"token '{item.expected[0].str_value}' not found" item.error = f"token '{item.expected[0].strip_quote}' not found"
if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1: if isinstance(item, SyaConceptParserHelper) and item.potential_pos != -1:
self.out.insert(item.potential_pos, item) self.out.insert(item.potential_pos, item)
@@ -431,7 +422,7 @@ class InFixToPostFix:
while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF: while len(current_concept.expected) > 0 and current_concept.expected[0].type == TokenKind.VAR_DEF:
# eat everything that was expected # eat everything that was expected
if len(self.parameters_list) == 0: if len(self.parameters_list) == 0:
current_concept.error = f"Failed to match parameter '{current_concept.expected[0].str_value}'" current_concept.error = f"Failed to match parameter '{current_concept.expected[0].strip_quote}'"
return return
del self.parameters_list[0] del self.parameters_list[0]
del current_concept.expected[0] del current_concept.expected[0]
+1
View File
@@ -19,6 +19,7 @@ from core.concept import Concept, ConceptParts, DEFINITION_TYPE_DEF
("a | b", ["a", "b"], "__var__0 | __var__1"), ("a | b", ["a", "b"], "__var__0 | __var__1"),
("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"), ("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"),
("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"), ("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"),
("def concept a", ["a"], "def concept __var__0"),
]) ])
def test_i_can_compute_the_key(name, variables, expected): def test_i_can_compute_the_key(name, variables, expected):
concept = Concept(name) concept = Concept(name)
+1 -1
View File
@@ -154,7 +154,7 @@ def get_node(
if concept_found: if concept_found:
concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests
if sya and len(concept_found.metadata.variables) > 0 and not is_bnf: if sya and len(concept_found.metadata.variables) > 0 and not is_bnf:
return SyaConceptParserHelper(concept_found, start) return SyaConceptParserHelper(concept_found, start, start + length - 1)
elif init_empty_body: elif init_empty_body:
node = CNC(concept_found, start, start + length - 1, source=sub_expr, exclude_body=exclude_body) node = CNC(concept_found, start, start + length - 1, source=sub_expr, exclude_body=exclude_body)
init_body(node, concept_found, sub_expr) init_body(node, concept_found, sub_expr)
+18
View File
@@ -215,6 +215,24 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka):
expected_array = compute_expected_array(concepts_map, "one", ["one"]) expected_array = compute_expected_array(concepts_map, "one", ["one"])
assert lexer_nodes == expected_array assert lexer_nodes == expected_array
def test_i_can_parse_concepts_with_keyword(self):
concepts_map = {
"a special concept": Concept("a special concept"),
"isa": Concept("isa"),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.parse(context, "a special concept")
lexer_nodes = res.body.body
expected_array = compute_expected_array(concepts_map, "a special concept", ["a special concept"])
assert lexer_nodes == expected_array
res = parser.parse(context, "isa")
lexer_nodes = res.body.body
expected_array = compute_expected_array(concepts_map, "isa", ["isa"])
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text", [ @pytest.mark.parametrize("text", [
"foo", "foo",
f"foo one", f"foo one",
+34 -6
View File
@@ -1,6 +1,6 @@
import pytest import pytest
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve from core.concept import Concept, ConceptParts, DoNotResolve, DEFINITION_TYPE_DEF, CC
from parsers.BaseNodeParser import CNC, UTN, CN from parsers.BaseNodeParser import CNC, UTN, CN
from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \ from parsers.BnfNodeParser import BnfNodeParser, StrMatch, TerminalNode, NonTerminalNode, Sequence, OrderedChoice, \
Optional, ZeroOrMore, OneOrMore, ConceptExpression Optional, ZeroOrMore, OneOrMore, ConceptExpression
@@ -18,7 +18,17 @@ cmap = {
'one or more three': Concept("one or more three", definition="three+"), 'one or more three': Concept("one or more three", definition="three+"),
'two or four': Concept("two or four", definition="two | 'four'"), 'two or four': Concept("two or four", definition="two | 'four'"),
"twenties": Concept("twenties", definition="'twenty' c:two or four:=unit"), "twenties": Concept("twenties", definition="'twenty' c:two or four:=unit"),
"one or more plus": Concept("one or more plus", definition="c:a plus b:+"), "one or more plus": Concept("one or more plus", definition="c:a plus b:+"), # TODO
# testing keywords
"def_only": Concept("def"),
"def number": Concept("def number", definition="def (one|two)=number"),
# sequence of keywords using bnf definition
# "def_concept_bnf": Concept("def_concept_bnf", definition="'def' 'concept'"),
# "def concept_bnf number": Concept("def number", definition="def_concept_bnf (one|two)=number"),
# sequence of keywords using def definition
# "def_concept_def": Concept("def_concept_def", definition="def concept", definition_type=DEFINITION_TYPE_DEF),
# "def concept_def number": Concept("def number", definition="def_concept_def (one|two|three)=number"),
} }
@@ -40,12 +50,13 @@ def u(parsing_expression, start, end, children=None):
return NonTerminalNode(parsing_expression, start, end, [], children) return NonTerminalNode(parsing_expression, start, end, [], children)
def compute_expected_array(my_concepts_map, expression, expected): def compute_expected_array(my_concepts_map, expression, expected, exclude_body=False):
return tests.parsers.parsers_utils.compute_expected_array( return tests.parsers.parsers_utils.compute_expected_array(
my_concepts_map, my_concepts_map,
expression, expression,
expected, expected,
init_empty_body=True) init_empty_body=True,
exclude_body=exclude_body)
class TestBnfNodeParser(TestUsingMemoryBasedSheerka): class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
@@ -136,7 +147,7 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert res.body.reason == BuiltinConcepts.IS_EMPTY assert res.body.reason == BuiltinConcepts.IS_EMPTY
@pytest.mark.parametrize("expr, text", [ @pytest.mark.parametrize("expr, text", [
(StrMatch("foo"), "foo"), # (StrMatch("foo"), "foo"),
(StrMatch("'foo'"), "'foo'"), (StrMatch("'foo'"), "'foo'"),
(StrMatch("1"), "1"), (StrMatch("1"), "1"),
(StrMatch("3.14"), "3.14"), (StrMatch("3.14"), "3.14"),
@@ -741,7 +752,6 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
("twenty four", True, [CN("twenties", source="twenty four")]), ("twenty four", True, [CN("twenties", source="twenty four")]),
("twenty one", False, [UTN("twenty "), CN("bnf one", source="one")]), ("twenty one", False, [UTN("twenty "), CN("bnf one", source="one")]),
("twenty two + 1", True, [CN("twenties", source="twenty two"), " + 1"]), ("twenty two + 1", True, [CN("twenties", source="twenty two"), " + 1"]),
]) ])
def test_i_can_parse(self, parser_input, expected_status, expected): def test_i_can_parse(self, parser_input, expected_status, expected):
sheerka, context, parser = self.init_parser(init_from_sheerka=True) sheerka, context, parser = self.init_parser(init_from_sheerka=True)
@@ -755,6 +765,24 @@ class TestBnfNodeParser(TestUsingMemoryBasedSheerka):
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT) assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array assert concepts_nodes == expected_array
def test_i_can_parse_when_keyword(self):
sheerka, context, parser = self.init_parser(init_from_sheerka=True)
parser_input = "def one"
expected = [CNC("def number", source="def one", number="one", one="one")]
res = parser.parse(context, parser_input)
expected_array = compute_expected_array(cmap, parser_input, expected)
expected_array[0].compiled["def"] = cmap["def_only"]
parser_result = res.value
concepts_nodes = res.value.value
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert concepts_nodes == expected_array
# @pytest.mark.parametrize("parser_input, expected", [ # @pytest.mark.parametrize("parser_input, expected", [
# ("one", [ # ("one", [
# (True, [CNC("bnf_one", source="one", one="one", body="one")]), # (True, [CNC("bnf_one", source="one", one="one", body="one")]),
+2 -1
View File
@@ -103,9 +103,10 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))), ("'str''='var", Sequence(StrMatch("str"), StrMatch("="), c("var"))),
("foo=f", c("foo", "f")), ("foo=f", c("foo", "f")),
("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))), ("foo=f 'constant'", Sequence(c("foo", "f"), StrMatch("constant"))),
("def 'concept'", Sequence(c("def"), StrMatch("concept"))),
]) ])
def test_i_can_parse_regex_with_concept(self, expression, expected): def test_i_can_parse_regex_with_concept(self, expression, expected):
sheerka, context, parser, foo, bar, var = self.init_parser("foo", "bar", "var") sheerka, context, parser, foo, bar, var, _def = self.init_parser("foo", "bar", "var", "def")
res = parser.parse(context, Tokenizer(expression)) res = parser.parse(context, Tokenizer(expression))
+26
View File
@@ -161,6 +161,32 @@ class TestExactConceptParser(TestUsingMemoryBasedSheerka):
assert concept_found.metadata.need_validation assert concept_found.metadata.need_validation
assert not concept_found.metadata.is_evaluated assert not concept_found.metadata.is_evaluated
def test_i_can_parse_when_expression_contains_keyword(self):
sheerka, context, isa, def_concept = self.init_concepts(
Concept("c is a concept").def_var("c"),
Concept("def concept a").def_var("a"),
)
source = "z is a concept"
results = ExactConceptParser().parse(context, source)
concept_found = results[0].value.value
assert len(results) == 1
assert results[0].status
assert concept_found == CMV(isa, c="z")
assert concept_found.metadata.need_validation
assert not concept_found.metadata.is_evaluated
source = "def concept z"
results = ExactConceptParser().parse(context, source)
concept_found = results[0].value.value
assert len(results) == 1
assert results[0].status
assert concept_found == CMV(def_concept, a="z")
assert concept_found.metadata.need_validation
assert not concept_found.metadata.is_evaluated
def test_i_can_manage_unknown_concept(self): def test_i_can_manage_unknown_concept(self):
context = self.get_context(self.get_sheerka(singleton=True)) context = self.get_context(self.get_sheerka(singleton=True))
source = "def concept hello" # this is not a concept by itself source = "def concept hello" # this is not a concept by itself
+4
View File
@@ -34,6 +34,7 @@ cmap = {
"foo bar": Concept("foo bar(a)").def_var("a"), "foo bar": Concept("foo bar(a)").def_var("a"),
"long infixed": Concept("a long infixed b").def_var("a").def_var("b"), "long infixed": Concept("a long infixed b").def_var("a").def_var("b"),
"twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"), "twenties": Concept("twenties", definition="'twenty' (one|two)=unit").def_var("unit"),
"is a concept": Concept("c is a concept").def_var("c"),
} }
@@ -1013,6 +1014,8 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
]), ]),
("function(suffixed x$!#)", False, [ ("function(suffixed x$!#)", False, [
SCWC("function(", ")", CNC("suffixed", 2, 7, a="x$!#"))]), SCWC("function(", ")", CNC("suffixed", 2, 7, a="x$!#"))]),
("one is a concept", True, [CNC("is a concept", c="one")]),
("a is a concept", False, [CNC("is a concept", c=UTN("a"))]),
]) ])
def test_i_can_parse_when_one_result(self, text, expected_status, expected_result): def test_i_can_parse_when_one_result(self, text, expected_status, expected_result):
sheerka, context, parser = self.init_parser() sheerka, context, parser = self.init_parser()
@@ -1142,3 +1145,4 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
assert not res.status assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY) assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)