Enhanced AtomNode parsing by name

This commit is contained in:
2020-05-18 08:25:29 +02:00
parent 08e3086820
commit d080cbb05a
6 changed files with 105 additions and 56 deletions
+27 -19
View File
@@ -2,8 +2,9 @@ from dataclasses import dataclass
from core import builtin_helpers from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import DEFINITION_TYPE_BNF from core.concept import DEFINITION_TYPE_BNF, Concept
from core.tokenizer import Tokenizer from core.tokenizer import Tokenizer
from core.utils import strip_tokens
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode
@@ -196,23 +197,6 @@ class AtomConceptParserHelper:
clone.has_unrecognized = self.has_unrecognized clone.has_unrecognized = self.has_unrecognized
return clone return clone
# def _get_lexer_nodes_from_unrecognized(self):
# """
# Use the source of self.unrecognized_tokens gto find concepts or source code
# :return:
# """
#
# res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
# only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
#
# if not only_parsers_results.status:
# return None
#
# return builtin_helpers.get_lexer_nodes(
# only_parsers_results.body.body,
# self.unrecognized_tokens.start,
# self.unrecognized_tokens.tokens)
class AtomNodeParser(BaseNodeParser): class AtomNodeParser(BaseNodeParser):
""" """
@@ -314,6 +298,26 @@ class AtomNodeParser(BaseNodeParser):
return concept_parser_helpers return concept_parser_helpers
def get_by_name(self, parser_input):
"""
Try to recognize the full parser input as a concept name
:return:
"""
source = self.get_input_as_text(parser_input)
concepts = self.sheerka.get_by_name(source.strip())
if not self.sheerka.is_known(concepts):
return None
concepts = [concepts] if isinstance(concepts, Concept) else concepts
res = []
start, end = self.get_tokens_boundaries(self.tokens)
for concept in concepts:
parser_helper = AtomConceptParserHelper(None)
parser_helper.sequence.append(ConceptNode(concept, start, end, strip_tokens(self.tokens, True), source))
res.append(parser_helper)
return res
def get_valid(self, concept_parser_helpers): def get_valid(self, concept_parser_helpers):
valid_parser_helpers = [] # be careful, it will be a list of list valid_parser_helpers = [] # be careful, it will be a list of list
for parser_helper in concept_parser_helpers: for parser_helper in concept_parser_helpers:
@@ -351,7 +355,11 @@ class AtomNodeParser(BaseNodeParser):
False, False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)) context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
parser_helpers = self.get_valid(self.get_concepts_sequences()) sequences = self.get_concepts_sequences()
if by_name := self.get_by_name(parser_input):
sequences.extend(by_name)
parser_helpers = self.get_valid(sequences)
if len(parser_helpers): if len(parser_helpers):
ret = [] ret = []
+34 -5
View File
@@ -153,17 +153,17 @@ class BaseParser:
return parser_input return parser_input
def get_input_as_tokens(self, parser_input): def get_input_as_tokens(self, parser_input, strip_eof=False):
if isinstance(parser_input, list): if isinstance(parser_input, list):
return self.add_eof_if_needed(parser_input) return self.manage_eof(parser_input, strip_eof)
if isinstance(parser_input, ParserResultConcept): if isinstance(parser_input, ParserResultConcept):
if parser_input.tokens: if parser_input.tokens:
return self.add_eof_if_needed(parser_input.tokens) return self.manage_eof(parser_input.tokens, strip_eof)
else: else:
return Tokenizer(parser_input.source) return Tokenizer(parser_input.source)
return Tokenizer(parser_input) return Tokenizer(parser_input, yield_eof=not strip_eof)
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None): def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
if not isinstance(parser_input, ParserResultConcept): if not isinstance(parser_input, ParserResultConcept):
@@ -183,7 +183,12 @@ class BaseParser:
return parser_input.value return parser_input.value
@staticmethod @staticmethod
def add_eof_if_needed(lst): def manage_eof(lst, strip_eof):
if strip_eof:
if len(lst) and lst[-1].type == TokenKind.EOF:
lst.pop()
return lst
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF: if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
lst.append(Token(TokenKind.EOF, "", -1, -1, -1)) lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
return lst return lst
@@ -210,6 +215,30 @@ class BaseParser:
res += value res += value
return res return res
@staticmethod
def get_tokens_boundaries(tokens):
"""
Returns the first and the last valid index of the tokens
a valid index is a token that is not a whitespace nor and EOF
:param tokens:
:return:
"""
if tokens is None:
return None
if len(tokens) == 0:
return 0, 0
if tokens[0].type == TokenKind.EOF:
return 0, 0
start = 1 if tokens[0].type == TokenKind.WHITESPACE else 0
end = len(tokens) - 1
while tokens[end].type in (TokenKind.WHITESPACE, TokenKind.EOF):
end -= 1
return start, end
class BaseTokenizerIterParser(BaseParser): class BaseTokenizerIterParser(BaseParser):
+4 -8
View File
@@ -192,18 +192,16 @@ as:
assert sheerka.isinstance(res[0].value, BuiltinConcepts.NOP) assert sheerka.isinstance(res[0].value, BuiltinConcepts.NOP)
def test_i_can_recognize_concept_with_variable(self): def test_i_can_recognize_concept_with_variable(self):
sheerka = self.get_sheerka() sheerka, context, concept_foo, concept_hello = self.init_concepts(
concept_hello = Concept(name="hello a").def_var("a") "foo",
concept_foo = Concept(name="foo") Concept(name="hello a").def_var("a"),
sheerka.add_in_cache(concept_hello) create_new=True)
sheerka.add_in_cache(concept_foo)
res = sheerka.evaluate_user_input("hello foo") res = sheerka.evaluate_user_input("hello foo")
return_value = res[0].value return_value = res[0].value
assert len(res) == 1 assert len(res) == 1
assert res[0].status assert res[0].status
assert sheerka.isinstance(return_value, concept_hello) assert sheerka.isinstance(return_value, concept_hello)
assert return_value.metadata.variables[0] == ('a', "foo")
# sanity check # sanity check
evaluated = sheerka.evaluate_concept(self.get_context(eval_body=True), return_value) evaluated = sheerka.evaluate_concept(self.get_context(eval_body=True), return_value)
@@ -864,12 +862,10 @@ as:
sheerka = self.init_scenario(definitions) sheerka = self.init_scenario(definitions)
res = sheerka.evaluate_user_input("eval mult") res = sheerka.evaluate_user_input("eval mult")
assert res[0].status assert res[0].status
assert isinstance(res[0].body, Concept) assert isinstance(res[0].body, Concept)
# res = sheerka.evaluate_user_input("eval a mult b") # res = sheerka.evaluate_user_input("eval a mult b")
#
# assert res[0].status # assert res[0].status
# assert isinstance(res[0].body, Concept) # assert isinstance(res[0].body, Concept)
+12 -12
View File
@@ -2,7 +2,7 @@ import pytest
from core.builtin_concepts import BuiltinConcepts from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_DEF from core.concept import Concept, DEFINITION_TYPE_DEF
from parsers.AtomNodeParser import AtomNodeParser from parsers.AtomNodeParser import AtomNodeParser
from parsers.BaseNodeParser import cnode, utnode, CNC, SCN from parsers.BaseNodeParser import cnode, utnode, CNC, SCN, CN
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array from tests.parsers.parsers_utils import compute_expected_array
@@ -35,16 +35,16 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka):
("foo", ["foo"]), ("foo", ["foo"]),
("foo bar", ["foo", "bar"]), ("foo bar", ["foo", "bar"]),
("foo bar twenties", ["foo", "bar", "twenties"]), ("foo bar twenties", ["foo", "bar", "twenties"]),
# ("plus", ["plus"]), ("a plus b", [CN("plus", 0, 4)]),
# ("++", ["++"]), ("mult", [CN("mult", 0, 0, "mult")]),
# ("a++ foo", ["++", "foo"]),
]) ])
def test_i_can_parse_simple_sequences(self, text, expected): def test_i_can_parse_simple_sequences(self, text, expected):
concepts_map = { concepts_map = {
"foo": Concept("foo"), "foo": Concept("foo"),
"bar": Concept("bar"), "bar": Concept("bar"),
"plus": Concept("a plus b").def_var("a").def_var("b"), "plus": Concept("a plus b").def_var("a").def_var("b"),
"++": Concept("++", definition="a++", definition_type=DEFINITION_TYPE_DEF).def_var("a"), "mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var(
"b"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
} }
@@ -286,19 +286,19 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka):
assert lexer_nodes == expected_array assert lexer_nodes == expected_array
@pytest.mark.parametrize("text, expected_is_evaluated", [ @pytest.mark.parametrize("text, expected_is_evaluated", [
("foo", False), ("foo", False),
("bar", False ), ("bar", False),
("twenties", True), ("twenties", True),
("plus", True), ("a plus b", True),
# ("plus", ["plus"]), ("mult", True),
# ("++", ["++"]),
# ("a++ foo", ["++", "foo"]),
]) ])
def test_concepts_with_variables_must_not_be_evaluated(self, text, expected_is_evaluated): def test_concepts_with_variables_must_not_be_evaluated(self, text, expected_is_evaluated):
concepts_map = { concepts_map = {
"foo": Concept("foo"), "foo": Concept("foo"),
"bar": Concept("bar", body="'bar'"), "bar": Concept("bar", body="'bar'"),
"plus": Concept("plus", definition="a plus b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var("b"), "plus": Concept("a plus b").def_var("a").def_var("b"),
"mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var(
"b"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"), "twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
} }
+16
View File
@@ -65,3 +65,19 @@ def test_i_can_test_split_iter_parser_indexes():
assert res[5] == Token(TokenKind.LPAR, "(", 20, 2, 12) assert res[5] == Token(TokenKind.LPAR, "(", 20, 2, 12)
assert res[6] == Token(TokenKind.RPAR, ")", 21, 2, 13) assert res[6] == Token(TokenKind.RPAR, ")", 21, 2, 13)
assert res[7] == Token(TokenKind.COMMA, ",", 22, 2, 14) assert res[7] == Token(TokenKind.COMMA, ",", 22, 2, 14)
@pytest.mark.parametrize("tokens, expected", [
(None, None),
([], (0, 0)),
(list(Tokenizer("")), (0, 0)),
(list(Tokenizer("", yield_eof=False)), (0, 0)),
(list(Tokenizer(" a")), (1, 1)),
(list(Tokenizer(" a", yield_eof=False)), (1, 1)),
(list(Tokenizer("a ")), (0, 0)),
(list(Tokenizer("a ", yield_eof=False)), (0, 0)),
(list(Tokenizer(" a ")), (1, 1)),
(list(Tokenizer(" a ", yield_eof=False)), (1, 1)),
])
def test_i_can_get_tokens_boundaries(tokens, expected):
assert BaseParser.get_tokens_boundaries(tokens) == expected
+12 -12
View File
@@ -632,18 +632,18 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
# I can't manage source code functions :-( # I can't manage source code functions :-(
# ("function(one plus three) minus two", []), # ("function(one plus three) minus two", []),
("(one plus two) ", ["one", "two", "plus"]), # ("(one plus two) ", ["one", "two", "plus"]),
("(one prefixed) ", ["one", "prefixed"]), # ("(one prefixed) ", ["one", "prefixed"]),
("(suffixed one) ", ["one", "suffixed"]), # ("(suffixed one) ", ["one", "suffixed"]),
("(one ? two : three)", ["one", "two", "three", "?"]), # ("(one ? two : three)", ["one", "two", "three", "?"]),
("square(square(one))", ["one", ("square", 1), "square"]), # ("square(square(one))", ["one", ("square", 1), "square"]),
("square ( square ( one ) )", ["one", ("square", 1), "square"]), # ("square ( square ( one ) )", ["one", ("square", 1), "square"]),
#
("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]), # ("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]),
("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]), # ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]),
("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]), # ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]),
#
("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]), # ("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]), ("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]), ("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]),