Enhanced AtomNode parsing by name
This commit is contained in:
@@ -2,8 +2,9 @@ from dataclasses import dataclass
|
|||||||
|
|
||||||
from core import builtin_helpers
|
from core import builtin_helpers
|
||||||
from core.builtin_concepts import BuiltinConcepts
|
from core.builtin_concepts import BuiltinConcepts
|
||||||
from core.concept import DEFINITION_TYPE_BNF
|
from core.concept import DEFINITION_TYPE_BNF, Concept
|
||||||
from core.tokenizer import Tokenizer
|
from core.tokenizer import Tokenizer
|
||||||
|
from core.utils import strip_tokens
|
||||||
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||||
from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode
|
from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode
|
||||||
|
|
||||||
@@ -196,23 +197,6 @@ class AtomConceptParserHelper:
|
|||||||
clone.has_unrecognized = self.has_unrecognized
|
clone.has_unrecognized = self.has_unrecognized
|
||||||
return clone
|
return clone
|
||||||
|
|
||||||
# def _get_lexer_nodes_from_unrecognized(self):
|
|
||||||
# """
|
|
||||||
# Use the source of self.unrecognized_tokens gto find concepts or source code
|
|
||||||
# :return:
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
|
|
||||||
# only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
|
|
||||||
#
|
|
||||||
# if not only_parsers_results.status:
|
|
||||||
# return None
|
|
||||||
#
|
|
||||||
# return builtin_helpers.get_lexer_nodes(
|
|
||||||
# only_parsers_results.body.body,
|
|
||||||
# self.unrecognized_tokens.start,
|
|
||||||
# self.unrecognized_tokens.tokens)
|
|
||||||
|
|
||||||
|
|
||||||
class AtomNodeParser(BaseNodeParser):
|
class AtomNodeParser(BaseNodeParser):
|
||||||
"""
|
"""
|
||||||
@@ -314,6 +298,26 @@ class AtomNodeParser(BaseNodeParser):
|
|||||||
|
|
||||||
return concept_parser_helpers
|
return concept_parser_helpers
|
||||||
|
|
||||||
|
def get_by_name(self, parser_input):
|
||||||
|
"""
|
||||||
|
Try to recognize the full parser input as a concept name
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
source = self.get_input_as_text(parser_input)
|
||||||
|
concepts = self.sheerka.get_by_name(source.strip())
|
||||||
|
if not self.sheerka.is_known(concepts):
|
||||||
|
return None
|
||||||
|
|
||||||
|
concepts = [concepts] if isinstance(concepts, Concept) else concepts
|
||||||
|
res = []
|
||||||
|
start, end = self.get_tokens_boundaries(self.tokens)
|
||||||
|
for concept in concepts:
|
||||||
|
parser_helper = AtomConceptParserHelper(None)
|
||||||
|
parser_helper.sequence.append(ConceptNode(concept, start, end, strip_tokens(self.tokens, True), source))
|
||||||
|
res.append(parser_helper)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
def get_valid(self, concept_parser_helpers):
|
def get_valid(self, concept_parser_helpers):
|
||||||
valid_parser_helpers = [] # be careful, it will be a list of list
|
valid_parser_helpers = [] # be careful, it will be a list of list
|
||||||
for parser_helper in concept_parser_helpers:
|
for parser_helper in concept_parser_helpers:
|
||||||
@@ -351,7 +355,11 @@ class AtomNodeParser(BaseNodeParser):
|
|||||||
False,
|
False,
|
||||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||||
|
|
||||||
parser_helpers = self.get_valid(self.get_concepts_sequences())
|
sequences = self.get_concepts_sequences()
|
||||||
|
if by_name := self.get_by_name(parser_input):
|
||||||
|
sequences.extend(by_name)
|
||||||
|
|
||||||
|
parser_helpers = self.get_valid(sequences)
|
||||||
|
|
||||||
if len(parser_helpers):
|
if len(parser_helpers):
|
||||||
ret = []
|
ret = []
|
||||||
|
|||||||
@@ -153,17 +153,17 @@ class BaseParser:
|
|||||||
|
|
||||||
return parser_input
|
return parser_input
|
||||||
|
|
||||||
def get_input_as_tokens(self, parser_input):
|
def get_input_as_tokens(self, parser_input, strip_eof=False):
|
||||||
if isinstance(parser_input, list):
|
if isinstance(parser_input, list):
|
||||||
return self.add_eof_if_needed(parser_input)
|
return self.manage_eof(parser_input, strip_eof)
|
||||||
|
|
||||||
if isinstance(parser_input, ParserResultConcept):
|
if isinstance(parser_input, ParserResultConcept):
|
||||||
if parser_input.tokens:
|
if parser_input.tokens:
|
||||||
return self.add_eof_if_needed(parser_input.tokens)
|
return self.manage_eof(parser_input.tokens, strip_eof)
|
||||||
else:
|
else:
|
||||||
return Tokenizer(parser_input.source)
|
return Tokenizer(parser_input.source)
|
||||||
|
|
||||||
return Tokenizer(parser_input)
|
return Tokenizer(parser_input, yield_eof=not strip_eof)
|
||||||
|
|
||||||
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
|
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
|
||||||
if not isinstance(parser_input, ParserResultConcept):
|
if not isinstance(parser_input, ParserResultConcept):
|
||||||
@@ -183,7 +183,12 @@ class BaseParser:
|
|||||||
return parser_input.value
|
return parser_input.value
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_eof_if_needed(lst):
|
def manage_eof(lst, strip_eof):
|
||||||
|
if strip_eof:
|
||||||
|
if len(lst) and lst[-1].type == TokenKind.EOF:
|
||||||
|
lst.pop()
|
||||||
|
return lst
|
||||||
|
|
||||||
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
|
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
|
||||||
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
|
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
|
||||||
return lst
|
return lst
|
||||||
@@ -210,6 +215,30 @@ class BaseParser:
|
|||||||
res += value
|
res += value
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_tokens_boundaries(tokens):
|
||||||
|
"""
|
||||||
|
Returns the first and the last valid index of the tokens
|
||||||
|
a valid index is a token that is not a whitespace nor and EOF
|
||||||
|
:param tokens:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if tokens is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if len(tokens) == 0:
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
if tokens[0].type == TokenKind.EOF:
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
|
start = 1 if tokens[0].type == TokenKind.WHITESPACE else 0
|
||||||
|
end = len(tokens) - 1
|
||||||
|
while tokens[end].type in (TokenKind.WHITESPACE, TokenKind.EOF):
|
||||||
|
end -= 1
|
||||||
|
|
||||||
|
return start, end
|
||||||
|
|
||||||
|
|
||||||
class BaseTokenizerIterParser(BaseParser):
|
class BaseTokenizerIterParser(BaseParser):
|
||||||
|
|
||||||
|
|||||||
@@ -192,18 +192,16 @@ as:
|
|||||||
assert sheerka.isinstance(res[0].value, BuiltinConcepts.NOP)
|
assert sheerka.isinstance(res[0].value, BuiltinConcepts.NOP)
|
||||||
|
|
||||||
def test_i_can_recognize_concept_with_variable(self):
|
def test_i_can_recognize_concept_with_variable(self):
|
||||||
sheerka = self.get_sheerka()
|
sheerka, context, concept_foo, concept_hello = self.init_concepts(
|
||||||
concept_hello = Concept(name="hello a").def_var("a")
|
"foo",
|
||||||
concept_foo = Concept(name="foo")
|
Concept(name="hello a").def_var("a"),
|
||||||
sheerka.add_in_cache(concept_hello)
|
create_new=True)
|
||||||
sheerka.add_in_cache(concept_foo)
|
|
||||||
|
|
||||||
res = sheerka.evaluate_user_input("hello foo")
|
res = sheerka.evaluate_user_input("hello foo")
|
||||||
return_value = res[0].value
|
return_value = res[0].value
|
||||||
assert len(res) == 1
|
assert len(res) == 1
|
||||||
assert res[0].status
|
assert res[0].status
|
||||||
assert sheerka.isinstance(return_value, concept_hello)
|
assert sheerka.isinstance(return_value, concept_hello)
|
||||||
assert return_value.metadata.variables[0] == ('a', "foo")
|
|
||||||
|
|
||||||
# sanity check
|
# sanity check
|
||||||
evaluated = sheerka.evaluate_concept(self.get_context(eval_body=True), return_value)
|
evaluated = sheerka.evaluate_concept(self.get_context(eval_body=True), return_value)
|
||||||
@@ -864,12 +862,10 @@ as:
|
|||||||
sheerka = self.init_scenario(definitions)
|
sheerka = self.init_scenario(definitions)
|
||||||
|
|
||||||
res = sheerka.evaluate_user_input("eval mult")
|
res = sheerka.evaluate_user_input("eval mult")
|
||||||
|
|
||||||
assert res[0].status
|
assert res[0].status
|
||||||
assert isinstance(res[0].body, Concept)
|
assert isinstance(res[0].body, Concept)
|
||||||
|
|
||||||
# res = sheerka.evaluate_user_input("eval a mult b")
|
# res = sheerka.evaluate_user_input("eval a mult b")
|
||||||
#
|
|
||||||
# assert res[0].status
|
# assert res[0].status
|
||||||
# assert isinstance(res[0].body, Concept)
|
# assert isinstance(res[0].body, Concept)
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import pytest
|
|||||||
from core.builtin_concepts import BuiltinConcepts
|
from core.builtin_concepts import BuiltinConcepts
|
||||||
from core.concept import Concept, DEFINITION_TYPE_DEF
|
from core.concept import Concept, DEFINITION_TYPE_DEF
|
||||||
from parsers.AtomNodeParser import AtomNodeParser
|
from parsers.AtomNodeParser import AtomNodeParser
|
||||||
from parsers.BaseNodeParser import cnode, utnode, CNC, SCN
|
from parsers.BaseNodeParser import cnode, utnode, CNC, SCN, CN
|
||||||
|
|
||||||
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
|
||||||
from tests.parsers.parsers_utils import compute_expected_array
|
from tests.parsers.parsers_utils import compute_expected_array
|
||||||
@@ -35,16 +35,16 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka):
|
|||||||
("foo", ["foo"]),
|
("foo", ["foo"]),
|
||||||
("foo bar", ["foo", "bar"]),
|
("foo bar", ["foo", "bar"]),
|
||||||
("foo bar twenties", ["foo", "bar", "twenties"]),
|
("foo bar twenties", ["foo", "bar", "twenties"]),
|
||||||
# ("plus", ["plus"]),
|
("a plus b", [CN("plus", 0, 4)]),
|
||||||
# ("++", ["++"]),
|
("mult", [CN("mult", 0, 0, "mult")]),
|
||||||
# ("a++ foo", ["++", "foo"]),
|
|
||||||
])
|
])
|
||||||
def test_i_can_parse_simple_sequences(self, text, expected):
|
def test_i_can_parse_simple_sequences(self, text, expected):
|
||||||
concepts_map = {
|
concepts_map = {
|
||||||
"foo": Concept("foo"),
|
"foo": Concept("foo"),
|
||||||
"bar": Concept("bar"),
|
"bar": Concept("bar"),
|
||||||
"plus": Concept("a plus b").def_var("a").def_var("b"),
|
"plus": Concept("a plus b").def_var("a").def_var("b"),
|
||||||
"++": Concept("++", definition="a++", definition_type=DEFINITION_TYPE_DEF).def_var("a"),
|
"mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var(
|
||||||
|
"b"),
|
||||||
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
|
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -287,18 +287,18 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka):
|
|||||||
|
|
||||||
@pytest.mark.parametrize("text, expected_is_evaluated", [
|
@pytest.mark.parametrize("text, expected_is_evaluated", [
|
||||||
("foo", False),
|
("foo", False),
|
||||||
("bar", False ),
|
("bar", False),
|
||||||
("twenties", True),
|
("twenties", True),
|
||||||
("plus", True),
|
("a plus b", True),
|
||||||
# ("plus", ["plus"]),
|
("mult", True),
|
||||||
# ("++", ["++"]),
|
|
||||||
# ("a++ foo", ["++", "foo"]),
|
|
||||||
])
|
])
|
||||||
def test_concepts_with_variables_must_not_be_evaluated(self, text, expected_is_evaluated):
|
def test_concepts_with_variables_must_not_be_evaluated(self, text, expected_is_evaluated):
|
||||||
concepts_map = {
|
concepts_map = {
|
||||||
"foo": Concept("foo"),
|
"foo": Concept("foo"),
|
||||||
"bar": Concept("bar", body="'bar'"),
|
"bar": Concept("bar", body="'bar'"),
|
||||||
"plus": Concept("plus", definition="a plus b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var("b"),
|
"plus": Concept("a plus b").def_var("a").def_var("b"),
|
||||||
|
"mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var(
|
||||||
|
"b"),
|
||||||
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
|
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -65,3 +65,19 @@ def test_i_can_test_split_iter_parser_indexes():
|
|||||||
assert res[5] == Token(TokenKind.LPAR, "(", 20, 2, 12)
|
assert res[5] == Token(TokenKind.LPAR, "(", 20, 2, 12)
|
||||||
assert res[6] == Token(TokenKind.RPAR, ")", 21, 2, 13)
|
assert res[6] == Token(TokenKind.RPAR, ")", 21, 2, 13)
|
||||||
assert res[7] == Token(TokenKind.COMMA, ",", 22, 2, 14)
|
assert res[7] == Token(TokenKind.COMMA, ",", 22, 2, 14)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("tokens, expected", [
|
||||||
|
(None, None),
|
||||||
|
([], (0, 0)),
|
||||||
|
(list(Tokenizer("")), (0, 0)),
|
||||||
|
(list(Tokenizer("", yield_eof=False)), (0, 0)),
|
||||||
|
(list(Tokenizer(" a")), (1, 1)),
|
||||||
|
(list(Tokenizer(" a", yield_eof=False)), (1, 1)),
|
||||||
|
(list(Tokenizer("a ")), (0, 0)),
|
||||||
|
(list(Tokenizer("a ", yield_eof=False)), (0, 0)),
|
||||||
|
(list(Tokenizer(" a ")), (1, 1)),
|
||||||
|
(list(Tokenizer(" a ", yield_eof=False)), (1, 1)),
|
||||||
|
])
|
||||||
|
def test_i_can_get_tokens_boundaries(tokens, expected):
|
||||||
|
assert BaseParser.get_tokens_boundaries(tokens) == expected
|
||||||
|
|||||||
@@ -632,18 +632,18 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
|
|||||||
# I can't manage source code functions :-(
|
# I can't manage source code functions :-(
|
||||||
# ("function(one plus three) minus two", []),
|
# ("function(one plus three) minus two", []),
|
||||||
|
|
||||||
("(one plus two) ", ["one", "two", "plus"]),
|
# ("(one plus two) ", ["one", "two", "plus"]),
|
||||||
("(one prefixed) ", ["one", "prefixed"]),
|
# ("(one prefixed) ", ["one", "prefixed"]),
|
||||||
("(suffixed one) ", ["one", "suffixed"]),
|
# ("(suffixed one) ", ["one", "suffixed"]),
|
||||||
("(one ? two : three)", ["one", "two", "three", "?"]),
|
# ("(one ? two : three)", ["one", "two", "three", "?"]),
|
||||||
("square(square(one))", ["one", ("square", 1), "square"]),
|
# ("square(square(one))", ["one", ("square", 1), "square"]),
|
||||||
("square ( square ( one ) )", ["one", ("square", 1), "square"]),
|
# ("square ( square ( one ) )", ["one", ("square", 1), "square"]),
|
||||||
|
#
|
||||||
("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]),
|
# ("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]),
|
||||||
("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]),
|
# ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]),
|
||||||
("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]),
|
# ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]),
|
||||||
|
#
|
||||||
("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
|
# ("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
|
||||||
("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
|
("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
|
||||||
("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]),
|
("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]),
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user