Enhanced AtomNode parsing by name

This commit is contained in:
2020-05-18 08:25:29 +02:00
parent 08e3086820
commit d080cbb05a
6 changed files with 105 additions and 56 deletions
+27 -19
View File
@@ -2,8 +2,9 @@ from dataclasses import dataclass
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import DEFINITION_TYPE_BNF
from core.concept import DEFINITION_TYPE_BNF, Concept
from core.tokenizer import Tokenizer
from core.utils import strip_tokens
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode
@@ -196,23 +197,6 @@ class AtomConceptParserHelper:
clone.has_unrecognized = self.has_unrecognized
return clone
# def _get_lexer_nodes_from_unrecognized(self):
# """
# Use the source of self.unrecognized_tokens gto find concepts or source code
# :return:
# """
#
# res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
# only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
#
# if not only_parsers_results.status:
# return None
#
# return builtin_helpers.get_lexer_nodes(
# only_parsers_results.body.body,
# self.unrecognized_tokens.start,
# self.unrecognized_tokens.tokens)
class AtomNodeParser(BaseNodeParser):
"""
@@ -314,6 +298,26 @@ class AtomNodeParser(BaseNodeParser):
return concept_parser_helpers
def get_by_name(self, parser_input):
"""
Try to recognize the full parser input as a concept name
:return:
"""
source = self.get_input_as_text(parser_input)
concepts = self.sheerka.get_by_name(source.strip())
if not self.sheerka.is_known(concepts):
return None
concepts = [concepts] if isinstance(concepts, Concept) else concepts
res = []
start, end = self.get_tokens_boundaries(self.tokens)
for concept in concepts:
parser_helper = AtomConceptParserHelper(None)
parser_helper.sequence.append(ConceptNode(concept, start, end, strip_tokens(self.tokens, True), source))
res.append(parser_helper)
return res
def get_valid(self, concept_parser_helpers):
valid_parser_helpers = [] # be careful, it will be a list of list
for parser_helper in concept_parser_helpers:
@@ -351,7 +355,11 @@ class AtomNodeParser(BaseNodeParser):
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
parser_helpers = self.get_valid(self.get_concepts_sequences())
sequences = self.get_concepts_sequences()
if by_name := self.get_by_name(parser_input):
sequences.extend(by_name)
parser_helpers = self.get_valid(sequences)
if len(parser_helpers):
ret = []
+34 -5
View File
@@ -153,17 +153,17 @@ class BaseParser:
return parser_input
def get_input_as_tokens(self, parser_input):
def get_input_as_tokens(self, parser_input, strip_eof=False):
if isinstance(parser_input, list):
return self.add_eof_if_needed(parser_input)
return self.manage_eof(parser_input, strip_eof)
if isinstance(parser_input, ParserResultConcept):
if parser_input.tokens:
return self.add_eof_if_needed(parser_input.tokens)
return self.manage_eof(parser_input.tokens, strip_eof)
else:
return Tokenizer(parser_input.source)
return Tokenizer(parser_input)
return Tokenizer(parser_input, yield_eof=not strip_eof)
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
if not isinstance(parser_input, ParserResultConcept):
@@ -183,7 +183,12 @@ class BaseParser:
return parser_input.value
@staticmethod
def add_eof_if_needed(lst):
def manage_eof(lst, strip_eof):
if strip_eof:
if len(lst) and lst[-1].type == TokenKind.EOF:
lst.pop()
return lst
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
return lst
@@ -210,6 +215,30 @@ class BaseParser:
res += value
return res
@staticmethod
def get_tokens_boundaries(tokens):
"""
Returns the first and the last valid index of the tokens
a valid index is a token that is not a whitespace nor and EOF
:param tokens:
:return:
"""
if tokens is None:
return None
if len(tokens) == 0:
return 0, 0
if tokens[0].type == TokenKind.EOF:
return 0, 0
start = 1 if tokens[0].type == TokenKind.WHITESPACE else 0
end = len(tokens) - 1
while tokens[end].type in (TokenKind.WHITESPACE, TokenKind.EOF):
end -= 1
return start, end
class BaseTokenizerIterParser(BaseParser):
+4 -8
View File
@@ -192,18 +192,16 @@ as:
assert sheerka.isinstance(res[0].value, BuiltinConcepts.NOP)
def test_i_can_recognize_concept_with_variable(self):
sheerka = self.get_sheerka()
concept_hello = Concept(name="hello a").def_var("a")
concept_foo = Concept(name="foo")
sheerka.add_in_cache(concept_hello)
sheerka.add_in_cache(concept_foo)
sheerka, context, concept_foo, concept_hello = self.init_concepts(
"foo",
Concept(name="hello a").def_var("a"),
create_new=True)
res = sheerka.evaluate_user_input("hello foo")
return_value = res[0].value
assert len(res) == 1
assert res[0].status
assert sheerka.isinstance(return_value, concept_hello)
assert return_value.metadata.variables[0] == ('a', "foo")
# sanity check
evaluated = sheerka.evaluate_concept(self.get_context(eval_body=True), return_value)
@@ -864,12 +862,10 @@ as:
sheerka = self.init_scenario(definitions)
res = sheerka.evaluate_user_input("eval mult")
assert res[0].status
assert isinstance(res[0].body, Concept)
# res = sheerka.evaluate_user_input("eval a mult b")
#
# assert res[0].status
# assert isinstance(res[0].body, Concept)
+11 -11
View File
@@ -2,7 +2,7 @@ import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_DEF
from parsers.AtomNodeParser import AtomNodeParser
from parsers.BaseNodeParser import cnode, utnode, CNC, SCN
from parsers.BaseNodeParser import cnode, utnode, CNC, SCN, CN
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array
@@ -35,16 +35,16 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka):
("foo", ["foo"]),
("foo bar", ["foo", "bar"]),
("foo bar twenties", ["foo", "bar", "twenties"]),
# ("plus", ["plus"]),
# ("++", ["++"]),
# ("a++ foo", ["++", "foo"]),
("a plus b", [CN("plus", 0, 4)]),
("mult", [CN("mult", 0, 0, "mult")]),
])
def test_i_can_parse_simple_sequences(self, text, expected):
concepts_map = {
"foo": Concept("foo"),
"bar": Concept("bar"),
"plus": Concept("a plus b").def_var("a").def_var("b"),
"++": Concept("++", definition="a++", definition_type=DEFINITION_TYPE_DEF).def_var("a"),
"mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var(
"b"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
}
@@ -287,18 +287,18 @@ class TestAtomsParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, expected_is_evaluated", [
("foo", False),
("bar", False ),
("bar", False),
("twenties", True),
("plus", True),
# ("plus", ["plus"]),
# ("++", ["++"]),
# ("a++ foo", ["++", "foo"]),
("a plus b", True),
("mult", True),
])
def test_concepts_with_variables_must_not_be_evaluated(self, text, expected_is_evaluated):
concepts_map = {
"foo": Concept("foo"),
"bar": Concept("bar", body="'bar'"),
"plus": Concept("plus", definition="a plus b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var("b"),
"plus": Concept("a plus b").def_var("a").def_var("b"),
"mult": Concept("mult", definition="a mult b", definition_type=DEFINITION_TYPE_DEF).def_var("a").def_var(
"b"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_var("unit"),
}
+16
View File
@@ -65,3 +65,19 @@ def test_i_can_test_split_iter_parser_indexes():
assert res[5] == Token(TokenKind.LPAR, "(", 20, 2, 12)
assert res[6] == Token(TokenKind.RPAR, ")", 21, 2, 13)
assert res[7] == Token(TokenKind.COMMA, ",", 22, 2, 14)
@pytest.mark.parametrize("tokens, expected", [
(None, None),
([], (0, 0)),
(list(Tokenizer("")), (0, 0)),
(list(Tokenizer("", yield_eof=False)), (0, 0)),
(list(Tokenizer(" a")), (1, 1)),
(list(Tokenizer(" a", yield_eof=False)), (1, 1)),
(list(Tokenizer("a ")), (0, 0)),
(list(Tokenizer("a ", yield_eof=False)), (0, 0)),
(list(Tokenizer(" a ")), (1, 1)),
(list(Tokenizer(" a ", yield_eof=False)), (1, 1)),
])
def test_i_can_get_tokens_boundaries(tokens, expected):
assert BaseParser.get_tokens_boundaries(tokens) == expected
+12 -12
View File
@@ -632,18 +632,18 @@ class TestSyaNodeParser(TestUsingMemoryBasedSheerka):
# I can't manage source code functions :-(
# ("function(one plus three) minus two", []),
("(one plus two) ", ["one", "two", "plus"]),
("(one prefixed) ", ["one", "prefixed"]),
("(suffixed one) ", ["one", "suffixed"]),
("(one ? two : three)", ["one", "two", "three", "?"]),
("square(square(one))", ["one", ("square", 1), "square"]),
("square ( square ( one ) )", ["one", ("square", 1), "square"]),
("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]),
("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]),
("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]),
("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
# ("(one plus two) ", ["one", "two", "plus"]),
# ("(one prefixed) ", ["one", "prefixed"]),
# ("(suffixed one) ", ["one", "suffixed"]),
# ("(one ? two : three)", ["one", "two", "three", "?"]),
# ("square(square(one))", ["one", ("square", 1), "square"]),
# ("square ( square ( one ) )", ["one", ("square", 1), "square"]),
#
# ("square(one plus three) minus two", ["one", "three", "plus", "square", "two", "minus"]),
# ("square( one plus three ) minus two", ["one", "three", "plus", "square", "two", "minus"]),
# ("one minus square( two plus three ) ", ["one", "two", "three", "plus", "square", "minus"]),
#
# ("((one prefixed) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( ( one prefixed ) prefixed)", ["one", "prefixed", ("prefixed", 1)]),
("( ( square( one ) prefixed ) prefixed)", ["one", "square", "prefixed", ("prefixed", 1)]),