Added SyaNodeParser (finally, after one month)

This commit is contained in:
2020-04-09 15:42:36 +02:00
parent c9acfa99a1
commit 6c7c529016
56 changed files with 5322 additions and 404 deletions
+150
View File
@@ -0,0 +1,150 @@
from core.concept import CC, Concept
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.BaseNodeParser import scnode, utnode, cnode, SCWC, CNC, short_cnode, SourceCodeWithConceptNode, CN, UTN
from parsers.SyaNodeParser import SyaConceptParserHelper
def _index(tokens, expr, index):
"""
Finds a sub list in a bigger list
:param tokens:
:param expr:
:param index:
:return:
"""
expected = [token.value for token in Tokenizer(expr) if token.type != TokenKind.EOF]
for i in range(0, len(tokens) - len(expected) + 1):
for j in range(len(expected)):
if tokens[i + j] != expected[j]:
break
else:
if index == 0:
return i, len(expected)
else:
index -= 1
raise ValueError(f"substring '{expr}' not found")
def compute_debug_array(res):
to_compare = []
for r in res:
res_debug = []
for token in r.debug:
if isinstance(token, Token):
if token.type == TokenKind.WHITESPACE:
continue
else:
res_debug.append(token.value)
else:
res_debug.append(token.concept.name)
to_compare.append(res_debug)
return to_compare
def get_node(concepts_map, expression_as_tokens, sub_expr, concept_key=None, skip=0, is_bnf=False, sya=False):
"""
Tries to find sub in expression
When found, transform it to its correct type
:param expression_as_tokens: full expression
:param sub_expr: sub expression to search in the full expression
:param concepts_map: hash of the known concepts
:param concept_key: key of the concept if different from sub_expr
:param skip: number of occurrences of sub_expr to skip
:param is_bnf: True if the concept to search is a bnf definition
:param sya: Return SyaConceptParserHelper instead of a ConceptNode when needed
:return:
"""
if sub_expr == "')'":
return ")"
if isinstance(sub_expr, (scnode, utnode)):
return sub_expr
if isinstance(sub_expr, cnode):
# for cnode, map the concept key to the one from concepts_maps if needed
if sub_expr.concept_key.startswith("#"):
return cnode(
concepts_map[sub_expr.concept_key[1:]].key,
sub_expr.start,
sub_expr.end,
sub_expr.source
)
else:
return sub_expr
if isinstance(sub_expr, SCWC):
first = get_node(concepts_map, expression_as_tokens, sub_expr.first, sya=sya)
last = get_node(concepts_map, expression_as_tokens, sub_expr.last, sya=sya)
content = [get_node(concepts_map, expression_as_tokens, c, sya=sya) for c in sub_expr.content]
return SourceCodeWithConceptNode(first, last, content).pseudo_fix_source()
if isinstance(sub_expr, (CNC, CC, CN)):
concept_node = get_node(
concepts_map,
expression_as_tokens,
sub_expr.source or sub_expr.concept_key,
sub_expr.concept_key, sya=sya)
concept_found = concept_node.concept
sub_expr.concept_key = concept_found.key
sub_expr.concept = concept_found
sub_expr.fix_pos((concept_node.start, concept_node.end if hasattr(concept_node, "end") else concept_node.start))
if hasattr(sub_expr, "compiled"):
for k, v in sub_expr.compiled.items():
node = get_node(concepts_map, expression_as_tokens, v, sya=sya) # need to get start and end positions
new_value = CC(Concept().update_from(concepts_map[v])) if (isinstance(v, str) and v in concepts_map) \
else node
sub_expr.compiled[k] = new_value
sub_expr.fix_pos(node)
if hasattr(sub_expr, "fix_source"):
sub_expr.fix_source(expression_as_tokens[sub_expr.start: sub_expr.end + 1])
return sub_expr
if isinstance(sub_expr, UTN):
node = get_node(concepts_map, expression_as_tokens, sub_expr.source)
sub_expr.fix_pos(node)
return sub_expr
if isinstance(sub_expr, short_cnode):
return get_node(concepts_map, expression_as_tokens, sub_expr.source,
concept_key=sub_expr.concept_key, skip=skip, is_bnf=True, sya=sya)
if isinstance(sub_expr, tuple):
return get_node(concepts_map, expression_as_tokens, sub_expr[0],
concept_key=concept_key, skip=sub_expr[1], is_bnf=is_bnf, sya=sya)
start, length = _index(expression_as_tokens, sub_expr, skip)
# special case of python source code
if "+" in sub_expr and sub_expr.strip() != "+":
return scnode(start, start + length - 1, sub_expr)
# try to match one of the concept from the map
concept_key = concept_key or sub_expr
concept_found = concepts_map.get(concept_key, None)
if concept_found:
concept_found = Concept().update_from(concept_found) # make a copy when massively used in tests
if not sya or len(concept_found.metadata.props) == 0 or is_bnf:
# if it's an atom, then return a ConceptNode
return CN(concept_found, start, start + length - 1, source=sub_expr)
else:
# else return a ParserHelper
return SyaConceptParserHelper(concept_found, start)
else:
# else an UnrecognizedTokensNode
return utnode(start, start + length - 1, sub_expr)
def compute_expected_array(concepts_map, expression, expected, sya=False):
"""
Computes a simple but sufficient version of the result of infix_to_postfix()
:param concepts_map:
:param expression:
:param expected:
:param sya: if true, generate an SyaConceptParserHelper instead of a cnode
:return:
"""
expression_as_tokens = [token.value for token in Tokenizer(expression) if token.type != TokenKind.EOF]
return [get_node(concepts_map, expression_as_tokens, sub_expr, sya=sya) for sub_expr in expected]
+241
View File
@@ -0,0 +1,241 @@
import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from parsers.AtomNodeParser import AtomNodeParser
from parsers.BaseNodeParser import cnode, utnode, CNC
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array
class TestAtomsParser(TestUsingMemoryBasedSheerka):
def init_parser(self, concepts_map, **kwargs):
sheerka, context, *updated_concepts = self.init_concepts(singleton=True, *concepts_map.values(), **kwargs)
parser = AtomNodeParser()
parser.initialize(context, updated_concepts)
return sheerka, context, parser
def test_i_cannot_parse_empty_string(self):
sheerka, context, parser = self.init_parser({})
res = parser.parse(context, "")
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.IS_EMPTY)
@pytest.mark.parametrize("text, expected", [
("foo", ["foo"]),
("foo bar", ["foo", "bar"]),
("foo bar twenties", ["foo", "bar", "twenties"]),
])
def test_i_can_parse_simple_sequences(self, text, expected):
concepts_map = {
"foo": Concept("foo"),
"bar": Concept("bar"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.parse(context, text)
wrapper = res.body
lexer_nodes = res.body.body
assert res.status
expected_array = compute_expected_array(concepts_map, text, expected)
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text, expected", [
("foo bar", ["foo bar"]),
("one two three", ["one two three"]),
("foo bar twenties one two three", ["foo bar", "twenties", "one two three"]),
])
def test_i_can_parse_long_names(self, text, expected):
concepts_map = {
"foo bar": Concept("foo bar"),
"one two three": Concept("one two three"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.parse(context, text)
wrapper = res.body
lexer_nodes = res.body.body
assert res.status
expected_array = compute_expected_array(concepts_map, text, expected)
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text, expected_status, expected", [
("foo bar suffixed one", False, ["foo bar", " suffixed ", "one"]),
("foo bar one prefixed", False, ["foo bar", "one", " prefixed"]),
("foo bar one infix two", False, ["foo bar", "one", " infix ", "two"]),
("foo bar 1 + 1", True, ["foo bar", " 1 + 1"]),
("foo bar twenty one", False, ["foo bar", " twenty ", "one"]),
("foo bar x$!#", False, ["foo bar", " x$!#"]),
("suffixed one foo bar", False, ["suffixed ", "one", "foo bar"]),
("one prefixed foo bar", False, ["one", " prefixed ", "foo bar"]),
("one infix two foo bar", False, ["one", " infix ", "two", "foo bar"]),
("1 + 1 foo bar", True, ["1 + 1 ", "foo bar"]),
("twenty one foo bar", False, ["twenty ", "one", "foo bar"]),
("x$!# foo bar", False, ["x$!# ", "foo bar"]),
("func(one)", False, ["func(", "one", ")"]),
])
def test_i_can_parse_when_unrecognized(self, text, expected_status, expected):
concepts_map = {
"prefixed": Concept("a prefixed").def_prop("a"),
"suffixed": Concept("prefixed a").def_prop("a"),
"infix": Concept("a infix b").def_prop("a").def_prop("b"),
"foo bar": Concept("foo bar"),
"one": Concept("one"),
"two": Concept("two"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.parse(context, text)
wrapper = res.body
lexer_nodes = res.body.body
assert res.status == expected_status
expected_array = compute_expected_array(concepts_map, text, expected)
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text, expected_status, expected", [
(" one two ", True, [cnode("one", 1, 1, "one"), cnode("two", 3, 3, "two")]),
(" one x$!# ", False, [cnode("one", 1, 1, "one"), utnode(2, 7, " x$!# ")]),
(" foo bar x$!# ", False, [cnode("foo bar", 1, 3, "foo bar"), utnode(4, 9, " x$!# ")]),
])
def test_i_can_parse_when_surrounded_by_spaces(self, text, expected_status, expected):
concepts_map = {
"prefixed": Concept("a prefixed").def_prop("a"),
"suffixed": Concept("prefixed a").def_prop("a"),
"infix": Concept("a infix b").def_prop("a").def_prop("b"),
"foo bar": Concept("foo bar"),
"one": Concept("one"),
"two": Concept("two"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.parse(context, text)
wrapper = res.body
lexer_nodes = res.body.body
assert res.status == expected_status
expected_array = compute_expected_array(concepts_map, text, expected)
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text, expected", [
("one two", [["one", "two"], ["one two"]])
])
def test_i_can_parse_when_multiple_concepts_start_with_the_same_token(self, text, expected):
concepts_map = {
"one": Concept("one"),
"two": Concept("two"),
"one two": Concept("one two"),
}
sheerka, context, parser = self.init_parser(concepts_map)
list_of_res = parser.parse(context, text)
assert len(list_of_res) == len(expected)
for i, res in enumerate(list_of_res):
wrapper = res.body
lexer_nodes = res.body.body
assert res.status
expected_array = compute_expected_array(concepts_map, text, expected[i])
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
def test_i_can_parse_multiple_concepts_when_long_names_and_unrecognized(self):
concepts_map = {
"one": Concept("one"),
"two": Concept("two"),
"one two": Concept("one two"),
"one two x$!# one two": Concept("one two x$!# one two"),
}
text = "one two x$!# one two"
sheerka, context, parser = self.init_parser(concepts_map)
list_of_res = parser.parse(context, text)
expected = [
(False, ["one", "two", " x$!# ", ("one", 1), ("two", 1)]),
(False, ["one", "two", " x$!# ", ("one two", 1)]),
(False, ["one two", " x$!# ", ("one", 1), ("two", 1)]),
(False, ["one two", " x$!# ", ("one two", 1)]),
(True, ["one two x$!# one two"]),
]
assert len(list_of_res) == len(expected)
for res, expected in zip(list_of_res, expected):
wrapper = res.body
lexer_nodes = res.body.body
assert res.status == expected[0]
expected_array = compute_expected_array(concepts_map, text, expected[1])
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@pytest.mark.parametrize("text", [
"foo",
f"foo one",
"x$!#",
"twenty one",
"1 + 1",
"foo x$!#",
"1 + 1 twenty one",
])
def test_i_cannot_parse_concepts_with_property_or_bnf_or_unrecognized(self, text):
concepts_map = {
"foo": Concept("foo a").def_prop("a"),
"twenties": Concept("twenties", definition="'twenty' ('one'|'two')=unit").def_prop("unit"),
}
sheerka, context, parser = self.init_parser(concepts_map)
res = parser.parse(context, text)
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.NOT_FOR_ME)
assert res.body.body == text
@pytest.mark.parametrize("text, expected", [
("hello foo bar",
[
(True, [CNC("hello1", source="hello foo ", a=" foo "), "bar"]),
(True, [CNC("hello2", source="hello foo ", b=" foo "), "bar"]),
]),
])
def test_i_can_parse_when_unrecognized_yield_multiple_values(self, text, expected):
concepts_map = {
"hello1": Concept("hello a").def_prop("a"),
"hello2": Concept("hello b").def_prop("b"),
"bar": Concept("bar")
}
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
list_of_res = parser.parse(context, text)
assert len(list_of_res) == len(expected)
for res, expected in zip(list_of_res, expected):
wrapper = res.body
lexer_nodes = res.body.body
assert res.status == expected[0]
expected_array = compute_expected_array(concepts_map, text, expected[1])
assert sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert lexer_nodes == expected_array
@@ -4,10 +4,11 @@ import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, ConceptParts, DoNotResolve
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.BaseNodeParser import cnode, short_cnode
from parsers.BnfParser import BnfParser
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence, StrMatch, OrderedChoice, Optional, \
ParsingExpressionVisitor, TerminalNode, NonTerminalNode, ZeroOrMore, OneOrMore, \
UnrecognizedTokensNode, cnode, short_cnode, ConceptExpression, ConceptGroupExpression
UnrecognizedTokensNode, ConceptExpression, ConceptGroupExpression
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -72,15 +73,16 @@ def cprop(concept, prop_name):
return concept.compiled[prop_name]
class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
class TestBnfConceptLexerParser(TestUsingMemoryBasedSheerka):
def init(self, concepts, grammar):
context = self.get_context()
sheerka = self.get_sheerka(singleton=True)
context = self.get_context(sheerka)
for c in concepts:
context.sheerka.add_in_cache(c)
context.sheerka.set_id_if_needed(c, False)
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
return context, parser
@@ -602,7 +604,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
grammar = {foo: Optional("one", ConceptExpression("foo"))}
context = self.get_context()
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression("foo", rule_name="foo"))
@@ -612,7 +614,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
context = self.get_context()
context.concepts["foo"] = foo
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
assert parser.concepts_grammars[foo] == Optional("one", ConceptExpression(foo, rule_name="foo"))
@@ -636,7 +638,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
grammar = {foo: Sequence("twenty", number)}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
res = parser.parse(context, "twenty two")
@@ -686,7 +688,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
grammar = {foo: ZeroOrMore("one")}
context, parser = self.init([foo], grammar)
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
res = parser.parse(context, "one two")
@@ -779,7 +781,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
bar: foo,
foo: bar
}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(self.get_context(), grammar)
assert bar not in parser.concepts_grammars
@@ -793,7 +795,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
foo: OrderedChoice(bar, "foo")
}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(self.get_context(), grammar)
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
@@ -824,7 +826,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
bar: foo,
foo: Sequence("one", bar, "two")
}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(self.get_context(), grammar)
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
@@ -838,7 +840,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
bar: foo,
foo: Sequence("one", OrderedChoice(bar, "other"), "two")
}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(self.get_context(), grammar)
assert foo not in parser.concepts_grammars # removed because of the infinite recursion
@@ -851,7 +853,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
grammar = {
foo: bar
}
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(self.get_context(), grammar)
assert foo in parser.concepts_grammars
@@ -883,7 +885,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
bar = Concept(name="bar")
grammar = {foo: Sequence("one", "two"), bar: foo}
parser = ConceptLexerParser()
parser = BnfNodeParser()
ret = parser.initialize(context, grammar)
return_value = ret.body
@@ -1209,7 +1211,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
context.sheerka.set_id_if_needed(c, False)
context.sheerka.add_concept_to_set(context, baz, bar)
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
encoded = parser.encode_grammar(parser.concepts_grammars)
@@ -1260,7 +1262,7 @@ class TestConceptLexerParser(TestUsingMemoryBasedSheerka):
# atom: OrderedChoice(OrderedChoice('1', '2', '3'), Sequence('(', add, ')')),
# }
#
# parser = ConceptLexerParser()
# parser = BnfNodeParser()
# parser.register(grammar)
#
# # res = parser.parse(context, "1")
+4 -3
View File
@@ -3,10 +3,11 @@ import pytest
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Tokenizer, TokenKind, LexerError, Token
from parsers.BaseNodeParser import cnode
from parsers.BaseParser import UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser, UnexpectedEndOfFileError
from parsers.ConceptLexerParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
ConceptLexerParser, ConceptExpression, cnode
from parsers.BnfNodeParser import StrMatch, Optional, ZeroOrMore, OrderedChoice, Sequence, OneOrMore, \
BnfNodeParser, ConceptExpression
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
@@ -155,7 +156,7 @@ class TestBnfParser(TestUsingMemoryBasedSheerka):
bar_definition = regex_parser.parse(context, "foo ('one' | 'two')").value.value
concepts = {bar: bar_definition, foo: foo_definition}
concept_parser = ConceptLexerParser()
concept_parser = BnfNodeParser()
concept_parser.initialize(context, concepts)
res = concept_parser.parse(context, "twenty two")
@@ -5,7 +5,8 @@ import pytest
from core.builtin_concepts import ParserResultConcept, ReturnValueConcept, BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
from parsers.ConceptsWithConceptsParser import ConceptsWithConceptsParser
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode
@@ -65,7 +66,7 @@ class TestConceptsWithConceptsParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, interested", [
("not parser result", False),
(ParserResultConcept(parser="not multiple_concepts_parser"), False),
(ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
(ParserResultConcept(parser=multiple_concepts_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True),
])
def test_not_interested(self, text, interested):
context = self.get_context()
+2 -2
View File
@@ -3,7 +3,7 @@ import ast
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import Concept, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from parsers.ConceptLexerParser import OrderedChoice, StrMatch, ConceptExpression
from parsers.BnfNodeParser import OrderedChoice, StrMatch, ConceptExpression
from parsers.PythonParser import PythonParser, PythonNode
from core.tokenizer import Keywords, Tokenizer, LexerError
from parsers.DefaultParser import DefaultParser, NameNode, SyntaxErrorNode, CannotHandleErrorNode, IsaConceptNode
@@ -251,7 +251,7 @@ def concept add one to a as
res = parser.parse(context, text)
node = res.value.value
definition = OrderedChoice(ConceptExpression(a_concept, rule_name="a_concept"), StrMatch("a_string"))
parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", definition, definition)
parser_result = ParserResultConcept(BnfParser(), "a_concept | 'a_string'", None, definition, definition)
expected = get_def_concept(name="name", body="__definition[0]", bnf_def=parser_result)
assert res.status
+3 -2
View File
@@ -3,7 +3,8 @@ import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.concept import Concept
from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.ConceptLexerParser import ConceptLexerParser, ConceptNode, Sequence, cnode, utnode, scnode, SourceCodeNode
from parsers.BaseNodeParser import cnode, scnode, utnode, SourceCodeNode
from parsers.BnfNodeParser import BnfNodeParser, ConceptNode, Sequence
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode
@@ -11,7 +12,7 @@ from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
def get_return_value(context, grammar, expression):
parser = ConceptLexerParser()
parser = BnfNodeParser()
parser.initialize(context, grammar)
ret_val = parser.parse(context, expression)
+13 -8
View File
@@ -1,6 +1,6 @@
import ast
import pytest
from core.builtin_concepts import ParserResultConcept
from core.builtin_concepts import ParserResultConcept, NotForMeConcept
from core.tokenizer import Tokenizer, LexerError
from parsers.PythonParser import PythonNode, PythonParser, PythonErrorNode
import core.utils
@@ -48,9 +48,11 @@ class TestPythonParser(TestUsingMemoryBasedSheerka):
assert not res.status
assert res.who == parser.name
assert isinstance(res.value, ParserResultConcept)
assert isinstance(res.value.value[0], PythonErrorNode)
assert isinstance(res.value.value[0].exception, SyntaxError)
assert isinstance(res.value, NotForMeConcept)
assert res.value.body == text
assert len(res.value.get_prop("reason")) == 1
assert isinstance(res.value.get_prop("reason")[0], PythonErrorNode)
assert isinstance(res.value.get_prop("reason")[0].exception, SyntaxError)
@pytest.mark.parametrize("text, error_msg, error_text", [
("c::", "Concept identifiers not found", ""),
@@ -61,10 +63,13 @@ class TestPythonParser(TestUsingMemoryBasedSheerka):
res = parser.parse(self.get_context(), text)
assert not res.status
assert isinstance(res.body, ParserResultConcept)
assert isinstance(res.body.body[0], LexerError)
assert res.body.body[0].message == error_msg
assert res.body.body[0].text == error_text
assert isinstance(res.value, NotForMeConcept)
assert res.value.body == text
assert len(res.value.get_prop("reason")) == 1
assert isinstance(res.value.get_prop("reason")[0], LexerError)
assert res.value.get_prop("reason")[0].message == error_msg
assert res.value.get_prop("reason")[0].text == error_text
def test_i_can_parse_a_concept(self):
text = "c:name|key: + 1"
+7 -11
View File
@@ -1,18 +1,17 @@
import ast
import pytest
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts, ReturnValueConcept
from core.concept import Concept
from core.tokenizer import Token, TokenKind, Tokenizer
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonNode, PythonErrorNode
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
from parsers.PythonParser import PythonNode
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
multiple_concepts_parser = MultipleConceptsParser()
unrecognized_nodes_parser = UnrecognizedNodeParser()
def ret_val(*args):
@@ -28,7 +27,7 @@ def ret_val(*args):
result.append(UnrecognizedTokensNode(index, index + len(tokens) - 1, tokens))
index += len(tokens)
return ReturnValueConcept("who", False, ParserResultConcept(parser=multiple_concepts_parser, value=result))
return ReturnValueConcept("who", False, ParserResultConcept(parser=unrecognized_nodes_parser, value=result))
def to_str_ast(expression):
@@ -40,7 +39,7 @@ class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka):
@pytest.mark.parametrize("text, interested", [
("not parser result", False),
(ParserResultConcept(parser="not multiple_concepts_parser"), False),
(ParserResultConcept(parser=multiple_concepts_parser, value=[]), True),
(ParserResultConcept(parser=unrecognized_nodes_parser, value=[UnrecognizedTokensNode(0, 0, [])]), True),
])
def test_not_interested(self, text, interested):
context = self.get_context()
@@ -130,9 +129,6 @@ class TestPythonWithConceptsParser(TestUsingMemoryBasedSheerka):
parser = PythonWithConceptsParser()
result = parser.parse(context, input_return_value.body)
wrapper = result.value
return_value = result.value.value
assert not result.status
assert context.sheerka.isinstance(wrapper, BuiltinConcepts.PARSER_RESULT)
assert isinstance(return_value[0], PythonErrorNode)
assert context.sheerka.isinstance(result.value, BuiltinConcepts.NOT_FOR_ME)
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,383 @@
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
from core.concept import Concept, CC
from core.tokenizer import Tokenizer, TokenKind
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, scnode, cnode, \
utnode, SyaAssociativity, CN, CNC, UTN
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
from tests.TestUsingMemoryBasedSheerka import TestUsingMemoryBasedSheerka
from tests.parsers.parsers_utils import compute_expected_array, get_node
def get_input_nodes_from(my_concepts_map, full_expr, *args):
def _get_real_node(n):
if isinstance(n, CC):
concept = n.concept or Concept.update_from(my_concepts_map[n.concept_key])
for k, v in n.compiled.items():
concept.compiled[k] = _get_real_node(v)
return concept
if isinstance(n, (utnode, UTN)):
return UnrecognizedTokensNode(n.start, n.end, full_expr_as_tokens[n.start: n.end + 1])
if isinstance(n, (CNC, CN, cnode)):
concept = n.concept if hasattr(n, "concept") and n.concept else \
Concept().update_from(my_concepts_map[n.concept_key])
tokens = full_expr_as_tokens[n.start: n.end + 1]
if hasattr(node, "compiled"):
for k, v in n.compiled.items():
concept.compiled[k] = _get_real_node(v)
return ConceptNode(concept, n.start, n.end, tokens)
raise NotImplementedError()
res = []
full_expr_as_tokens = list(Tokenizer(full_expr))
tokens_for_get_node = [token.value for token in full_expr_as_tokens if token.type != TokenKind.EOF]
for arg in args:
node = get_node(my_concepts_map, tokens_for_get_node, arg)
res.append(_get_real_node(node))
return res
concepts_map = {
"5params": Concept("5params").def_prop("a").def_prop("b").def_prop("c").def_prop("d").def_prop("e"),
"plus": Concept("a plus b", body="a + b").def_prop("a").def_prop("b"),
"mult": Concept("a mult b", body="a * b").def_prop("a").def_prop("b"),
"one": Concept("one", body="1"),
"two": Concept("two", body="2"),
"three": Concept("three", body="3"),
"twenties": Concept("twenties", definition="'twenty' (one|two)=unit", body="20 + unit").def_prop("unit"),
"hello_atom": Concept("hello one"),
"hello_sya": Concept("hello a").def_prop("a"),
"greetings_a": Concept("greetings a").def_prop("a"),
"greetings_b": Concept("greetings b").def_prop("b"),
}
class TestUnrecognizedNodeParser(TestUsingMemoryBasedSheerka):
sheerka = None
@classmethod
def setup_class(cls):
t = TestUnrecognizedNodeParser()
TestUnrecognizedNodeParser.sheerka, context, _ = t.init_parser(concepts_map, create_new=True)
TestUnrecognizedNodeParser.sheerka.set_sya_def(context, [
(concepts_map["mult"].id, 20, SyaAssociativity.Right),
(concepts_map["plus"].id, 10, SyaAssociativity.Right),
])
def init_parser(self, my_concepts_map=None, **kwargs):
if my_concepts_map:
sheerka, context, *updated_concepts = self.init_concepts(*my_concepts_map.values(), **kwargs)
for i, pair in enumerate(my_concepts_map):
my_concepts_map[pair] = updated_concepts[i]
else:
sheerka = TestUnrecognizedNodeParser.sheerka
context = self.get_context(sheerka)
parser = UnrecognizedNodeParser()
return sheerka, context, parser
def test_i_can_validate_a_valid_concept_node(self):
sheerka, context, parser = self.init_parser()
node = get_input_nodes_from(concepts_map, "one", "one")[0]
res = UnrecognizedNodeParser().validate_concept_node(context, node)
assert res.status
assert res.body.concept == concepts_map["one"]
def test_i_can_validate_concept_unrecognized_tokens(self):
sheerka, context, parser = self.init_parser()
node = get_input_nodes_from(
concepts_map,
"5params one two three twenty one 1 + 2 one plus two mult three",
CNC("5params",
a=" one ",
b=" two three ",
c=" twenty one ",
d=utnode(12, 18, " 1 + 2 "),
e=" one plus two mult three"))[0]
res = UnrecognizedNodeParser().validate_concept_node(context, node)
assert res.status
concept = res.body.concept
assert concept == concepts_map["5params"]
assert len(concept.compiled["a"]) == 1
assert sheerka.isinstance(concept.compiled["a"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.compiled["a"][0].status
assert concept.compiled["a"][0].who == "parsers.AtomNode"
assert concept.compiled["a"][0].body.body == [cnode("one", 1, 1, "one")]
assert len(concept.compiled["b"]) == 1
assert sheerka.isinstance(concept.compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.compiled["b"][0].status
assert concept.compiled["b"][0].who == "parsers.AtomNode"
assert concept.compiled["b"][0].body.body == [cnode("two", 1, 1, "two"), cnode("three", 3, 3, "three")]
assert len(concept.compiled["c"]) == 1
assert sheerka.isinstance(concept.compiled["c"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.compiled["c"][0].status
assert concept.compiled["c"][0].who == "parsers.BnfNode"
expected_nodes = compute_expected_array(
concepts_map,
" twenty one ",
[CNC("twenties", source="twenty one", unit="one", one="one")])
assert concept.compiled["c"][0].body.body == expected_nodes
assert len(concept.compiled["d"]) == 1
assert sheerka.isinstance(concept.compiled["d"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.compiled["d"][0].status
assert concept.compiled["d"][0].who == "parsers.Python"
assert concept.compiled["d"][0].body.source == "1 + 2"
assert len(concept.compiled["e"]) == 1
assert sheerka.isinstance(concept.compiled["e"][0], BuiltinConcepts.RETURN_VALUE)
assert concept.compiled["e"][0].status
assert concept.compiled["e"][0].who == "parsers.SyaNode"
expected_nodes = compute_expected_array(
concepts_map,
" one plus two mult three ",
[CNC("plus", a="one", b=CC("mult", a="two", b="three"))])
assert concept.compiled["e"][0].body.body == expected_nodes
# # sanity check, I can evaluate the concept
# evaluated = sheerka.evaluate_concept(self.get_context(sheerka, eval_body=True), concept)
# assert evaluated.key == concept.key
# assert evaluated.get_prop("a") ==
def test_i_can_validate_with_recursion(self):
sheerka, context, parser = self.init_parser()
node = get_input_nodes_from(
concepts_map,
"1 plus 2 mult twenty two",
CNC("plus",
a="1 ",
b=CC("mult", a=" 2 ", b=" twenty two")))[0]
res = UnrecognizedNodeParser().validate_concept_node(context, node)
assert res.status
assert res.body.concept == concepts_map["plus"]
assert len(res.body.concept.compiled["a"]) == 1
assert res.body.concept.compiled["a"][0].status
assert res.body.concept.compiled["a"][0].who == "parsers.Python"
assert res.body.concept.compiled["a"][0].body.source == "1"
assert res.body.concept.compiled["b"] == concepts_map["mult"]
assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["a"][0], BuiltinConcepts.RETURN_VALUE)
assert res.body.concept.compiled["b"].compiled["a"][0].status
assert res.body.concept.compiled["b"].compiled["a"][0].who == "parsers.Python"
assert res.body.concept.compiled["b"].compiled["a"][0].body.source == "2"
assert sheerka.isinstance(res.body.concept.compiled["b"].compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
assert res.body.concept.compiled["b"].compiled["b"][0].status
assert res.body.concept.compiled["b"].compiled["b"][0].who == "parsers.BnfNode"
expected_nodes = compute_expected_array(
concepts_map,
" twenty two",
[CNC("twenties", source="twenty two", unit="two", two="two")])
assert res.body.concept.compiled["b"].compiled["b"][0].body.body == expected_nodes
# def test_i_can_validate_and_evaluate_a_concept_node_with_python(self):
# sheerka, context, parser = self.init_parser()
#
# node = get_input_nodes_from(
# concepts_map,
# "one plus 1 + 1",
# CNC("plus",
# a=UTN("one "),
# b=UTN("1 + 1")))[0]
#
# res = UnrecognizedNodeParser().validate_concept_node(context, node)
#
# assert res.status
# assert res.body.concept == concepts_map["plus"]
# assert res.body.concept.compiled["a"] == concepts_map["one"]
# assert len(res.body.concept.compiled["b"]) == 1
# assert sheerka.isinstance(res.body.concept.compiled["b"][0], BuiltinConcepts.RETURN_VALUE)
# assert res.body.concept.compiled["b"][0].status
# assert res.body.concept.compiled["b"][0].who == "parsers.Python"
# assert res.body.concept.compiled["b"][0].body.source == "1 + 1"
#
# # # evaluate
# # context = self.get_context(sheerka, eval_body=True)
# # evaluated = sheerka.evaluate_concept(context, res.body.concept)
# # assert evaluated.body == 3
# def test_i_can_validate_and_evaluate_concept_when_bnf_concept(self):
# sheerka, context, parser = self.init_parser()
# node = get_concept_node(concepts_map, "one plus twenty one", "plus", "one", "twenty one")
#
# res = UnrecognizedNodeParser().validate_concept_node(context, node)
#
# assert res.status
# assert res.body.concept == concepts_map["plus"]
# assert res.body.concept.compiled["a"] == concepts_map["one"]
# assert len(res.body.concept.compiled["b"]) == 1
# assert res.body.concept.compiled["b"][0].status
# assert res.body.concept.compiled["b"][0].who == "parsers.BnfNode"
#
# # evaluate
# context = self.get_context(sheerka, eval_body=True)
# evaluated = sheerka.evaluate_concept(context, res.body.concept)
# assert evaluated.body == 22
def test_i_can_parse_and_evaluate_unrecognized_python_node(self):
sheerka, context, parser = self.init_parser()
expression = "1 + 1"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert len(actual_nodes) == 1
assert actual_nodes[0] == scnode(0, 4, expression)
def test_i_can_parse_unrecognized_bnf_concept_node(self):
sheerka, context, parser = self.init_parser()
expression = "twenty one"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert len(actual_nodes) == 1
expected_array = compute_expected_array(
concepts_map,
expression, [CNC("twenties", source=expression, unit="one", one="one")])
assert actual_nodes == expected_array
def test_i_can_parse_unrecognized_sya_concept_node(self):
sheerka, context, parser = self.init_parser()
expression = "one plus two mult three"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source=expression, value=nodes)
res = parser.parse(context, parser_input)
parser_result = res.body
actual_nodes = res.body.body
assert res.status
assert sheerka.isinstance(parser_result, BuiltinConcepts.PARSER_RESULT)
assert len(actual_nodes) == 1
expected_array = compute_expected_array(
concepts_map,
expression, [CNC("plus",
a="one",
b=CC("mult", source="two mult three", a="two", b="three"))])
assert actual_nodes == expected_array
def test_i_can_parse_sequences(self):
sheerka, context, parser = self.init_parser()
expression = "one plus two three"
sequence = get_input_nodes_from(concepts_map, expression,
CNC("plus", a="one", b="two"),
utnode(5, 6, " three"))
parser_input = ParserResultConcept("parsers.xxx", source="one plus two three", value=sequence)
res = parser.parse(context, parser_input)
actual_nodes = res.body.body
assert res.status
expected_array = compute_expected_array(
concepts_map,
expression, [
CNC("plus", a="one", b="two"),
CN("three", start=6, end=6)])
assert actual_nodes == expected_array
def test_i_can_parse_when_multiple_atom_and_sya(self):
sheerka, context, parser = self.init_parser()
expression = "two hello one three"
nodes = get_input_nodes_from(concepts_map, expression,
"two", UTN("hello one"), "three")
parser_input = ParserResultConcept("parsers.xxx", source="one plus two hello one", value=nodes)
res = parser.parse(context, parser_input)
assert len(res) == 2
assert res[0].status
assert res[1].status
actual_nodes0 = res[0].body.body
expected_0 = compute_expected_array(concepts_map, expression, [
CN("two", 0, 0),
CN("hello_atom", source="hello one", start=2, end=4),
CN("three", 6, 6)])
assert actual_nodes0 == expected_0
actual_nodes1 = res[1].body.body
expected_1 = compute_expected_array(concepts_map, expression, [
CN("two", 0, 0),
CNC("hello_sya", source="hello one", start=2, end=4, a="one"),
CN("three", 6, 6)])
assert actual_nodes1 == expected_1
def test_i_can_parse_when_multiple_sya_concepts(self):
sheerka, context, parser = self.init_parser()
expression = "greetings two"
nodes = get_input_nodes_from(concepts_map, expression, UTN("greetings two"))
parser_input = ParserResultConcept("parsers.xxx", source="greetings two", value=nodes)
res = parser.parse(context, parser_input)
assert len(res) == 2
assert res[0].status
assert res[1].status
actual_nodes0 = res[0].body.body
expected_0 = compute_expected_array(concepts_map, expression, [
CNC("greetings_a", source="greetings two", start=0, end=2, a="two")])
assert actual_nodes0 == expected_0
actual_nodes1 = res[1].body.body
expected_1 = compute_expected_array(concepts_map, expression, [
CNC("greetings_b", source="greetings two", start=0, end=2, b="two")])
assert actual_nodes1 == expected_1
def test_i_cannot_parse_when_i_cannot_validate(self):
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
expression = "one plus unknown tokens"
nodes = get_input_nodes_from(concepts_map, expression,
CNC("plus", a="one ", b=" unknown tokens"))
parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes)
res = parser.parse(context, parser_input)
assert not res.status
assert sheerka.isinstance(res.body, BuiltinConcepts.ERROR)
def test_i_cannot_parse_when_unrecognized(self):
sheerka, context, parser = self.init_parser(concepts_map, create_new=True)
expression = "unknown tokens"
nodes = get_input_nodes_from(concepts_map, expression, UTN(expression))
parser_input = ParserResultConcept("parsers.xxx", source="six", value=nodes)
res = parser.parse(context, parser_input)
actual_nodes = res.body.body
assert not res.status
assert actual_nodes == nodes