Working on #21 : Working on SyaConceptsParser.py
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
from parsers.state_machine import End, ManageUnrecognized, PrepareReadTokens, ReadConcept, ReadTokens, Start, \
|
from core.concept import DefinitionType
|
||||||
|
from parsers.state_machine import ConceptToRecognize, End, ManageUnrecognized, PrepareReadTokens, ReadConcept, \
|
||||||
|
ReadTokens, Start, \
|
||||||
StateMachine, StateMachineContext
|
StateMachine, StateMachineContext
|
||||||
from parsers.tokenizer import Token
|
from parsers.tokenizer import Token, TokenKind, Tokenizer
|
||||||
|
|
||||||
|
|
||||||
class SyaConceptsParser:
|
class SyaConceptsParser:
|
||||||
@@ -32,8 +34,75 @@ class SyaConceptsParser:
|
|||||||
self.error_sink = []
|
self.error_sink = []
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_metadata_from_first_token(context, token: Token):
|
def _get_expected_tokens(concept_key):
|
||||||
pass
|
"""
|
||||||
|
Return of list of pairs of (expected token, number of expected variable before this token)
|
||||||
|
ex:
|
||||||
|
'if x y then z end' => ('if', 0), ('then', 2), ('end', 1)
|
||||||
|
:param concept_key:
|
||||||
|
:type concept_key:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
|
||||||
|
# def custom_strip_tokens(_tokens):
|
||||||
|
# return _tokens
|
||||||
|
|
||||||
|
def custom_strip_tokens(_tokens):
|
||||||
|
"""
|
||||||
|
Removes consecutive whitespace tokens
|
||||||
|
Returns empy list if only whitespace tokens
|
||||||
|
:param _tokens:
|
||||||
|
:type _tokens:
|
||||||
|
:return:
|
||||||
|
:rtype:
|
||||||
|
"""
|
||||||
|
res = []
|
||||||
|
buffer = None
|
||||||
|
for t in _tokens:
|
||||||
|
if t.type == TokenKind.WHITESPACE:
|
||||||
|
buffer = t
|
||||||
|
else:
|
||||||
|
if buffer:
|
||||||
|
res.append(buffer)
|
||||||
|
buffer = None
|
||||||
|
res.append(t)
|
||||||
|
|
||||||
|
if res and buffer: # add the buffer only is the result is not empty
|
||||||
|
res.append(buffer)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
expected = [] # tuple of expected token and number of expected variables before this token
|
||||||
|
tokens = []
|
||||||
|
nb_variables = 0
|
||||||
|
parsing_tokens = None # True if we are parsing tokens (and not VAR_DEF)
|
||||||
|
|
||||||
|
for token in Tokenizer(concept_key, yield_eof=False):
|
||||||
|
if token.type == TokenKind.WHITESPACE:
|
||||||
|
tokens.append(token)
|
||||||
|
elif token.type == TokenKind.VAR_DEF:
|
||||||
|
if parsing_tokens is not None and parsing_tokens:
|
||||||
|
expected.append((custom_strip_tokens(tokens), nb_variables))
|
||||||
|
nb_variables = 1
|
||||||
|
tokens = []
|
||||||
|
parsing_tokens = False
|
||||||
|
else:
|
||||||
|
nb_variables += 1
|
||||||
|
else:
|
||||||
|
tokens.append(token)
|
||||||
|
parsing_tokens = True
|
||||||
|
|
||||||
|
# do not forget the remaining ones
|
||||||
|
if tokens or nb_variables:
|
||||||
|
expected.append((custom_strip_tokens(tokens), nb_variables))
|
||||||
|
|
||||||
|
return expected
|
||||||
|
|
||||||
|
def get_metadata_from_first_token(self, context, token: Token):
|
||||||
|
return [ConceptToRecognize(m, self._get_expected_tokens(m.key), "key")
|
||||||
|
for m in context.sheerka.get_metadatas_from_first_token("key", token.value)
|
||||||
|
if m.definition_type == DefinitionType.DEFAULT and len(m.parameters) > 0]
|
||||||
|
|
||||||
def parse(self, context, parser_input):
|
def parse(self, context, parser_input):
|
||||||
sm = StateMachine(self.workflows)
|
sm = StateMachine(self.workflows)
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
import inspect
|
import inspect
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from helpers import GetNextId
|
from helpers import GetNextId
|
||||||
|
from parsers.tokenizer import Token
|
||||||
from server.authentication import User
|
from server.authentication import User
|
||||||
|
|
||||||
DEFAULT_ONTOLOGY_NAME = "current_test_"
|
DEFAULT_ONTOLOGY_NAME = "current_test_"
|
||||||
@@ -95,3 +97,18 @@ class NewOntology:
|
|||||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
self.sheerka.om.revert_ontology(self.context, self.ontology)
|
self.sheerka.om.revert_ontology(self.context, self.ontology)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def simple_token_compare(a, b):
|
||||||
|
return a.type == b.type and a.value == b.value
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def comparable_tokens():
|
||||||
|
eq = Token.__eq__
|
||||||
|
ne = Token.__ne__
|
||||||
|
setattr(Token, "__eq__", simple_token_compare)
|
||||||
|
setattr(Token, "__ne__", lambda a, b: not simple_token_compare(a, b))
|
||||||
|
yield
|
||||||
|
setattr(Token, "__eq__", eq)
|
||||||
|
setattr(Token, "__ne__", ne)
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from base import BaseTest
|
from base import BaseTest
|
||||||
from conftest import NewOntology
|
from conftest import NewOntology, comparable_tokens
|
||||||
from evaluators.base_evaluator import MultipleChoices
|
from evaluators.base_evaluator import MultipleChoices
|
||||||
from helpers import get_concept, get_concepts, get_parser_input
|
from helpers import _mt, get_concept, get_concepts, get_parser_input
|
||||||
from parsers.SyaConceptsParser import SyaConceptsParser
|
from parsers.SyaConceptsParser import SyaConceptsParser
|
||||||
|
from parsers.tokenizer import Tokenizer
|
||||||
|
|
||||||
|
|
||||||
class TestSyaConceptsParser(BaseTest):
|
class TestSyaConceptsParser(BaseTest):
|
||||||
@@ -13,13 +14,28 @@ class TestSyaConceptsParser(BaseTest):
|
|||||||
def parser(self):
|
def parser(self):
|
||||||
return SyaConceptsParser()
|
return SyaConceptsParser()
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("concept_key, expected_list", [
|
||||||
|
["a long token name", [("a long token name", 0)]],
|
||||||
|
["__var__0 __var__1 __var__2", [("", 3)]],
|
||||||
|
["__var__0 __var__1 prefixed", [(" prefixed", 2)]],
|
||||||
|
["suffixed __var__0 __var__1", [("suffixed ", 0), ["", 2]]],
|
||||||
|
["__var__0 __var__1 infixed __var__0 __var__1", [(" infixed ", 2), ["", 2]]],
|
||||||
|
["if __var__0 __var__1 then __var__2 end", [("if ", 0), (" then ", 2), (" end", 1)]]
|
||||||
|
])
|
||||||
|
def test_i_can_initialize_expected_parameters(self, parser, concept_key, expected_list):
|
||||||
|
resolved_expected_list = [(list(Tokenizer(source, yield_eof=False)), nb) for source, nb in expected_list]
|
||||||
|
actual = parser._get_expected_tokens(concept_key)
|
||||||
|
|
||||||
|
with comparable_tokens():
|
||||||
|
assert actual == resolved_expected_list
|
||||||
|
|
||||||
def test_i_can_parse_a_simple_case(self, context, parser):
|
def test_i_can_parse_a_simple_case(self, context, parser):
|
||||||
with NewOntology("test_i_can_parse_a_simple_case"):
|
with NewOntology(context, "test_i_can_parse_a_simple_case"):
|
||||||
get_concepts(context, get_concept("a plus b", variables=["a", "b"]), use_sheerka=True)
|
get_concepts(context, get_concept("a plus b", variables=["a", "b"]), use_sheerka=True)
|
||||||
|
|
||||||
pi = get_parser_input("1 plus 2")
|
pi = get_parser_input("1 plus 2")
|
||||||
res = parser.parse(context, pi)
|
res = parser.parse(context, pi)
|
||||||
|
|
||||||
expected = []
|
expected = [_mt("1001", a="1 ", b=" 2")]
|
||||||
assert res == MultipleChoices([expected])
|
assert res == MultipleChoices([expected])
|
||||||
assert not parser.error_sink
|
assert not parser.error_sink
|
||||||
|
|||||||
Reference in New Issue
Block a user