Working on #21 : Working on SyaConceptsParser.py

This commit is contained in:
2023-07-09 20:16:58 +02:00
parent a7043b1dd8
commit a729d98a0d
3 changed files with 110 additions and 8 deletions
+73 -4
View File
@@ -1,6 +1,8 @@
from parsers.state_machine import End, ManageUnrecognized, PrepareReadTokens, ReadConcept, ReadTokens, Start, \ from core.concept import DefinitionType
from parsers.state_machine import ConceptToRecognize, End, ManageUnrecognized, PrepareReadTokens, ReadConcept, \
ReadTokens, Start, \
StateMachine, StateMachineContext StateMachine, StateMachineContext
from parsers.tokenizer import Token from parsers.tokenizer import Token, TokenKind, Tokenizer
class SyaConceptsParser: class SyaConceptsParser:
@@ -32,8 +34,75 @@ class SyaConceptsParser:
self.error_sink = [] self.error_sink = []
@staticmethod @staticmethod
def get_metadata_from_first_token(context, token: Token): def _get_expected_tokens(concept_key):
pass """
Return of list of pairs of (expected token, number of expected variable before this token)
ex:
'if x y then z end' => ('if', 0), ('then', 2), ('end', 1)
:param concept_key:
:type concept_key:
:return:
:rtype:
"""
# def custom_strip_tokens(_tokens):
# return _tokens
def custom_strip_tokens(_tokens):
"""
Removes consecutive whitespace tokens
Returns empy list if only whitespace tokens
:param _tokens:
:type _tokens:
:return:
:rtype:
"""
res = []
buffer = None
for t in _tokens:
if t.type == TokenKind.WHITESPACE:
buffer = t
else:
if buffer:
res.append(buffer)
buffer = None
res.append(t)
if res and buffer: # add the buffer only is the result is not empty
res.append(buffer)
return res
expected = [] # tuple of expected token and number of expected variables before this token
tokens = []
nb_variables = 0
parsing_tokens = None # True if we are parsing tokens (and not VAR_DEF)
for token in Tokenizer(concept_key, yield_eof=False):
if token.type == TokenKind.WHITESPACE:
tokens.append(token)
elif token.type == TokenKind.VAR_DEF:
if parsing_tokens is not None and parsing_tokens:
expected.append((custom_strip_tokens(tokens), nb_variables))
nb_variables = 1
tokens = []
parsing_tokens = False
else:
nb_variables += 1
else:
tokens.append(token)
parsing_tokens = True
# do not forget the remaining ones
if tokens or nb_variables:
expected.append((custom_strip_tokens(tokens), nb_variables))
return expected
def get_metadata_from_first_token(self, context, token: Token):
return [ConceptToRecognize(m, self._get_expected_tokens(m.key), "key")
for m in context.sheerka.get_metadatas_from_first_token("key", token.value)
if m.definition_type == DefinitionType.DEFAULT and len(m.parameters) > 0]
def parse(self, context, parser_input): def parse(self, context, parser_input):
sm = StateMachine(self.workflows) sm = StateMachine(self.workflows)
+17
View File
@@ -1,8 +1,10 @@
import inspect import inspect
from contextlib import contextmanager
import pytest import pytest
from helpers import GetNextId from helpers import GetNextId
from parsers.tokenizer import Token
from server.authentication import User from server.authentication import User
DEFAULT_ONTOLOGY_NAME = "current_test_" DEFAULT_ONTOLOGY_NAME = "current_test_"
@@ -95,3 +97,18 @@ class NewOntology:
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
self.sheerka.om.revert_ontology(self.context, self.ontology) self.sheerka.om.revert_ontology(self.context, self.ontology)
return False return False
def simple_token_compare(a, b):
return a.type == b.type and a.value == b.value
@contextmanager
def comparable_tokens():
eq = Token.__eq__
ne = Token.__ne__
setattr(Token, "__eq__", simple_token_compare)
setattr(Token, "__ne__", lambda a, b: not simple_token_compare(a, b))
yield
setattr(Token, "__eq__", eq)
setattr(Token, "__ne__", ne)
+20 -4
View File
@@ -1,10 +1,11 @@
import pytest import pytest
from base import BaseTest from base import BaseTest
from conftest import NewOntology from conftest import NewOntology, comparable_tokens
from evaluators.base_evaluator import MultipleChoices from evaluators.base_evaluator import MultipleChoices
from helpers import get_concept, get_concepts, get_parser_input from helpers import _mt, get_concept, get_concepts, get_parser_input
from parsers.SyaConceptsParser import SyaConceptsParser from parsers.SyaConceptsParser import SyaConceptsParser
from parsers.tokenizer import Tokenizer
class TestSyaConceptsParser(BaseTest): class TestSyaConceptsParser(BaseTest):
@@ -13,13 +14,28 @@ class TestSyaConceptsParser(BaseTest):
def parser(self): def parser(self):
return SyaConceptsParser() return SyaConceptsParser()
@pytest.mark.parametrize("concept_key, expected_list", [
["a long token name", [("a long token name", 0)]],
["__var__0 __var__1 __var__2", [("", 3)]],
["__var__0 __var__1 prefixed", [(" prefixed", 2)]],
["suffixed __var__0 __var__1", [("suffixed ", 0), ["", 2]]],
["__var__0 __var__1 infixed __var__0 __var__1", [(" infixed ", 2), ["", 2]]],
["if __var__0 __var__1 then __var__2 end", [("if ", 0), (" then ", 2), (" end", 1)]]
])
def test_i_can_initialize_expected_parameters(self, parser, concept_key, expected_list):
resolved_expected_list = [(list(Tokenizer(source, yield_eof=False)), nb) for source, nb in expected_list]
actual = parser._get_expected_tokens(concept_key)
with comparable_tokens():
assert actual == resolved_expected_list
def test_i_can_parse_a_simple_case(self, context, parser): def test_i_can_parse_a_simple_case(self, context, parser):
with NewOntology("test_i_can_parse_a_simple_case"): with NewOntology(context, "test_i_can_parse_a_simple_case"):
get_concepts(context, get_concept("a plus b", variables=["a", "b"]), use_sheerka=True) get_concepts(context, get_concept("a plus b", variables=["a", "b"]), use_sheerka=True)
pi = get_parser_input("1 plus 2") pi = get_parser_input("1 plus 2")
res = parser.parse(context, pi) res = parser.parse(context, pi)
expected = [] expected = [_mt("1001", a="1 ", b=" 2")]
assert res == MultipleChoices([expected]) assert res == MultipleChoices([expected])
assert not parser.error_sink assert not parser.error_sink