diff --git a/src/parsers/SyaConceptsParser.py b/src/parsers/SyaConceptsParser.py index 99b9309..8877cdd 100644 --- a/src/parsers/SyaConceptsParser.py +++ b/src/parsers/SyaConceptsParser.py @@ -1,6 +1,8 @@ -from parsers.state_machine import End, ManageUnrecognized, PrepareReadTokens, ReadConcept, ReadTokens, Start, \ +from core.concept import DefinitionType +from parsers.state_machine import ConceptToRecognize, End, ManageUnrecognized, PrepareReadTokens, ReadConcept, \ + ReadTokens, Start, \ StateMachine, StateMachineContext -from parsers.tokenizer import Token +from parsers.tokenizer import Token, TokenKind, Tokenizer class SyaConceptsParser: @@ -32,8 +34,75 @@ class SyaConceptsParser: self.error_sink = [] @staticmethod - def get_metadata_from_first_token(context, token: Token): - pass + def _get_expected_tokens(concept_key): + """ + Return of list of pairs of (expected token, number of expected variable before this token) + ex: + 'if x y then z end' => ('if', 0), ('then', 2), ('end', 1) + :param concept_key: + :type concept_key: + :return: + :rtype: + """ + + # def custom_strip_tokens(_tokens): + # return _tokens + + def custom_strip_tokens(_tokens): + """ + Removes consecutive whitespace tokens + Returns empy list if only whitespace tokens + :param _tokens: + :type _tokens: + :return: + :rtype: + """ + res = [] + buffer = None + for t in _tokens: + if t.type == TokenKind.WHITESPACE: + buffer = t + else: + if buffer: + res.append(buffer) + buffer = None + res.append(t) + + if res and buffer: # add the buffer only is the result is not empty + res.append(buffer) + + return res + + expected = [] # tuple of expected token and number of expected variables before this token + tokens = [] + nb_variables = 0 + parsing_tokens = None # True if we are parsing tokens (and not VAR_DEF) + + for token in Tokenizer(concept_key, yield_eof=False): + if token.type == TokenKind.WHITESPACE: + tokens.append(token) + elif token.type == TokenKind.VAR_DEF: + if parsing_tokens is not None and parsing_tokens: + expected.append((custom_strip_tokens(tokens), nb_variables)) + nb_variables = 1 + tokens = [] + parsing_tokens = False + else: + nb_variables += 1 + else: + tokens.append(token) + parsing_tokens = True + + # do not forget the remaining ones + if tokens or nb_variables: + expected.append((custom_strip_tokens(tokens), nb_variables)) + + return expected + + def get_metadata_from_first_token(self, context, token: Token): + return [ConceptToRecognize(m, self._get_expected_tokens(m.key), "key") + for m in context.sheerka.get_metadatas_from_first_token("key", token.value) + if m.definition_type == DefinitionType.DEFAULT and len(m.parameters) > 0] def parse(self, context, parser_input): sm = StateMachine(self.workflows) diff --git a/tests/conftest.py b/tests/conftest.py index 894a5bf..29e62cb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,10 @@ import inspect +from contextlib import contextmanager import pytest from helpers import GetNextId +from parsers.tokenizer import Token from server.authentication import User DEFAULT_ONTOLOGY_NAME = "current_test_" @@ -95,3 +97,18 @@ class NewOntology: def __exit__(self, exc_type, exc_val, exc_tb): self.sheerka.om.revert_ontology(self.context, self.ontology) return False + + +def simple_token_compare(a, b): + return a.type == b.type and a.value == b.value + + +@contextmanager +def comparable_tokens(): + eq = Token.__eq__ + ne = Token.__ne__ + setattr(Token, "__eq__", simple_token_compare) + setattr(Token, "__ne__", lambda a, b: not simple_token_compare(a, b)) + yield + setattr(Token, "__eq__", eq) + setattr(Token, "__ne__", ne) diff --git a/tests/parsers/test_SyaConceptsParser.py b/tests/parsers/test_SyaConceptsParser.py index 262a7a6..753863d 100644 --- a/tests/parsers/test_SyaConceptsParser.py +++ b/tests/parsers/test_SyaConceptsParser.py @@ -1,10 +1,11 @@ import pytest from base import BaseTest -from conftest import NewOntology +from conftest import NewOntology, comparable_tokens from evaluators.base_evaluator import MultipleChoices -from helpers import get_concept, get_concepts, get_parser_input +from helpers import _mt, get_concept, get_concepts, get_parser_input from parsers.SyaConceptsParser import SyaConceptsParser +from parsers.tokenizer import Tokenizer class TestSyaConceptsParser(BaseTest): @@ -13,13 +14,28 @@ class TestSyaConceptsParser(BaseTest): def parser(self): return SyaConceptsParser() + @pytest.mark.parametrize("concept_key, expected_list", [ + ["a long token name", [("a long token name", 0)]], + ["__var__0 __var__1 __var__2", [("", 3)]], + ["__var__0 __var__1 prefixed", [(" prefixed", 2)]], + ["suffixed __var__0 __var__1", [("suffixed ", 0), ["", 2]]], + ["__var__0 __var__1 infixed __var__0 __var__1", [(" infixed ", 2), ["", 2]]], + ["if __var__0 __var__1 then __var__2 end", [("if ", 0), (" then ", 2), (" end", 1)]] + ]) + def test_i_can_initialize_expected_parameters(self, parser, concept_key, expected_list): + resolved_expected_list = [(list(Tokenizer(source, yield_eof=False)), nb) for source, nb in expected_list] + actual = parser._get_expected_tokens(concept_key) + + with comparable_tokens(): + assert actual == resolved_expected_list + def test_i_can_parse_a_simple_case(self, context, parser): - with NewOntology("test_i_can_parse_a_simple_case"): + with NewOntology(context, "test_i_can_parse_a_simple_case"): get_concepts(context, get_concept("a plus b", variables=["a", "b"]), use_sheerka=True) pi = get_parser_input("1 plus 2") res = parser.parse(context, pi) - expected = [] + expected = [_mt("1001", a="1 ", b=" 2")] assert res == MultipleChoices([expected]) assert not parser.error_sink