Sheerka-Old/parsers/ExactConceptParser.py

from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from parsers.BaseParser import BaseParser
from core.tokenizer import Tokenizer, Keywords, TokenKind
from core.concept import Concept
import logging

log = logging.getLogger(__name__)


class ExactConceptParser(BaseParser):
    """
    Tries to recognize a single concept
    """

    MAX_WORDS_SIZE = 10

    def __init__(self):
        BaseParser.__init__(self, "ConceptParser")

    def parse(self, context, text):
        """
        text can be string, but text can also be an list of tokens
        :param context:
        :param text:
        :return:
        """
        res = []
        sheerka = context.sheerka
        words = self.get_words(text)
        if len(words) > self.MAX_WORDS_SIZE:
            return ReturnValueConcept(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, obj=text))

        recognized = False
        for combination in self.combinations(words):
            concept_key = " ".join(combination)

            # Very important question to think about later
            # Must we return a new instance or the existing one
            # That will depend on the context
            # Let's return a new one for now and see if it works
            concept = sheerka.new(concept_key)
            if not sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
                # update the properties if needed
                for i, token in enumerate(combination):
                    if token.startswith(Concept.PROPERTY_PREFIX):
                        index = int(token[len(Concept.PROPERTY_PREFIX):])
                        concept.set_prop_by_index(index, words[i])
                res.append(ReturnValueConcept(self.name, True, concept))
                log.debug(f"Recognized '{text}' as '{concept}'")
                recognized = True

        if recognized:
            return res

        log.debug(f"Failed to recognize {words}")
        return ReturnValueConcept(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, obj=text))

    @staticmethod
    def get_words(text):
        tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
        res = []
        for t in tokens:
            if t.type == TokenKind.EOF:
                break
            if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
                continue
            res.append(t.value.value if isinstance(t.value, Keywords) else t.value)
        return res

    def combinations(self, iterable):
        # combinations('foo', 'bar', 'baz') -->
        # ('foo', 'bar', 'baz'),
        # ('__var__0', 'bar', 'baz'),
        # ('foo', '__var__0', 'baz'),
        # ('foo', 'bar', '__var__0'),
        # ('__var__0', '__var__1', 'baz'),
        # ('__var__0', 'bar', '__var__1'),
        # ('foo', '__var__0', '__var__1'),
        # ('__var__0', '__var__1', '__var__2')]

        pool = tuple(iterable)
        n = len(pool)

        res = set()

        for r in range(0, n + 1):
            indices = list(range(r))
            res.add(self.get_tuple(pool, indices))
            while True:
                for i in reversed(range(r)):
                    if indices[i] != i + n - r:
                        break
                else:
                    break
                indices[i] += 1
                for j in range(i + 1, r):
                    indices[j] = indices[j - 1] + 1
                res.add(self.get_tuple(pool, indices))

        return res

    @staticmethod
    def get_tuple(pool, indices):
        res = []
        vars = {}
        k = 0

        # init vars
        for i in indices:
            value = pool[i]
            if value not in vars:
                vars[pool[i]] = f"{Concept.PROPERTY_PREFIX}{k}"
                k += 1

        # create tuple
        for i in range(len(pool)):
            value = pool[i]
            res.append(vars[value] if value in vars else value)
        return tuple(res)