120 lines
3.9 KiB
Python
120 lines
3.9 KiB
Python
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
|
from parsers.BaseParser import BaseParser
|
|
from core.tokenizer import Tokenizer, Keywords, TokenKind
|
|
from core.concept import Concept
|
|
import logging
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class ExactConceptParser(BaseParser):
|
|
"""
|
|
Tries to recognize a single concept
|
|
"""
|
|
|
|
MAX_WORDS_SIZE = 10
|
|
|
|
def __init__(self):
|
|
BaseParser.__init__(self, "ConceptParser")
|
|
|
|
def parse(self, context, text):
|
|
"""
|
|
text can be string, but text can also be an list of tokens
|
|
:param context:
|
|
:param text:
|
|
:return:
|
|
"""
|
|
res = []
|
|
sheerka = context.sheerka
|
|
words = self.get_words(text)
|
|
if len(words) > self.MAX_WORDS_SIZE:
|
|
return ReturnValueConcept(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, obj=text))
|
|
|
|
recognized = False
|
|
for combination in self.combinations(words):
|
|
concept_key = " ".join(combination)
|
|
|
|
# Very important question to think about later
|
|
# Must we return a new instance or the existing one
|
|
# That will depend on the context
|
|
# Let's return a new one for now and see if it works
|
|
concept = sheerka.new(concept_key)
|
|
if not sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
|
# update the properties if needed
|
|
for i, token in enumerate(combination):
|
|
if token.startswith(Concept.PROPERTY_PREFIX):
|
|
index = int(token[len(Concept.PROPERTY_PREFIX):])
|
|
concept.set_prop_by_index(index, words[i])
|
|
res.append(ReturnValueConcept(self.name, True, concept))
|
|
log.debug(f"Recognized '{text}' as '{concept}'")
|
|
recognized = True
|
|
|
|
if recognized:
|
|
return res
|
|
|
|
log.debug(f"Failed to recognize {words}")
|
|
return ReturnValueConcept(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, obj=text))
|
|
|
|
@staticmethod
|
|
def get_words(text):
|
|
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
|
|
res = []
|
|
for t in tokens:
|
|
if t.type == TokenKind.EOF:
|
|
break
|
|
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
|
|
continue
|
|
res.append(t.value.value if isinstance(t.value, Keywords) else t.value)
|
|
return res
|
|
|
|
def combinations(self, iterable):
|
|
# combinations('foo', 'bar', 'baz') -->
|
|
# ('foo', 'bar', 'baz'),
|
|
# ('__var__0', 'bar', 'baz'),
|
|
# ('foo', '__var__0', 'baz'),
|
|
# ('foo', 'bar', '__var__0'),
|
|
# ('__var__0', '__var__1', 'baz'),
|
|
# ('__var__0', 'bar', '__var__1'),
|
|
# ('foo', '__var__0', '__var__1'),
|
|
# ('__var__0', '__var__1', '__var__2')]
|
|
|
|
pool = tuple(iterable)
|
|
n = len(pool)
|
|
|
|
res = set()
|
|
|
|
for r in range(0, n + 1):
|
|
indices = list(range(r))
|
|
res.add(self.get_tuple(pool, indices))
|
|
while True:
|
|
for i in reversed(range(r)):
|
|
if indices[i] != i + n - r:
|
|
break
|
|
else:
|
|
break
|
|
indices[i] += 1
|
|
for j in range(i + 1, r):
|
|
indices[j] = indices[j - 1] + 1
|
|
res.add(self.get_tuple(pool, indices))
|
|
|
|
return res
|
|
|
|
@staticmethod
|
|
def get_tuple(pool, indices):
|
|
res = []
|
|
vars = {}
|
|
k = 0
|
|
|
|
# init vars
|
|
for i in indices:
|
|
value = pool[i]
|
|
if value not in vars:
|
|
vars[pool[i]] = f"{Concept.PROPERTY_PREFIX}{k}"
|
|
k += 1
|
|
|
|
# create tuple
|
|
for i in range(len(pool)):
|
|
value = pool[i]
|
|
res.append(vars[value] if value in vars else value)
|
|
return tuple(res)
|