Files
Sheerka-Old/parsers/ExactConceptParser.py
T

130 lines
4.1 KiB
Python

from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from parsers.BaseParser import BaseParser
from core.tokenizer import Tokenizer, Keywords, TokenKind
from core.concept import Concept, VARIABLE_PREFIX
import logging
log = logging.getLogger(__name__)
class ExactConceptParser(BaseParser):
"""
Tries to recognize a single concept
"""
MAX_WORDS_SIZE = 10
def __init__(self):
BaseParser.__init__(self, "ConceptParser")
def parse(self, context, text):
"""
text can be string, but text can also be an list of tokens
:param context:
:param text:
:return:
"""
res = []
sheerka = context.sheerka
words = self.get_words(text)
if len(words) > self.MAX_WORDS_SIZE:
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
recognized = False
for combination in self.combinations(words):
concept_key = " ".join(combination)
result = sheerka.new(concept_key)
if sheerka.isinstance(result, BuiltinConcepts.UNKNOWN_CONCEPT):
continue
concepts = result.body if sheerka.isinstance(result, BuiltinConcepts.ENUMERATION) else [result]
for concept in concepts:
# update the properties if needed
for i, token in enumerate(combination):
if token.startswith(VARIABLE_PREFIX):
index = int(token[len(VARIABLE_PREFIX):])
concept.set_prop_by_index(index, words[i])
res.append(ReturnValueConcept(
self.name,
True,
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
body=concept,
try_parsed=concept)))
log.debug(f"Recognized '{text}' as '{concept}'")
recognized = True
if recognized:
return res
log.debug(f"Failed to recognize {words}")
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text))
@staticmethod
def get_words(text):
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
res = []
for t in tokens:
if t.type == TokenKind.EOF:
break
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
continue
res.append(t.value.value if isinstance(t.value, Keywords) else t.value)
return res
def combinations(self, iterable):
# combinations('foo', 'bar', 'baz') -->
# ('foo', 'bar', 'baz'),
# ('__var__0', 'bar', 'baz'),
# ('foo', '__var__0', 'baz'),
# ('foo', 'bar', '__var__0'),
# ('__var__0', '__var__1', 'baz'),
# ('__var__0', 'bar', '__var__1'),
# ('foo', '__var__0', '__var__1'),
# ('__var__0', '__var__1', '__var__2')]
pool = tuple(iterable)
n = len(pool)
res = set()
for r in range(0, n + 1):
indices = list(range(r))
res.add(self.get_tuple(pool, indices))
while True:
for i in reversed(range(r)):
if indices[i] != i + n - r:
break
else:
break
indices[i] += 1
for j in range(i + 1, r):
indices[j] = indices[j - 1] + 1
res.add(self.get_tuple(pool, indices))
return res
@staticmethod
def get_tuple(pool, indices):
res = []
vars = {}
k = 0
# init vars
for i in indices:
value = pool[i]
if value not in vars:
vars[pool[i]] = f"{VARIABLE_PREFIX}{k}"
k += 1
# create tuple
for i in range(len(pool)):
value = pool[i]
res.append(vars[value] if value in vars else value)
return tuple(res)