ExactConceptParser can now recognize concepts by their names

This commit is contained in:
2020-05-21 16:27:18 +02:00
parent d357329f51
commit 37d3d16e21
17 changed files with 347 additions and 112 deletions
+16
View File
@@ -470,3 +470,19 @@ def remove_from_ret_val(sheerka, return_values, concept_key):
return_values.remove(item)
return return_values
def set_is_evaluated(concepts):
"""
set is_evaluated to True
:param concepts:
:return:
"""
if concepts is None:
return
if hasattr(concepts, "__iter__"):
for c in concepts:
c.metadata.is_evaluated = True
else:
concepts.metadata.is_evaluated = True
+56 -4
View File
@@ -115,10 +115,7 @@ class Concept:
if isinstance(other, simplec):
return self.name == other.name and self.body == other.body
if isinstance(other, CC):
return other == self
if isinstance(other, CB):
if isinstance(other, (CC, CB, CMV)):
return other == self
if not isinstance(other, Concept):
@@ -601,5 +598,60 @@ class CB:
def __hash__(self):
return hash((self.concept, self.body))
def __repr__(self):
return f"CB({self.body})"
class CMV:
"""
Concept with metadata variables
CMV stands for Concept Metadata Variables
Test class that only compare the key and the metadata variables
"""
def __init__(self, concept, **kwargs):
self.concept_key = concept.key if isinstance(concept, Concept) else concept
self.concept = concept if isinstance(concept, Concept) else None
self.variables = kwargs
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, Concept):
if other.key != self.concept_key:
return False
if len(other.metadata.variables) != len(self.variables):
return False
for name, value in other.metadata.variables:
if self.variables[name] != value:
return False
return True
if not isinstance(other, CMV):
return False
if self.concept_key != other.concept_key:
return False
return self.variables == other.variables
def __hash__(self):
if self.concept:
return hash(self.concept)
return hash(self.concept_key)
def __repr__(self):
if self.concept:
txt = f"CMV(concept='{self.concept}'"
else:
txt = f"CMV(concept_key='{self.concept_key}'"
for k, v in self.variables.items():
txt += f", {k}='{v}'"
return txt + ")"
simplec = namedtuple("concept", "name body") # for simple concept (tests purposes only)
+22 -6
View File
@@ -491,8 +491,14 @@ class Sheerka(Concept):
return self._get_unknown(metadata)
def resolve(self, concept):
def new_instances(concepts):
if hasattr(concepts, "__iter__"):
return [self.new_from_template(c, c.key) for c in concepts]
return self.new_from_template(concepts, concepts.key)
if concept is None:
return concept
return None
# if the entry is a concept token, use its values.
if isinstance(concept, Token):
@@ -500,24 +506,34 @@ class Sheerka(Concept):
return None
concept = concept.value
if isinstance(concept, str) and \
concept.startswith("c:") and \
(tmp := core.utils.unstr_concept(concept)) != (None, None):
concept = tmp
# if the entry is a tuple
# concept[0] is the name
# concept[1] is the id
if isinstance(concept, tuple):
if concept[1]:
if self.is_known(found := self.get_by_id(concept[1])):
return found
instance = self.new_from_template(found, found.key)
instance.metadata.is_evaluated = True
return instance
elif concept[0]:
return found if self.is_known(found := self.get_by_name(concept[0])) else None
if self.is_known(found := self.get_by_name(concept[0])):
instances = new_instances(found)
core.builtin_helpers.set_is_evaluated(instances)
return instances
else:
return None
# otherwise search in db
if isinstance(concept, str):
if self.is_known(found := self.get_by_id(concept)):
return found
if self.is_known(found := self.get_by_name(concept)):
return found
instances = new_instances(found)
core.builtin_helpers.set_is_evaluated(instances)
return instances
return None
@@ -2,6 +2,7 @@ from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import expect_one, only_successful
from core.concept import Concept, DoNotResolve, ConceptParts, InfiniteRecursionResolved
from core.sheerka.services.sheerka_service import BaseService
from core.utils import unstr_concept
CONCEPT_EVALUATION_STEPS = [
BuiltinConcepts.BEFORE_EVALUATION,
@@ -14,7 +15,7 @@ class SheerkaEvaluateConcept(BaseService):
def __init__(self, sheerka):
super().__init__(sheerka)
def initialize(self):
self.sheerka.bind_service_method(self.evaluate_concept)
@@ -73,7 +74,6 @@ class SheerkaEvaluateConcept(BaseService):
Basically, it runs the parsers on all parts
:param concept:
:param context:
:param logger:
:return:
"""
@@ -82,6 +82,11 @@ class SheerkaEvaluateConcept(BaseService):
return context.sheerka.isinstance(r, BuiltinConcepts.RETURN_VALUE) and \
context.sheerka.isinstance(r.body, BuiltinConcepts.ONLY_SUCCESSFUL)
def parse_token_concept(s):
if s.startswith("c:") and (identifier := unstr_concept(s)) != (None, None):
return self.sheerka.resolve(identifier)
return None
steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
for part_key in ConceptParts:
if part_key in concept.compiled:
@@ -94,14 +99,19 @@ class SheerkaEvaluateConcept(BaseService):
if source.strip() == "":
concept.compiled[part_key] = DoNotResolve(source)
else:
with context.push(desc=f"Initializing *compiled* for {part_key}") as sub_context:
sub_context.add_inputs(source=source)
to_parse = self.sheerka.ret(context.who, True,
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=source))
res = self.sheerka.execute(sub_context, to_parse, steps)
only_success = only_successful(sub_context, res)
concept.compiled[part_key] = only_success.body.body if is_only_successful(only_success) else res
sub_context.add_values(return_values=res)
# first case, when the metadata references another concept via c:xxx: keyword
if concept_found := parse_token_concept(source):
context.log(f"Recognized concept '{concept_found}'", self.NAME)
concept.compiled[part_key] = concept_found
else:
with context.push(desc=f"Initializing *compiled* for {part_key}") as sub_context:
sub_context.add_inputs(source=source)
to_parse = self.sheerka.ret(context.who, True,
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=source))
res = self.sheerka.execute(sub_context, to_parse, steps)
only_success = only_successful(sub_context, res)
concept.compiled[part_key] = only_success.body.body if is_only_successful(only_success) else res
sub_context.add_values(return_values=res)
for var_name, default_value in concept.metadata.variables:
if var_name in concept.compiled:
@@ -113,14 +123,19 @@ class SheerkaEvaluateConcept(BaseService):
if default_value.strip() == "":
concept.compiled[var_name] = DoNotResolve(default_value)
else:
with context.push(desc=f"Initializing *compiled* for property {var_name}") as sub_context:
sub_context.add_inputs(source=default_value)
to_parse = self.sheerka.ret(context.who, True,
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=default_value))
res = self.sheerka.execute(sub_context, to_parse, steps)
only_success = only_successful(sub_context, res)
concept.compiled[var_name] = only_success.body.body if is_only_successful(only_success) else res
sub_context.add_values(return_values=res)
# first case, when the metadata references another concept via c:xxx: keyword
if concept_found := parse_token_concept(default_value):
context.log(f"Recognized concept '{concept_found}'", self.NAME)
concept.compiled[var_name] = concept_found
else:
with context.push(desc=f"Initializing *compiled* for property {var_name}") as sub_context:
sub_context.add_inputs(source=default_value)
to_parse = self.sheerka.ret(context.who, True,
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=default_value))
res = self.sheerka.execute(sub_context, to_parse, steps)
only_success = only_successful(sub_context, res)
concept.compiled[var_name] = only_success.body.body if is_only_successful(only_success) else res
sub_context.add_values(return_values=res)
# Updates the cache of concepts when possible
if self.sheerka.has_id(concept.id):
+8 -8
View File
@@ -288,7 +288,7 @@ def decode_enum(enum_repr: str):
return None
def str_concept(t, skip_key=None):
def str_concept(t, drop_name=None):
"""
The key,id identifiers of a concept are stored in a tuple
we want to return the key and the id, separated by a pipe
@@ -298,21 +298,21 @@ def str_concept(t, skip_key=None):
>>> assert str_concept((None, "id")) == "c:|id:"
>>> assert str_concept(("key", None)) == "c:key:"
>>> assert str_concept((None, None)) == ""
>>> assert str_concept(Concept(key="foo", id="bar")) == "c:foo|bar:"
>>> assert str_concept(Concept(key="foo", id="bar"), skip_key=True) == "c:|bar:"
>>> assert str_concept(Concept(name="foo", id="bar")) == "c:foo|bar:"
>>> assert str_concept(Concept(name="foo", id="bar"), drop_name=True) == "c:|bar:"
:param t:
:param skip_key: True if we only want the id (and not the key)
:param drop_name: True if we only want the id (and not the key)
:return:
"""
if isinstance(t, tuple):
key, id_ = t[0], t[1]
name, id_ = t[0], t[1]
else:
key, id_ = t.key, t.id
name, id_ = t.key, t.id
if key is None and id_ is None:
if name is None and id_ is None:
return ""
result = 'c:' if (key is None or skip_key) else "c:" + key
result = 'c:' if (name is None or drop_name) else "c:" + name
if id_:
result += "|" + id_
return result + ":"
+20
View File
@@ -255,6 +255,26 @@ class BaseParser:
return start, end
@staticmethod
def merge_concepts(list_a, b):
if not b:
return list_a
list_b = b if isinstance(b, list) else [b]
if not list_a:
return list_b
by_ids = {c.id for c in list_b}
for c in list_b:
if c.id in by_ids: # and c.metadata.is_evaluated == by_ids[c.id].metadata.is_evaluated:
continue
list_a.append(c)
by_ids.add(c.id)
return list_a
class BaseTokenizerIterParser(BaseParser):
+1 -1
View File
@@ -455,7 +455,7 @@ class BnfNodeFirstTokenVisitor(ParsingExpressionVisitor):
def visit_ConceptExpression(self, pe):
concept = self.sheerka.get_by_key(pe.concept) if isinstance(pe.concept, str) else pe.concept
if self.sheerka.is_known(concept):
self.add_first_token(core.utils.str_concept(concept, skip_key=True))
self.add_first_token(core.utils.str_concept(concept, drop_name=True))
return self.STOP
def visit_StrMatch(self, pe):
+41 -23
View File
@@ -3,7 +3,9 @@ import logging
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from core.concept import VARIABLE_PREFIX
from core.tokenizer import Keywords, TokenKind, LexerError
from core.utils import str_concept
from parsers.BaseParser import BaseParser
import core.builtin_helpers
class ExactConceptParser(BaseParser):
@@ -26,8 +28,8 @@ class ExactConceptParser(BaseParser):
"""
context.log(f"Parsing '{parser_input}'", self.name)
res = []
sheerka = context.sheerka
try:
words = self.get_words(parser_input)
except LexerError as e:
@@ -40,7 +42,7 @@ class ExactConceptParser(BaseParser):
body = sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input, reason=too_long)
return sheerka.ret(self.name, False, body)
recognized = [] # keep track of the concepts founds
already_recognized = [] # keep track of the concepts founds
for combination in self.combinations(words):
concept_key = " ".join(combination)
@@ -52,7 +54,7 @@ class ExactConceptParser(BaseParser):
concepts = result if isinstance(result, list) else [result]
for concept in concepts:
if concept.id in recognized:
if concept in already_recognized:
context.log(f"Recognized concept {concept} again. Skipping.", self.name)
# example
# if the input is foo a and a concept is defined as foo a
@@ -65,38 +67,33 @@ class ExactConceptParser(BaseParser):
for i, token in enumerate(combination):
if token.startswith(VARIABLE_PREFIX):
index = int(token[len(VARIABLE_PREFIX):])
concept.def_var_by_index(index, words[i])
value = words[i]
concept.def_var_by_index(index, str_concept(value) if isinstance(value, tuple) else value)
concept.metadata.need_validation = True
if self.verbose_log.isEnabledFor(logging.DEBUG):
prop_name = concept.metadata.variables[index][0]
context.log(
f"Added property {index}: {prop_name}='{words[i]}'.",
f"Added variable {index}: {prop_name}='{words[i]}'.",
self.name)
res.append(ReturnValueConcept(
self.name,
True,
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(
parser_input),
body=concept,
try_parsed=concept)))
recognized.append(concept.id)
already_recognized.append(concept)
if len(recognized) > 0:
by_name = sheerka.resolve(self.get_input_as_text(parser_input))
core.builtin_helpers.set_is_evaluated(by_name)
recognized = self.merge_concepts(already_recognized, by_name)
if len(recognized) == 0:
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
else:
res = [self.as_return_value(context, parser_input, c) for c in recognized]
if len(res) == 1:
self.log_result(context, parser_input, res[0])
else:
self.log_multiple_results(context, parser_input, res)
return res
return res
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
def get_words(self, text):
tokens = self.get_input_as_tokens(text)
res = []
@@ -138,7 +135,17 @@ class ExactConceptParser(BaseParser):
indices[j] = indices[j - 1] + 1
res.add(self.get_tuple(pool, indices))
return res
# remove all result that contains a token concepts
# They are not valid entries, since a token concept MUST be replaced by a variable
filtered = set()
for combination in res:
for entry in combination:
if isinstance(entry, tuple):
break
else:
filtered.add(combination)
return filtered
@staticmethod
def get_tuple(pool, indices):
@@ -158,3 +165,14 @@ class ExactConceptParser(BaseParser):
value = pool[i]
res.append(vars[value] if value in vars else value)
return tuple(res)
def as_return_value(self, context, parser_input, concept):
return ReturnValueConcept(
self.name,
True,
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
body=concept,
try_parsed=concept))