Added first implementation of concepts ambiguity resolution + Jenkins file test

This commit is contained in:
2020-07-15 18:29:43 +02:00
parent b768eaa95d
commit e84b394da2
42 changed files with 1130 additions and 313 deletions
+133 -16
View File
@@ -5,7 +5,7 @@ import core.ast.nodes
from core.ast.nodes import CallNodeConcept
from core.ast.visitors import UnreferencedNamesVisitor
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, NotInit
from core.concept import Concept, NotInit, ConceptParts
from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, ErrorNode
@@ -25,13 +25,10 @@ def is_same_success(context, return_values):
if isinstance(ret_val.body, Concept):
if not ret_val.body.metadata.is_evaluated:
with context.push(BuiltinConcepts.EVALUATE_CONCEPT,
ret_val.body,
desc=f"Evaluating concept '{ret_val.body}'") as sub_context:
sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED)
evaluated = context.sheerka.evaluate_concept(sub_context, ret_val.body)
if evaluated.key != ret_val.body.key:
raise Exception("Failed to evaluate evaluate")
evaluated = context.sheerka.evaluate_concept(context, ret_val.body, eval_body=True)
if not context.sheerka.is_success(evaluated):
raise Exception("Failed to evaluate evaluate")
return context.sheerka.objvalue(evaluated)
else:
return context.sheerka.objvalue(ret_val.body)
@@ -173,6 +170,129 @@ def only_successful(context, return_values):
sheerka.new(BuiltinConcepts.ONLY_SUCCESSFUL, body=successful_results),
parents=return_values)
#
# def remove_ambiguity(context, return_values):
# """
# When multiple concepts are matching, try to find the one(s) which is/are the more accurate
# :param context:
# :param return_values:
# :return:
# """
# # example :
# # Concept("x is a y", PRE=QUESTION)
# # Concept("x is a y")
# # Concept("x is a command")
# # with the source 'foo is a command'
# # The first one do not respect the pre condition (assuming that QUESTION is not in context)
# # The second one is Ok, but the third is more specific (as it's a concept that explicitly asks for 'command')
# # The third one will remain
# if not return_values:
# return return_values
#
# sheerka = context.sheerka
# by_number_of_vars = {} # sorted by number of variables
# to_keep = [] # return values values that are not eligible (ex non parser result)
# passed_validation = []
#
# # sort by number of variables
# # The less variables there are, the more accurate is the concept
# for r in return_values:
# if (r.status and
# sheerka.isinstance(r.body, BuiltinConcepts.PARSER_RESULT) and
# isinstance(r.body.body, Concept)):
# by_number_of_vars.setdefault(len(r.body.body.metadata.variables), []).append(r)
# else:
# to_keep.append(r)
#
# # Check the concepts, starting with the ones that have the less variables
# # Once a concept with a given number of variables is found, we do not process the concepts with an higher
# # number of variables
# for nb_vars in sorted(by_number_of_vars.keys()):
# for r in by_number_of_vars[nb_vars]:
# concept = r.body.body
# if concept.metadata.pre is None or concept.metadata.pre.strip() == "":
# passed_validation.append(r)
# else:
# evaluated = context.sheerka.evaluate_concept(context, concept, metadata=["pre"])
# if evaluated.key == concept.key and evaluated.get_value(ConceptParts.PRE):
# passed_validation.append(r)
# if len(passed_validation) > 0:
# break
#
# # Nothing was filtered, return the original return values
# if len(passed_validation) + len(to_keep) == len(return_values):
# return return_values # nothing to filter
#
# # All return_values fail, return empty list
# if len(passed_validation) == 0:
# return sheerka.ret(
# context.who,
# True,
# sheerka.new(BuiltinConcepts.FILTERED,
# body=to_keep,
# iterable=return_values,
# predicate="remove_ambiguity(context, iterable)"),
# parents=return_values)
#
# # Final check, we consider that the concepts that match a PRE condition are better than concepts with no condition
# by_condition_complexity = {}
# for r in passed_validation:
# by_condition_complexity.setdefault(get_condition_complexity(r.body.body, "pre"), []).append(r)
#
# return sheerka.ret(
# context.who,
# True,
# sheerka.new(BuiltinConcepts.FILTERED,
# body=to_keep + by_condition_complexity[max(by_condition_complexity.keys())],
# iterable=return_values,
# predicate="remove_ambiguity(context, iterable)"),
# parents=return_values)
def resolve_ambiguity(context, concepts):
"""
From the list of concept, elect the one(s) that best suit(s) the context
:param context:
:param concepts:
:return:
"""
# we first sort by condition complexity. The more complex is the PRE condition, the more likely
# the concept matches the context
by_complexity = {}
for c in concepts:
by_complexity.setdefault(get_condition_complexity(c, "pre"), []).append(c)
remaining_concepts = []
for complexity in sorted(by_complexity.keys(), reverse=True):
for c in by_complexity[complexity]:
evaluated = context.sheerka.evaluate_concept(context, c, metadata=["pre"])
if evaluated.key == c.key:
remaining_concepts.append(c)
if len(remaining_concepts) > 0:
break # no need to check concept with lower complexity
if len(remaining_concepts) in (0, 1):
return remaining_concepts # they all failed the pre conditions or one champ is found
# for concepts with the same condition complexity, we choose the one that has the less number of variables
# We consider that Concept("hello world") is more specific than Concept("hello a").def_var("a")
# when the input is "hello world"
by_number_of_vars = {}
for c in remaining_concepts:
by_number_of_vars.setdefault(len(c.metadata.variables), []).append(c)
return by_number_of_vars[min(by_number_of_vars.keys())]
def get_condition_complexity(concept, concept_part_str):
concept_part_value = getattr(concept.metadata, concept_part_str)
if concept_part_value is None or concept_part_value.strip() == 0:
return 0
return 1 # no real computing as of now
def only_parsers_results(context, return_values):
"""
@@ -197,7 +317,8 @@ def only_parsers_results(context, return_values):
sheerka.new(BuiltinConcepts.IS_EMPTY, body=return_values),
parents=return_values)
return_values_ok = [item for item in return_values if sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)]
return_values_ok = [item for item in return_values if
sheerka.isinstance(item.body, BuiltinConcepts.PARSER_RESULT)]
# hack because some parsers don't follow the NOT_FOR_ME rule
temp_ret_val = []
@@ -291,7 +412,8 @@ def get_lexer_nodes(return_values, start, tokens):
continue
end = start + len(tokens) - 1
lexer_nodes.append([SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)])
lexer_nodes.append(
[SourceCodeNode(ret_val.body.body, start, end, tokens, ret_val.body.source, ret_val)])
elif ret_val.who == "parsers.ExactConcept":
concepts = ret_val.body.body if hasattr(ret_val.body.body, "__iter__") else [ret_val.body.body]
@@ -333,12 +455,7 @@ def ensure_evaluated(context, concept, eval_body=True):
(var[0] not in concept.values or concept.get_value(var[0]) == NotInit):
return concept
with context.push(BuiltinConcepts.EVALUATE_CONCEPT, concept, desc=f"Evaluating concept {concept}") as sub_context:
if eval_body:
sub_context.local_hints.add(BuiltinConcepts.EVAL_BODY_REQUESTED)
evaluated = context.sheerka.evaluate_concept(sub_context, concept)
sub_context.add_values(return_values=evaluated)
evaluated = context.sheerka.evaluate_concept(context, concept, eval_body=eval_body)
return evaluated