187 lines
7.1 KiB
Python
187 lines
7.1 KiB
Python
from core.builtin_concepts import ParserResultConcept, BuiltinConcepts
|
|
from evaluators.BaseEvaluator import OneReturnValueEvaluator
|
|
import core.utils
|
|
from parsers.ConceptLexerParser import ConceptNode, NonTerminalNode, ConceptMatch, UnrecognizedTokensNode, TerminalNode
|
|
|
|
|
|
class ConceptNodeEvaluator(OneReturnValueEvaluator):
|
|
"""
|
|
After a BNF is recognized, generates the concept or the list concepts
|
|
"""
|
|
|
|
NAME = "ConceptNode"
|
|
|
|
def __init__(self):
|
|
super().__init__(self.NAME, [BuiltinConcepts.EVALUATION], 60) # more than the ConceptNodeEvaluator
|
|
|
|
def matches(self, context, return_value):
|
|
if not return_value.status:
|
|
return False
|
|
|
|
if not isinstance(return_value.value, ParserResultConcept):
|
|
return False
|
|
|
|
return (
|
|
isinstance(return_value.value.value, ConceptNode) or
|
|
isinstance(return_value.value.value, UnrecognizedTokensNode) or
|
|
(
|
|
hasattr(return_value.value.value, "__iter__") and
|
|
len(return_value.value.value) > 0 and
|
|
(
|
|
isinstance(return_value.value.value[0], ConceptNode) or
|
|
isinstance(return_value.value.value[0], UnrecognizedTokensNode)
|
|
)
|
|
)
|
|
)
|
|
|
|
def eval(self, context, return_value):
|
|
"""
|
|
From a concept node, creates a new concept
|
|
and makes sure that the properties are correctly set
|
|
"""
|
|
sheerka = context.sheerka
|
|
nodes = return_value.value.value
|
|
if not hasattr(nodes, "__iter__"):
|
|
nodes = [nodes]
|
|
|
|
concepts = []
|
|
error_found = False
|
|
source = ""
|
|
for node in nodes:
|
|
if isinstance(node, ConceptNode):
|
|
source += node.source if source == "" else (" " + node.source)
|
|
concept = sheerka.new(node.concept.key)
|
|
concept = self.finalize_concept(sheerka, concept, node.underlying)
|
|
concepts.append(concept)
|
|
else:
|
|
error_found = True
|
|
|
|
if len(concepts) == 1:
|
|
return sheerka.ret(
|
|
self.name,
|
|
not error_found,
|
|
context.sheerka.new(
|
|
BuiltinConcepts.PARSER_RESULT,
|
|
parser=self,
|
|
source=source,
|
|
body=concepts[0],
|
|
try_parsed=None),
|
|
parents=[return_value])
|
|
|
|
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME), parents=[return_value])
|
|
|
|
def finalize_concept(self, sheerka, concept, underlying, init_empty_body=True):
|
|
"""
|
|
Updates the properties of the concept
|
|
Goes in recursion if the property is a concept
|
|
"""
|
|
|
|
def _add_prop(c, prop_name, value):
|
|
"""
|
|
Adds a new entry,
|
|
makes a list if the property already exists
|
|
"""
|
|
|
|
if prop_name not in c.props or c.props[prop_name].value is None:
|
|
# new entry
|
|
c.set_prop(prop_name, value)
|
|
else:
|
|
# make a list if there was a value
|
|
previous_value = c.props[prop_name].value
|
|
if isinstance(previous_value, list):
|
|
previous_value.append(value)
|
|
else:
|
|
new_value = [previous_value, value]
|
|
c.set_prop(prop_name, new_value)
|
|
|
|
parsing_expression = underlying.parsing_expression
|
|
|
|
if parsing_expression.rule_name:
|
|
_add_prop(concept, parsing_expression.rule_name, self.get_underlying_as_string(underlying))
|
|
|
|
# the update of the body must come BEFORE the recursion
|
|
# otherwise it will be updated by a children and it won't be possible to modify the value
|
|
if init_empty_body and concept.body is None:
|
|
concept.metadata.body = self.get_underlying_as_string(underlying) # self.escape_if_needed(underlying.source)
|
|
|
|
if isinstance(underlying, NonTerminalNode):
|
|
for child in underlying.children:
|
|
if isinstance(child.parsing_expression, ConceptMatch):
|
|
new_concept = sheerka.new(child.parsing_expression.concept.key)
|
|
_add_prop(concept, child.parsing_expression.rule_name, new_concept)
|
|
if sheerka.isinstance(new_concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
|
continue
|
|
else:
|
|
self.finalize_concept(sheerka, new_concept, child.children[0], init_empty_body)
|
|
else:
|
|
self.finalize_concept(sheerka, concept, child, init_empty_body)
|
|
|
|
return concept
|
|
|
|
@staticmethod
|
|
def escape_if_needed(value):
|
|
if not isinstance(value, str):
|
|
return value
|
|
|
|
return "'" + core.utils.escape_char(value, "'") + "'"
|
|
|
|
def get_underlying_as_string(self, underlying):
|
|
"""
|
|
Return the sequence of the recognized character
|
|
When a concept is recognized, return the string version of the concept eg c:concept name:
|
|
:param underlying:
|
|
:return:
|
|
"""
|
|
|
|
# Example
|
|
# grammar = {
|
|
# foo: Sequence("one", "two", rule_name="var"),
|
|
# bar: Sequence(foo, "three", rule_name="var")}
|
|
#
|
|
# we want bar.body and bar.prop["var"]
|
|
# to be "foo 'three'" (no quotes surrounding foo, as it is a concept, not a string)
|
|
|
|
if isinstance(underlying, TerminalNode):
|
|
return self.escape_if_needed(underlying.source)
|
|
|
|
res = ""
|
|
first = True
|
|
in_quote = ""
|
|
for node in underlying.children:
|
|
if isinstance(node.parsing_expression, ConceptMatch):
|
|
if in_quote != "":
|
|
res += in_quote + "'"
|
|
if not first:
|
|
res += " "
|
|
res += node.parsing_expression.concept.key
|
|
in_quote = ""
|
|
else:
|
|
if in_quote == "":
|
|
in_quote = ("'" if first else " '") + core.utils.escape_char(node.source, "'")
|
|
else:
|
|
in_quote += ("" if first else " ") + core.utils.escape_char(node.source, "'")
|
|
|
|
first = False
|
|
|
|
if in_quote:
|
|
res += in_quote + "'"
|
|
return res
|
|
|
|
# - - - E X P L A N A T I O N S - - -
|
|
# why do we need to update the body ?
|
|
# cf test_concept_property_is_correctly_updated_when_concept_recursion_using_zero_or_more()
|
|
# def concept number from bnf one | two | three
|
|
# def concept add from bnf number plus number
|
|
#
|
|
# the expression 'one plus two plus three' will match concept add
|
|
# add.props["number"] is a list of concepts 'number'
|
|
# But which one is 'one', which one is 'two' which one is 'three' ?
|
|
#
|
|
# That's the reason why we update the body
|
|
# add.props["number"] is a list of concepts 'number' but they won't have the same body
|
|
#
|
|
# !!! C A U T I O N !!!
|
|
# In the current implementation, the body is the sequence of char found
|
|
# If a concept is recognized, we don't put this information in the body
|
|
# Use get_body_as_string() instead of escape_if_needed() if we need this information
|