cac2dad17f
Fixed #2 : Variables are not recognized when inside a rule token Fixed #15 : Rule: rete attributes are lost when a new ontology is created Fixed #14 : ReteNetwork: Format rules must not be added to Rete network Fixed #16 : DefConcept: Variables are not recognized when they are keyword arguments Fixed #4 : Comparison are not correctly set when comparison property is a concept Fixed #14 : Parser: merge FunctionParser.NamesNode and ExpressionParser.NamesNode Fixed #18 : Parser: Add SourceCodeNode test to UnrecognizedNodeParser Fixed #20 : At startup Number concept is saved in db a numerous number of time Fixed #21 : CacheManager: I can remove all elements from a ListIfNeededCache and fill it again Fixed #22 : CacheManager: I can remove all elements from a SetCache and fill it again Fixed #23 : HistoryManager: history() no longer works Fixed #24 : HistoryManager: history() no longer works after creating an exec rule Fixed #25 : SheerkaMemory: Use MemoryObject instead of sheerka.local Fixed #26 : Debugger: add the list all available services.. Fixed #27 : CONCEPTS_GRAMMARS_ENTRY does not seems to be in use any more Fixed #28 : Give order to services
399 lines
16 KiB
Python
399 lines
16 KiB
Python
from dataclasses import dataclass
|
|
|
|
from core.builtin_concepts import BuiltinConcepts
|
|
from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled
|
|
from core.concept import Concept
|
|
from core.sheerka.services.SheerkaExecute import ParserInput
|
|
from core.tokenizer import TokenKind
|
|
from core.utils import get_n_clones
|
|
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode
|
|
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, Node
|
|
from parsers.BnfNodeParser import BnfNodeParser
|
|
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
|
from parsers.RuleParser import RuleParser
|
|
from parsers.SequenceNodeParser import SequenceNodeParser
|
|
from parsers.SyaNodeParser import SyaNodeParser
|
|
from parsers.expressions import NameExprNode
|
|
|
|
PARSERS = [RuleParser.NAME,
|
|
SequenceNodeParser.NAME,
|
|
BnfNodeParser.NAME,
|
|
SyaNodeParser.NAME]
|
|
|
|
|
|
@dataclass
|
|
class FunctionParserNode(Node):
|
|
pass
|
|
|
|
|
|
@dataclass()
|
|
class FunctionParameter:
|
|
"""
|
|
class the represent result of the parameter parsing
|
|
"""
|
|
value: NameExprNode # value parsed
|
|
separator: NameExprNode = None # holds the value and the position of the separator
|
|
|
|
def add_sep(self, start, end, tokens):
|
|
self.separator = NameExprNode(start, end, tokens)
|
|
|
|
def value_to_unrecognized(self):
|
|
return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source()
|
|
|
|
def separator_to_unrecognized(self):
|
|
if self.separator is None:
|
|
return None
|
|
return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source()
|
|
|
|
|
|
@dataclass
|
|
class FunctionNode(FunctionParserNode):
|
|
first: NameExprNode # beginning of the function (it should represent the name of the function)
|
|
last: NameExprNode # last part of the function (it should be the trailing parenthesis)
|
|
parameters: list
|
|
|
|
|
|
class FN(FunctionNode):
|
|
"""
|
|
Test class only
|
|
It matches with FunctionNode but with less constraints
|
|
|
|
Thereby,
|
|
FN("first", "last", ["param1," ...]) can be compared to
|
|
FunctionNode(NameExprNode("first"), NameExprNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")])
|
|
|
|
Note that FunctionParameter can easily be defined with a single string
|
|
* "param" -> FunctionParameter(NameExprNode("param"), None)
|
|
* "param, " -> FunctionParameter(NameExprNode("param"), NameExprNode(", "))
|
|
For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part
|
|
"""
|
|
|
|
def __init__(self, first, last, parameters):
|
|
self.first = first
|
|
self.last = last
|
|
self.parameters = []
|
|
for param in parameters:
|
|
if isinstance(param, tuple):
|
|
self.parameters.append(param)
|
|
elif isinstance(param, str) and (pos := param.find(",")) != -1:
|
|
self.parameters.append((param[:pos], param[pos:]))
|
|
else:
|
|
self.parameters.append((param, None))
|
|
|
|
def __eq__(self, other):
|
|
if id(self) == id(other):
|
|
return True
|
|
|
|
if isinstance(other, FN):
|
|
return self.first == other.first and self.last == other.last and self.parameters == other.parameters
|
|
|
|
if isinstance(other, FunctionNode):
|
|
if self.first != other.first.value or self.last != other.last.value:
|
|
return False
|
|
if len(self.parameters) != len(other.parameters):
|
|
return False
|
|
for self_parameter, other_parameter in zip(self.parameters, other.parameters):
|
|
value = other_parameter.value.value if isinstance(self_parameter[0], str) else other_parameter.value
|
|
sep = other_parameter.separator.value if other_parameter.separator else None
|
|
if self_parameter[0] != value or self_parameter[1] != sep:
|
|
return False
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
def __hash__(self):
|
|
return hash((self.first, self.last, self.parameters))
|
|
|
|
|
|
class FunctionParser(BaseParser):
|
|
"""
|
|
The parser will be used to parse func(x, y, z)
|
|
where x, y and z can be source code, concepts or other functions
|
|
It will return a SourceCodeNode or SourceCodeNodeWithConcept
|
|
"""
|
|
|
|
def __init__(self, sep=",", longest_concepts_only=True, **kwargs):
|
|
"""
|
|
|
|
:param sep:
|
|
:param longest_concepts_only: When multiples concepts are found, only keep the longest one
|
|
so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]]
|
|
:param kwargs:
|
|
"""
|
|
super().__init__("Function", 55)
|
|
self.sep = sep
|
|
self.longest_concepts_only = longest_concepts_only
|
|
self.record_errors = True
|
|
|
|
def add_error(self, error, next_token=True):
|
|
if not self.record_errors:
|
|
return
|
|
|
|
return super().add_error(error, next_token)
|
|
|
|
def parse(self, context, parser_input: ParserInput):
|
|
"""
|
|
|
|
:param context:
|
|
:param parser_input:
|
|
:return:
|
|
"""
|
|
|
|
if not isinstance(parser_input, ParserInput):
|
|
return None
|
|
|
|
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
|
|
sheerka = context.sheerka
|
|
|
|
if parser_input.is_empty():
|
|
return sheerka.ret(self.name,
|
|
False,
|
|
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
|
|
|
if not self.reset_parser(context, parser_input):
|
|
return self.sheerka.ret(
|
|
self.name,
|
|
False,
|
|
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
|
|
|
self.parser_input.next_token()
|
|
node = self.parse_function()
|
|
|
|
if self.parser_input.next_token():
|
|
self.add_error(UnexpectedTokenParsingError("Only one function supported",
|
|
self.parser_input.token,
|
|
[TokenKind.EOF]))
|
|
|
|
if self.has_error:
|
|
if node is None:
|
|
body = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME,
|
|
body=parser_input.as_text(),
|
|
reason=self.error_sink)
|
|
else:
|
|
body = context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)
|
|
return self.sheerka.ret(self.name, False, body)
|
|
|
|
source_code_nodes = self.to_source_code_node(node)
|
|
|
|
res = []
|
|
for source_code_node in source_code_nodes:
|
|
value = self.get_return_value_body(context.sheerka,
|
|
self.parser_input.as_text(),
|
|
source_code_node,
|
|
source_code_node)
|
|
|
|
res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, value))
|
|
|
|
return res[0] if len(res) == 1 else res
|
|
|
|
def parse_function(self):
|
|
|
|
start = self.parser_input.pos
|
|
token = self.parser_input.token
|
|
if token.type != TokenKind.IDENTIFIER:
|
|
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier",
|
|
token,
|
|
[TokenKind.IDENTIFIER]))
|
|
return None
|
|
|
|
if not self.parser_input.next_token():
|
|
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis"))
|
|
return None
|
|
|
|
token = self.parser_input.token
|
|
if token.type != TokenKind.LPAR:
|
|
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis",
|
|
token,
|
|
[TokenKind.LPAR]))
|
|
return None
|
|
|
|
start_node = NameExprNode(start, start + 1, self.parser_input.tokens[start:start + 2])
|
|
if not self.parser_input.next_token():
|
|
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis"))
|
|
return FunctionNode(start_node, None, None)
|
|
|
|
params = self.parse_parameters()
|
|
if self.has_error:
|
|
return FunctionNode(start_node, None, params)
|
|
|
|
token = self.parser_input.token
|
|
if not token or token.type != TokenKind.RPAR:
|
|
self.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found",
|
|
token,
|
|
[TokenKind.RPAR]))
|
|
return FunctionNode(start_node, None, params)
|
|
|
|
return FunctionNode(start_node,
|
|
NameExprNode(self.parser_input.pos, self.parser_input.pos, [token]),
|
|
params)
|
|
|
|
def parse_parameters(self):
|
|
nodes = []
|
|
while True:
|
|
param_value = self.parse_parameter_value()
|
|
if not param_value:
|
|
break
|
|
|
|
function_parameter = FunctionParameter(param_value)
|
|
nodes.append(function_parameter)
|
|
|
|
token = self.parser_input.token
|
|
if token.type == TokenKind.EOF:
|
|
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters"))
|
|
return None
|
|
|
|
if token.type == TokenKind.RPAR:
|
|
break
|
|
|
|
if token.value == self.sep:
|
|
sep_pos = self.parser_input.pos
|
|
has_next = self.parser_input.next_token() # it's before add_sep() to capture trailing whitespace
|
|
function_parameter.add_sep(sep_pos,
|
|
self.parser_input.pos - 1,
|
|
self.parser_input.tokens[sep_pos: self.parser_input.pos])
|
|
if not has_next:
|
|
break
|
|
|
|
return nodes
|
|
|
|
def parse_parameter_value(self):
|
|
# check if the parameter is a function
|
|
start_pos = self.parser_input.pos
|
|
self.record_errors = False
|
|
func = self.parse_function()
|
|
self.record_errors = True
|
|
if func:
|
|
self.parser_input.next_token()
|
|
return func
|
|
|
|
# otherwise, eat until LPAR or separator
|
|
self.parser_input.seek(start_pos)
|
|
self.record_errors = True
|
|
tokens = []
|
|
while True:
|
|
token = self.parser_input.token
|
|
if token is None:
|
|
break
|
|
|
|
if token.value == self.sep or token.type == TokenKind.RPAR:
|
|
break
|
|
|
|
tokens.append(token)
|
|
if not self.parser_input.next_token(skip_whitespace=False):
|
|
break
|
|
|
|
return NameExprNode(start_pos, self.parser_input.pos - 1, tokens) if len(tokens) else None
|
|
|
|
def to_source_code_node(self, function_node: FunctionNode):
|
|
python_parser = PythonWithConceptsParser()
|
|
|
|
if len(function_node.parameters) == 0:
|
|
# validate the source
|
|
nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()]
|
|
python_parsing_res = python_parser.parse_nodes(self.context, nodes_to_parse)
|
|
python_node = python_parsing_res.body.body if python_parsing_res.status else None
|
|
|
|
return [SourceCodeNode(start=function_node.first.start,
|
|
end=function_node.last.end,
|
|
tokens=function_node.first.tokens + function_node.last.tokens,
|
|
python_node=python_node,
|
|
return_value=python_parsing_res)]
|
|
|
|
def update_source_code_node(scn, nodes, sep):
|
|
if hasattr(nodes, "__iter__"):
|
|
for n in nodes:
|
|
scn.add_node(n)
|
|
else:
|
|
scn.add_node(nodes)
|
|
|
|
if sep:
|
|
scn.add_node(sep.to_unrecognized())
|
|
|
|
res = [SourceCodeWithConceptNode(function_node.first.to_unrecognized(), function_node.last.to_unrecognized())]
|
|
|
|
# try to recognize every parameter, one by one
|
|
for param in function_node.parameters:
|
|
if isinstance(param.value, NameExprNode):
|
|
# try to recognize concepts
|
|
unrecognized = param.value.to_unrecognized()
|
|
nodes_sequences = get_lexer_nodes_from_unrecognized(self.context,
|
|
unrecognized,
|
|
PARSERS)
|
|
else:
|
|
# the parameter is also a function
|
|
nodes_sequences = self.to_source_code_node(param.value)
|
|
|
|
if self.longest_concepts_only:
|
|
nodes_sequences = self.get_longest_concepts(nodes_sequences)
|
|
|
|
if nodes_sequences is None:
|
|
# no concept found
|
|
for source_code_node in res:
|
|
update_source_code_node(source_code_node, unrecognized, param.separator)
|
|
|
|
elif len(nodes_sequences) == 1:
|
|
# only one result
|
|
# It is the same code than when there are multiple results
|
|
# But here, we save the creation of the tmp_res object (not sure it worth it)
|
|
for source_code_node in res:
|
|
update_source_code_node(source_code_node, nodes_sequences[0], param.separator)
|
|
else:
|
|
# multiple result, make the cartesian product
|
|
tmp_res = []
|
|
for source_code_node in res:
|
|
instances = get_n_clones(source_code_node, len(nodes_sequences))
|
|
tmp_res.extend(instances)
|
|
for instance, node_sequence in zip(instances, nodes_sequences):
|
|
update_source_code_node(instance, node_sequence, param.separator)
|
|
res = tmp_res
|
|
|
|
# check if it is a valid source code
|
|
for source_code_node in res:
|
|
source_code_node.fix_all_pos()
|
|
source_code_node.pseudo_fix_source()
|
|
|
|
python_parsing_res = python_parser.parse_nodes(self.context, source_code_node.get_all_nodes())
|
|
if python_parsing_res.status:
|
|
source_code_node.python_node = python_parsing_res.body.body
|
|
source_code_node.return_value = python_parsing_res
|
|
|
|
# make sure that concepts found can be evaluated
|
|
errors = []
|
|
for c in [c for c in source_code_node.python_node.objects.values() if isinstance(c, Concept)]:
|
|
update_compiled(self.context, c, errors)
|
|
|
|
return res
|
|
|
|
@staticmethod
|
|
def get_longest_concepts(nodes_sequences):
|
|
"""
|
|
The longest sequences are the ones that have the less number of concepts
|
|
For example
|
|
'twenty one' resolves to
|
|
[c:twenty one:]
|
|
[c:twenty:, c:one:]
|
|
[c:twenty one:] has only one concept, so it's the longest one (two tokens against one token twice)
|
|
:param nodes_sequences:
|
|
:return:
|
|
"""
|
|
if nodes_sequences is None:
|
|
return None
|
|
|
|
res = []
|
|
min_len = -1
|
|
for current_sequence in nodes_sequences:
|
|
# awful hack to remove when NodeSequence and ConceptSequence will be implemented
|
|
current_len = len(current_sequence) if hasattr(current_sequence, "__len__") else 1
|
|
if len(res) == 0:
|
|
res.append(current_sequence)
|
|
min_len = current_len
|
|
elif current_len == min_len:
|
|
res.append(current_sequence)
|
|
elif current_len < min_len:
|
|
res.clear()
|
|
res.append(current_sequence)
|
|
min_len = current_len
|
|
|
|
return res
|