Files
Sheerka-Old/src/parsers/FunctionParser.py
T
kodjo cac2dad17f Implemented some enhancement request and fixed some bugs
Fixed #2 : Variables are not recognized when inside a rule token
Fixed #15 : Rule: rete attributes are lost when a new ontology is created
Fixed #14 : ReteNetwork: Format rules must not be added to Rete network
Fixed #16 : DefConcept: Variables are not recognized when they are keyword arguments
Fixed #4 : Comparison are not correctly set when comparison property is a concept
Fixed #14 : Parser: merge FunctionParser.NamesNode and ExpressionParser.NamesNode
Fixed #18 : Parser: Add SourceCodeNode test to UnrecognizedNodeParser
Fixed #20 : At startup Number concept is saved in db a numerous number of time
Fixed #21 : CacheManager: I can remove all elements from a ListIfNeededCache and fill it again
Fixed #22 : CacheManager: I can remove all elements from a SetCache and fill it again
Fixed #23 : HistoryManager: history() no longer works
Fixed #24 : HistoryManager: history() no longer works after creating an exec rule
Fixed #25 : SheerkaMemory: Use MemoryObject instead of sheerka.local
Fixed #26 : Debugger: add the list all available services..
Fixed #27 : CONCEPTS_GRAMMARS_ENTRY does not seems to be in use any more
Fixed #28 : Give order to services
2021-02-12 15:15:31 +01:00

399 lines
16 KiB
Python

from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind
from core.utils import get_n_clones
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, Node
from parsers.BnfNodeParser import BnfNodeParser
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.RuleParser import RuleParser
from parsers.SequenceNodeParser import SequenceNodeParser
from parsers.SyaNodeParser import SyaNodeParser
from parsers.expressions import NameExprNode
PARSERS = [RuleParser.NAME,
SequenceNodeParser.NAME,
BnfNodeParser.NAME,
SyaNodeParser.NAME]
@dataclass
class FunctionParserNode(Node):
pass
@dataclass()
class FunctionParameter:
"""
class the represent result of the parameter parsing
"""
value: NameExprNode # value parsed
separator: NameExprNode = None # holds the value and the position of the separator
def add_sep(self, start, end, tokens):
self.separator = NameExprNode(start, end, tokens)
def value_to_unrecognized(self):
return UnrecognizedTokensNode(self.value.start, self.value.end, self.value.tokens).fix_source()
def separator_to_unrecognized(self):
if self.separator is None:
return None
return UnrecognizedTokensNode(self.separator.start, self.separator.end, self.separator.tokens).fix_source()
@dataclass
class FunctionNode(FunctionParserNode):
first: NameExprNode # beginning of the function (it should represent the name of the function)
last: NameExprNode # last part of the function (it should be the trailing parenthesis)
parameters: list
class FN(FunctionNode):
"""
Test class only
It matches with FunctionNode but with less constraints
Thereby,
FN("first", "last", ["param1," ...]) can be compared to
FunctionNode(NameExprNode("first"), NameExprNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")])
Note that FunctionParameter can easily be defined with a single string
* "param" -> FunctionParameter(NameExprNode("param"), None)
* "param, " -> FunctionParameter(NameExprNode("param"), NameExprNode(", "))
For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part
"""
def __init__(self, first, last, parameters):
self.first = first
self.last = last
self.parameters = []
for param in parameters:
if isinstance(param, tuple):
self.parameters.append(param)
elif isinstance(param, str) and (pos := param.find(",")) != -1:
self.parameters.append((param[:pos], param[pos:]))
else:
self.parameters.append((param, None))
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, FN):
return self.first == other.first and self.last == other.last and self.parameters == other.parameters
if isinstance(other, FunctionNode):
if self.first != other.first.value or self.last != other.last.value:
return False
if len(self.parameters) != len(other.parameters):
return False
for self_parameter, other_parameter in zip(self.parameters, other.parameters):
value = other_parameter.value.value if isinstance(self_parameter[0], str) else other_parameter.value
sep = other_parameter.separator.value if other_parameter.separator else None
if self_parameter[0] != value or self_parameter[1] != sep:
return False
return True
return False
def __hash__(self):
return hash((self.first, self.last, self.parameters))
class FunctionParser(BaseParser):
"""
The parser will be used to parse func(x, y, z)
where x, y and z can be source code, concepts or other functions
It will return a SourceCodeNode or SourceCodeNodeWithConcept
"""
def __init__(self, sep=",", longest_concepts_only=True, **kwargs):
"""
:param sep:
:param longest_concepts_only: When multiples concepts are found, only keep the longest one
so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]]
:param kwargs:
"""
super().__init__("Function", 55)
self.sep = sep
self.longest_concepts_only = longest_concepts_only
self.record_errors = True
def add_error(self, error, next_token=True):
if not self.record_errors:
return
return super().add_error(error, next_token)
def parse(self, context, parser_input: ParserInput):
"""
:param context:
:param parser_input:
:return:
"""
if not isinstance(parser_input, ParserInput):
return None
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
self.parser_input.next_token()
node = self.parse_function()
if self.parser_input.next_token():
self.add_error(UnexpectedTokenParsingError("Only one function supported",
self.parser_input.token,
[TokenKind.EOF]))
if self.has_error:
if node is None:
body = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=parser_input.as_text(),
reason=self.error_sink)
else:
body = context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink)
return self.sheerka.ret(self.name, False, body)
source_code_nodes = self.to_source_code_node(node)
res = []
for source_code_node in source_code_nodes:
value = self.get_return_value_body(context.sheerka,
self.parser_input.as_text(),
source_code_node,
source_code_node)
res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, value))
return res[0] if len(res) == 1 else res
def parse_function(self):
start = self.parser_input.pos
token = self.parser_input.token
if token.type != TokenKind.IDENTIFIER:
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier",
token,
[TokenKind.IDENTIFIER]))
return None
if not self.parser_input.next_token():
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis"))
return None
token = self.parser_input.token
if token.type != TokenKind.LPAR:
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis",
token,
[TokenKind.LPAR]))
return None
start_node = NameExprNode(start, start + 1, self.parser_input.tokens[start:start + 2])
if not self.parser_input.next_token():
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis"))
return FunctionNode(start_node, None, None)
params = self.parse_parameters()
if self.has_error:
return FunctionNode(start_node, None, params)
token = self.parser_input.token
if not token or token.type != TokenKind.RPAR:
self.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found",
token,
[TokenKind.RPAR]))
return FunctionNode(start_node, None, params)
return FunctionNode(start_node,
NameExprNode(self.parser_input.pos, self.parser_input.pos, [token]),
params)
def parse_parameters(self):
nodes = []
while True:
param_value = self.parse_parameter_value()
if not param_value:
break
function_parameter = FunctionParameter(param_value)
nodes.append(function_parameter)
token = self.parser_input.token
if token.type == TokenKind.EOF:
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters"))
return None
if token.type == TokenKind.RPAR:
break
if token.value == self.sep:
sep_pos = self.parser_input.pos
has_next = self.parser_input.next_token() # it's before add_sep() to capture trailing whitespace
function_parameter.add_sep(sep_pos,
self.parser_input.pos - 1,
self.parser_input.tokens[sep_pos: self.parser_input.pos])
if not has_next:
break
return nodes
def parse_parameter_value(self):
# check if the parameter is a function
start_pos = self.parser_input.pos
self.record_errors = False
func = self.parse_function()
self.record_errors = True
if func:
self.parser_input.next_token()
return func
# otherwise, eat until LPAR or separator
self.parser_input.seek(start_pos)
self.record_errors = True
tokens = []
while True:
token = self.parser_input.token
if token is None:
break
if token.value == self.sep or token.type == TokenKind.RPAR:
break
tokens.append(token)
if not self.parser_input.next_token(skip_whitespace=False):
break
return NameExprNode(start_pos, self.parser_input.pos - 1, tokens) if len(tokens) else None
def to_source_code_node(self, function_node: FunctionNode):
python_parser = PythonWithConceptsParser()
if len(function_node.parameters) == 0:
# validate the source
nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()]
python_parsing_res = python_parser.parse_nodes(self.context, nodes_to_parse)
python_node = python_parsing_res.body.body if python_parsing_res.status else None
return [SourceCodeNode(start=function_node.first.start,
end=function_node.last.end,
tokens=function_node.first.tokens + function_node.last.tokens,
python_node=python_node,
return_value=python_parsing_res)]
def update_source_code_node(scn, nodes, sep):
if hasattr(nodes, "__iter__"):
for n in nodes:
scn.add_node(n)
else:
scn.add_node(nodes)
if sep:
scn.add_node(sep.to_unrecognized())
res = [SourceCodeWithConceptNode(function_node.first.to_unrecognized(), function_node.last.to_unrecognized())]
# try to recognize every parameter, one by one
for param in function_node.parameters:
if isinstance(param.value, NameExprNode):
# try to recognize concepts
unrecognized = param.value.to_unrecognized()
nodes_sequences = get_lexer_nodes_from_unrecognized(self.context,
unrecognized,
PARSERS)
else:
# the parameter is also a function
nodes_sequences = self.to_source_code_node(param.value)
if self.longest_concepts_only:
nodes_sequences = self.get_longest_concepts(nodes_sequences)
if nodes_sequences is None:
# no concept found
for source_code_node in res:
update_source_code_node(source_code_node, unrecognized, param.separator)
elif len(nodes_sequences) == 1:
# only one result
# It is the same code than when there are multiple results
# But here, we save the creation of the tmp_res object (not sure it worth it)
for source_code_node in res:
update_source_code_node(source_code_node, nodes_sequences[0], param.separator)
else:
# multiple result, make the cartesian product
tmp_res = []
for source_code_node in res:
instances = get_n_clones(source_code_node, len(nodes_sequences))
tmp_res.extend(instances)
for instance, node_sequence in zip(instances, nodes_sequences):
update_source_code_node(instance, node_sequence, param.separator)
res = tmp_res
# check if it is a valid source code
for source_code_node in res:
source_code_node.fix_all_pos()
source_code_node.pseudo_fix_source()
python_parsing_res = python_parser.parse_nodes(self.context, source_code_node.get_all_nodes())
if python_parsing_res.status:
source_code_node.python_node = python_parsing_res.body.body
source_code_node.return_value = python_parsing_res
# make sure that concepts found can be evaluated
errors = []
for c in [c for c in source_code_node.python_node.objects.values() if isinstance(c, Concept)]:
update_compiled(self.context, c, errors)
return res
@staticmethod
def get_longest_concepts(nodes_sequences):
"""
The longest sequences are the ones that have the less number of concepts
For example
'twenty one' resolves to
[c:twenty one:]
[c:twenty:, c:one:]
[c:twenty one:] has only one concept, so it's the longest one (two tokens against one token twice)
:param nodes_sequences:
:return:
"""
if nodes_sequences is None:
return None
res = []
min_len = -1
for current_sequence in nodes_sequences:
# awful hack to remove when NodeSequence and ConceptSequence will be implemented
current_len = len(current_sequence) if hasattr(current_sequence, "__len__") else 1
if len(res) == 0:
res.append(current_sequence)
min_len = current_len
elif current_len == min_len:
res.append(current_sequence)
elif current_len < min_len:
res.clear()
res.append(current_sequence)
min_len = current_len
return res