Refactored to allow ConceptEvaluator

This commit is contained in:
2019-11-14 22:04:38 +01:00
parent 576ce77740
commit 9e10e77737
30 changed files with 2406 additions and 1007 deletions
+303 -230
View File
@@ -1,4 +1,6 @@
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.concept import ConceptParts
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field
import logging
@@ -8,16 +10,10 @@ log = logging.getLogger(__name__)
@dataclass()
class DefaultParserNode(Node):
tokens: list = field(compare=False)
def is_same(self, other):
if type(self) != type(other):
return False
if hasattr(self, "value") and self.value != other.value:
return False
return True
"""
Base node for all default parser nodes
"""
tokens: list = field(compare=False, repr=False)
@dataclass()
@@ -30,8 +26,8 @@ class UnexpectedTokenErrorNode(DefaultParserErrorNode):
message: str
expected_tokens: list
def __post_init__(self):
log.debug("-> UnexpectedTokenErrorNode: " + self.message)
# def __post_init__(self):
# log.debug("-> UnexpectedTokenErrorNode: " + self.message)
@dataclass()
@@ -41,8 +37,8 @@ class SyntaxErrorNode(DefaultParserErrorNode):
"""
message: str
def __post_init__(self):
log.debug("-> SyntaxErrorNode: " + self.message)
# def __post_init__(self):
# log.debug("-> SyntaxErrorNode: " + self.message)
@dataclass()
@@ -52,117 +48,209 @@ class CannotHandleErrorNode(DefaultParserErrorNode):
"""
text: str
def __post_init__(self):
log.debug("-> CannotHandleErrorNode: " + self.text)
# def __post_init__(self):
# log.debug("-> CannotHandleErrorNode: " + self.text)
#
# @dataclass()
# class NumberNode(DefaultParserNode):
# value: object
#
# def __repr__(self):
# return str(self.value)
#
#
# @dataclass()
# class StringNode(DefaultParserNode):
# value: str
# quote: str
#
# def is_same(self, other):
# if not super(StringNode, self).is_same(other):
# return False
# return self.quote == other.quote
#
# def __repr__(self):
# return self.quote + self.value + self.quote
#
#
# @dataclass()
# class VariableNode(DefaultParserNode):
# value: str
#
# def __repr__(self):
# return self.value
#
#
# @dataclass()
# class TrueNode(DefaultParserNode):
# pass
#
# def __repr__(self):
# return "true"
#
#
# @dataclass()
# class FalseNode(DefaultParserNode):
# pass
#
# def __repr__(self):
# return "false"
#
#
# @dataclass()
# class NullNode(DefaultParserNode):
# pass
#
# def __repr__(self):
# return "null"
#
#
# @dataclass()
# class BinaryNode(DefaultParserNode):
# operator: TokenKind
# left: Node
# right: Node
#
# def is_same(self, other):
# if not super(BinaryNode, self).is_same(other):
# return False
# if self.operator != other.operator:
# return False
# if not self.left.is_same(other.left):
# return False
# return self.right.is_same(other.right)
#
# def __repr__(self):
# return f"({self.left} {self.operator} {self.right})"
#
@staticmethod
def get_concept_key(tokens, variables=None):
key = ""
first = True
for token in tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
key += " "
if variables is not None and token.value in variables:
key += "__var__" + str(variables.index(token.value))
else:
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False
return key
@dataclass()
class NameNode(DefaultParserNode):
def get_name(self):
name = ""
first = True
for token in self.tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
name += " "
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False
return name
def __repr__(self):
return self.get_name()
def __eq__(self, other):
if not isinstance(other, NameNode):
return False
return self.get_name() == other.get_name()
def __hash__(self):
return hash(self.get_name())
@dataclass()
class DefConceptNode(DefaultParserNode):
name: str
where: Node = None
pre: Node = None
post: Node = None
body: Node = NopNode
name: NameNode = NotInitializedNode()
where: ReturnValueConcept = NotInitializedNode()
pre: ReturnValueConcept = NotInitializedNode()
post: ReturnValueConcept = NotInitializedNode()
body: ReturnValueConcept = NotInitializedNode()
def get_codes(self):
codes = {}
for prop in ["where", "pre", "post", "body"]:
prop_value = getattr(self, prop)
if hasattr(prop_value, "ast"):
codes[prop] = prop_value.ast
for part_key in ConceptParts:
prop_value = getattr(self, part_key.value)
if hasattr(prop_value, "ast_"):
codes[part_key] = prop_value.ast_
return codes
@dataclass()
class NumberNode(DefaultParserNode):
value: object
def __repr__(self):
return str(self.value)
@dataclass()
class StringNode(DefaultParserNode):
value: str
quote: str
def is_same(self, other):
if not super(StringNode, self).is_same(other):
return False
return self.quote == other.quote
def __repr__(self):
return self.quote + self.value + self.quote
@dataclass()
class VariableNode(DefaultParserNode):
value: str
def __repr__(self):
return self.value
@dataclass()
class TrueNode(DefaultParserNode):
pass
def __repr__(self):
return "true"
@dataclass()
class FalseNode(DefaultParserNode):
pass
def __repr__(self):
return "false"
@dataclass()
class NullNode(DefaultParserNode):
pass
def __repr__(self):
return "null"
@dataclass()
class BinaryNode(DefaultParserNode):
operator: TokenKind
left: Node
right: Node
def is_same(self, other):
if not super(BinaryNode, self).is_same(other):
return False
if self.operator != other.operator:
return False
if not self.left.is_same(other.left):
return False
return self.right.is_same(other.right)
def __repr__(self):
return f"({self.left} {self.operator} {self.right})"
class DefaultParser(BaseParser):
"""
Parse sheerka specific grammar (like def concept)
"""
def __init__(self, sub_parser=None):
def __init__(self):
BaseParser.__init__(self, "DefaultParser")
self.sub_parser = sub_parser
self.lexer_iter = None
self._current = None
self.context = None
self.text = None
self.sheerka = None
@staticmethod
def fix_indentation(tokens):
"""
In the following example
def concept add one to a as:
def func(x):
return x+1
func(a)
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
:param tokens:
:return:
"""
if tokens[0].type != TokenKind.COLON:
return tokens
if len(tokens) < 3:
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
if tokens[1].type != TokenKind.NEWLINE:
return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE])
if tokens[2].type != TokenKind.WHITESPACE:
return SyntaxErrorNode([tokens[2]], "Indentation not found.")
indent_size = len(tokens[2].value)
# now fix the other indentations
i = 3
while i < len(tokens) - 1:
if tokens[i].type == TokenKind.NEWLINE:
if tokens[i + 1].type != TokenKind.WHITESPACE:
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
if len(tokens[i + 1].value) < indent_size:
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
i += 1
return tokens[3:]
def reset_parser(self, context, text):
self.context = context
# hack before implementing all the sub parsers
if context:
self.sub_parser = context.sheerka.parsers[1]
self.sheerka = context.sheerka
self.text = text
self.lexer_iter = iter(Tokenizer(text))
@@ -190,167 +278,152 @@ class DefaultParser(BaseParser):
self._current = None
return
@staticmethod
def get_concept_key(tokens, variables=None):
key = ""
first = True
for token in tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
key += " "
if variables is not None and token.value in variables:
key += "__var__" + str(variables.index(token.value))
else:
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False
return key
@staticmethod
def fix_indentation(tokens):
"""
In the following example
def concept add one to a as:
def func(x):
return x+1
func(a)
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
:param tokens:
:return:
"""
if tokens[1].type != TokenKind.COLON:
return tokens[1:]
if len(tokens) < 3:
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
if tokens[2].type != TokenKind.NEWLINE:
return UnexpectedTokenErrorNode([tokens[2]], "Unexpected token after colon", [TokenKind.NEWLINE])
if tokens[3].type != TokenKind.WHITESPACE:
return SyntaxErrorNode([tokens[3]], "Indentation not found")
indent_size = len(tokens[3].value)
# now fix the other indentations
i = 4
while i < len(tokens) - 1:
if tokens[i].type == TokenKind.NEWLINE:
if tokens[i + 1].type != TokenKind.WHITESPACE:
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
if len(tokens[i + 1].value) < indent_size:
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
i += 1
return tokens[4:]
def parse(self, context, text):
# default parser can only manage string text
if not isinstance(text, str):
log.debug(f"Failed to recognize '{text}'")
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
self.reset_parser(context, text)
return self.parse_statement()
tree = self.parse_statement()
# If a error is found it must be sent to error_sink
# tree must contain what was recognized
ret = self.sheerka.ret(
self.name,
not self.has_error,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
body=self.error_sink if self.has_error else tree,
try_parsed=tree))
self.log_result(log, text, ret)
return ret
def parse_statement(self):
token = self.get_token()
if token.value == Keywords.DEF:
self.next_token()
return self.parse_def_concept()
return self.parse_def_concept(token)
else:
return self.add_error(CannotHandleErrorNode([], self.text))
def parse_def_concept(self):
def parse_def_concept(self, def_token):
"""
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
"""
def_concept_parts = [Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
# init
log.debug("It may be a definition of a concept")
concept_special_tokens = [def_token]
concept_found = DefConceptNode(concept_special_tokens)
tokens_found = {} # Node token is supposed to be a list, but here, it will be a dict
# the definition of a concept consists of several parts
# Keywords.CONCEPT to get the name of the concept
# Keywords.AS to get the body
# Keywords.WHERE to get the conditions to recognize for the variables
# Keywords.PRE to know if the conditions to evaluate the concept
# Keywords.POST to apply or verify once the concept is executed
def_concept_parts = [Keywords.CONCEPT, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
token = self.get_token()
if token.value != Keywords.CONCEPT:
return self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
self.next_token()
token = self.get_token()
if token.value in (Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST):
return self.add_error(UnexpectedTokenErrorNode([token], "Concept name is missing.", ["<name>"]))
name_as_tokens = []
while token.type != TokenKind.EOF and token.value not in def_concept_parts:
name_as_tokens.append(token)
self.next_token()
token = self.get_token()
name = self.get_concept_key(name_as_tokens)
tokens_found["name"] = name_as_tokens
# try to parse as, where, pre and post declarations
tokens = {
# tokens found, when trying to recognize the parts
tokens_found_by_parts = {
Keywords.CONCEPT: [],
Keywords.AS: None,
Keywords.WHERE: None,
Keywords.PRE: None,
Keywords.POST: None,
}
current_part = None
current_part = Keywords.CONCEPT
token = self.get_token()
first_token = token
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
while token.type != TokenKind.EOF:
if token.value in def_concept_parts:
concept_special_tokens.append(token) # keep track of the keywords
keyword = token.value
if tokens[keyword]:
return self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
tokens[keyword] = [token] # first element of the list is the keyword
if tokens_found_by_parts[keyword]:
# a part is defined more than once
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
tokens_found_by_parts[current_part].append(token) # adds the token again
else:
tokens_found_by_parts[keyword] = [token]
current_part = keyword
self.next_token()
else:
if current_part is None:
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", def_concept_parts))
else:
tokens[current_part].append(token)
self.next_token(False)
tokens_found_by_parts[current_part].append(token)
self.next_token(False)
token = self.get_token()
for t in tokens:
tokens_found[t.value] = tokens[t]
asts = {
Keywords.AS: NopNode(),
Keywords.WHERE: NopNode(),
Keywords.PRE: NopNode(),
Keywords.POST: NopNode(),
# semantic checks
name_first_token_index = 1
if first_token.value != Keywords.CONCEPT:
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
name_first_token_index = 0
# Manage the name
name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
if len(name_tokens) == name_first_token_index:
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
if name_tokens[-1].type == TokenKind.NEWLINE:
name_tokens = name_tokens[:-1] # strip trailing newlines
if TokenKind.NEWLINE in [t.type for t in name_tokens]:
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
concept_found.name = NameNode(name_tokens[name_first_token_index:]) # skip the first token
asts_found_by_parts = {
Keywords.AS: NotInitializedNode(),
Keywords.WHERE: NotInitializedNode(),
Keywords.PRE: NotInitializedNode(),
Keywords.POST: NotInitializedNode(),
}
# check for empty declarations
for keyword in tokens:
current_tokens = tokens[keyword]
if current_tokens is not None:
if len(current_tokens) == 0: # only one element means empty decl
return self.add_error(SyntaxErrorNode([current_tokens[0]], "Empty declaration"), False)
else:
current_tokens = self.fix_indentation(current_tokens)
if isinstance(current_tokens, ErrorNode):
self.add_error(current_tokens)
continue
for keyword in tokens_found_by_parts:
if keyword == Keywords.CONCEPT:
continue # already done
# start = current_tokens[0].index
# end = current_tokens[-1].index + len(current_tokens[-1].value)
sub_parser = self.sub_parser(source=keyword.value)
sub_tree = sub_parser.parse(self.context, current_tokens)
if isinstance(sub_tree, ErrorNode):
self.add_error(sub_tree, False)
asts[keyword] = sub_tree
log.debug("Processing part '" + keyword.name + "'")
def_concept_node = DefConceptNode(tokens_found, # dict instead of list is wanted.
name,
asts[Keywords.WHERE],
asts[Keywords.PRE],
asts[Keywords.POST],
asts[Keywords.AS])
tokens = tokens_found_by_parts[keyword]
if tokens is None:
continue # nothing to do
log.debug(f"Found DefConcept node '{def_concept_node}'")
return def_concept_node
if len(tokens) == 1: # check for empty declarations
self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False)
continue
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
if isinstance(tokens, ErrorNode):
self.add_error(tokens)
continue
# ask the other parsers if they recognize the tokens
new_context = self.context.push(self)
parsing_result = self.sheerka.expect_one(new_context, self.sheerka.parse(new_context, tokens))
if not parsing_result.status:
self.add_error(parsing_result.value)
continue
asts_found_by_parts[keyword] = parsing_result
concept_found.where = asts_found_by_parts[Keywords.WHERE]
concept_found.pre = asts_found_by_parts[Keywords.PRE]
concept_found.post = asts_found_by_parts[Keywords.POST]
concept_found.body = asts_found_by_parts[Keywords.AS]
log.debug(f"Found DefConcept node '{concept_found}'")
return concept_found
# def parse_expression(self):
# return self.parse_addition()