431 lines
15 KiB
Python
431 lines
15 KiB
Python
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
|
|
from core.concept import ConceptParts
|
|
import core.builtin_helpers
|
|
import core.utils
|
|
from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
|
|
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
|
from dataclasses import dataclass, field
|
|
from parsers.BnfParser import BnfParser
|
|
from core.sheerka.Sheerka import ExecutionContext
|
|
|
|
|
|
@dataclass()
|
|
class DefaultParserNode(Node):
|
|
"""
|
|
Base node for all default parser nodes
|
|
"""
|
|
tokens: list = field(compare=False, repr=False)
|
|
|
|
|
|
@dataclass()
|
|
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
|
|
pass
|
|
|
|
|
|
@dataclass()
|
|
class UnexpectedTokenErrorNode(DefaultParserErrorNode):
|
|
message: str
|
|
expected_tokens: list
|
|
|
|
|
|
@dataclass()
|
|
class SyntaxErrorNode(DefaultParserErrorNode):
|
|
"""
|
|
The input is recognized, but there is a syntax error
|
|
"""
|
|
message: str
|
|
|
|
|
|
@dataclass()
|
|
class CannotHandleErrorNode(DefaultParserErrorNode):
|
|
"""
|
|
The input is not recognized
|
|
"""
|
|
text: str
|
|
|
|
|
|
@dataclass()
|
|
class NameNode(DefaultParserNode):
|
|
|
|
def get_name(self):
|
|
name = ""
|
|
first = True
|
|
for token in self.tokens:
|
|
if token.type == TokenKind.EOF:
|
|
break
|
|
if token.type == TokenKind.WHITESPACE:
|
|
continue
|
|
if not first:
|
|
name += " "
|
|
|
|
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
|
first = False
|
|
|
|
return name
|
|
|
|
def __repr__(self):
|
|
return self.get_name()
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, NameNode):
|
|
return False
|
|
|
|
return self.get_name() == other.get_name()
|
|
|
|
def __hash__(self):
|
|
return hash(self.get_name())
|
|
|
|
|
|
@dataclass()
|
|
class DefConceptNode(DefaultParserNode):
|
|
name: NameNode = NotInitializedNode()
|
|
where: ReturnValueConcept = NotInitializedNode()
|
|
pre: ReturnValueConcept = NotInitializedNode()
|
|
post: ReturnValueConcept = NotInitializedNode()
|
|
body: ReturnValueConcept = NotInitializedNode()
|
|
definition: ReturnValueConcept = NotInitializedNode()
|
|
|
|
def get_asts(self):
|
|
asts = {}
|
|
for part_key in ConceptParts:
|
|
prop_value = getattr(self, part_key.value)
|
|
if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body,
|
|
ParserResultConcept) and hasattr(
|
|
prop_value.body.body, "ast_"):
|
|
asts[part_key] = prop_value
|
|
#asts[part_key] = prop_value.body.body.ast_
|
|
return asts
|
|
|
|
|
|
@dataclass()
|
|
class IsaConceptNode(DefaultParserNode):
|
|
concept: NameNode = NotInitializedNode()
|
|
set: NameNode = NotInitializedNode()
|
|
|
|
|
|
class DefaultParser(BaseParser):
|
|
"""
|
|
Parse sheerka specific grammar (like def concept)
|
|
"""
|
|
|
|
def __init__(self, **kwargs):
|
|
BaseParser.__init__(self, "Default", 50)
|
|
self.lexer_iter = None
|
|
self._current = None
|
|
self.context: ExecutionContext = None
|
|
self.text = None
|
|
self.sheerka = None
|
|
|
|
@staticmethod
|
|
def fix_indentation(tokens):
|
|
"""
|
|
In the following example
|
|
def concept add one to a as:
|
|
def func(x):
|
|
return x+1
|
|
func(a)
|
|
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
|
|
:param tokens:
|
|
:return:
|
|
"""
|
|
if tokens[0].type != TokenKind.COLON:
|
|
return tokens
|
|
|
|
if len(tokens) < 3:
|
|
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
|
|
|
|
if tokens[1].type != TokenKind.NEWLINE:
|
|
return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE])
|
|
|
|
if tokens[2].type != TokenKind.WHITESPACE:
|
|
return SyntaxErrorNode([tokens[2]], "Indentation not found.")
|
|
indent_size = len(tokens[2].value)
|
|
|
|
# now fix the other indentations
|
|
i = 3
|
|
while i < len(tokens) - 1:
|
|
if tokens[i].type == TokenKind.NEWLINE:
|
|
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
|
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
|
|
|
|
if len(tokens[i + 1].value) < indent_size:
|
|
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
|
|
|
|
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
|
|
i += 1
|
|
|
|
return tokens[3:]
|
|
|
|
def reset_parser(self, context, text):
|
|
self.context = context
|
|
self.sheerka = context.sheerka
|
|
|
|
self.text = text
|
|
self.lexer_iter = iter(Tokenizer(text))
|
|
self._current = None
|
|
|
|
self.next_token()
|
|
|
|
def add_error(self, error, next_token=True):
|
|
self.has_error = True
|
|
self.error_sink.append(error)
|
|
if next_token:
|
|
self.next_token()
|
|
return error
|
|
|
|
def get_token(self) -> Token:
|
|
return self._current
|
|
|
|
def next_token(self, skip_whitespace=True):
|
|
try:
|
|
self._current = next(self.lexer_iter)
|
|
if skip_whitespace:
|
|
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
|
self._current = next(self.lexer_iter)
|
|
except StopIteration:
|
|
self._current = None
|
|
|
|
return
|
|
|
|
def parse(self, context, text):
|
|
# default parser can only manage string text
|
|
if not isinstance(text, str):
|
|
ret = context.sheerka.ret(
|
|
self.name,
|
|
False,
|
|
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
|
|
self.log_result(context, text, ret)
|
|
return ret
|
|
|
|
tree = None
|
|
try:
|
|
self.reset_parser(context, text)
|
|
tree = self.parse_statement()
|
|
except core.tokenizer.LexerError as e:
|
|
self.add_error(e, False)
|
|
|
|
# If a error is found it must be sent to error_sink
|
|
# tree must contain what was recognized
|
|
|
|
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
|
|
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
|
|
else:
|
|
body = self.get_return_value_body(context.sheerka, text, tree, tree)
|
|
# body = self.sheerka.new(
|
|
# BuiltinConcepts.PARSER_RESULT,
|
|
# parser=self,
|
|
# source=text,
|
|
# body=self.error_sink if self.has_error else tree,
|
|
# try_parsed=tree)
|
|
|
|
ret = self.sheerka.ret(
|
|
self.name,
|
|
not self.has_error,
|
|
body)
|
|
|
|
self.log_result(context, text, ret)
|
|
return ret
|
|
|
|
def parse_statement(self):
|
|
token = self.get_token()
|
|
if token.value == Keywords.DEF:
|
|
self.next_token()
|
|
self.context.log(self.verbose_log, "Keyword DEF found.", self.name)
|
|
return self.parse_def_concept(token)
|
|
else:
|
|
return self.parse_isa_concept()
|
|
|
|
def parse_def_concept(self, def_token):
|
|
"""
|
|
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
|
|
"""
|
|
|
|
# init
|
|
keywords_tokens = [def_token]
|
|
concept_found = DefConceptNode(keywords_tokens)
|
|
|
|
# the definition of a concept consists of several parts
|
|
# Keywords.CONCEPT to get the name of the concept
|
|
# Keywords.FROM [Keywords.BNF] to get the definition of the concept
|
|
# Keywords.AS to get the body
|
|
# Keywords.WHERE to get the conditions to recognize for the variables
|
|
# Keywords.PRE to know if the conditions to evaluate the concept
|
|
# Keywords.POST to apply or verify once the concept is executed
|
|
#
|
|
# Regroup the tokens by parts
|
|
first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens)
|
|
|
|
if first_token.type == TokenKind.EOF:
|
|
return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
|
|
|
|
# get the name
|
|
concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts)
|
|
|
|
# get the definition
|
|
concept_found.definition = self.get_concept_definition(concept_found, tokens_found_by_parts)
|
|
|
|
# get the ASTs for the remaining parts
|
|
asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts)
|
|
concept_found.where = asts_found_by_parts[Keywords.WHERE]
|
|
concept_found.pre = asts_found_by_parts[Keywords.PRE]
|
|
concept_found.post = asts_found_by_parts[Keywords.POST]
|
|
concept_found.body = asts_found_by_parts[Keywords.AS]
|
|
|
|
return concept_found
|
|
|
|
def parse_isa_concept(self):
|
|
concept_name = self.parse_concept_name()
|
|
if isinstance(concept_name, DefaultParserErrorNode):
|
|
return concept_name
|
|
|
|
keyword = []
|
|
token = self.get_token()
|
|
if token.value != Keywords.ISA:
|
|
return self.add_error(CannotHandleErrorNode([token], ""))
|
|
keyword.append(token)
|
|
self.next_token()
|
|
|
|
set_name = self.parse_concept_name()
|
|
return IsaConceptNode(keyword, concept_name, set_name)
|
|
|
|
def parse_concept_name(self):
|
|
tokens = []
|
|
token = self.get_token()
|
|
|
|
while not (token.type == TokenKind.EOF or token.type == TokenKind.KEYWORD):
|
|
tokens.append(token)
|
|
self.next_token()
|
|
token = self.get_token()
|
|
|
|
if len(tokens) == 0:
|
|
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", []))
|
|
else:
|
|
return NameNode(tokens)
|
|
|
|
def regroup_tokens_by_parts(self, keywords_tokens):
|
|
|
|
def_concept_parts = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
|
|
|
|
# tokens found, when trying to recognize the parts
|
|
tokens_found_by_parts = {
|
|
Keywords.CONCEPT: [],
|
|
Keywords.FROM: None,
|
|
Keywords.AS: None,
|
|
Keywords.WHERE: None,
|
|
Keywords.PRE: None,
|
|
Keywords.POST: None,
|
|
}
|
|
current_part = Keywords.CONCEPT
|
|
token = self.get_token()
|
|
first_token = token
|
|
|
|
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
|
|
while token.type != TokenKind.EOF:
|
|
if token.value in def_concept_parts:
|
|
keywords_tokens.append(token) # keep track of the keywords
|
|
keyword = token.value
|
|
if tokens_found_by_parts[keyword]:
|
|
# a part is defined more than once
|
|
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
|
|
tokens_found_by_parts[current_part].append(token) # adds the token again
|
|
else:
|
|
tokens_found_by_parts[keyword] = [token]
|
|
current_part = keyword
|
|
self.next_token()
|
|
else:
|
|
tokens_found_by_parts[current_part].append(token)
|
|
self.next_token(False)
|
|
|
|
token = self.get_token()
|
|
|
|
return first_token, tokens_found_by_parts
|
|
|
|
def get_concept_name(self, first_token, tokens_found_by_parts):
|
|
name_first_token_index = 1
|
|
token = self.get_token()
|
|
if first_token.value != Keywords.CONCEPT:
|
|
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
|
name_first_token_index = 0
|
|
|
|
name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
|
|
if len(name_tokens) == name_first_token_index:
|
|
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
|
|
|
|
if name_tokens[-1].type == TokenKind.NEWLINE:
|
|
name_tokens = name_tokens[:-1] # strip trailing newlines
|
|
|
|
if TokenKind.NEWLINE in [t.type for t in name_tokens]:
|
|
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
|
|
|
|
name_node = NameNode(name_tokens[name_first_token_index:]) # skip the first token
|
|
return name_node
|
|
|
|
def get_concept_definition(self, current_concept_def, tokens_found_by_parts):
|
|
if tokens_found_by_parts[Keywords.FROM] is None:
|
|
return NotInitializedNode()
|
|
|
|
definition_tokens = tokens_found_by_parts[Keywords.FROM]
|
|
if definition_tokens[1].value != Keywords.BNF:
|
|
return NotInitializedNode()
|
|
|
|
tokens = core.utils.strip_tokens(definition_tokens[2:])
|
|
if len(tokens) == 0:
|
|
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
|
|
return NotInitializedNode()
|
|
|
|
regex_parser = BnfParser()
|
|
with self.context.push(self.name, obj=current_concept_def) as sub_context:
|
|
parsing_result = regex_parser.parse(sub_context, tokens)
|
|
sub_context.add_values(return_values=parsing_result)
|
|
|
|
if not parsing_result.status:
|
|
self.add_error(parsing_result.value)
|
|
return NotInitializedNode()
|
|
|
|
return parsing_result
|
|
|
|
def get_concept_parts(self, tokens_found_by_parts):
|
|
asts_found_by_parts = {
|
|
Keywords.AS: NotInitializedNode(),
|
|
Keywords.WHERE: NotInitializedNode(),
|
|
Keywords.PRE: NotInitializedNode(),
|
|
Keywords.POST: NotInitializedNode(),
|
|
}
|
|
|
|
for keyword in tokens_found_by_parts:
|
|
if keyword == Keywords.CONCEPT or keyword == Keywords.FROM:
|
|
continue # already done
|
|
|
|
tokens = tokens_found_by_parts[keyword]
|
|
if tokens is None:
|
|
continue # nothing to do
|
|
|
|
if len(tokens) == 1: # check for empty declarations
|
|
self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False)
|
|
continue
|
|
|
|
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
|
|
if isinstance(tokens, ErrorNode):
|
|
self.add_error(tokens)
|
|
continue
|
|
|
|
# ask the other parsers if they recognize the tokens
|
|
with self.context.push(self.name, desc=f"Parsing {keyword}") as sub_context:
|
|
sub_context.log_new(self.verbose_log)
|
|
to_parse = self.sheerka.ret(
|
|
sub_context.who,
|
|
True,
|
|
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
|
|
steps = [BuiltinConcepts.PARSING]
|
|
parsed = self.sheerka.execute(sub_context, to_parse, steps, self.verbose_log)
|
|
parsing_result = core.builtin_helpers.expect_one(sub_context, parsed, self.verbose_log)
|
|
sub_context.add_values(return_values=parsing_result)
|
|
|
|
if not parsing_result.status:
|
|
self.add_error(parsing_result.value)
|
|
continue
|
|
|
|
asts_found_by_parts[keyword] = parsing_result
|
|
|
|
return asts_found_by_parts
|