Refactored Parsers. Introduced BaseCustomGrammarParser. Renamed DefaultParser into DefConceptParser

This commit is contained in:
2020-10-02 04:45:47 +02:00
parent d100b7e8b3
commit e8f2705dcf
28 changed files with 1411 additions and 872 deletions
+274
View File
@@ -0,0 +1,274 @@
from dataclasses import dataclass, field
import core.builtin_helpers
import core.utils
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.tokenizer import TokenKind, Keywords
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode
from parsers.BaseParser import Node, ErrorNode, NotInitializedNode, UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser
class ParsingException(Exception):
def __init__(self, error):
self.error = error
@dataclass()
class DefaultParserNode(Node):
"""
Base node for all default parser nodes
"""
tokens: list = field(compare=False, repr=False)
@dataclass()
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
pass
@dataclass()
class CannotHandleErrorNode(DefaultParserErrorNode):
"""
The input is not recognized
"""
text: str
@dataclass()
class NameNode(DefaultParserNode):
def get_name(self):
name = ""
first = True
for token in self.tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
name += " "
name += token.value[1:-1] if token.type == TokenKind.STRING else str(token.value)
first = False
return name
def __repr__(self):
return self.get_name()
def __eq__(self, other):
if not isinstance(other, NameNode):
return False
return self.get_name() == other.get_name()
def __hash__(self):
return hash(self.get_name())
@dataclass()
class DefConceptNode(DefaultParserNode):
name: NameNode = NotInitializedNode()
where: ReturnValueConcept = NotInitializedNode()
pre: ReturnValueConcept = NotInitializedNode()
post: ReturnValueConcept = NotInitializedNode()
body: ReturnValueConcept = NotInitializedNode()
ret: ReturnValueConcept = NotInitializedNode()
definition: ReturnValueConcept = NotInitializedNode()
definition_type: str = None
def get_asts(self):
asts = {}
for part_key in ConceptParts:
prop_value = getattr(self, part_key.value)
if isinstance(prop_value, ReturnValueConcept) and \
isinstance(prop_value.body, ParserResultConcept) and \
hasattr(prop_value.body.body, "ast_"):
asts[part_key] = prop_value
return asts
@dataclass()
class IsaConceptNode(DefaultParserNode):
concept: NameNode = NotInitializedNode()
set: NameNode = NotInitializedNode()
class DefConceptParser(BaseCustomGrammarParser):
"""
Parse sheerka specific grammar (like def concept)
"""
KEYWORDS = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST, Keywords.RET]
KEYWORDS_VALUES = [k.value for k in KEYWORDS]
def __init__(self, **kwargs):
BaseCustomGrammarParser.__init__(self, "DefConcept", 60)
def parse(self, context, parser_input: ParserInput):
# default parser can only manage string text
if parser_input.from_tokens:
ret = context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
self.parser_input.next_token()
node = self.parse_def_concept()
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node)
ret = sheerka.ret(self.name, not self.has_error, body)
self.log_result(context, parser_input.as_text(), ret)
return ret
def parse_def_concept(self):
"""
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
"""
token = self.parser_input.token
if token.value != Keywords.DEF.value:
self.add_error(UnexpectedTokenErrorNode("'def' keyword not found.", token, [Keywords.DEF]))
return None
self.context.log("Keyword DEF found.", self.name)
keywords_found = [token]
self.parser_input.next_token()
# ## the definition of a concept consists of several parts
# Keywords.CONCEPT to get the name of the concept
# Keywords.FROM [Keywords.BNF] | [Keywords.DEF] to get the definition of the concept
# Keywords.AS to get the body
# Keywords.WHERE to get the conditions to recognize for the variables
# Keywords.PRE to know if the conditions to evaluate the concept
# Keywords.POST to apply or verify once the concept is executed
# Keywords.RET to transform the concept into another concept
parts = self.get_parts(self.KEYWORDS_VALUES, expected_first_token=Keywords.CONCEPT)
if parts is None:
return None
keywords_found.extend([t[0] for t in parts.values()]) # keep track of all keywords found
node = DefConceptNode(keywords_found)
# if first_token.type == TokenKind.EOF:
# return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
# get the name
node.name = self.get_concept_name(parts[Keywords.CONCEPT])
# get definition
node.definition_type, node.definition = self.get_concept_definition(node, parts)
# get the bodies
node.body = self.get_ast(Keywords.AS, parts)
node.where = self.get_ast(Keywords.WHERE, parts)
node.pre = self.get_ast(Keywords.PRE, parts)
node.post = self.get_ast(Keywords.POST, parts)
node.ret = self.get_ast(Keywords.RET, parts)
return node
def get_concept_name(self, tokens):
name_tokens = core.utils.strip_tokens(tokens[1:])
if len(name_tokens) == 0:
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
return None
for token in name_tokens:
if token.type == TokenKind.NEWLINE:
self.add_error(SyntaxErrorNode([token], "Newline are not allowed in name."))
return None
name_node = NameNode(name_tokens) # skip the first token
return name_node
def get_concept_definition(self, current_concept_def, parts):
if Keywords.FROM not in parts:
return None, NotInitializedNode()
tokens = parts[Keywords.FROM]
if len(tokens) == 1:
self.add_error(SyntaxErrorNode([], f"Empty '{tokens[0].value}' declaration."), False)
return None, NotInitializedNode()
if tokens[1].value == Keywords.BNF.value:
return self.get_concept_bnf_definition(current_concept_def, core.utils.strip_tokens(tokens[2:]))
return self.get_concept_simple_definition(core.utils.strip_tokens(tokens[0:]))
def get_concept_bnf_definition(self, current_concept_def, tokens):
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([], "Empty 'bnf' declaration"), False)
return None, NotInitializedNode()
if tokens[0].type == TokenKind.COLON:
tokens = self.get_body(tokens[1:])
bnf_regex_parser = BnfParser()
desc = f"Resolving BNF {current_concept_def.definition}"
with self.context.push(BuiltinConcepts.INIT_BNF,
current_concept_def,
who=self.name,
obj=current_concept_def,
desc=desc) as sub_context:
parsing_result = bnf_regex_parser.parse(sub_context, tokens)
sub_context.add_values(return_values=parsing_result)
if not parsing_result.status:
self.add_error(parsing_result.value)
return None, NotInitializedNode()
return DEFINITION_TYPE_BNF, parsing_result
def get_concept_simple_definition(self, tokens):
start = 2 if tokens[1].value == Keywords.DEF.value else 1
tokens = core.utils.strip_tokens(tokens[start:])
if len(tokens) == 0:
self.add_error(SyntaxErrorNode([], f"Empty 'from' declaration."), False)
return None, NotInitializedNode()
if tokens[0].type == TokenKind.COLON:
tokens = self.get_body(tokens[1:])
return DEFINITION_TYPE_DEF, NameNode(tokens)
def get_ast(self, keyword, parts):
if keyword not in parts:
return NotInitializedNode()
tokens = parts[keyword]
if len(tokens) == 1:
self.add_error(SyntaxErrorNode(tokens, f"Empty '{tokens[0].value}' declaration."))
return None
source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens[1:])
parsed = core.builtin_helpers.parse_unrecognized(self.context,
source,
parsers="all",
who=self.name,
prop=keyword,
filter_func=core.builtin_helpers.expect_one)
if not parsed.status:
self.add_error(parsed.value)
return None
return parsed