Refactored Parsers. Introduced BaseCustomGrammarParser. Renamed DefaultParser into DefConceptParser
This commit is contained in:
@@ -0,0 +1,274 @@
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import core.builtin_helpers
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
|
||||
from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
|
||||
from core.tokenizer import TokenKind, Keywords
|
||||
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode
|
||||
from parsers.BaseParser import Node, ErrorNode, NotInitializedNode, UnexpectedTokenErrorNode
|
||||
from parsers.BnfParser import BnfParser
|
||||
|
||||
|
||||
class ParsingException(Exception):
|
||||
def __init__(self, error):
|
||||
self.error = error
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserNode(Node):
|
||||
"""
|
||||
Base node for all default parser nodes
|
||||
"""
|
||||
tokens: list = field(compare=False, repr=False)
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CannotHandleErrorNode(DefaultParserErrorNode):
|
||||
"""
|
||||
The input is not recognized
|
||||
"""
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NameNode(DefaultParserNode):
|
||||
|
||||
def get_name(self):
|
||||
name = ""
|
||||
first = True
|
||||
for token in self.tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
if not first:
|
||||
name += " "
|
||||
|
||||
name += token.value[1:-1] if token.type == TokenKind.STRING else str(token.value)
|
||||
first = False
|
||||
|
||||
return name
|
||||
|
||||
def __repr__(self):
|
||||
return self.get_name()
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, NameNode):
|
||||
return False
|
||||
|
||||
return self.get_name() == other.get_name()
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.get_name())
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefConceptNode(DefaultParserNode):
|
||||
name: NameNode = NotInitializedNode()
|
||||
where: ReturnValueConcept = NotInitializedNode()
|
||||
pre: ReturnValueConcept = NotInitializedNode()
|
||||
post: ReturnValueConcept = NotInitializedNode()
|
||||
body: ReturnValueConcept = NotInitializedNode()
|
||||
ret: ReturnValueConcept = NotInitializedNode()
|
||||
definition: ReturnValueConcept = NotInitializedNode()
|
||||
definition_type: str = None
|
||||
|
||||
def get_asts(self):
|
||||
asts = {}
|
||||
for part_key in ConceptParts:
|
||||
prop_value = getattr(self, part_key.value)
|
||||
if isinstance(prop_value, ReturnValueConcept) and \
|
||||
isinstance(prop_value.body, ParserResultConcept) and \
|
||||
hasattr(prop_value.body.body, "ast_"):
|
||||
asts[part_key] = prop_value
|
||||
return asts
|
||||
|
||||
|
||||
@dataclass()
|
||||
class IsaConceptNode(DefaultParserNode):
|
||||
concept: NameNode = NotInitializedNode()
|
||||
set: NameNode = NotInitializedNode()
|
||||
|
||||
|
||||
class DefConceptParser(BaseCustomGrammarParser):
|
||||
"""
|
||||
Parse sheerka specific grammar (like def concept)
|
||||
"""
|
||||
|
||||
KEYWORDS = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST, Keywords.RET]
|
||||
KEYWORDS_VALUES = [k.value for k in KEYWORDS]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseCustomGrammarParser.__init__(self, "DefConcept", 60)
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
# default parser can only manage string text
|
||||
if parser_input.from_tokens:
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
|
||||
sheerka = context.sheerka
|
||||
|
||||
if parser_input.is_empty():
|
||||
return sheerka.ret(self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
||||
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
self.parser_input.next_token()
|
||||
node = self.parse_def_concept()
|
||||
|
||||
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node)
|
||||
ret = sheerka.ret(self.name, not self.has_error, body)
|
||||
|
||||
self.log_result(context, parser_input.as_text(), ret)
|
||||
return ret
|
||||
|
||||
def parse_def_concept(self):
|
||||
"""
|
||||
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
|
||||
"""
|
||||
token = self.parser_input.token
|
||||
if token.value != Keywords.DEF.value:
|
||||
self.add_error(UnexpectedTokenErrorNode("'def' keyword not found.", token, [Keywords.DEF]))
|
||||
return None
|
||||
|
||||
self.context.log("Keyword DEF found.", self.name)
|
||||
keywords_found = [token]
|
||||
self.parser_input.next_token()
|
||||
|
||||
# ## the definition of a concept consists of several parts
|
||||
# Keywords.CONCEPT to get the name of the concept
|
||||
# Keywords.FROM [Keywords.BNF] | [Keywords.DEF] to get the definition of the concept
|
||||
# Keywords.AS to get the body
|
||||
# Keywords.WHERE to get the conditions to recognize for the variables
|
||||
# Keywords.PRE to know if the conditions to evaluate the concept
|
||||
# Keywords.POST to apply or verify once the concept is executed
|
||||
# Keywords.RET to transform the concept into another concept
|
||||
parts = self.get_parts(self.KEYWORDS_VALUES, expected_first_token=Keywords.CONCEPT)
|
||||
if parts is None:
|
||||
return None
|
||||
|
||||
keywords_found.extend([t[0] for t in parts.values()]) # keep track of all keywords found
|
||||
node = DefConceptNode(keywords_found)
|
||||
# if first_token.type == TokenKind.EOF:
|
||||
# return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
|
||||
|
||||
# get the name
|
||||
node.name = self.get_concept_name(parts[Keywords.CONCEPT])
|
||||
|
||||
# get definition
|
||||
node.definition_type, node.definition = self.get_concept_definition(node, parts)
|
||||
|
||||
# get the bodies
|
||||
node.body = self.get_ast(Keywords.AS, parts)
|
||||
node.where = self.get_ast(Keywords.WHERE, parts)
|
||||
node.pre = self.get_ast(Keywords.PRE, parts)
|
||||
node.post = self.get_ast(Keywords.POST, parts)
|
||||
node.ret = self.get_ast(Keywords.RET, parts)
|
||||
|
||||
return node
|
||||
|
||||
def get_concept_name(self, tokens):
|
||||
name_tokens = core.utils.strip_tokens(tokens[1:])
|
||||
if len(name_tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
|
||||
return None
|
||||
|
||||
for token in name_tokens:
|
||||
if token.type == TokenKind.NEWLINE:
|
||||
self.add_error(SyntaxErrorNode([token], "Newline are not allowed in name."))
|
||||
return None
|
||||
|
||||
name_node = NameNode(name_tokens) # skip the first token
|
||||
return name_node
|
||||
|
||||
def get_concept_definition(self, current_concept_def, parts):
|
||||
if Keywords.FROM not in parts:
|
||||
return None, NotInitializedNode()
|
||||
|
||||
tokens = parts[Keywords.FROM]
|
||||
if len(tokens) == 1:
|
||||
self.add_error(SyntaxErrorNode([], f"Empty '{tokens[0].value}' declaration."), False)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if tokens[1].value == Keywords.BNF.value:
|
||||
return self.get_concept_bnf_definition(current_concept_def, core.utils.strip_tokens(tokens[2:]))
|
||||
|
||||
return self.get_concept_simple_definition(core.utils.strip_tokens(tokens[0:]))
|
||||
|
||||
def get_concept_bnf_definition(self, current_concept_def, tokens):
|
||||
if len(tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([], "Empty 'bnf' declaration"), False)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if tokens[0].type == TokenKind.COLON:
|
||||
tokens = self.get_body(tokens[1:])
|
||||
|
||||
bnf_regex_parser = BnfParser()
|
||||
desc = f"Resolving BNF {current_concept_def.definition}"
|
||||
with self.context.push(BuiltinConcepts.INIT_BNF,
|
||||
current_concept_def,
|
||||
who=self.name,
|
||||
obj=current_concept_def,
|
||||
desc=desc) as sub_context:
|
||||
parsing_result = bnf_regex_parser.parse(sub_context, tokens)
|
||||
sub_context.add_values(return_values=parsing_result)
|
||||
|
||||
if not parsing_result.status:
|
||||
self.add_error(parsing_result.value)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
return DEFINITION_TYPE_BNF, parsing_result
|
||||
|
||||
def get_concept_simple_definition(self, tokens):
|
||||
|
||||
start = 2 if tokens[1].value == Keywords.DEF.value else 1
|
||||
tokens = core.utils.strip_tokens(tokens[start:])
|
||||
if len(tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([], f"Empty 'from' declaration."), False)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if tokens[0].type == TokenKind.COLON:
|
||||
tokens = self.get_body(tokens[1:])
|
||||
|
||||
return DEFINITION_TYPE_DEF, NameNode(tokens)
|
||||
|
||||
def get_ast(self, keyword, parts):
|
||||
if keyword not in parts:
|
||||
return NotInitializedNode()
|
||||
|
||||
tokens = parts[keyword]
|
||||
if len(tokens) == 1:
|
||||
self.add_error(SyntaxErrorNode(tokens, f"Empty '{tokens[0].value}' declaration."))
|
||||
return None
|
||||
|
||||
source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens[1:])
|
||||
parsed = core.builtin_helpers.parse_unrecognized(self.context,
|
||||
source,
|
||||
parsers="all",
|
||||
who=self.name,
|
||||
prop=keyword,
|
||||
filter_func=core.builtin_helpers.expect_one)
|
||||
|
||||
if not parsed.status:
|
||||
self.add_error(parsed.value)
|
||||
return None
|
||||
|
||||
return parsed
|
||||
Reference in New Issue
Block a user