Refactored Parsers. Introduced BaseCustomGrammarParser. Renamed DefaultParser into DefConceptParser
This commit is contained in:
@@ -0,0 +1,250 @@
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import core.utils
|
||||
from core.tokenizer import Keywords, TokenKind, Tokenizer
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode, UnexpectedEofNode, UnexpectedTokenErrorNode
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CustomGrammarParserNode(Node):
|
||||
"""
|
||||
Base node for all default parser nodes
|
||||
"""
|
||||
tokens: list = field(compare=False, repr=False)
|
||||
|
||||
|
||||
@dataclass()
|
||||
class SyntaxErrorNode(CustomGrammarParserNode, ErrorNode):
|
||||
"""
|
||||
The input is recognized, but there is a syntax error
|
||||
"""
|
||||
message: str
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if not isinstance(other, SyntaxErrorNode):
|
||||
return False
|
||||
|
||||
if self.message != other.message:
|
||||
return False
|
||||
|
||||
if other.tokens is not None and self.tokens != other.tokens:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.message)
|
||||
|
||||
|
||||
@dataclass()
|
||||
class KeywordNotFound(CustomGrammarParserNode, ErrorNode):
|
||||
keywords: list
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if not isinstance(other, KeywordNotFound):
|
||||
return False
|
||||
|
||||
if self.keywords != other.keywords:
|
||||
return False
|
||||
|
||||
if other.tokens is not None and self.tokens != other.tokens:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.keywords)
|
||||
|
||||
|
||||
class BaseCustomGrammarParser(BaseParser):
|
||||
"""
|
||||
Base class for sheerka specific grammars
|
||||
"""
|
||||
|
||||
DEFAULT_TAB_SIZE = 4
|
||||
|
||||
def __init__(self, name, priority: int, enabled=True):
|
||||
super().__init__(name, priority, enabled=enabled)
|
||||
|
||||
@staticmethod
|
||||
def skip_white_spaces(tokens):
|
||||
i = 0
|
||||
while i < len(tokens) and tokens[i].type == TokenKind.WHITESPACE:
|
||||
i += 1
|
||||
|
||||
return i
|
||||
|
||||
def get_body(self, tokens):
|
||||
"""
|
||||
Get the body of a keyword definition
|
||||
It manages colon body, but the colon must be stripped first
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def get_tab_size(default_tab_size, text):
|
||||
return sum([1 if isinstance(c, str) else default_tab_size for c in text])
|
||||
|
||||
pos = self.skip_white_spaces(tokens)
|
||||
|
||||
if len(tokens) - pos < 3:
|
||||
self.add_error(SyntaxErrorNode(tokens, "Body is empty or too short."))
|
||||
return None
|
||||
|
||||
if tokens[pos].type != TokenKind.NEWLINE:
|
||||
self.add_error(UnexpectedTokenErrorNode("New line not found.", tokens[pos], [TokenKind.NEWLINE]))
|
||||
return None
|
||||
pos += 1
|
||||
|
||||
if tokens[pos].type != TokenKind.WHITESPACE:
|
||||
self.add_error(UnexpectedTokenErrorNode("Indentation not found.", tokens[pos], [TokenKind.WHITESPACE]))
|
||||
return None
|
||||
|
||||
indent_size = get_tab_size(self.DEFAULT_TAB_SIZE, tokens[pos].value)
|
||||
pos += 1
|
||||
|
||||
i = pos
|
||||
while i < len(tokens) - 1:
|
||||
if tokens[i].type == TokenKind.NEWLINE:
|
||||
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
||||
self.add_error(UnexpectedTokenErrorNode("Indentation not found.",
|
||||
tokens[i + 1],
|
||||
[TokenKind.WHITESPACE]))
|
||||
return None
|
||||
|
||||
if get_tab_size(self.DEFAULT_TAB_SIZE, tokens[i + 1].value) < indent_size:
|
||||
self.add_error(SyntaxErrorNode([tokens[i + 1]], "Invalid indentation."))
|
||||
return None
|
||||
|
||||
tokens[i + 1] = tokens[i + 1].clone()
|
||||
tokens[i + 1].value = " " * (get_tab_size(self.DEFAULT_TAB_SIZE, tokens[i + 1].value) - indent_size)
|
||||
i += 1
|
||||
|
||||
return tokens[pos:]
|
||||
|
||||
def get_parts(self, keywords, expected_first_token=None):
|
||||
"""
|
||||
Reads Parser Input and groups the tokens by keywords
|
||||
ex:
|
||||
tokens = Tokenizer("as a b c pre u v w where x y z")
|
||||
keywords = ["as", "pre", "where"]
|
||||
assert get_parts(keywords) == {
|
||||
Keyword("as"): [Token("a"), Token(<ws>), Token("b"), Token(<ws>), Token("c"), Token(<ws>)],
|
||||
Keyword("pre"): [Token("u"), Token(<ws>), Token("v"), Token(<ws>), Token("w"), Token(<ws>)],
|
||||
Keyword("where"): [Token("x"), Token(<ws>), Token("y"), Token(<ws>), Token("z"), Token(<ws>)]}
|
||||
|
||||
* The order of appearance of the keywords is not important
|
||||
"as w pre y where z" and "where z pre y as w" will produce the same dictionary
|
||||
|
||||
* I can use double quote to protect keyword
|
||||
where "x y" will produce the entry Keyword("where"): [Token("x"), Token(<ws>), Token("y"), Token(<ws>)]
|
||||
where 'x y' will produce the entry Keyword("where"): [Token("'x y'")]
|
||||
|
||||
:param keywords:
|
||||
:param expected_first_token: it must be a KeyW
|
||||
:return: dictionary
|
||||
"""
|
||||
|
||||
def new_part(t, cma, p):
|
||||
"""
|
||||
|
||||
:param t: token
|
||||
:param cma: colon_mode_activated
|
||||
:param p: previous token
|
||||
:return:
|
||||
"""
|
||||
if t.value not in keywords:
|
||||
return False
|
||||
|
||||
if not cma or not p:
|
||||
return True
|
||||
|
||||
return p.line != t.line
|
||||
|
||||
if self.parser_input.token is None:
|
||||
self.add_error(KeywordNotFound([], keywords))
|
||||
return None
|
||||
|
||||
if self.parser_input.token.type == TokenKind.WHITESPACE:
|
||||
self.parser_input.next_token()
|
||||
|
||||
token = self.parser_input.token
|
||||
if expected_first_token and token.value != expected_first_token.value:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"'{expected_first_token.value}' keyword not found.",
|
||||
token,
|
||||
[expected_first_token]))
|
||||
return None
|
||||
|
||||
if token.value not in keywords:
|
||||
self.add_error(KeywordNotFound([token], keywords))
|
||||
return None
|
||||
|
||||
colon_mode_activated = False # if activate, use keyword + colon to start a new keyword definition
|
||||
previous_token = None
|
||||
|
||||
res = {}
|
||||
|
||||
# More explanations on colon_mode_activated
|
||||
# You can use the pattern
|
||||
# def concept <name> as:
|
||||
# <tab> xxx
|
||||
# <tab> yyy
|
||||
# ...
|
||||
#
|
||||
# It allows to readability and usage of other keywords inside the bloc#
|
||||
# Example
|
||||
# def concept give the the date as:
|
||||
# from datetime import date # I can use the 'from' keyword !!!
|
||||
# return date.today()
|
||||
#
|
||||
# Note that I can choose to use colon or not
|
||||
#
|
||||
# def concept in x days as:
|
||||
# from datetime import date
|
||||
# return date.today() - x
|
||||
# where x > 0
|
||||
#
|
||||
# is a valid declaration
|
||||
|
||||
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
|
||||
while True:
|
||||
if new_part(token, colon_mode_activated, previous_token):
|
||||
keyword = Keywords(token.value)
|
||||
if keyword in res:
|
||||
# a part is defined more than once
|
||||
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
|
||||
break
|
||||
|
||||
res[keyword] = [token] # to keep track of when it starts
|
||||
colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON
|
||||
if not self.parser_input.next_token():
|
||||
self.add_error(UnexpectedEofNode(f"While parsing keyword '{keyword.value}'."))
|
||||
break
|
||||
else:
|
||||
res[keyword].append(token)
|
||||
if not self.parser_input.next_token(skip_whitespace=False):
|
||||
break
|
||||
|
||||
previous_token = token
|
||||
token = self.parser_input.token
|
||||
|
||||
# Post process the result if needed
|
||||
for k, v in res.items():
|
||||
stripped = core.utils.strip_tokens(v[1:])
|
||||
|
||||
# manage colon first, to sure that what is protected by the quotes remains protected
|
||||
if len(stripped) > 0 and stripped[0].type == TokenKind.COLON:
|
||||
body = self.get_body(stripped[1:])
|
||||
if body:
|
||||
res[k] = v[0:1] + body
|
||||
# replace double quoted strings by their content
|
||||
elif len(stripped) == 1 and stripped[0].type == TokenKind.STRING and stripped[0].value[0] == '"':
|
||||
res[k] = v[0:1] + list(Tokenizer(stripped[0].strip_quote, yield_eof=False))
|
||||
|
||||
return res
|
||||
@@ -6,8 +6,7 @@ from typing import Set
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind, LexerError, Token
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from parsers.BaseParser import Node, BaseParser, ErrorNode
|
||||
|
||||
DEBUG_COMPILED = True
|
||||
@@ -718,7 +717,7 @@ class BaseNodeParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(self, name, priority, **kwargs):
|
||||
super().__init__(name, priority)
|
||||
super().__init__(name, priority, yield_eof=True)
|
||||
if 'sheerka' in kwargs:
|
||||
sheerka = kwargs.get("sheerka")
|
||||
self.concepts_by_first_keyword = sheerka.resolved_concepts_by_first_keyword
|
||||
@@ -745,17 +744,6 @@ class BaseNodeParser(BaseParser):
|
||||
concepts_by_first_keyword = self.get_concepts_by_first_token(context, concepts).body
|
||||
self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
|
||||
|
||||
def reset_parser(self, context, parser_input: ParserInput):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.parser_input = parser_input
|
||||
try:
|
||||
self.parser_input.reset(False)
|
||||
except LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
|
||||
"""
|
||||
Tries to find if there are concepts that match the value of the token
|
||||
|
||||
+25
-29
@@ -1,7 +1,7 @@
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Union
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.concept import Concept
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
@@ -57,7 +57,7 @@ class ErrorNode(Node):
|
||||
@dataclass()
|
||||
class UnexpectedTokenErrorNode(ErrorNode):
|
||||
message: str
|
||||
token: Token
|
||||
token: Union[Token, str]
|
||||
expected_tokens: list
|
||||
|
||||
def __eq__(self, other):
|
||||
@@ -70,31 +70,25 @@ class UnexpectedTokenErrorNode(ErrorNode):
|
||||
if self.message != other.message:
|
||||
return False
|
||||
|
||||
if self.token.type != other.token.type or self.token.value != other.token.value:
|
||||
to_compare = self.token.repr_value if isinstance(other.token, str) else self.token
|
||||
if to_compare != other.token:
|
||||
return False
|
||||
|
||||
if len(self.expected_tokens) != len(other.expected_tokens):
|
||||
return False
|
||||
|
||||
for i, t in enumerate(self.expected_tokens):
|
||||
if t != other.expected_tokens[i]:
|
||||
return False
|
||||
|
||||
return True
|
||||
return self.expected_tokens == other.expected_tokens
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.message, self.token, self.expected_tokens))
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedEof(ErrorNode):
|
||||
class UnexpectedEofNode(ErrorNode):
|
||||
message: str
|
||||
|
||||
|
||||
class BaseParser:
|
||||
PREFIX = "parsers."
|
||||
|
||||
def __init__(self, name, priority: int, enabled=True):
|
||||
def __init__(self, name, priority: int, enabled=True, yield_eof=False):
|
||||
self.log = get_logger("parsers." + self.__class__.__name__)
|
||||
self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__)
|
||||
self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__)
|
||||
@@ -107,6 +101,7 @@ class BaseParser:
|
||||
self.context: ExecutionContext = None
|
||||
self.sheerka = None
|
||||
self.parser_input: ParserInput = None
|
||||
self.yield_eof = yield_eof
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, self.__class__):
|
||||
@@ -126,10 +121,9 @@ class BaseParser:
|
||||
self.error_sink.clear()
|
||||
|
||||
try:
|
||||
self.parser_input.reset(False)
|
||||
self.parser_input.next_token()
|
||||
self.parser_input.reset(self.yield_eof)
|
||||
except LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
self.add_error(e, False)
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -165,12 +159,12 @@ class BaseParser:
|
||||
value = context.return_value_to_str(r)
|
||||
context.log(f" Recognized '{value}'", self.name)
|
||||
|
||||
def get_return_value_body(self, sheerka, source, tree, try_parse):
|
||||
def get_return_value_body(self, sheerka, source, parsed, try_parse):
|
||||
"""
|
||||
All parsers must return their result in a standard way
|
||||
:param sheerka:
|
||||
:param source:
|
||||
:param tree:
|
||||
:param parsed:
|
||||
:param try_parse:
|
||||
:return:
|
||||
"""
|
||||
@@ -178,17 +172,19 @@ class BaseParser:
|
||||
return self.error_sink[0]
|
||||
|
||||
if self.has_error:
|
||||
return sheerka.new(
|
||||
BuiltinConcepts.ERROR,
|
||||
body=self.error_sink
|
||||
)
|
||||
if parsed is None:
|
||||
return sheerka.new(BuiltinConcepts.NOT_FOR_ME,
|
||||
body=source,
|
||||
reason=self.error_sink)
|
||||
else:
|
||||
return sheerka.new(BuiltinConcepts.ERROR,
|
||||
body=self.error_sink)
|
||||
|
||||
return sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=tree,
|
||||
try_parsed=try_parse)
|
||||
return sheerka.new(BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=parsed,
|
||||
try_parsed=try_parse)
|
||||
|
||||
@staticmethod
|
||||
def get_input_as_lexer_nodes(parser_input, expected_parser=None):
|
||||
@@ -242,7 +238,7 @@ class BaseParser:
|
||||
tokens = [tokens]
|
||||
|
||||
switcher = {
|
||||
# TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
|
||||
# TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
|
||||
}
|
||||
|
||||
if custom_switcher:
|
||||
|
||||
@@ -0,0 +1,274 @@
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import core.builtin_helpers
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
|
||||
from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
|
||||
from core.tokenizer import TokenKind, Keywords
|
||||
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode
|
||||
from parsers.BaseParser import Node, ErrorNode, NotInitializedNode, UnexpectedTokenErrorNode
|
||||
from parsers.BnfParser import BnfParser
|
||||
|
||||
|
||||
class ParsingException(Exception):
|
||||
def __init__(self, error):
|
||||
self.error = error
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserNode(Node):
|
||||
"""
|
||||
Base node for all default parser nodes
|
||||
"""
|
||||
tokens: list = field(compare=False, repr=False)
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CannotHandleErrorNode(DefaultParserErrorNode):
|
||||
"""
|
||||
The input is not recognized
|
||||
"""
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NameNode(DefaultParserNode):
|
||||
|
||||
def get_name(self):
|
||||
name = ""
|
||||
first = True
|
||||
for token in self.tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
if not first:
|
||||
name += " "
|
||||
|
||||
name += token.value[1:-1] if token.type == TokenKind.STRING else str(token.value)
|
||||
first = False
|
||||
|
||||
return name
|
||||
|
||||
def __repr__(self):
|
||||
return self.get_name()
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, NameNode):
|
||||
return False
|
||||
|
||||
return self.get_name() == other.get_name()
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.get_name())
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefConceptNode(DefaultParserNode):
|
||||
name: NameNode = NotInitializedNode()
|
||||
where: ReturnValueConcept = NotInitializedNode()
|
||||
pre: ReturnValueConcept = NotInitializedNode()
|
||||
post: ReturnValueConcept = NotInitializedNode()
|
||||
body: ReturnValueConcept = NotInitializedNode()
|
||||
ret: ReturnValueConcept = NotInitializedNode()
|
||||
definition: ReturnValueConcept = NotInitializedNode()
|
||||
definition_type: str = None
|
||||
|
||||
def get_asts(self):
|
||||
asts = {}
|
||||
for part_key in ConceptParts:
|
||||
prop_value = getattr(self, part_key.value)
|
||||
if isinstance(prop_value, ReturnValueConcept) and \
|
||||
isinstance(prop_value.body, ParserResultConcept) and \
|
||||
hasattr(prop_value.body.body, "ast_"):
|
||||
asts[part_key] = prop_value
|
||||
return asts
|
||||
|
||||
|
||||
@dataclass()
|
||||
class IsaConceptNode(DefaultParserNode):
|
||||
concept: NameNode = NotInitializedNode()
|
||||
set: NameNode = NotInitializedNode()
|
||||
|
||||
|
||||
class DefConceptParser(BaseCustomGrammarParser):
|
||||
"""
|
||||
Parse sheerka specific grammar (like def concept)
|
||||
"""
|
||||
|
||||
KEYWORDS = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST, Keywords.RET]
|
||||
KEYWORDS_VALUES = [k.value for k in KEYWORDS]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseCustomGrammarParser.__init__(self, "DefConcept", 60)
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
# default parser can only manage string text
|
||||
if parser_input.from_tokens:
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
|
||||
sheerka = context.sheerka
|
||||
|
||||
if parser_input.is_empty():
|
||||
return sheerka.ret(self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
||||
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
self.parser_input.next_token()
|
||||
node = self.parse_def_concept()
|
||||
|
||||
body = self.get_return_value_body(sheerka, parser_input.as_text(), node, node)
|
||||
ret = sheerka.ret(self.name, not self.has_error, body)
|
||||
|
||||
self.log_result(context, parser_input.as_text(), ret)
|
||||
return ret
|
||||
|
||||
def parse_def_concept(self):
|
||||
"""
|
||||
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
|
||||
"""
|
||||
token = self.parser_input.token
|
||||
if token.value != Keywords.DEF.value:
|
||||
self.add_error(UnexpectedTokenErrorNode("'def' keyword not found.", token, [Keywords.DEF]))
|
||||
return None
|
||||
|
||||
self.context.log("Keyword DEF found.", self.name)
|
||||
keywords_found = [token]
|
||||
self.parser_input.next_token()
|
||||
|
||||
# ## the definition of a concept consists of several parts
|
||||
# Keywords.CONCEPT to get the name of the concept
|
||||
# Keywords.FROM [Keywords.BNF] | [Keywords.DEF] to get the definition of the concept
|
||||
# Keywords.AS to get the body
|
||||
# Keywords.WHERE to get the conditions to recognize for the variables
|
||||
# Keywords.PRE to know if the conditions to evaluate the concept
|
||||
# Keywords.POST to apply or verify once the concept is executed
|
||||
# Keywords.RET to transform the concept into another concept
|
||||
parts = self.get_parts(self.KEYWORDS_VALUES, expected_first_token=Keywords.CONCEPT)
|
||||
if parts is None:
|
||||
return None
|
||||
|
||||
keywords_found.extend([t[0] for t in parts.values()]) # keep track of all keywords found
|
||||
node = DefConceptNode(keywords_found)
|
||||
# if first_token.type == TokenKind.EOF:
|
||||
# return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
|
||||
|
||||
# get the name
|
||||
node.name = self.get_concept_name(parts[Keywords.CONCEPT])
|
||||
|
||||
# get definition
|
||||
node.definition_type, node.definition = self.get_concept_definition(node, parts)
|
||||
|
||||
# get the bodies
|
||||
node.body = self.get_ast(Keywords.AS, parts)
|
||||
node.where = self.get_ast(Keywords.WHERE, parts)
|
||||
node.pre = self.get_ast(Keywords.PRE, parts)
|
||||
node.post = self.get_ast(Keywords.POST, parts)
|
||||
node.ret = self.get_ast(Keywords.RET, parts)
|
||||
|
||||
return node
|
||||
|
||||
def get_concept_name(self, tokens):
|
||||
name_tokens = core.utils.strip_tokens(tokens[1:])
|
||||
if len(name_tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
|
||||
return None
|
||||
|
||||
for token in name_tokens:
|
||||
if token.type == TokenKind.NEWLINE:
|
||||
self.add_error(SyntaxErrorNode([token], "Newline are not allowed in name."))
|
||||
return None
|
||||
|
||||
name_node = NameNode(name_tokens) # skip the first token
|
||||
return name_node
|
||||
|
||||
def get_concept_definition(self, current_concept_def, parts):
|
||||
if Keywords.FROM not in parts:
|
||||
return None, NotInitializedNode()
|
||||
|
||||
tokens = parts[Keywords.FROM]
|
||||
if len(tokens) == 1:
|
||||
self.add_error(SyntaxErrorNode([], f"Empty '{tokens[0].value}' declaration."), False)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if tokens[1].value == Keywords.BNF.value:
|
||||
return self.get_concept_bnf_definition(current_concept_def, core.utils.strip_tokens(tokens[2:]))
|
||||
|
||||
return self.get_concept_simple_definition(core.utils.strip_tokens(tokens[0:]))
|
||||
|
||||
def get_concept_bnf_definition(self, current_concept_def, tokens):
|
||||
if len(tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([], "Empty 'bnf' declaration"), False)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if tokens[0].type == TokenKind.COLON:
|
||||
tokens = self.get_body(tokens[1:])
|
||||
|
||||
bnf_regex_parser = BnfParser()
|
||||
desc = f"Resolving BNF {current_concept_def.definition}"
|
||||
with self.context.push(BuiltinConcepts.INIT_BNF,
|
||||
current_concept_def,
|
||||
who=self.name,
|
||||
obj=current_concept_def,
|
||||
desc=desc) as sub_context:
|
||||
parsing_result = bnf_regex_parser.parse(sub_context, tokens)
|
||||
sub_context.add_values(return_values=parsing_result)
|
||||
|
||||
if not parsing_result.status:
|
||||
self.add_error(parsing_result.value)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
return DEFINITION_TYPE_BNF, parsing_result
|
||||
|
||||
def get_concept_simple_definition(self, tokens):
|
||||
|
||||
start = 2 if tokens[1].value == Keywords.DEF.value else 1
|
||||
tokens = core.utils.strip_tokens(tokens[start:])
|
||||
if len(tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([], f"Empty 'from' declaration."), False)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if tokens[0].type == TokenKind.COLON:
|
||||
tokens = self.get_body(tokens[1:])
|
||||
|
||||
return DEFINITION_TYPE_DEF, NameNode(tokens)
|
||||
|
||||
def get_ast(self, keyword, parts):
|
||||
if keyword not in parts:
|
||||
return NotInitializedNode()
|
||||
|
||||
tokens = parts[keyword]
|
||||
if len(tokens) == 1:
|
||||
self.add_error(SyntaxErrorNode(tokens, f"Empty '{tokens[0].value}' declaration."))
|
||||
return None
|
||||
|
||||
source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens[1:])
|
||||
parsed = core.builtin_helpers.parse_unrecognized(self.context,
|
||||
source,
|
||||
parsers="all",
|
||||
who=self.name,
|
||||
prop=keyword,
|
||||
filter_func=core.builtin_helpers.expect_one)
|
||||
|
||||
if not parsed.status:
|
||||
self.add_error(parsed.value)
|
||||
return None
|
||||
|
||||
return parsed
|
||||
@@ -1,509 +0,0 @@
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
import core.builtin_helpers
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
|
||||
from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
|
||||
from core.tokenizer import Tokenizer, TokenKind, Keywords
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
|
||||
from parsers.BnfParser import BnfParser
|
||||
|
||||
|
||||
class ParsingException(Exception):
|
||||
def __init__(self, error):
|
||||
self.error = error
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserNode(Node):
|
||||
"""
|
||||
Base node for all default parser nodes
|
||||
"""
|
||||
tokens: list = field(compare=False, repr=False)
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedTokenErrorNode(DefaultParserErrorNode):
|
||||
message: str
|
||||
expected_tokens: list
|
||||
|
||||
|
||||
@dataclass()
|
||||
class SyntaxErrorNode(DefaultParserErrorNode):
|
||||
"""
|
||||
The input is recognized, but there is a syntax error
|
||||
"""
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CannotHandleErrorNode(DefaultParserErrorNode):
|
||||
"""
|
||||
The input is not recognized
|
||||
"""
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NameNode(DefaultParserNode):
|
||||
|
||||
def get_name(self):
|
||||
name = ""
|
||||
first = True
|
||||
for token in self.tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
if not first:
|
||||
name += " "
|
||||
|
||||
name += token.value[1:-1] if token.type == TokenKind.STRING else str(token.value)
|
||||
first = False
|
||||
|
||||
return name
|
||||
|
||||
def __repr__(self):
|
||||
return self.get_name()
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, NameNode):
|
||||
return False
|
||||
|
||||
return self.get_name() == other.get_name()
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.get_name())
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefConceptNode(DefaultParserNode):
|
||||
name: NameNode = NotInitializedNode()
|
||||
where: ReturnValueConcept = NotInitializedNode()
|
||||
pre: ReturnValueConcept = NotInitializedNode()
|
||||
post: ReturnValueConcept = NotInitializedNode()
|
||||
body: ReturnValueConcept = NotInitializedNode()
|
||||
ret: ReturnValueConcept = NotInitializedNode()
|
||||
definition: ReturnValueConcept = NotInitializedNode()
|
||||
definition_type: str = None
|
||||
|
||||
def get_asts(self):
|
||||
asts = {}
|
||||
for part_key in ConceptParts:
|
||||
prop_value = getattr(self, part_key.value)
|
||||
if isinstance(prop_value, ReturnValueConcept) and \
|
||||
isinstance(prop_value.body, ParserResultConcept) and \
|
||||
hasattr(prop_value.body.body, "ast_"):
|
||||
asts[part_key] = prop_value
|
||||
return asts
|
||||
|
||||
|
||||
@dataclass()
|
||||
class IsaConceptNode(DefaultParserNode):
|
||||
concept: NameNode = NotInitializedNode()
|
||||
set: NameNode = NotInitializedNode()
|
||||
|
||||
|
||||
class DefaultParser(BaseParser):
|
||||
"""
|
||||
Parse sheerka specific grammar (like def concept)
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "Default", 60)
|
||||
|
||||
@staticmethod
|
||||
def fix_indentation(tokens):
|
||||
"""
|
||||
In the following example
|
||||
def concept add one to a as:
|
||||
def func(x):
|
||||
return x+1
|
||||
func(a)
|
||||
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
if len(tokens) == 0:
|
||||
return tokens
|
||||
|
||||
tokens = tokens.copy() # do not modify ParserInput.tokens
|
||||
|
||||
if tokens[0].type != TokenKind.COLON:
|
||||
return tokens
|
||||
|
||||
if len(tokens) < 3:
|
||||
raise ParsingException(UnexpectedTokenErrorNode(tokens[0:2],
|
||||
"Unexpected end of file",
|
||||
[TokenKind.NEWLINE]))
|
||||
pos = DefaultParser.eat_white_space(tokens, 1)
|
||||
if tokens[pos].type != TokenKind.NEWLINE:
|
||||
raise ParsingException(UnexpectedTokenErrorNode([tokens[pos]],
|
||||
"Unexpected token after colon",
|
||||
[TokenKind.NEWLINE]))
|
||||
pos += 1
|
||||
|
||||
if tokens[pos].type != TokenKind.WHITESPACE:
|
||||
raise ParsingException(SyntaxErrorNode([tokens[pos]],
|
||||
"Indentation not found."))
|
||||
indent_size = len(tokens[pos].value)
|
||||
pos += 1
|
||||
|
||||
# now fix the other indentations
|
||||
# KSI 23/05/2020 Not quite sure this 'fixing' stuff is still relevant,
|
||||
# as I now have an editor in interactive mode
|
||||
i = pos
|
||||
while i < len(tokens) - 1:
|
||||
if tokens[i].type == TokenKind.NEWLINE:
|
||||
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
||||
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
|
||||
|
||||
if len(tokens[i + 1].value) < indent_size:
|
||||
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
|
||||
|
||||
tokens[i + 1] = tokens[i + 1].clone()
|
||||
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
|
||||
i += 1
|
||||
|
||||
return tokens[pos:]
|
||||
|
||||
@staticmethod
|
||||
def eat_white_space(tokens, index):
|
||||
if index >= len(tokens):
|
||||
return index
|
||||
|
||||
while index < len(tokens) and tokens[index].type == TokenKind.WHITESPACE:
|
||||
index += 1
|
||||
|
||||
return index
|
||||
|
||||
def reset_parser(self, context, parser_input):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.parser_input = parser_input
|
||||
self.parser_input.reset()
|
||||
self.parser_input.next_token()
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
# default parser can only manage string text
|
||||
if parser_input.from_tokens:
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
try:
|
||||
self.reset_parser(context, parser_input)
|
||||
tree = self.parse_statement()
|
||||
except core.tokenizer.LexerError as e:
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
|
||||
|
||||
# If a error is found it must be sent to error_sink
|
||||
# tree must contain what was recognized
|
||||
|
||||
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
|
||||
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
|
||||
else:
|
||||
body = self.get_return_value_body(context.sheerka, parser_input.as_text(), tree, tree)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
body)
|
||||
|
||||
self.log_result(context, parser_input.as_text(), ret)
|
||||
return ret
|
||||
|
||||
def parse_statement(self):
|
||||
token = self.parser_input.token
|
||||
if token.value == Keywords.DEF.value:
|
||||
self.parser_input.next_token()
|
||||
self.context.log("Keyword DEF found.", self.name)
|
||||
return self.parse_def_concept(token)
|
||||
|
||||
return self.add_error(CannotHandleErrorNode([token], ""))
|
||||
|
||||
def parse_def_concept(self, def_token):
|
||||
"""
|
||||
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
|
||||
"""
|
||||
|
||||
# init
|
||||
keywords_tokens = [def_token]
|
||||
concept_found = DefConceptNode(keywords_tokens)
|
||||
|
||||
# ##
|
||||
# ## the definition of a concept consists of several parts
|
||||
# ## Keywords.CONCEPT to get the name of the concept
|
||||
# ## Keywords.FROM [Keywords.BNF] | [Keywords.DEF] to get the definition of the concept
|
||||
# ## Keywords.AS to get the body
|
||||
# ## Keywords.WHERE to get the conditions to recognize for the variables
|
||||
# ## Keywords.PRE to know if the conditions to evaluate the concept
|
||||
# ## Keywords.POST to apply or verify once the concept is executed
|
||||
|
||||
# Regroup the tokens by parts
|
||||
first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens)
|
||||
|
||||
if first_token.type == TokenKind.EOF:
|
||||
return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
|
||||
|
||||
# get the name
|
||||
concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts)
|
||||
|
||||
# get the definition
|
||||
def_type, def_value = self.get_concept_definition(concept_found, tokens_found_by_parts)
|
||||
concept_found.definition_type = def_type
|
||||
concept_found.definition = def_value
|
||||
|
||||
# get the ASTs for the remaining parts
|
||||
asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts)
|
||||
concept_found.where = asts_found_by_parts[Keywords.WHERE]
|
||||
concept_found.pre = asts_found_by_parts[Keywords.PRE]
|
||||
concept_found.post = asts_found_by_parts[Keywords.POST]
|
||||
concept_found.body = asts_found_by_parts[Keywords.AS]
|
||||
concept_found.ret = asts_found_by_parts[Keywords.RET]
|
||||
|
||||
return concept_found
|
||||
|
||||
def regroup_tokens_by_parts(self, keywords_tokens):
|
||||
|
||||
def new_part(t, cma, p):
|
||||
"""
|
||||
|
||||
:param t: token
|
||||
:param cma: concept_mode_activated
|
||||
:param p: previous token
|
||||
:return:
|
||||
"""
|
||||
if not t.value in def_concept_parts:
|
||||
return False
|
||||
|
||||
if not cma or not p:
|
||||
return True
|
||||
|
||||
return p.line != t.line
|
||||
|
||||
def_concept_parts = [Keywords.CONCEPT.value,
|
||||
Keywords.FROM.value,
|
||||
Keywords.AS.value,
|
||||
Keywords.WHERE.value,
|
||||
Keywords.PRE.value,
|
||||
Keywords.POST.value,
|
||||
Keywords.RET.value]
|
||||
|
||||
# tokens found, when trying to recognize the parts
|
||||
tokens_found_by_parts = {
|
||||
Keywords.CONCEPT: [],
|
||||
Keywords.FROM: None,
|
||||
Keywords.AS: None,
|
||||
Keywords.WHERE: None,
|
||||
Keywords.PRE: None,
|
||||
Keywords.POST: None,
|
||||
Keywords.RET: None,
|
||||
}
|
||||
current_part = Keywords.CONCEPT
|
||||
token = self.parser_input.token
|
||||
first_token = token
|
||||
colon_mode_activated = False # if activate, use keyword + colon to start a new keyword definition
|
||||
previous_token = None
|
||||
|
||||
# more explanation on colon_mode_activated
|
||||
# You can use the pattern
|
||||
# def concept <name> as:
|
||||
# <tab> xxx
|
||||
# <tab> yyy
|
||||
# ...
|
||||
#
|
||||
# It allows to readability and usage of other keywords inside the bloc#
|
||||
# Example
|
||||
# def concept give the the date as:
|
||||
# from datetime import date
|
||||
# return date.today()
|
||||
#
|
||||
# 'from datetime' will not be considered as a keyword because it's lead by a tab
|
||||
# whereas in
|
||||
# def concept in x days as:
|
||||
# from datetime import date
|
||||
# return date.today() - x
|
||||
# where x > 0
|
||||
#
|
||||
# where will be recognized as the keyword because it is the first word of the line
|
||||
|
||||
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
|
||||
while token.type != TokenKind.EOF:
|
||||
if new_part(token, colon_mode_activated, previous_token):
|
||||
keywords_tokens.append(token) # keep track of the keywords
|
||||
keyword = Keywords(token.value)
|
||||
if tokens_found_by_parts[keyword]:
|
||||
# a part is defined more than once
|
||||
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
|
||||
tokens_found_by_parts[current_part].append(token) # adds the token again
|
||||
else:
|
||||
tokens_found_by_parts[keyword] = [token]
|
||||
current_part = keyword
|
||||
colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON
|
||||
|
||||
self.parser_input.next_token()
|
||||
else:
|
||||
tokens_found_by_parts[current_part].append(token)
|
||||
self.parser_input.next_token(False)
|
||||
|
||||
previous_token = token
|
||||
token = self.parser_input.token
|
||||
|
||||
return first_token, tokens_found_by_parts
|
||||
|
||||
def get_concept_name(self, first_token, tokens_found_by_parts):
|
||||
name_first_token_index = 1
|
||||
token = self.parser_input.token
|
||||
if first_token.value != Keywords.CONCEPT.value:
|
||||
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
||||
name_first_token_index = 0
|
||||
|
||||
name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
|
||||
if len(name_tokens) == name_first_token_index:
|
||||
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
|
||||
|
||||
if name_tokens[-1].type == TokenKind.NEWLINE:
|
||||
name_tokens = name_tokens[:-1] # strip trailing newlines
|
||||
|
||||
if TokenKind.NEWLINE in [t.type for t in name_tokens]:
|
||||
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
|
||||
|
||||
tokens = name_tokens[name_first_token_index:]
|
||||
stripped = core.utils.strip_tokens(tokens)
|
||||
if len(stripped) == 1 and stripped[0].type == TokenKind.STRING and stripped[0].value[0] == '"':
|
||||
tokens = list(Tokenizer(stripped[0].strip_quote, yield_eof=False))
|
||||
|
||||
name_node = NameNode(tokens) # skip the first token
|
||||
return name_node
|
||||
|
||||
def get_concept_definition(self, current_concept_def, tokens_found_by_parts):
|
||||
if tokens_found_by_parts[Keywords.FROM] is None:
|
||||
return None, NotInitializedNode()
|
||||
|
||||
definition_tokens = tokens_found_by_parts[Keywords.FROM]
|
||||
if len(definition_tokens) == 1:
|
||||
self.add_error(SyntaxErrorNode([], "Empty declaration"), False)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if definition_tokens[1].value == Keywords.BNF.value:
|
||||
return self.get_concept_bnf_definition(current_concept_def, definition_tokens)
|
||||
|
||||
return self.get_concept_simple_definition(definition_tokens)
|
||||
|
||||
def get_concept_bnf_definition(self, current_concept_def, definition_tokens):
|
||||
try:
|
||||
tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[2:]))
|
||||
except ParsingException as ex:
|
||||
self.add_error(ex.error)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if len(tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
bnf_regex_parser = BnfParser()
|
||||
desc = f"Resolving BNF {current_concept_def.definition}"
|
||||
with self.context.push(BuiltinConcepts.INIT_BNF,
|
||||
current_concept_def,
|
||||
who=self.name,
|
||||
obj=current_concept_def,
|
||||
desc=desc) as sub_context:
|
||||
parsing_result = bnf_regex_parser.parse(sub_context, tokens)
|
||||
sub_context.add_values(return_values=parsing_result)
|
||||
|
||||
if not parsing_result.status:
|
||||
self.add_error(parsing_result.value)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
return DEFINITION_TYPE_BNF, parsing_result
|
||||
|
||||
def get_concept_simple_definition(self, definition_tokens):
|
||||
start = 2 if definition_tokens[1].value == Keywords.DEF.value else 1
|
||||
try:
|
||||
tokens = self.fix_indentation(core.utils.strip_tokens(definition_tokens[start:]))
|
||||
except ParsingException as ex:
|
||||
self.add_error(ex.error)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
if len(tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([definition_tokens[start]], "Empty declaration"), False)
|
||||
return None, NotInitializedNode()
|
||||
|
||||
return DEFINITION_TYPE_DEF, NameNode(tokens)
|
||||
|
||||
def get_concept_parts(self, tokens_found_by_parts):
|
||||
asts_found_by_parts = {
|
||||
Keywords.AS: NotInitializedNode(),
|
||||
Keywords.WHERE: NotInitializedNode(),
|
||||
Keywords.PRE: NotInitializedNode(),
|
||||
Keywords.POST: NotInitializedNode(),
|
||||
Keywords.RET: NotInitializedNode()
|
||||
}
|
||||
|
||||
for keyword in tokens_found_by_parts:
|
||||
if keyword == Keywords.CONCEPT or keyword == Keywords.FROM:
|
||||
continue # already done
|
||||
|
||||
tokens = tokens_found_by_parts[keyword]
|
||||
if tokens is None:
|
||||
continue # nothing to do
|
||||
|
||||
if len(tokens) == 1: # check for empty declarations
|
||||
self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False)
|
||||
continue
|
||||
|
||||
try:
|
||||
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
|
||||
except ParsingException as ex:
|
||||
self.add_error(ex.error)
|
||||
continue
|
||||
|
||||
# ask the other parsers if they recognize the tokens
|
||||
source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens)
|
||||
parsed = core.builtin_helpers.parse_unrecognized(self.context,
|
||||
source,
|
||||
parsers="all",
|
||||
who=self.name,
|
||||
prop=keyword,
|
||||
filter_func=core.builtin_helpers.expect_one)
|
||||
|
||||
if not parsed.status:
|
||||
self.add_error(parsed.value)
|
||||
continue
|
||||
|
||||
asts_found_by_parts[keyword] = parsed
|
||||
|
||||
#
|
||||
# with self.context.push(BuiltinConcepts.PARSING, keyword, who=self.name, desc=f"Parsing {keyword}") as sub_context:
|
||||
# parser_input = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, tokens)
|
||||
# to_parse = self.sheerka.ret(
|
||||
# sub_context.who,
|
||||
# True,
|
||||
# self.sheerka.new(BuiltinConcepts.USER_INPUT, body=parser_input))
|
||||
# steps = [BuiltinConcepts.BEFORE_PARSING, BuiltinConcepts.PARSING, BuiltinConcepts.AFTER_PARSING]
|
||||
# if keyword in (Keywords.WHERE, Keywords.PRE):
|
||||
# sub_context.protected_hints.add(BuiltinConcepts.EVAL_QUESTION_REQUESTED)
|
||||
# parsed = self.sheerka.execute(sub_context, to_parse, steps)
|
||||
# parsing_result = core.builtin_helpers.expect_one(sub_context, parsed)
|
||||
# sub_context.add_values(return_values=parsing_result)
|
||||
#
|
||||
# if not parsing_result.status:
|
||||
# self.add_error(parsing_result.value)
|
||||
# continue
|
||||
#
|
||||
# asts_found_by_parts[keyword] = parsing_result
|
||||
|
||||
return asts_found_by_parts
|
||||
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import LexerError, TokenKind, Token
|
||||
from parsers.BaseParser import Node, BaseParser, UnexpectedTokenErrorNode, UnexpectedEof, ErrorNode
|
||||
from parsers.BaseParser import Node, BaseParser, UnexpectedTokenErrorNode, UnexpectedEofNode, ErrorNode
|
||||
|
||||
|
||||
class ExprNode(Node):
|
||||
@@ -189,7 +189,7 @@ class ExpressionParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("Expression", 50, False)
|
||||
super().__init__("Expression", 50, False, yield_eof=True)
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
@@ -215,6 +215,7 @@ class ExpressionParser(BaseParser):
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
self.parser_input.next_token()
|
||||
tree = self.parse_or()
|
||||
token = self.parser_input.token
|
||||
if token and token.type != TokenKind.EOF:
|
||||
@@ -240,7 +241,7 @@ class ExpressionParser(BaseParser):
|
||||
self.parser_input.next_token()
|
||||
expr = self.parse_and()
|
||||
if expr is None:
|
||||
self.add_error(UnexpectedEof("When parsing 'or'"))
|
||||
self.add_error(UnexpectedEofNode("When parsing 'or'"))
|
||||
return OrNode(*parts)
|
||||
parts.append(expr)
|
||||
token = self.parser_input.token
|
||||
@@ -258,7 +259,7 @@ class ExpressionParser(BaseParser):
|
||||
self.parser_input.next_token()
|
||||
expr = self.parse_names()
|
||||
if expr is None:
|
||||
self.add_error(UnexpectedEof("When parsing 'and'"))
|
||||
self.add_error(UnexpectedEofNode("When parsing 'and'"))
|
||||
return AndNode(*parts)
|
||||
parts.append(expr)
|
||||
token = self.parser_input.token
|
||||
|
||||
@@ -0,0 +1,132 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
||||
from core.builtin_helpers import parse_unrecognized, expect_one
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
|
||||
from core.tokenizer import Keywords
|
||||
from core.utils import strip_tokens
|
||||
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, KeywordNotFound
|
||||
from parsers.BaseParser import BaseParser, Node
|
||||
|
||||
|
||||
@dataclass
|
||||
class FormatAstNode:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class FormatAstRawText(FormatAstNode):
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class FormatRuleNode(Node):
|
||||
tokens: dict
|
||||
rule: ReturnValueConcept = None
|
||||
format_ast: FormatAstNode = None
|
||||
|
||||
|
||||
class FormatRuleParser(BaseCustomGrammarParser):
|
||||
"""
|
||||
Class that will parse formatting rules definitions
|
||||
when xxx print yyy
|
||||
where xxx will be evaluated in the context of BuiltinConcepts.EVAL_QUESTION_REQUESTED
|
||||
and yyy is a internal way to describe a format (yet another one)
|
||||
"""
|
||||
|
||||
KEYWORDS = [Keywords.WHEN, Keywords.PRINT]
|
||||
KEYWORDS_VALUES = [k.value for k in KEYWORDS]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseCustomGrammarParser.__init__(self, "FormatRule", 60)
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
|
||||
:param context:
|
||||
:param parser_input:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not isinstance(parser_input, ParserInput):
|
||||
return None
|
||||
|
||||
if parser_input.from_tokens:
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
context.log(f"Parsing '{parser_input}' with FunctionParser", self.name)
|
||||
sheerka = context.sheerka
|
||||
|
||||
if parser_input.is_empty():
|
||||
return sheerka.ret(self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.IS_EMPTY))
|
||||
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
self.parser_input.next_token()
|
||||
rule = self.parse_rule()
|
||||
body = self.get_return_value_body(sheerka, parser_input.as_text(), rule, rule)
|
||||
ret = sheerka.ret(self.name, not self.has_error, body)
|
||||
|
||||
self.log_result(context, parser_input.as_text(), ret)
|
||||
return ret
|
||||
|
||||
def parse_rule(self):
|
||||
parts = self.get_parts(self.KEYWORDS_VALUES)
|
||||
if parts is None:
|
||||
return None
|
||||
|
||||
node = FormatRuleNode(parts)
|
||||
try:
|
||||
res = self.get_when(parts[Keywords.WHEN])
|
||||
if res is None:
|
||||
return node
|
||||
node.rule = res
|
||||
|
||||
parsed = self.get_print(parts[Keywords.PRINT])
|
||||
if parsed is None:
|
||||
return node
|
||||
node.format_ast = parsed
|
||||
except KeyError as e:
|
||||
self.add_error(KeywordNotFound([], [e.args[0].value]))
|
||||
return None
|
||||
|
||||
return node
|
||||
|
||||
def get_when(self, tokens):
|
||||
"""
|
||||
Validate the when part of the rule.
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, strip_tokens(tokens[1:]))
|
||||
parsed = parse_unrecognized(self.context,
|
||||
source,
|
||||
parsers="all",
|
||||
who=self.name,
|
||||
prop=Keywords.WHEN,
|
||||
filter_func=expect_one)
|
||||
|
||||
if not parsed.status:
|
||||
self.add_error(parsed.value)
|
||||
return None
|
||||
|
||||
return parsed
|
||||
|
||||
def get_print(self, tokens):
|
||||
"""
|
||||
Validate the print part
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
source = BaseParser.get_text_from_tokens(strip_tokens(tokens[1:]))
|
||||
return FormatAstRawText(source)
|
||||
@@ -7,7 +7,7 @@ from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from core.utils import get_n_clones
|
||||
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEof, Node
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEofNode, Node
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
|
||||
# No need to check for Python code as the source code node will resolve to python code anyway
|
||||
@@ -143,7 +143,7 @@ class FunctionParser(BaseParser):
|
||||
so 'twenty one' will resolve to [[c:twenty one:]], not [[c:twenty one:], [c:twenty:, c:one:]]
|
||||
:param kwargs:
|
||||
"""
|
||||
super().__init__("Function", 55, True)
|
||||
super().__init__("Function", 55)
|
||||
self.sep = sep
|
||||
self.longest_concepts_only = longest_concepts_only
|
||||
self.record_errors = True
|
||||
@@ -179,6 +179,7 @@ class FunctionParser(BaseParser):
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
self.parser_input.next_token()
|
||||
node = self.parse_function()
|
||||
|
||||
if self.parser_input.next_token():
|
||||
@@ -219,7 +220,7 @@ class FunctionParser(BaseParser):
|
||||
return None
|
||||
|
||||
if not self.parser_input.next_token():
|
||||
self.add_error(UnexpectedEof(f"Unexpected EOF while parsing left parenthesis"))
|
||||
self.add_error(UnexpectedEofNode(f"Unexpected EOF while parsing left parenthesis"))
|
||||
return None
|
||||
|
||||
token = self.parser_input.token
|
||||
@@ -231,7 +232,7 @@ class FunctionParser(BaseParser):
|
||||
|
||||
start_node = NamesNode(start, start + 1, self.parser_input.tokens[start:start + 2])
|
||||
if not self.parser_input.next_token():
|
||||
self.add_error(UnexpectedEof(f"Unexpected EOF after left parenthesis"))
|
||||
self.add_error(UnexpectedEofNode(f"Unexpected EOF after left parenthesis"))
|
||||
return FunctionNode(start_node, None, None)
|
||||
|
||||
params = self.parse_parameters()
|
||||
@@ -239,7 +240,7 @@ class FunctionParser(BaseParser):
|
||||
return FunctionNode(start_node, None, params)
|
||||
|
||||
token = self.parser_input.token
|
||||
if token.type != TokenKind.RPAR:
|
||||
if not token or token.type != TokenKind.RPAR:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Right parenthesis not found",
|
||||
token,
|
||||
[TokenKind.RPAR]))
|
||||
@@ -261,7 +262,7 @@ class FunctionParser(BaseParser):
|
||||
|
||||
token = self.parser_input.token
|
||||
if token.type == TokenKind.EOF:
|
||||
self.add_error(UnexpectedEof(f"Unexpected EOF while parsing parameters"))
|
||||
self.add_error(UnexpectedEofNode(f"Unexpected EOF while parsing parameters"))
|
||||
return None
|
||||
|
||||
if token.type == TokenKind.RPAR:
|
||||
@@ -269,10 +270,12 @@ class FunctionParser(BaseParser):
|
||||
|
||||
if token.value == self.sep:
|
||||
sep_pos = self.parser_input.pos
|
||||
self.parser_input.next_token()
|
||||
has_next = self.parser_input.next_token() # it's before add_sep() to capture trailing whitespace
|
||||
function_parameter.add_sep(sep_pos,
|
||||
self.parser_input.pos - 1,
|
||||
self.parser_input.tokens[sep_pos: self.parser_input.pos])
|
||||
if not has_next:
|
||||
break
|
||||
|
||||
return nodes
|
||||
|
||||
@@ -292,8 +295,8 @@ class FunctionParser(BaseParser):
|
||||
tokens = []
|
||||
while True:
|
||||
token = self.parser_input.token
|
||||
# if token is None:
|
||||
# break
|
||||
if token is None:
|
||||
break
|
||||
|
||||
if token.value == self.sep or token.type == TokenKind.RPAR:
|
||||
break
|
||||
|
||||
Reference in New Issue
Block a user