Implemented some enhancement requests
This commit is contained in:
@@ -2,7 +2,7 @@ from dataclasses import dataclass, field
|
||||
|
||||
import core.utils
|
||||
from core.tokenizer import Keywords, TokenKind, Tokenizer
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode, UnexpectedEofNode, UnexpectedTokenErrorNode
|
||||
from parsers.BaseParser import BaseParser, Node, ParsingError, UnexpectedEofParsingError, UnexpectedTokenParsingError
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -14,7 +14,7 @@ class CustomGrammarParserNode(Node):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class SyntaxErrorNode(CustomGrammarParserNode, ErrorNode):
|
||||
class SyntaxErrorNode(CustomGrammarParserNode, ParsingError):
|
||||
"""
|
||||
The input is recognized, but there is a syntax error
|
||||
"""
|
||||
@@ -40,7 +40,7 @@ class SyntaxErrorNode(CustomGrammarParserNode, ErrorNode):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class KeywordNotFound(CustomGrammarParserNode, ErrorNode):
|
||||
class KeywordNotFound(CustomGrammarParserNode, ParsingError):
|
||||
keywords: list
|
||||
|
||||
def __eq__(self, other):
|
||||
@@ -98,12 +98,12 @@ class BaseCustomGrammarParser(BaseParser):
|
||||
return None
|
||||
|
||||
if tokens[pos].type != TokenKind.NEWLINE:
|
||||
self.add_error(UnexpectedTokenErrorNode("New line not found.", tokens[pos], [TokenKind.NEWLINE]))
|
||||
self.add_error(UnexpectedTokenParsingError("New line not found.", tokens[pos], [TokenKind.NEWLINE]))
|
||||
return None
|
||||
pos += 1
|
||||
|
||||
if tokens[pos].type != TokenKind.WHITESPACE:
|
||||
self.add_error(UnexpectedTokenErrorNode("Indentation not found.", tokens[pos], [TokenKind.WHITESPACE]))
|
||||
self.add_error(UnexpectedTokenParsingError("Indentation not found.", tokens[pos], [TokenKind.WHITESPACE]))
|
||||
return None
|
||||
|
||||
indent_size = get_tab_size(self.DEFAULT_TAB_SIZE, tokens[pos].value)
|
||||
@@ -113,9 +113,9 @@ class BaseCustomGrammarParser(BaseParser):
|
||||
while i < len(tokens) - 1:
|
||||
if tokens[i].type == TokenKind.NEWLINE:
|
||||
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
||||
self.add_error(UnexpectedTokenErrorNode("Indentation not found.",
|
||||
tokens[i + 1],
|
||||
[TokenKind.WHITESPACE]))
|
||||
self.add_error(UnexpectedTokenParsingError("Indentation not found.",
|
||||
tokens[i + 1],
|
||||
[TokenKind.WHITESPACE]))
|
||||
return None
|
||||
|
||||
if get_tab_size(self.DEFAULT_TAB_SIZE, tokens[i + 1].value) < indent_size:
|
||||
@@ -177,9 +177,9 @@ class BaseCustomGrammarParser(BaseParser):
|
||||
|
||||
token = self.parser_input.token
|
||||
if expected_first_token and token.value != expected_first_token.value:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"'{expected_first_token.value}' keyword not found.",
|
||||
token,
|
||||
[expected_first_token]))
|
||||
self.add_error(UnexpectedTokenParsingError(f"'{expected_first_token.value}' keyword not found.",
|
||||
token,
|
||||
[expected_first_token]))
|
||||
return None
|
||||
|
||||
if token.value not in keywords:
|
||||
@@ -225,7 +225,7 @@ class BaseCustomGrammarParser(BaseParser):
|
||||
res[keyword] = [token] # to keep track of when it starts
|
||||
colon_mode_activated = self.parser_input.the_token_after().type == TokenKind.COLON
|
||||
if not self.parser_input.next_token():
|
||||
self.add_error(UnexpectedEofNode(f"While parsing keyword '{keyword.value}'."))
|
||||
self.add_error(UnexpectedEofParsingError(f"While parsing keyword '{keyword.value}'."))
|
||||
break
|
||||
else:
|
||||
res[keyword].append(token)
|
||||
|
||||
@@ -8,7 +8,7 @@ from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
|
||||
from core.rule import Rule
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from parsers.BaseParser import Node, BaseParser, ErrorNode
|
||||
from parsers.BaseParser import Node, BaseParser, ParsingError
|
||||
|
||||
DEBUG_COMPILED = True
|
||||
|
||||
@@ -19,7 +19,7 @@ class ChickenAndEggError(Exception):
|
||||
|
||||
|
||||
@dataclass
|
||||
class NoFirstTokenError(ErrorNode):
|
||||
class NoFirstTokenError(ParsingError):
|
||||
concept: Concept
|
||||
key: str
|
||||
|
||||
@@ -424,7 +424,7 @@ class SourceCodeWithConceptNode(LexerNode):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GrammarErrorNode(ErrorNode):
|
||||
class GrammarErrorNode(ParsingError):
|
||||
message: str
|
||||
|
||||
|
||||
|
||||
+13
-208
@@ -35,14 +35,6 @@ class Node:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NopNode(Node):
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "nop"
|
||||
|
||||
|
||||
class NotInitializedNode(Node):
|
||||
pass
|
||||
|
||||
@@ -51,12 +43,12 @@ class NotInitializedNode(Node):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class ErrorNode(Node, ErrorObj):
|
||||
class ParsingError(Node, ErrorObj):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedTokenErrorNode(ErrorNode):
|
||||
class UnexpectedTokenParsingError(ParsingError):
|
||||
message: str
|
||||
token: Union[Token, str]
|
||||
expected_tokens: list
|
||||
@@ -65,7 +57,7 @@ class UnexpectedTokenErrorNode(ErrorNode):
|
||||
if id(other) == id(self):
|
||||
return True
|
||||
|
||||
if not isinstance(other, UnexpectedTokenErrorNode):
|
||||
if not isinstance(other, UnexpectedTokenParsingError):
|
||||
return False
|
||||
|
||||
if self.message != other.message:
|
||||
@@ -82,8 +74,8 @@ class UnexpectedTokenErrorNode(ErrorNode):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedEofNode(ErrorNode):
|
||||
message: str
|
||||
class UnexpectedEofParsingError(ParsingError):
|
||||
message: str = None
|
||||
|
||||
|
||||
class BaseParser:
|
||||
@@ -214,46 +206,16 @@ class BaseParser:
|
||||
|
||||
return parser_input.value
|
||||
|
||||
@staticmethod
|
||||
def manage_eof(lst, strip_eof):
|
||||
if strip_eof:
|
||||
if len(lst) and lst[-1].type == TokenKind.EOF:
|
||||
lst.pop()
|
||||
return lst
|
||||
|
||||
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
|
||||
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
|
||||
return lst
|
||||
|
||||
# @staticmethod
|
||||
# def get_text_from_tokens(tokens, custom_switcher=None, tracker=None):
|
||||
# """
|
||||
# Create the source code, from the list of token
|
||||
# :param tokens: list of tokens
|
||||
# :param custom_switcher: to override the behaviour (the return value) of some token
|
||||
# :param tracker: keep track of the original token value when custom switched
|
||||
# :return:
|
||||
# """
|
||||
# if tokens is None:
|
||||
# return ""
|
||||
# res = ""
|
||||
# def manage_eof(lst, strip_eof):
|
||||
# if strip_eof:
|
||||
# if len(lst) and lst[-1].type == TokenKind.EOF:
|
||||
# lst.pop()
|
||||
# return lst
|
||||
#
|
||||
# if not hasattr(tokens, "__iter__"):
|
||||
# tokens = [tokens]
|
||||
#
|
||||
# switcher = {
|
||||
# # TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
|
||||
# }
|
||||
#
|
||||
# if custom_switcher:
|
||||
# switcher.update(custom_switcher)
|
||||
#
|
||||
# for token in tokens:
|
||||
# value = switcher.get(token.type, lambda t: t.str_value)(token)
|
||||
# res += value
|
||||
# if tracker is not None and token.type in custom_switcher:
|
||||
# tracker[value] = token.value
|
||||
# return res
|
||||
# if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
|
||||
# lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
|
||||
# return lst
|
||||
|
||||
@staticmethod
|
||||
def get_tokens_boundaries(tokens):
|
||||
@@ -302,160 +264,3 @@ class BaseParser:
|
||||
@staticmethod
|
||||
def get_name(name):
|
||||
return BaseParser.PREFIX + name
|
||||
|
||||
|
||||
class BaseTokenizerIterParser(BaseParser):
|
||||
|
||||
def __init__(self, name, priority, parse_word=False, none_on_eof=True):
|
||||
super().__init__(name, priority)
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
self.parse_word = parse_word
|
||||
self.none_on_eof = none_on_eof
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
self.text = text
|
||||
self.lexer_iter = iter(Tokenizer(text, self.parse_word))
|
||||
self._current = None
|
||||
|
||||
self.next_token()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
try:
|
||||
self._current = next(self.lexer_iter)
|
||||
|
||||
if self.none_on_eof and self._current.type == TokenKind.EOF:
|
||||
self._current = None
|
||||
return False
|
||||
|
||||
if skip_whitespace:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class BaseSplitIterParser(BaseParser):
|
||||
|
||||
def __init__(self, name, priority, none_on_eof=False):
|
||||
super().__init__(name, priority)
|
||||
self._current = None
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
self.iter_split = None
|
||||
self.split_and_eat_tokens = (" ", "\n", "\t")
|
||||
self.split_and_keep_tokens = ("=", ")", "(", ",")
|
||||
self.split_tokens = self.split_and_eat_tokens + self.split_and_keep_tokens
|
||||
|
||||
self.none_on_eof = none_on_eof # current token is set to None when EOF is hit
|
||||
|
||||
def parse_word(self, c, index, line, column):
|
||||
end = self.split_tokens
|
||||
escaped = False
|
||||
buffer = ""
|
||||
|
||||
while escaped or c not in end:
|
||||
if not escaped and c == "\\":
|
||||
escaped = True
|
||||
elif not escaped and c in ("'", '"'):
|
||||
end = [c]
|
||||
else:
|
||||
buffer += c
|
||||
escaped = False
|
||||
|
||||
index, column = index + 1, column + 1
|
||||
if index == len(self.text):
|
||||
break
|
||||
c = self.text[index]
|
||||
|
||||
if c == "\n":
|
||||
line += 1
|
||||
column = 0
|
||||
|
||||
if c not in self.split_and_keep_tokens: # 'not in' instead of 'in' to when c is a quote
|
||||
index, column = index + 1, column + 1
|
||||
|
||||
return buffer, index, line, column
|
||||
|
||||
def split(self):
|
||||
index = 0
|
||||
line = 1
|
||||
column = 1
|
||||
|
||||
while index < len(self.text):
|
||||
c = self.text[index]
|
||||
|
||||
if c == "=":
|
||||
if index + 1 < len(self.text) and self.text[index + 1] == "=":
|
||||
yield Token(TokenKind.EQUALSEQUALS, "==", index, line, column)
|
||||
index, column = index + 2, column + 2
|
||||
else:
|
||||
yield Token(TokenKind.EQUALS, "=", index, line, column)
|
||||
index, column = index + 1, column + 1
|
||||
elif c == ")":
|
||||
yield Token(TokenKind.RPAR, ")", index, line, column)
|
||||
index, column = index + 1, column + 1
|
||||
elif c == "(":
|
||||
yield Token(TokenKind.LPAR, "(", index, line, column)
|
||||
index, column = index + 1, column + 1
|
||||
elif c == ",":
|
||||
yield Token(TokenKind.COMMA, ",", index, line, column)
|
||||
index, column = index + 1, column + 1
|
||||
else:
|
||||
|
||||
buffer, end_index, end_line, end_column = self.parse_word(c, index, line, column)
|
||||
if buffer:
|
||||
yield Token(TokenKind.WORD, buffer, index, line, column)
|
||||
index, line, column = end_index, end_line, end_column
|
||||
|
||||
yield Token(TokenKind.EOF, "<eof>", index, line, column)
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka if context else None
|
||||
|
||||
self.text = text
|
||||
self._current = None
|
||||
self.iter_split = iter(self.split())
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
def next_token(self):
|
||||
try:
|
||||
self._current = next(self.iter_split)
|
||||
if self._current.type == TokenKind.EOF:
|
||||
if self.none_on_eof:
|
||||
self._current = None
|
||||
return False
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@@ -1,19 +1,12 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError
|
||||
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, \
|
||||
ConceptExpression, StrMatch
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedEndOfFileError(ErrorNode):
|
||||
pass
|
||||
|
||||
|
||||
class BnfDefinitionParser(BaseParser):
|
||||
"""
|
||||
Parser used to transform literal into ParsingExpression
|
||||
@@ -52,7 +45,7 @@ class BnfDefinitionParser(BaseParser):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.source = ""
|
||||
|
||||
|
||||
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
|
||||
self._current = None
|
||||
self.after_current = None
|
||||
@@ -123,7 +116,7 @@ class BnfDefinitionParser(BaseParser):
|
||||
|
||||
token = self.get_token()
|
||||
if token and token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, []))
|
||||
self.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, []))
|
||||
except LexerError as e:
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
@@ -221,7 +214,7 @@ class BnfDefinitionParser(BaseParser):
|
||||
def parse_expression(self):
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.EOF:
|
||||
self.add_error(UnexpectedEndOfFileError(), False)
|
||||
self.add_error(UnexpectedEofParsingError(), False)
|
||||
if token.type == TokenKind.LPAR:
|
||||
self.nb_open_par += 1
|
||||
self.next_token()
|
||||
@@ -232,7 +225,7 @@ class BnfDefinitionParser(BaseParser):
|
||||
self.next_token()
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
else:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
|
||||
self.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
|
||||
return expr
|
||||
|
||||
if token.type == TokenKind.CONCEPT:
|
||||
@@ -291,7 +284,7 @@ class BnfDefinitionParser(BaseParser):
|
||||
|
||||
if token is None or token.type != TokenKind.IDENTIFIER:
|
||||
return self.add_error(
|
||||
UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.IDENTIFIER]))
|
||||
UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.IDENTIFIER]))
|
||||
|
||||
expression.rule_name = token.value
|
||||
self.next_token()
|
||||
|
||||
@@ -1341,72 +1341,6 @@ class BnfNodeParser(BaseNodeParser):
|
||||
debugger.debug_var("result", concept_parser_helpers)
|
||||
return concept_parser_helpers
|
||||
|
||||
def fix_infinite_recursions(self, context, grammar, concept_id, parsing_expression):
|
||||
"""
|
||||
Check the newly created parsing expression
|
||||
Some infinite recursion can be resolved, simply by removing the pexpression that causes the loop
|
||||
Let's look for that
|
||||
:param context:
|
||||
:param grammar:
|
||||
:param concept_id:
|
||||
:param parsing_expression:
|
||||
:return:
|
||||
"""
|
||||
|
||||
def _find(expression_, path_):
|
||||
index_ = -1
|
||||
parent_ = None
|
||||
for node_id in path_:
|
||||
expression_ = expression_.nodes[0] if isinstance(expression_, ConceptExpression) else expression_
|
||||
for i, node in [(i, n) for i, n in enumerate(expression_.nodes) if isinstance(n, ConceptExpression)]:
|
||||
if node_id == node.concept.id:
|
||||
index_ = i
|
||||
parent_ = expression_
|
||||
expression_ = node # take the child of the ConceptExpression found
|
||||
break
|
||||
else:
|
||||
raise IndexError(f"path {path_} cannot be found in '{expression_}'")
|
||||
|
||||
return parent_, index_, expression_
|
||||
|
||||
def _fix_node(expression, path):
|
||||
parent, index, expression_update = _find(expression, path[1:-2])
|
||||
|
||||
assert isinstance(expression_update, ConceptExpression)
|
||||
|
||||
desc = f"Fixing circular reference {path}"
|
||||
with context.push(BuiltinConcepts.INIT_BNF,
|
||||
expression_update.concept,
|
||||
who=self.name,
|
||||
obj=expression_update.concept,
|
||||
concepts_to_skip=[concept_id],
|
||||
desc=desc) as sub_context:
|
||||
new_grammar = grammar.copy()
|
||||
for node_id in path[-2:]:
|
||||
del new_grammar[node_id]
|
||||
new_nodes = self.resolve_concept_parsing_expression(sub_context,
|
||||
expression_update.concept,
|
||||
expression_update.rule_name, new_grammar, set())
|
||||
new = ConceptExpression(expression_update.concept,
|
||||
rule_name=expression_update.rule_name,
|
||||
nodes=new_nodes)
|
||||
|
||||
parent.nodes[index] = new
|
||||
|
||||
while True:
|
||||
already_found = [concept_id]
|
||||
concepts_in_recursion = []
|
||||
if self.check_for_infinite_recursion(parsing_expression, already_found, concepts_in_recursion):
|
||||
if "#" in concepts_in_recursion[-2]:
|
||||
# means that it's isaset concept
|
||||
_fix_node(parsing_expression, concepts_in_recursion[:-1])
|
||||
else:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
return concepts_in_recursion
|
||||
|
||||
def check_for_infinite_recursion(self, parsing_expression, already_found, in_recursion, only_first=False):
|
||||
|
||||
if isinstance(parsing_expression, ConceptExpression):
|
||||
|
||||
@@ -7,7 +7,7 @@ from core.concept import ConceptParts, DEFINITION_TYPE_BNF, DEFINITION_TYPE_DEF
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
|
||||
from core.tokenizer import TokenKind, Keywords
|
||||
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode
|
||||
from parsers.BaseParser import Node, ErrorNode, NotInitializedNode, UnexpectedTokenErrorNode
|
||||
from parsers.BaseParser import Node, ParsingError, NotInitializedNode, UnexpectedTokenParsingError
|
||||
from parsers.BnfDefinitionParser import BnfDefinitionParser
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ class ParsingException(Exception):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefConceptParserNode(Node):
|
||||
class DefConceptParsingResult(Node):
|
||||
"""
|
||||
Base node for all default parser nodes
|
||||
"""
|
||||
@@ -25,12 +25,12 @@ class DefConceptParserNode(Node):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefConceptParserErrorNode(DefConceptParserNode, ErrorNode):
|
||||
class DefConceptParsingError(DefConceptParsingResult, ParsingError):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CannotHandleErrorNode(DefConceptParserErrorNode):
|
||||
class CannotHandleParsingError(DefConceptParsingError):
|
||||
"""
|
||||
The input is not recognized
|
||||
"""
|
||||
@@ -38,7 +38,7 @@ class CannotHandleErrorNode(DefConceptParserErrorNode):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NameNode(DefConceptParserNode):
|
||||
class NameNode(DefConceptParsingResult):
|
||||
|
||||
def get_name(self):
|
||||
name = ""
|
||||
@@ -70,7 +70,7 @@ class NameNode(DefConceptParserNode):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefConceptNode(DefConceptParserNode):
|
||||
class DefConceptNode(DefConceptParsingResult):
|
||||
name: NameNode = NotInitializedNode()
|
||||
where: ReturnValueConcept = NotInitializedNode()
|
||||
pre: ReturnValueConcept = NotInitializedNode()
|
||||
@@ -92,7 +92,7 @@ class DefConceptNode(DefConceptParserNode):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class IsaConceptNode(DefConceptParserNode):
|
||||
class IsaConceptNode(DefConceptParsingResult):
|
||||
concept: NameNode = NotInitializedNode()
|
||||
set: NameNode = NotInitializedNode()
|
||||
|
||||
@@ -146,7 +146,7 @@ class DefConceptParser(BaseCustomGrammarParser):
|
||||
"""
|
||||
token = self.parser_input.token
|
||||
if token.value != Keywords.DEF.value:
|
||||
self.add_error(UnexpectedTokenErrorNode("'def' keyword not found.", token, [Keywords.DEF]))
|
||||
self.add_error(UnexpectedTokenParsingError("'def' keyword not found.", token, [Keywords.DEF]))
|
||||
return None
|
||||
|
||||
self.context.log("Keyword DEF found.", self.name)
|
||||
@@ -168,7 +168,7 @@ class DefConceptParser(BaseCustomGrammarParser):
|
||||
keywords_found.extend([t[0] for t in parts.values()]) # keep track of all keywords found
|
||||
node = DefConceptNode(keywords_found)
|
||||
# if first_token.type == TokenKind.EOF:
|
||||
# return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
|
||||
# return self.add_error(UnexpectedTokenParsingError([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
|
||||
|
||||
# get the name
|
||||
node.name = self.get_concept_name(parts[Keywords.CONCEPT])
|
||||
|
||||
@@ -16,7 +16,7 @@ class FormatRuleNode(Node):
|
||||
format_ast: FormatAstNode = None
|
||||
|
||||
|
||||
class FormatRuleParser(BaseCustomGrammarParser):
|
||||
class DefFormatRuleParser(BaseCustomGrammarParser):
|
||||
"""
|
||||
Class that will parse formatting rules definitions
|
||||
eg: when xxx print yyy
|
||||
@@ -28,7 +28,7 @@ class FormatRuleParser(BaseCustomGrammarParser):
|
||||
KEYWORDS_VALUES = [k.value for k in KEYWORDS]
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseCustomGrammarParser.__init__(self, "FormatRule", 60)
|
||||
BaseCustomGrammarParser.__init__(self, "DefFormatRule", 60)
|
||||
|
||||
def parse(self, context, parser_input: ParserInput):
|
||||
"""
|
||||
@@ -4,8 +4,8 @@ from typing import List, Tuple, Callable
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import LexerError, TokenKind, Token
|
||||
from parsers.BaseParser import Node, BaseParser, UnexpectedTokenErrorNode, UnexpectedEofNode, ErrorNode
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from parsers.BaseParser import Node, BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, ParsingError
|
||||
|
||||
|
||||
class ExprNode(Node):
|
||||
@@ -19,7 +19,7 @@ class ExprNode(Node):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LeftPartNotFoundError(ErrorNode):
|
||||
class LeftPartNotFoundError(ParsingError):
|
||||
"""
|
||||
When the expression starts with 'or' or 'and'
|
||||
"""
|
||||
@@ -161,6 +161,7 @@ class OrNode(ExprNode):
|
||||
def __str__(self):
|
||||
return " or ".join([str(p) for p in self.parts])
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NotNode(ExprNode):
|
||||
node: ExprNode
|
||||
@@ -219,7 +220,7 @@ class ExpressionParser(BaseParser):
|
||||
tree = self.parse_or()
|
||||
token = self.parser_input.token
|
||||
if token and token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, []))
|
||||
self.add_error(UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, []))
|
||||
|
||||
value = self.get_return_value_body(context.sheerka, self.parser_input.as_text(), tree, tree)
|
||||
|
||||
@@ -241,7 +242,7 @@ class ExpressionParser(BaseParser):
|
||||
self.parser_input.next_token()
|
||||
expr = self.parse_and()
|
||||
if expr is None:
|
||||
self.add_error(UnexpectedEofNode("When parsing 'or'"))
|
||||
self.add_error(UnexpectedEofParsingError("When parsing 'or'"))
|
||||
return OrNode(*parts)
|
||||
parts.append(expr)
|
||||
token = self.parser_input.token
|
||||
@@ -259,7 +260,7 @@ class ExpressionParser(BaseParser):
|
||||
self.parser_input.next_token()
|
||||
expr = self.parse_names()
|
||||
if expr is None:
|
||||
self.add_error(UnexpectedEofNode("When parsing 'and'"))
|
||||
self.add_error(UnexpectedEofParsingError("When parsing 'and'"))
|
||||
return AndNode(*parts)
|
||||
parts.append(expr)
|
||||
token = self.parser_input.token
|
||||
@@ -282,7 +283,8 @@ class ExpressionParser(BaseParser):
|
||||
expr = self.parse_or()
|
||||
token = self.parser_input.token
|
||||
if token.type != TokenKind.RPAR:
|
||||
self.error_sink.append(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
|
||||
self.error_sink.append(
|
||||
UnexpectedTokenParsingError(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
|
||||
return expr
|
||||
self.parser_input.next_token()
|
||||
return expr
|
||||
|
||||
@@ -9,7 +9,7 @@ from core.tokenizer import TokenKind, Token
|
||||
from core.utils import get_n_clones
|
||||
from parsers.SequenceNodeParser import SequenceNodeParser
|
||||
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEofNode, Node
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenParsingError, UnexpectedEofParsingError, Node
|
||||
from parsers.BnfNodeParser import BnfNodeParser
|
||||
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
|
||||
from parsers.RuleParser import RuleParser
|
||||
@@ -191,9 +191,9 @@ class FunctionParser(BaseParser):
|
||||
node = self.parse_function()
|
||||
|
||||
if self.parser_input.next_token():
|
||||
self.add_error(UnexpectedTokenErrorNode("Only one function supported",
|
||||
self.parser_input.token,
|
||||
[TokenKind.EOF]))
|
||||
self.add_error(UnexpectedTokenParsingError("Only one function supported",
|
||||
self.parser_input.token,
|
||||
[TokenKind.EOF]))
|
||||
|
||||
if self.has_error:
|
||||
if node is None:
|
||||
@@ -222,25 +222,25 @@ class FunctionParser(BaseParser):
|
||||
start = self.parser_input.pos
|
||||
token = self.parser_input.token
|
||||
if token.type != TokenKind.IDENTIFIER:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"{token.repr_value} is not a identifier",
|
||||
token,
|
||||
[TokenKind.IDENTIFIER]))
|
||||
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a identifier",
|
||||
token,
|
||||
[TokenKind.IDENTIFIER]))
|
||||
return None
|
||||
|
||||
if not self.parser_input.next_token():
|
||||
self.add_error(UnexpectedEofNode(f"Unexpected EOF while parsing left parenthesis"))
|
||||
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing left parenthesis"))
|
||||
return None
|
||||
|
||||
token = self.parser_input.token
|
||||
if token.type != TokenKind.LPAR:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"{token.repr_value} is not a left parenthesis",
|
||||
token,
|
||||
[TokenKind.LPAR]))
|
||||
self.add_error(UnexpectedTokenParsingError(f"{token.repr_value} is not a left parenthesis",
|
||||
token,
|
||||
[TokenKind.LPAR]))
|
||||
return None
|
||||
|
||||
start_node = NamesNode(start, start + 1, self.parser_input.tokens[start:start + 2])
|
||||
if not self.parser_input.next_token():
|
||||
self.add_error(UnexpectedEofNode(f"Unexpected EOF after left parenthesis"))
|
||||
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF after left parenthesis"))
|
||||
return FunctionNode(start_node, None, None)
|
||||
|
||||
params = self.parse_parameters()
|
||||
@@ -249,9 +249,9 @@ class FunctionParser(BaseParser):
|
||||
|
||||
token = self.parser_input.token
|
||||
if not token or token.type != TokenKind.RPAR:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Right parenthesis not found",
|
||||
token,
|
||||
[TokenKind.RPAR]))
|
||||
self.add_error(UnexpectedTokenParsingError(f"Right parenthesis not found",
|
||||
token,
|
||||
[TokenKind.RPAR]))
|
||||
return FunctionNode(start_node, None, params)
|
||||
|
||||
return FunctionNode(start_node,
|
||||
@@ -270,7 +270,7 @@ class FunctionParser(BaseParser):
|
||||
|
||||
token = self.parser_input.token
|
||||
if token.type == TokenKind.EOF:
|
||||
self.add_error(UnexpectedEofNode(f"Unexpected EOF while parsing parameters"))
|
||||
self.add_error(UnexpectedEofParsingError(f"Unexpected EOF while parsing parameters"))
|
||||
return None
|
||||
|
||||
if token.type == TokenKind.RPAR:
|
||||
|
||||
@@ -6,7 +6,7 @@ import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from parsers.BaseParser import BaseParser, Node, ParsingError
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -20,7 +20,7 @@ def get_python_node(obj):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class PythonErrorNode(ErrorNode):
|
||||
class PythonErrorNode(ParsingError):
|
||||
source: str
|
||||
exception: Exception
|
||||
|
||||
@@ -29,7 +29,7 @@ class PythonErrorNode(ErrorNode):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class ConceptDetected(ErrorNode):
|
||||
class ConceptDetectedError(ParsingError):
|
||||
name: str
|
||||
|
||||
|
||||
@@ -138,7 +138,7 @@ class PythonParser(BaseParser):
|
||||
if isinstance(tree, ast.Expression) and isinstance(tree.body, ast.Name):
|
||||
if tree.body.id in tracker or context.sheerka.fast_resolve(tree.body.id, return_new=False) is not None:
|
||||
context.log("It's a simple concept. Not for me.", self.name)
|
||||
self.error_sink.append(ConceptDetected(tree.body.id))
|
||||
self.error_sink.append(ConceptDetectedError(tree.body.id))
|
||||
|
||||
if self.has_error:
|
||||
ret = sheerka.ret(
|
||||
|
||||
@@ -2,16 +2,16 @@ from core.builtin_concepts import BuiltinConcepts
|
||||
from core.rule import Rule, ACTION_TYPE_DEFERRED
|
||||
from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.BaseParser import BaseParser, ParsingError, UnexpectedTokenParsingError
|
||||
|
||||
|
||||
class RuleNotFound(ErrorNode):
|
||||
class RuleNotFoundError(ParsingError):
|
||||
def __init__(self, id_as_tuple):
|
||||
self.key = id_as_tuple[0]
|
||||
self.id = id_as_tuple[1]
|
||||
|
||||
def __repr__(self):
|
||||
return f"RuleNotFound(id={self.id}, key={self.key}"
|
||||
return f"RuleNotFoundError(id={self.id}, key={self.key}"
|
||||
|
||||
|
||||
class RuleParser(BaseParser):
|
||||
@@ -54,9 +54,9 @@ class RuleParser(BaseParser):
|
||||
token = parser_input.token
|
||||
|
||||
if parser_input.next_token():
|
||||
reason = UnexpectedTokenErrorNode("Only one rule supported",
|
||||
parser_input.token,
|
||||
[TokenKind.EOF])
|
||||
reason = UnexpectedTokenParsingError("Only one rule supported",
|
||||
parser_input.token,
|
||||
[TokenKind.EOF])
|
||||
return sheerka.ret(self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text(), reason=reason))
|
||||
@@ -76,7 +76,7 @@ class RuleParser(BaseParser):
|
||||
return sheerka.ret(self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.ERROR,
|
||||
body=[RuleNotFound(token.value)]))
|
||||
body=[RuleNotFoundError(token.value)]))
|
||||
body = sheerka.new(BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input.as_text(),
|
||||
|
||||
@@ -7,7 +7,7 @@ from core.sheerka.services.SheerkaExecute import ParserInput
|
||||
from core.tokenizer import Tokenizer, TokenKind
|
||||
from core.utils import strip_tokens, make_unique
|
||||
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode
|
||||
from parsers.BaseParser import UnexpectedTokenParsingError, ParsingError
|
||||
from parsers.BnfNodeParser import BnfNodeParser
|
||||
from parsers.SyaNodeParser import SyaNodeParser
|
||||
|
||||
@@ -15,14 +15,14 @@ PARSERS = [BnfNodeParser.NAME, SyaNodeParser.NAME, "Python"]
|
||||
|
||||
|
||||
@dataclass()
|
||||
class TokensNodeFound(ErrorNode):
|
||||
class TokensNodeFoundError(ParsingError):
|
||||
expected_tokens: list
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(other) == id(self):
|
||||
return True
|
||||
|
||||
if not isinstance(other, UnexpectedTokenErrorNode):
|
||||
if not isinstance(other, UnexpectedTokenParsingError):
|
||||
return False
|
||||
|
||||
if self.message != other.message:
|
||||
@@ -96,7 +96,7 @@ class AtomConceptParserHelper:
|
||||
self.debug.append(token)
|
||||
|
||||
if self.expected_tokens[0] != token.strip_quote:
|
||||
self.errors.append(UnexpectedTokenErrorNode(
|
||||
self.errors.append(UnexpectedTokenParsingError(
|
||||
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
|
||||
token,
|
||||
[self.expected_tokens[0]]))
|
||||
@@ -186,7 +186,7 @@ class AtomConceptParserHelper:
|
||||
forked.finalize()
|
||||
|
||||
if self.expected_tokens:
|
||||
self.errors.append(TokensNodeFound(self.expected_tokens))
|
||||
self.errors.append(TokensNodeFoundError(self.expected_tokens))
|
||||
|
||||
def clone(self):
|
||||
clone = AtomConceptParserHelper(self.context)
|
||||
|
||||
@@ -14,7 +14,7 @@ from core.tokenizer import Token, TokenKind, Tokenizer
|
||||
from core.utils import get_n_clones, get_text_from_tokens, NextIdManager
|
||||
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
|
||||
SourceCodeWithConceptNode, BaseNodeParser
|
||||
from parsers.BaseParser import ErrorNode
|
||||
from parsers.BaseParser import ParsingError
|
||||
|
||||
PARSERS = ["Sequence", "Bnf", "Python"]
|
||||
|
||||
@@ -53,7 +53,7 @@ class DebugInfo:
|
||||
return msg + f" => {self.action}"
|
||||
|
||||
|
||||
class ParenthesisMismatchErrorNode(ErrorNode):
|
||||
class ParenthesisMismatchError(ParsingError):
|
||||
|
||||
def __init__(self, error_int):
|
||||
if isinstance(error_int, tuple):
|
||||
@@ -79,7 +79,7 @@ class ParenthesisMismatchErrorNode(ErrorNode):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if not isinstance(other, ParenthesisMismatchErrorNode):
|
||||
if not isinstance(other, ParenthesisMismatchError):
|
||||
return False
|
||||
|
||||
return self.token_value == other.token_value and self.pos == other.pos
|
||||
@@ -88,11 +88,11 @@ class ParenthesisMismatchErrorNode(ErrorNode):
|
||||
return hash(self.pos)
|
||||
|
||||
def __repr__(self):
|
||||
return f"ParenthesisMismatchErrorNode('{self.token_value}', {self.pos}"
|
||||
return f"ParenthesisMismatchError('{self.token_value}', {self.pos}"
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NoneAssociativeSequenceErrorNode(ErrorNode):
|
||||
class NoneAssociativeSequenceError(ParsingError):
|
||||
concept: Concept
|
||||
first: int
|
||||
second: int
|
||||
@@ -100,7 +100,7 @@ class NoneAssociativeSequenceErrorNode(ErrorNode):
|
||||
|
||||
|
||||
@dataclass()
|
||||
class TooManyParametersFound(ErrorNode):
|
||||
class TooManyParametersFoundError(ParsingError):
|
||||
concept: Concept
|
||||
pos: int # position of the concept
|
||||
token: Token # token of the concept where the error was noticed
|
||||
@@ -532,7 +532,7 @@ class InFixToPostFix:
|
||||
|
||||
# manage parenthesis that didn't find any match
|
||||
if self._is_lpar(self.stack[-1]):
|
||||
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
|
||||
self._add_error(ParenthesisMismatchError(self.stack[-1]))
|
||||
|
||||
# The parameter must be part the current concept being parsed
|
||||
assert len(self._concepts()) != 0 # sanity check
|
||||
@@ -560,7 +560,7 @@ class InFixToPostFix:
|
||||
|
||||
if self.unrecognized_tokens.parenthesis_count > 0:
|
||||
# parenthesis mismatch detected, do not try to resolve the unrecognized
|
||||
self._add_error(ParenthesisMismatchErrorNode(self.unrecognized_tokens))
|
||||
self._add_error(ParenthesisMismatchError(self.unrecognized_tokens))
|
||||
self._put_to_out(self.unrecognized_tokens)
|
||||
else:
|
||||
# try to recognize concepts
|
||||
@@ -676,7 +676,7 @@ class InFixToPostFix:
|
||||
|
||||
if stack.associativity == SyaAssociativity.No and current.associativity == SyaAssociativity.No:
|
||||
self._add_error(
|
||||
NoneAssociativeSequenceErrorNode(current.concept, stack_head.start, sya_parser_helper.start))
|
||||
NoneAssociativeSequenceError(current.concept, stack_head.start, sya_parser_helper.start))
|
||||
|
||||
if current.associativity == SyaAssociativity.Left and current.precedence <= stack.precedence:
|
||||
if self.debug_enabled:
|
||||
@@ -719,7 +719,7 @@ class InFixToPostFix:
|
||||
self.pop_stack_to_out()
|
||||
|
||||
if self._is_lpar(self.stack[-1]):
|
||||
self._add_error(ParenthesisMismatchErrorNode(self.stack[-1]))
|
||||
self._add_error(ParenthesisMismatchError(self.stack[-1]))
|
||||
return False
|
||||
|
||||
# Manage concepts ending with long names
|
||||
@@ -777,7 +777,7 @@ class InFixToPostFix:
|
||||
if len(self.parameters_list) > len(current_concept.parameters_list_at_init):
|
||||
# we have eaten the parameters expected between two expected tokens
|
||||
# But there are some remaining parameters
|
||||
self._add_error(TooManyParametersFound(
|
||||
self._add_error(TooManyParametersFoundError(
|
||||
current_concept.concept.concept,
|
||||
current_concept.start,
|
||||
token,
|
||||
@@ -951,7 +951,7 @@ class InFixToPostFix:
|
||||
|
||||
# checks consistency if an lpar is found
|
||||
if len(self.stack) == 0:
|
||||
self._add_error(ParenthesisMismatchErrorNode((token, pos)))
|
||||
self._add_error(ParenthesisMismatchError((token, pos)))
|
||||
return None
|
||||
|
||||
if self._stack_isinstance(UnrecognizedTokensNode):
|
||||
@@ -962,7 +962,7 @@ class InFixToPostFix:
|
||||
start = i
|
||||
break
|
||||
else:
|
||||
self._add_error(ParenthesisMismatchErrorNode((token, pos)))
|
||||
self._add_error(ParenthesisMismatchError((token, pos)))
|
||||
return None
|
||||
|
||||
source_code = self._make_source_code_with_concept(start, token, pos)
|
||||
@@ -1073,7 +1073,7 @@ class InFixToPostFix:
|
||||
if len(self.stack) == 0 and len(self.out) == 0:
|
||||
# check for parenthesis mismatch
|
||||
if self.unrecognized_tokens.parenthesis_count > 0:
|
||||
self._add_error(ParenthesisMismatchErrorNode(self.unrecognized_tokens))
|
||||
self._add_error(ParenthesisMismatchError(self.unrecognized_tokens))
|
||||
return # no need to pop the buffer, as no concept is found
|
||||
|
||||
if self.debug_enabled:
|
||||
@@ -1084,7 +1084,7 @@ class InFixToPostFix:
|
||||
|
||||
# validate parenthesis
|
||||
if self._is_lpar(parser_helper) or self._is_rpar(parser_helper):
|
||||
self._add_error(ParenthesisMismatchErrorNode(parser_helper))
|
||||
self._add_error(ParenthesisMismatchError(parser_helper))
|
||||
return None
|
||||
|
||||
self.manage_unrecognized()
|
||||
|
||||
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes, update_compiled
|
||||
from parsers.SequenceNodeParser import SequenceNodeParser
|
||||
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
from parsers.BaseParser import BaseParser, ParsingError
|
||||
from parsers.BnfNodeParser import BnfNodeParser
|
||||
from parsers.SyaNodeParser import SyaNodeParser
|
||||
|
||||
@@ -18,7 +18,7 @@ PARSERS = ["EmptyString",
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CannotParseNode(ErrorNode):
|
||||
class CannotParseError(ParsingError):
|
||||
unrecognized: UnrecognizedTokensNode
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user