Added first version of DebugManager. Implemented draft of the rule engine
This commit is contained in:
@@ -0,0 +1,320 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, \
|
||||
ConceptExpression, StrMatch
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedEndOfFileError(ErrorNode):
|
||||
pass
|
||||
|
||||
|
||||
class BnfDefinitionParser(BaseParser):
|
||||
"""
|
||||
Parser used to transform literal into ParsingExpression
|
||||
example :
|
||||
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
|
||||
|
||||
'|' (pipe) is used for OrderedChoice
|
||||
',' (comma) is used for Sequence
|
||||
'?' (question mark) is used for Optional
|
||||
'*' (star) is used for ZeroOrMore
|
||||
'+' (plus) is used for OneOrMore
|
||||
|
||||
"""
|
||||
|
||||
NAME = "BnfDefinition"
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(BnfDefinitionParser.NAME, 50, False)
|
||||
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.after_current = None # one look ahead
|
||||
self.nb_open_par = 0
|
||||
self.context = None
|
||||
self.source = ""
|
||||
self.sheerka = None
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, BnfDefinitionParser):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
|
||||
self._current = None
|
||||
self.after_current = None
|
||||
self.nb_open_par = 0
|
||||
|
||||
self.next_token()
|
||||
self.eat_white_space()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
def next_token(self, skip_whitespace=False):
|
||||
if self._current and self._current.type == TokenKind.EOF:
|
||||
return
|
||||
|
||||
try:
|
||||
self._current = self.after_current or next(self.lexer_iter)
|
||||
self.source += self._current.str_value
|
||||
self.after_current = None
|
||||
|
||||
if skip_whitespace:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
self.source += self._current.str_value
|
||||
except StopIteration:
|
||||
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
|
||||
def next_after(self):
|
||||
if self.after_current is not None:
|
||||
return self.after_current
|
||||
|
||||
try:
|
||||
self.after_current = next(self.lexer_iter)
|
||||
# self.source += self.after_current.str_value
|
||||
return self.after_current
|
||||
except StopIteration:
|
||||
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
return self.after_current
|
||||
|
||||
def eat_white_space(self):
|
||||
if self.after_current is not None:
|
||||
self._current = self.after_current
|
||||
self.source += self._current.str_value
|
||||
self.after_current = None
|
||||
|
||||
try:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
self.source += self._current.str_value
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
|
||||
def maybe_sequence(self, first, second):
|
||||
token = self.get_token()
|
||||
return token.type == second or token.type == first and self.next_after().type == second
|
||||
|
||||
def parse(self, context: ExecutionContext, parser_input):
|
||||
|
||||
try:
|
||||
self.reset_parser(context, parser_input)
|
||||
tree = self.parse_choice()
|
||||
|
||||
token = self.get_token()
|
||||
if token and token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, []))
|
||||
except LexerError as e:
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=[e]))
|
||||
|
||||
value = self.get_return_value_body(context.sheerka, self.source, tree, tree)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
value)
|
||||
|
||||
return ret
|
||||
|
||||
def parse_choice(self):
|
||||
"""
|
||||
a | b | c
|
||||
:return:
|
||||
"""
|
||||
sequence = self.parse_sequence()
|
||||
|
||||
self.eat_white_space()
|
||||
token = self.get_token()
|
||||
if token is None or token.type != TokenKind.VBAR:
|
||||
return sequence
|
||||
|
||||
elements = [sequence]
|
||||
while True:
|
||||
# maybe eat the vertical bar
|
||||
self.eat_white_space()
|
||||
token = self.get_token()
|
||||
if token is None or token.type != TokenKind.VBAR:
|
||||
break
|
||||
self.next_token(skip_whitespace=True)
|
||||
|
||||
sequence = self.parse_sequence()
|
||||
elements.append(sequence)
|
||||
|
||||
return self.eat_rule_name_if_needed(OrderedChoice(*elements))
|
||||
|
||||
def parse_sequence(self):
|
||||
"""
|
||||
a b c
|
||||
:return:
|
||||
"""
|
||||
expr_and_modifier = self.parse_modifier()
|
||||
token = self.get_token()
|
||||
if token is None or \
|
||||
token.type == TokenKind.EOF or \
|
||||
token.type == TokenKind.EQUALS or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
return expr_and_modifier
|
||||
|
||||
elements = [expr_and_modifier]
|
||||
while True:
|
||||
token = self.get_token()
|
||||
if token is None or \
|
||||
token.type == TokenKind.EOF or \
|
||||
token.type == TokenKind.EQUALS or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
break
|
||||
self.eat_white_space()
|
||||
|
||||
sequence = self.parse_modifier()
|
||||
elements.append(sequence)
|
||||
|
||||
return self.eat_rule_name_if_needed(Sequence(*elements))
|
||||
|
||||
def parse_modifier(self):
|
||||
"""
|
||||
a? | a* | a+
|
||||
:return:
|
||||
"""
|
||||
expression = self.parse_expression()
|
||||
|
||||
token = self.get_token()
|
||||
|
||||
if token.type == TokenKind.QMARK:
|
||||
self.next_token()
|
||||
return self.eat_rule_name_if_needed(Optional(expression))
|
||||
|
||||
if token.type == TokenKind.STAR:
|
||||
self.next_token()
|
||||
return self.eat_rule_name_if_needed(ZeroOrMore(expression))
|
||||
|
||||
if token.type == TokenKind.PLUS:
|
||||
self.next_token()
|
||||
return self.eat_rule_name_if_needed(OneOrMore(expression))
|
||||
|
||||
return expression
|
||||
|
||||
def parse_expression(self):
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.EOF:
|
||||
self.add_error(UnexpectedEndOfFileError(), False)
|
||||
if token.type == TokenKind.LPAR:
|
||||
self.nb_open_par += 1
|
||||
self.next_token()
|
||||
expr = self.parse_choice()
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.RPAR:
|
||||
self.nb_open_par -= 1
|
||||
self.next_token()
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
else:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
|
||||
return expr
|
||||
|
||||
if token.type == TokenKind.CONCEPT:
|
||||
self.next_token()
|
||||
concept = self.sheerka.new((token.value[0], token.value[1]))
|
||||
expr = ConceptExpression(concept)
|
||||
# expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \
|
||||
# else ConceptExpression(concept)
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
|
||||
if token.type in (TokenKind.IDENTIFIER, TokenKind.KEYWORD):
|
||||
self.next_token()
|
||||
|
||||
concept_name = token.str_value
|
||||
|
||||
# we are trying to match against a concept which is still under construction !
|
||||
# (for example of recursive bnf definition)
|
||||
if self.context.obj and hasattr(self.context.obj, "name"):
|
||||
if concept_name == str(self.context.obj.name):
|
||||
return self.eat_rule_name_if_needed(ConceptExpression(concept_name))
|
||||
|
||||
concept = self.context.get_concept(concept_name)
|
||||
if not self.sheerka.is_known(concept):
|
||||
self.add_error(concept)
|
||||
return None
|
||||
elif hasattr(concept, "__iter__"):
|
||||
self.add_error(
|
||||
self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
|
||||
body=("key", concept_name)))
|
||||
return None
|
||||
else:
|
||||
expr = ConceptExpression(concept)
|
||||
expr.rule_name = concept.name
|
||||
return self.eat_rule_name_if_needed(expr)
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
self.next_token()
|
||||
tokens = list(Tokenizer(token.strip_quote, yield_eof=False))
|
||||
if len(tokens) == 1:
|
||||
return self.eat_rule_name_if_needed(StrMatch(tokens[0].str_value))
|
||||
else:
|
||||
elements = [StrMatch(t.str_value, skip_whitespace=False) for t in tokens]
|
||||
elements[-1].skip_white_space = True
|
||||
ret = Sequence(*elements)
|
||||
return self.eat_rule_name_if_needed(ret)
|
||||
|
||||
ret = StrMatch(core.utils.strip_quotes(token.value))
|
||||
self.next_token()
|
||||
return self.eat_rule_name_if_needed(ret)
|
||||
|
||||
def eat_rule_name_if_needed(self, expression):
|
||||
token = self.get_token()
|
||||
if token is not None and token.type == TokenKind.EQUALS:
|
||||
self.next_token() # eat equals
|
||||
token = self.get_token()
|
||||
|
||||
if token is None or token.type != TokenKind.IDENTIFIER:
|
||||
return self.add_error(
|
||||
UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.IDENTIFIER]))
|
||||
|
||||
expression.rule_name = token.value
|
||||
self.next_token()
|
||||
|
||||
if BnfDefinitionParser.is_expression_a_set(self.context, expression):
|
||||
root_concept = self.context.search(start_with_self=True,
|
||||
predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF,
|
||||
get_obj=lambda ec: ec.action_context,
|
||||
stop=lambda ec: ec.action == BuiltinConcepts.INIT_BNF)
|
||||
root_concept = list(root_concept)
|
||||
if root_concept and hasattr(root_concept[0], "id"):
|
||||
expression.recurse_id = expression.get_recurse_id(root_concept[0].id,
|
||||
expression.concept.id,
|
||||
expression.rule_name)
|
||||
|
||||
return expression
|
||||
|
||||
@staticmethod
|
||||
def is_expression_a_set(context, expression):
|
||||
return isinstance(expression, ConceptExpression) and context.sheerka.isaset(context, expression.concept)
|
||||
|
||||
@staticmethod
|
||||
def update_recurse_id(context, concept_id, expression):
|
||||
if BnfDefinitionParser.is_expression_a_set(context, expression):
|
||||
expression.recurse_id = expression.get_recurse_id(concept_id, expression.concept.id, expression.rule_name)
|
||||
|
||||
for element in expression.elements:
|
||||
BnfDefinitionParser.update_recurse_id(context, concept_id, element)
|
||||
Reference in New Issue
Block a user