Added ExactConceptParser
This commit is contained in:
+102
-94
@@ -1,5 +1,5 @@
|
||||
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode
|
||||
from parsers.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
||||
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
||||
from dataclasses import dataclass, field
|
||||
import logging
|
||||
|
||||
@@ -147,24 +147,29 @@ class BinaryNode(DefaultParserNode):
|
||||
|
||||
|
||||
class DefaultParser(BaseParser):
|
||||
def __init__(self, text, sub_parser):
|
||||
BaseParser.__init__(self, "DefaultParser", text)
|
||||
"""
|
||||
Parse sheerka specific grammar (like def concept)
|
||||
"""
|
||||
def __init__(self, sub_parser=None):
|
||||
BaseParser.__init__(self, "DefaultParser")
|
||||
self.sub_parser = sub_parser
|
||||
self.lexer = Tokenizer(text)
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.context = None
|
||||
self.text = None
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
# hack before implementing all the sub parsers
|
||||
if context:
|
||||
self.sub_parser = context.sheerka.parsers[1]
|
||||
|
||||
self.text = text
|
||||
self.lexer_iter = iter(Tokenizer(text))
|
||||
self._current = None
|
||||
|
||||
self.next_token()
|
||||
|
||||
def collect_tokens(self, *args):
|
||||
result = []
|
||||
for item in args:
|
||||
if isinstance(item, Node):
|
||||
result.extend(item.tokens)
|
||||
else:
|
||||
result.append(item)
|
||||
return result
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
@@ -186,21 +191,23 @@ class DefaultParser(BaseParser):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def get_concept_name(tokens, variables=None):
|
||||
name = ""
|
||||
def get_concept_key(tokens, variables=None):
|
||||
key = ""
|
||||
first = True
|
||||
for token in tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
if not first:
|
||||
name += " "
|
||||
key += " "
|
||||
if variables is not None and token.value in variables:
|
||||
name += "__var__" + str(variables.index(token.value))
|
||||
key += "__var__" + str(variables.index(token.value))
|
||||
else:
|
||||
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||
first = False
|
||||
|
||||
return name
|
||||
return key
|
||||
|
||||
@staticmethod
|
||||
def fix_indentation(tokens):
|
||||
@@ -242,7 +249,8 @@ class DefaultParser(BaseParser):
|
||||
|
||||
return tokens[4:]
|
||||
|
||||
def parse(self):
|
||||
def parse(self, context, text):
|
||||
self.reset_parser(context, text)
|
||||
return self.parse_statement()
|
||||
|
||||
def parse_statement(self):
|
||||
@@ -277,7 +285,7 @@ class DefaultParser(BaseParser):
|
||||
name_as_tokens.append(token)
|
||||
self.next_token()
|
||||
token = self.get_token()
|
||||
name = self.get_concept_name(name_as_tokens)
|
||||
name = self.get_concept_key(name_as_tokens)
|
||||
tokens_found["name"] = name_as_tokens
|
||||
|
||||
# try to parse as, where, pre and post declarations
|
||||
@@ -328,8 +336,8 @@ class DefaultParser(BaseParser):
|
||||
|
||||
# start = current_tokens[0].index
|
||||
# end = current_tokens[-1].index + len(current_tokens[-1].value)
|
||||
sub_parser = self.sub_parser(current_tokens, source=keyword.value)
|
||||
sub_tree = sub_parser.parse()
|
||||
sub_parser = self.sub_parser(source=keyword.value)
|
||||
sub_tree = sub_parser.parse(self.context, current_tokens)
|
||||
if isinstance(sub_tree, ErrorNode):
|
||||
self.add_error(sub_tree, False)
|
||||
asts[keyword] = sub_tree
|
||||
@@ -344,74 +352,74 @@ class DefaultParser(BaseParser):
|
||||
log.debug(f"Found DefConcept node '{def_concept_node}'")
|
||||
return def_concept_node
|
||||
|
||||
def parse_expression(self):
|
||||
return self.parse_addition()
|
||||
|
||||
def parse_addition(self):
|
||||
left = self.parse_multiply()
|
||||
token = self.get_token()
|
||||
if token is None or token.type == TokenKind.EOF:
|
||||
return left
|
||||
|
||||
if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5
|
||||
right = self.parse_addition()
|
||||
return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right)
|
||||
|
||||
if token.type not in (TokenKind.PLUS, TokenKind.MINUS):
|
||||
return left
|
||||
|
||||
self.next_token()
|
||||
right = self.parse_addition()
|
||||
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
|
||||
|
||||
def parse_multiply(self):
|
||||
left = self.parse_atom()
|
||||
token = self.get_token()
|
||||
if token is None or token.type == TokenKind.EOF:
|
||||
return left
|
||||
|
||||
if token.type not in (TokenKind.STAR, TokenKind.SLASH):
|
||||
return left
|
||||
|
||||
self.next_token()
|
||||
right = self.parse_multiply()
|
||||
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
|
||||
|
||||
def parse_atom(self):
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.NUMBER:
|
||||
self.next_token()
|
||||
return NumberNode([token], float(token.value) if '.' in token.value else int(token.value))
|
||||
elif token.type == TokenKind.STRING:
|
||||
self.next_token()
|
||||
return StringNode([token], token.value[1:-1], token.value[0])
|
||||
elif token.type == TokenKind.IDENTIFIER:
|
||||
if token.value == "true":
|
||||
self.next_token()
|
||||
return TrueNode([token])
|
||||
elif token.value == "false":
|
||||
self.next_token()
|
||||
return FalseNode([token])
|
||||
elif token.value == "null":
|
||||
self.next_token()
|
||||
return NullNode([token])
|
||||
else:
|
||||
self.next_token()
|
||||
return VariableNode([token], token.value)
|
||||
elif token.type == TokenKind.LPAR:
|
||||
self.next_token()
|
||||
exp = self.parse_expression()
|
||||
token = self.get_token()
|
||||
self.next_token()
|
||||
|
||||
if token.type != TokenKind.RPAR:
|
||||
error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR])
|
||||
self.add_error(error)
|
||||
return error
|
||||
|
||||
return exp
|
||||
else:
|
||||
error = UnexpectedTokenErrorNode([token], "Unexpected token",
|
||||
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false",
|
||||
"null", TokenKind.LPAR])
|
||||
return self.add_error(error)
|
||||
# def parse_expression(self):
|
||||
# return self.parse_addition()
|
||||
#
|
||||
# def parse_addition(self):
|
||||
# left = self.parse_multiply()
|
||||
# token = self.get_token()
|
||||
# if token is None or token.type == TokenKind.EOF:
|
||||
# return left
|
||||
#
|
||||
# if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5
|
||||
# right = self.parse_addition()
|
||||
# return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right)
|
||||
#
|
||||
# if token.type not in (TokenKind.PLUS, TokenKind.MINUS):
|
||||
# return left
|
||||
#
|
||||
# self.next_token()
|
||||
# right = self.parse_addition()
|
||||
# return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
|
||||
#
|
||||
# def parse_multiply(self):
|
||||
# left = self.parse_atom()
|
||||
# token = self.get_token()
|
||||
# if token is None or token.type == TokenKind.EOF:
|
||||
# return left
|
||||
#
|
||||
# if token.type not in (TokenKind.STAR, TokenKind.SLASH):
|
||||
# return left
|
||||
#
|
||||
# self.next_token()
|
||||
# right = self.parse_multiply()
|
||||
# return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
|
||||
#
|
||||
# def parse_atom(self):
|
||||
# token = self.get_token()
|
||||
# if token.type == TokenKind.NUMBER:
|
||||
# self.next_token()
|
||||
# return NumberNode([token], float(token.value) if '.' in token.value else int(token.value))
|
||||
# elif token.type == TokenKind.STRING:
|
||||
# self.next_token()
|
||||
# return StringNode([token], token.value[1:-1], token.value[0])
|
||||
# elif token.type == TokenKind.IDENTIFIER:
|
||||
# if token.value == "true":
|
||||
# self.next_token()
|
||||
# return TrueNode([token])
|
||||
# elif token.value == "false":
|
||||
# self.next_token()
|
||||
# return FalseNode([token])
|
||||
# elif token.value == "null":
|
||||
# self.next_token()
|
||||
# return NullNode([token])
|
||||
# else:
|
||||
# self.next_token()
|
||||
# return VariableNode([token], token.value)
|
||||
# elif token.type == TokenKind.LPAR:
|
||||
# self.next_token()
|
||||
# exp = self.parse_expression()
|
||||
# token = self.get_token()
|
||||
# self.next_token()
|
||||
#
|
||||
# if token.type != TokenKind.RPAR:
|
||||
# error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR])
|
||||
# self.add_error(error)
|
||||
# return error
|
||||
#
|
||||
# return exp
|
||||
# else:
|
||||
# error = UnexpectedTokenErrorNode([token], "Unexpected token",
|
||||
# [TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false",
|
||||
# "null", TokenKind.LPAR])
|
||||
# return self.add_error(error)
|
||||
|
||||
Reference in New Issue
Block a user