Added ExactConceptParser

This commit is contained in:
2019-11-09 17:29:50 +01:00
parent a636198222
commit 576ce77740
12 changed files with 603 additions and 169 deletions
+102 -94
View File
@@ -1,5 +1,5 @@
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode
from parsers.tokenizer import Tokenizer, TokenKind, Token, Keywords
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field
import logging
@@ -147,24 +147,29 @@ class BinaryNode(DefaultParserNode):
class DefaultParser(BaseParser):
def __init__(self, text, sub_parser):
BaseParser.__init__(self, "DefaultParser", text)
"""
Parse sheerka specific grammar (like def concept)
"""
def __init__(self, sub_parser=None):
BaseParser.__init__(self, "DefaultParser")
self.sub_parser = sub_parser
self.lexer = Tokenizer(text)
self.lexer_iter = None
self._current = None
self.context = None
self.text = None
def reset_parser(self, context, text):
self.context = context
# hack before implementing all the sub parsers
if context:
self.sub_parser = context.sheerka.parsers[1]
self.text = text
self.lexer_iter = iter(Tokenizer(text))
self._current = None
self.next_token()
def collect_tokens(self, *args):
result = []
for item in args:
if isinstance(item, Node):
result.extend(item.tokens)
else:
result.append(item)
return result
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
@@ -186,21 +191,23 @@ class DefaultParser(BaseParser):
return
@staticmethod
def get_concept_name(tokens, variables=None):
name = ""
def get_concept_key(tokens, variables=None):
key = ""
first = True
for token in tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
name += " "
key += " "
if variables is not None and token.value in variables:
name += "__var__" + str(variables.index(token.value))
key += "__var__" + str(variables.index(token.value))
else:
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False
return name
return key
@staticmethod
def fix_indentation(tokens):
@@ -242,7 +249,8 @@ class DefaultParser(BaseParser):
return tokens[4:]
def parse(self):
def parse(self, context, text):
self.reset_parser(context, text)
return self.parse_statement()
def parse_statement(self):
@@ -277,7 +285,7 @@ class DefaultParser(BaseParser):
name_as_tokens.append(token)
self.next_token()
token = self.get_token()
name = self.get_concept_name(name_as_tokens)
name = self.get_concept_key(name_as_tokens)
tokens_found["name"] = name_as_tokens
# try to parse as, where, pre and post declarations
@@ -328,8 +336,8 @@ class DefaultParser(BaseParser):
# start = current_tokens[0].index
# end = current_tokens[-1].index + len(current_tokens[-1].value)
sub_parser = self.sub_parser(current_tokens, source=keyword.value)
sub_tree = sub_parser.parse()
sub_parser = self.sub_parser(source=keyword.value)
sub_tree = sub_parser.parse(self.context, current_tokens)
if isinstance(sub_tree, ErrorNode):
self.add_error(sub_tree, False)
asts[keyword] = sub_tree
@@ -344,74 +352,74 @@ class DefaultParser(BaseParser):
log.debug(f"Found DefConcept node '{def_concept_node}'")
return def_concept_node
def parse_expression(self):
return self.parse_addition()
def parse_addition(self):
left = self.parse_multiply()
token = self.get_token()
if token is None or token.type == TokenKind.EOF:
return left
if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5
right = self.parse_addition()
return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right)
if token.type not in (TokenKind.PLUS, TokenKind.MINUS):
return left
self.next_token()
right = self.parse_addition()
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
def parse_multiply(self):
left = self.parse_atom()
token = self.get_token()
if token is None or token.type == TokenKind.EOF:
return left
if token.type not in (TokenKind.STAR, TokenKind.SLASH):
return left
self.next_token()
right = self.parse_multiply()
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
def parse_atom(self):
token = self.get_token()
if token.type == TokenKind.NUMBER:
self.next_token()
return NumberNode([token], float(token.value) if '.' in token.value else int(token.value))
elif token.type == TokenKind.STRING:
self.next_token()
return StringNode([token], token.value[1:-1], token.value[0])
elif token.type == TokenKind.IDENTIFIER:
if token.value == "true":
self.next_token()
return TrueNode([token])
elif token.value == "false":
self.next_token()
return FalseNode([token])
elif token.value == "null":
self.next_token()
return NullNode([token])
else:
self.next_token()
return VariableNode([token], token.value)
elif token.type == TokenKind.LPAR:
self.next_token()
exp = self.parse_expression()
token = self.get_token()
self.next_token()
if token.type != TokenKind.RPAR:
error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR])
self.add_error(error)
return error
return exp
else:
error = UnexpectedTokenErrorNode([token], "Unexpected token",
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false",
"null", TokenKind.LPAR])
return self.add_error(error)
# def parse_expression(self):
# return self.parse_addition()
#
# def parse_addition(self):
# left = self.parse_multiply()
# token = self.get_token()
# if token is None or token.type == TokenKind.EOF:
# return left
#
# if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5
# right = self.parse_addition()
# return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right)
#
# if token.type not in (TokenKind.PLUS, TokenKind.MINUS):
# return left
#
# self.next_token()
# right = self.parse_addition()
# return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
#
# def parse_multiply(self):
# left = self.parse_atom()
# token = self.get_token()
# if token is None or token.type == TokenKind.EOF:
# return left
#
# if token.type not in (TokenKind.STAR, TokenKind.SLASH):
# return left
#
# self.next_token()
# right = self.parse_multiply()
# return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
#
# def parse_atom(self):
# token = self.get_token()
# if token.type == TokenKind.NUMBER:
# self.next_token()
# return NumberNode([token], float(token.value) if '.' in token.value else int(token.value))
# elif token.type == TokenKind.STRING:
# self.next_token()
# return StringNode([token], token.value[1:-1], token.value[0])
# elif token.type == TokenKind.IDENTIFIER:
# if token.value == "true":
# self.next_token()
# return TrueNode([token])
# elif token.value == "false":
# self.next_token()
# return FalseNode([token])
# elif token.value == "null":
# self.next_token()
# return NullNode([token])
# else:
# self.next_token()
# return VariableNode([token], token.value)
# elif token.type == TokenKind.LPAR:
# self.next_token()
# exp = self.parse_expression()
# token = self.get_token()
# self.next_token()
#
# if token.type != TokenKind.RPAR:
# error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR])
# self.add_error(error)
# return error
#
# return exp
# else:
# error = UnexpectedTokenErrorNode([token], "Unexpected token",
# [TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false",
# "null", TokenKind.LPAR])
# return self.add_error(error)