Added DefaultParser

This commit is contained in:
2019-10-29 18:39:51 +01:00
parent 101319b8b6
commit 8107e149b9
18 changed files with 1581 additions and 376 deletions
+41
View File
@@ -0,0 +1,41 @@
from dataclasses import dataclass, field
from parsers.tokenizer import TokenKind, Keywords
@dataclass()
class Node:
pass
@dataclass()
class NopNode(Node):
pass
def __repr__(self):
return "nop"
@dataclass()
class ErrorNode(Node):
pass
class BaseParser:
def __init__(self, name, text):
self.name = name
self.text = text
self.has_error = False
self.error_sink = []
def parse(self):
pass
@staticmethod
def get_text_from_tokens(tokens):
if tokens is None:
return ""
res = ""
for token in tokens:
value = Keywords(token.value).value if token.type == TokenKind.KEYWORD else token.value
res += value
return res
+383
View File
@@ -0,0 +1,383 @@
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode
from parsers.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field
@dataclass()
class DefaultParserNode(Node):
tokens: list = field(compare=False)
def is_same(self, other):
if type(self) != type(other):
return False
if hasattr(self, "value") and self.value != other.value:
return False
return True
@dataclass()
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
pass
@dataclass()
class UnexpectedTokenErrorNode(DefaultParserErrorNode):
message: str
expected_tokens: list
@dataclass()
class SyntaxErrorNode(DefaultParserErrorNode):
message: str
pass
@dataclass()
class DefConceptNode(DefaultParserNode):
name: str
where: Node = None
pre: Node = None
post: Node = None
body: Node = NopNode
def get_codes(self):
codes = {}
for prop in ["where", "pre", "post", "body"]:
prop_value = getattr(self, prop)
if hasattr(prop_value, "ast"):
codes[prop] = prop_value.ast
return codes
@dataclass()
class NumberNode(DefaultParserNode):
value: object
def __repr__(self):
return str(self.value)
@dataclass()
class StringNode(DefaultParserNode):
value: str
quote: str
def is_same(self, other):
if not super(StringNode, self).is_same(other):
return False
return self.quote == other.quote
def __repr__(self):
return self.quote + self.value + self.quote
@dataclass()
class VariableNode(DefaultParserNode):
value: str
def __repr__(self):
return self.value
@dataclass()
class TrueNode(DefaultParserNode):
pass
def __repr__(self):
return "true"
@dataclass()
class FalseNode(DefaultParserNode):
pass
def __repr__(self):
return "false"
@dataclass()
class NullNode(DefaultParserNode):
pass
def __repr__(self):
return "null"
@dataclass()
class BinaryNode(DefaultParserNode):
operator: TokenKind
left: Node
right: Node
def is_same(self, other):
if not super(BinaryNode, self).is_same(other):
return False
if self.operator != other.operator:
return False
if not self.left.is_same(other.left):
return False
return self.right.is_same(other.right)
def __repr__(self):
return f"({self.left} {self.operator} {self.right})"
class DefaultParser(BaseParser):
def __init__(self, text, sub_parser):
BaseParser.__init__(self, "Default", text)
self.sub_parser = sub_parser
self.lexer = Tokenizer(text)
self.lexer_iter = iter(Tokenizer(text))
self._current = None
self.next_token()
def collect_tokens(self, *args):
result = []
for item in args:
if isinstance(item, Node):
result.extend(item.tokens)
else:
result.append(item)
return result
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self._current
def next_token(self, skip_whitespace=True):
try:
self._current = next(self.lexer_iter)
if skip_whitespace:
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
self._current = next(self.lexer_iter)
except StopIteration:
self._current = None
return
@staticmethod
def get_concept_name(tokens):
name = ""
first = True
for token in tokens:
if token.type == TokenKind.EOF:
break
if not first:
name += " "
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False
return name
@staticmethod
def fix_indentation(tokens):
"""
In the following example
def concept add one to a as:
def func(x):
return x+1
func(a)
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
:param tokens:
:return:
"""
if tokens[1].type != TokenKind.COLON:
return tokens[1:]
if len(tokens) < 3:
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
if tokens[2].type != TokenKind.NEWLINE:
return UnexpectedTokenErrorNode([tokens[2]], "Unexpected token after colon", [TokenKind.NEWLINE])
if tokens[3].type != TokenKind.WHITESPACE:
return SyntaxErrorNode([tokens[3]], "Indentation not found")
indent_size = len(tokens[3].value)
# now fix the other indentations
i = 4
while i < len(tokens) - 1:
if tokens[i].type == TokenKind.NEWLINE:
if tokens[i + 1].type != TokenKind.WHITESPACE:
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
if len(tokens[i + 1].value) < indent_size:
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
i += 1
return tokens[4:]
def parse(self):
return self.parse_statement()
def parse_statement(self):
token = self.get_token()
if token.value == Keywords.DEF:
self.next_token()
return self.parse_def_concept()
else:
return self.parse_expression()
def parse_def_concept(self):
"""
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
"""
def_concept_parts = [Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
token = self.get_token()
if token.value != Keywords.CONCEPT:
return self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
self.next_token()
token = self.get_token()
if token.value in (Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST):
return self.add_error(UnexpectedTokenErrorNode([token], "Concept name is missing.", ["<name>"]))
name_as_tokens = []
while token.type != TokenKind.EOF and token.value not in def_concept_parts:
name_as_tokens.append(token)
self.next_token()
token = self.get_token()
name = self.get_concept_name(name_as_tokens)
# try to parse as, where, pre and post declarations
tokens = {
Keywords.AS: None,
Keywords.WHERE: None,
Keywords.PRE: None,
Keywords.POST: None,
}
current_part = None
while token.type != TokenKind.EOF:
if token.value in def_concept_parts:
keyword = token.value
if tokens[keyword]:
return self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
tokens[keyword] = [token] # first element of the list is the keyword
current_part = keyword
self.next_token()
else:
if current_part is None:
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", def_concept_parts))
else:
tokens[current_part].append(token)
self.next_token(False)
token = self.get_token()
asts = {
Keywords.AS: NopNode(),
Keywords.WHERE: NopNode(),
Keywords.PRE: NopNode(),
Keywords.POST: NopNode(),
}
# check for empty declarations
for keyword in tokens:
current_tokens = tokens[keyword]
if current_tokens is not None:
if len(current_tokens) == 0: # only one element means empty decl
return self.add_error(SyntaxErrorNode([current_tokens[0]], "Empty declaration"), False)
else:
current_tokens = self.fix_indentation(current_tokens)
if isinstance(current_tokens, ErrorNode):
self.add_error(current_tokens)
continue
# start = current_tokens[0].index
# end = current_tokens[-1].index + len(current_tokens[-1].value)
sub_parser = self.sub_parser(current_tokens, source=keyword.value)
sub_tree = sub_parser.parse()
if isinstance(sub_tree, ErrorNode):
self.add_error(sub_tree, False)
asts[keyword] = sub_tree
return DefConceptNode([], name,
asts[Keywords.WHERE],
asts[Keywords.PRE],
asts[Keywords.POST],
asts[Keywords.AS])
def parse_expression(self):
return self.parse_addition()
def parse_addition(self):
left = self.parse_multiply()
token = self.get_token()
if token is None or token.type == TokenKind.EOF:
return left
if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5
right = self.parse_addition()
return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right)
if token.type not in (TokenKind.PLUS, TokenKind.MINUS):
return left
self.next_token()
right = self.parse_addition()
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
def parse_multiply(self):
left = self.parse_atom()
token = self.get_token()
if token is None or token.type == TokenKind.EOF:
return left
if token.type not in (TokenKind.STAR, TokenKind.SLASH):
return left
self.next_token()
right = self.parse_multiply()
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
def parse_atom(self):
token = self.get_token()
if token.type == TokenKind.NUMBER:
self.next_token()
return NumberNode([token], float(token.value) if '.' in token.value else int(token.value))
elif token.type == TokenKind.STRING:
self.next_token()
return StringNode([token], token.value[1:-1], token.value[0])
elif token.type == TokenKind.IDENTIFIER:
if token.value == "true":
self.next_token()
return TrueNode([token])
elif token.value == "false":
self.next_token()
return FalseNode([token])
elif token.value == "null":
self.next_token()
return NullNode([token])
else:
self.next_token()
return VariableNode([token], token.value)
elif token.type == TokenKind.LPAR:
self.next_token()
exp = self.parse_expression()
token = self.get_token()
self.next_token()
if token.type != TokenKind.RPAR:
error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR])
self.add_error(error)
return error
return exp
else:
error = UnexpectedTokenErrorNode([token], "Unexpected token",
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false",
"null", TokenKind.LPAR])
return self.add_error(error)
+75
View File
@@ -0,0 +1,75 @@
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass
import ast
import copy
@dataclass()
class PythonErrorNode(ErrorNode):
source: str
exception: Exception
@dataclass()
class PythonNode(Node):
source: str
ast: ast.AST
def __repr__(self):
return "PythonNode(" + ast.dump(self.ast) + ")"
#return "PythonNode(" + self.source + ")"
class PythonParser(BaseParser):
def __init__(self, text, source="<undef>"):
text = text if isinstance(text, str) else self.get_text_from_tokens(text)
text = text.strip()
BaseParser.__init__(self, "PythonParser", text)
self.source = source
def parse(self):
# first, try to parse an expression
res, tree, error = self.try_parse_expression()
if not res:
# then try to parse a statement
res, tree, error = self.try_parse_statement()
if not res:
self.has_error = True
error_node = PythonErrorNode(self.text, error)
self.error_sink.append(error_node)
return error_node
return PythonNode(self.text, tree)
def try_parse_expression(self):
try:
return True, ast.parse(self.text, f"<{self.source}>", 'eval'), None
except Exception as error:
return False, None, error
def try_parse_statement(self):
try:
return True, ast.parse(self.text, f"<{self.source}>", 'exec'), None
except Exception as error:
return False, None, error
def expr_to_expression(self, expr):
expr.lineno = 0
expr.col_offset = 0
result = ast.Expression(expr.value, lineno=0, col_offset=0)
return result
def exec_with_return(self, code):
code_ast = ast.parse(code)
init_ast = copy.deepcopy(code_ast)
init_ast.body = code_ast.body[:-1]
last_ast = copy.deepcopy(code_ast)
last_ast.body = code_ast.body[-1:]
exec(compile(init_ast, "<ast>", "exec"), globals())
if type(last_ast.body[0]) == ast.Expr:
return eval(compile(self.expr_to_expression(last_ast.body[0]), "<ast>", "eval"), globals())
else:
exec(compile(last_ast, "<ast>", "exec"), globals())
-249
View File
@@ -1,249 +0,0 @@
from dataclasses import dataclass
@dataclass(frozen=True)
class Token:
type: str
value: str
index: int
line: int
column: int
@dataclass(frozen=True)
class LexerError(Exception):
message: str
text: str
index: int
line: int
column: int
class Tokens:
EOF = "eof"
WHITESPACE = "whitespace"
NEWLINE = "newline"
KEYWORD = "keyword"
IDENTIFIER = "identifier"
STRING = "string"
NUMBER = "number"
TRUE = "true"
FALSE = "false"
LPAR = "lpar"
RPAR = "rpar"
LBRACKET = "lbrace"
RBRACKET = "rbracket"
LBRACE = "lbrace"
RBRACE = "rbrace"
PLUS = "plus"
MINUS = "minus"
STAR = "star"
SLASH = "slash"
PERCENT = "percent"
COMMA = "comma"
SEMICOLON = "semicolon"
COLON = "colon"
DOT = "dot"
QMARK = "qmark"
VBAR = "vbar"
AMPER = "amper"
class TokenIter:
KEYWORDS = ("def", "concept", "as", "pre", "post")
"""
Class that can iterate on the tokens
"""
def __init__(self, text):
self.text = text
self.text_len = len(text)
def __iter__(self):
i = 0
line = 1
column = 1
while i < self.text_len:
c = self.text[i]
if c == "+":
yield Token(Tokens.PLUS, "+", i, line, column)
i += 1
column += 1
elif c == "-":
if i + 1 < self.text_len and self.text[i + 1].isdigit():
number = self.eat_number(i)
yield Token(Tokens.NUMBER, number, i, line, column)
i += len(number)
column += len(number)
else:
yield Token(Tokens.MINUS, "-", i, line, column)
i += 1
column += 1
elif c == "/":
yield Token(Tokens.SLASH, "/", i, line, column)
i += 1
column += 1
elif c == "*":
yield Token(Tokens.STAR, "*", i, line, column)
i += 1
column += 1
elif c == "{":
yield Token(Tokens.LBRACE, "{", i, line, column)
i += 1
column += 1
elif c == "}":
yield Token(Tokens.RBRACE, "}", i, line, column)
i += 1
column += 1
elif c == "(":
yield Token(Tokens.LPAR, "(", i, line, column)
i += 1
column += 1
elif c == ")":
yield Token(Tokens.RPAR, ")", i, line, column)
i += 1
column += 1
elif c == "[":
yield Token(Tokens.LBRACKET, "[", i, line, column)
i += 1
column += 1
elif c == "]":
yield Token(Tokens.RBRACKET, "]", i, line, column)
i += 1
column += 1
elif c == " " or c == "\t":
whitespace = self.eat_whitespace(i)
yield Token(Tokens.WHITESPACE, whitespace, i, line, column)
i += len(whitespace)
column += len(whitespace)
elif c == ",":
yield Token(Tokens.COMMA, ",", i, line, column)
i += 1
column += 1
elif c == ".":
yield Token(Tokens.DOT, ".", i, line, column)
i += 1
column += 1
elif c == ";":
yield Token(Tokens.SEMICOLON, ";", i, line, column)
i += 1
column += 1
elif c == ":":
yield Token(Tokens.COLON, ":", i, line, column)
i += 1
column += 1
elif c == "?":
yield Token(Tokens.QMARK, "?", i, line, column)
i += 1
column += 1
elif c == "\n" or c == "\r":
newline = self.eat_newline(i)
yield Token(Tokens.NEWLINE, newline, i, line, column)
i += len(newline)
column = 1
line += 1
elif c.isalpha() or c == "_":
identifier = self.eat_identifier(i)
type = Tokens.KEYWORD if identifier in self.KEYWORDS else Tokens.IDENTIFIER
yield Token(type, identifier, i, line, column)
i += len(identifier)
column += len(identifier)
elif c.isdigit():
number = self.eat_number(i)
yield Token(Tokens.NUMBER, number, i, line, column)
i += len(number)
column += len(number)
elif c == "'" or c == '"':
string, newlines = self.eat_string(i)
yield Token(Tokens.STRING, string, i, line, column)
i += len(string)
column = 1 if newlines > 0 else column + len(string)
line += newlines
else:
raise LexerError(f"Unknown token '{c}'", self.text, i, line, column)
yield Token(Tokens.EOF, "", i, line, column)
def eat_whitespace(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c == " " or c == "\t":
result += c
i += 1
else:
break
return result
def eat_newline(self, start):
if start + 1 == self.text_len:
return self.text[start]
current = self.text[start]
next = self.text[start + 1]
if current == "\n" and next == "\r" or current == "\r" and next == "\n":
return current + next
return current
def eat_identifier(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isalpha() or c == "_" or c == "-" or c.isdigit():
result += c
i += 1
else:
break
return result
def eat_number(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isdigit() or c == ".":
result += c
i += 1
else:
break
return result
def eat_string(self, start):
quote = self.text[start]
result = self.text[start]
lines_count = 0
i = start + 1
escape = False
newline = None
while i < self.text_len:
c = self.text[i]
result += c
i += 1
if newline:
lines_count += 1
newline = c if c == newline else None
else:
if c == "\r" or c == "\n":
newline = c
if c == "\\":
escape = True
elif c == quote and not escape:
break
else:
escape = False
if newline:
lines_count += 1
return result, lines_count
+297
View File
@@ -0,0 +1,297 @@
from dataclasses import dataclass
from enum import Enum
class TokenKind(Enum):
EOF = "eof"
WHITESPACE = "whitespace"
NEWLINE = "newline"
KEYWORD = "keyword"
IDENTIFIER = "identifier"
STRING = "string"
NUMBER = "number"
TRUE = "true"
FALSE = "false"
LPAR = "lpar"
RPAR = "rpar"
LBRACKET = "lbrace"
RBRACKET = "rbracket"
LBRACE = "lbrace"
RBRACE = "rbrace"
PLUS = "plus"
MINUS = "minus"
STAR = "star"
SLASH = "slash"
PERCENT = "percent"
COMMA = "comma"
SEMICOLON = "semicolon"
COLON = "colon"
DOT = "dot"
QMARK = "qmark"
VBAR = "vbar"
AMPER = "amper"
EQUALS = "="
@dataclass()
class Token:
type: TokenKind
value: object
index: int
line: int
column: int
@dataclass()
class LexerError(Exception):
message: str
text: str
index: int
line: int
column: int
class Keywords(Enum):
DEF = "def"
CONCEPT = "concept"
AS = "as"
WHERE = "where"
PRE = "pre"
POST = "post"
class Tokenizer:
"""
Class that can iterate on the tokens
"""
KEYWORDS = set(x.value for x in Keywords)
def __init__(self, text):
self.text = text
self.text_len = len(text)
self.column = 1
self.line = 1
self.i = 0
def __iter__(self):
while self.i < self.text_len:
c = self.text[self.i]
if c == "+":
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
else:
yield Token(TokenKind.PLUS, "+", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "-":
if self.i + 1 < self.text_len and self.text[self.i + 1].isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
else:
yield Token(TokenKind.MINUS, "-", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "/":
yield Token(TokenKind.SLASH, "/", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "*":
yield Token(TokenKind.STAR, "*", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "{":
yield Token(TokenKind.LBRACE, "{", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "}":
yield Token(TokenKind.RBRACE, "}", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "(":
yield Token(TokenKind.LPAR, "(", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ")":
yield Token(TokenKind.RPAR, ")", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "[":
yield Token(TokenKind.LBRACKET, "[", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "]":
yield Token(TokenKind.RBRACKET, "]", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "=":
yield Token(TokenKind.EQUALS, "=", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == " " or c == "\t":
whitespace = self.eat_whitespace(self.i)
yield Token(TokenKind.WHITESPACE, whitespace, self.i, self.line, self.column)
self.i += len(whitespace)
self.column += len(whitespace)
elif c == ",":
yield Token(TokenKind.COMMA, ",", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ".":
yield Token(TokenKind.DOT, ".", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ";":
yield Token(TokenKind.SEMICOLON, ";", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == ":":
yield Token(TokenKind.COLON, ":", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "?":
yield Token(TokenKind.QMARK, "?", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\n" or c == "\r":
newline = self.eat_newline(self.i)
yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column)
self.i += len(newline)
self.column = 1
self.line += 1
elif c.isalpha() or c == "_":
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
value = Keywords(identifier) if identifier in self.KEYWORDS else identifier
yield Token(token_type, value, self.i, self.line, self.column)
self.i += len(identifier)
self.column += len(identifier)
elif c.isdigit():
number = self.eat_number(self.i)
yield Token(TokenKind.NUMBER, number, self.i, self.line, self.column)
self.i += len(number)
self.column += len(number)
elif c == "'" or c == '"':
string, newlines = self.eat_string(self.i, self.line, self.column)
yield Token(TokenKind.STRING, string, self.i, self.line, self.column)
self.i += len(string)
self.column = 1 if newlines > 0 else self.column + len(string)
self.line += newlines
else:
raise LexerError(f"Unknown token '{c}'", self.text, self.i, self.line, self.column)
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
def eat_whitespace(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c == " " or c == "\t":
result += c
i += 1
else:
break
return result
def eat_newline(self, start):
if start + 1 == self.text_len:
return self.text[start]
current = self.text[start]
next = self.text[start + 1]
if current == "\n" and next == "\r" or current == "\r" and next == "\n":
return current + next
return current
def eat_identifier(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isalpha() or c == "_" or c == "-" or c.isdigit():
result += c
i += 1
else:
break
return result
def eat_number(self, start):
result = self.text[start]
i = start + 1
while i < self.text_len:
c = self.text[i]
if c.isdigit() or c == ".":
result += c
i += 1
else:
break
return result
def eat_string(self, start_index, start_line, start_column):
quote = self.text[start_index]
result = self.text[start_index]
lines_count = 0
i = start_index + 1
escape = False
newline = None
while i < self.text_len:
c = self.text[i]
result += c
i += 1
if newline:
lines_count += 1
newline = c if c == newline else None
else:
if c == "\r" or c == "\n":
newline = c
if c == "\\":
escape = True
elif c == quote and not escape:
break
else:
escape = False
# add trailing new line if needed
if newline:
lines_count += 1
if result[-1] != quote:
raise LexerError("Missing Trailing quote", result, i, start_line + lines_count,
1 if lines_count > 0 else start_column + len(result))
return result, lines_count
def seek(self, words):
if self.i == self.text_len:
return 0
# init
offsets = {}
start_index = self.i
buffer = ""
while self.i < self.text_len:
c = self.text[self.i]
# skip white space
if c in (" ", "\t"):
self.i += 1
continue
for word in words:
if c == word[offset]:
os