Tokenizer exceptions are not catched

This commit is contained in:
2019-12-31 18:28:04 +01:00
parent 197b0700fa
commit adcbc6bb2e
12 changed files with 131 additions and 39 deletions
+11 -6
View File
@@ -3,7 +3,7 @@ from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptMatch, StrMatch
@@ -114,12 +114,17 @@ class BnfParser:
return token.type == second or token.type == first and self.next_after().type == second
def parse(self, context: ExecutionContext, text):
self.reset_parser(context, text)
tree = self.parser_outer_rule_name()
token = self.get_token()
if token and token.type != TokenKind.EOF:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
tree = None
try:
self.reset_parser(context, text)
tree = self.parser_outer_rule_name()
token = self.get_token()
if token and token.type != TokenKind.EOF:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
except LexerError as e:
self.add_error(e, False)
ret = self.sheerka.ret(
self.name,
+12 -2
View File
@@ -555,7 +555,12 @@ class ConceptLexerParser(BaseParser):
self.text = text
if isinstance(text, str):
self.tokens = list(Tokenizer(text))
try:
self.tokens = list(Tokenizer(text))
except core.tokenizer.LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
else:
self.tokens = list(text)
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
@@ -563,6 +568,7 @@ class ConceptLexerParser(BaseParser):
self.token = None
self.pos = -1
self.next_token()
return True
def get_token(self) -> Token:
return self.token
@@ -724,7 +730,11 @@ class ConceptLexerParser(BaseParser):
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
)
self.reset_parser(context, text)
if not self.reset_parser(context, text):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
concepts_found = [[]]
unrecognized_tokens = None
+7 -2
View File
@@ -183,6 +183,7 @@ class DefaultParser(BaseParser):
self._current = next(self.lexer_iter)
except StopIteration:
self._current = None
return
def parse(self, context, text):
@@ -195,8 +196,12 @@ class DefaultParser(BaseParser):
self.log_result(context, text, ret)
return ret
self.reset_parser(context, text)
tree = self.parse_statement()
tree = None
try:
self.reset_parser(context, text)
tree = self.parse_statement()
except core.tokenizer.LexerError as e:
self.add_error(e, False)
# If a error is found it must be sent to error_sink
# tree must contain what was recognized
+7 -2
View File
@@ -2,7 +2,7 @@ import logging
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from parsers.BaseParser import BaseParser
from core.tokenizer import Tokenizer, Keywords, TokenKind
from core.tokenizer import Tokenizer, Keywords, TokenKind, LexerError
from core.concept import VARIABLE_PREFIX
@@ -27,7 +27,12 @@ class ExactConceptParser(BaseParser):
context.log(self.verbose_log, f"Parsing '{text}'", self.name)
res = []
sheerka = context.sheerka
words = self.get_words(text)
try:
words = self.get_words(text)
except LexerError as e:
context.log(self.verbose_log, f"Error found in tokenizer {e}", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
if len(words) > self.MAX_WORDS_SIZE:
context.log(self.verbose_log, f"Max words reached. Stopping.", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
+25 -19
View File
@@ -1,5 +1,5 @@
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import Tokenizer
from core.tokenizer import Tokenizer, LexerError
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass
import ast
@@ -63,27 +63,33 @@ class PythonParser(BaseParser):
self.source = kwargs.get("source", "<undef>")
def parse(self, context, text):
if isinstance(text, str) and "c:" in text:
source = self.get_text_from_tokens(list(Tokenizer(text)))
elif isinstance(text, str):
source = text
else:
source = self.get_text_from_tokens(text)
source = source.strip()
text = text if isinstance(text, str) else source
sheerka = context.sheerka
tree = None
# first, try to parse an expression
res, tree, error = self.try_parse_expression(source)
if not res:
# then try to parse a statement
res, tree, error = self.try_parse_statement(source)
try:
if isinstance(text, str) and "c:" in text:
source = self.get_text_from_tokens(list(Tokenizer(text)))
elif isinstance(text, str):
source = text
else:
source = self.get_text_from_tokens(text)
source = source.strip()
text = text if isinstance(text, str) else source
# first, try to parse an expression
res, tree, error = self.try_parse_expression(source)
if not res:
self.has_error = True
error_node = PythonErrorNode(text, error)
self.error_sink.append(error_node)
# then try to parse a statement
res, tree, error = self.try_parse_statement(source)
if not res:
self.has_error = True
error_node = PythonErrorNode(text, error)
self.error_sink.append(error_node)
except LexerError as e:
self.has_error = True
self.error_sink.append(e)
ret = sheerka.ret(
self.name,