Tokenizer exceptions are not catched
This commit is contained in:
+11
-6
@@ -3,7 +3,7 @@ from dataclasses import dataclass
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka import ExecutionContext
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptMatch, StrMatch
|
||||
|
||||
@@ -114,12 +114,17 @@ class BnfParser:
|
||||
return token.type == second or token.type == first and self.next_after().type == second
|
||||
|
||||
def parse(self, context: ExecutionContext, text):
|
||||
self.reset_parser(context, text)
|
||||
tree = self.parser_outer_rule_name()
|
||||
|
||||
token = self.get_token()
|
||||
if token and token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
tree = self.parser_outer_rule_name()
|
||||
|
||||
token = self.get_token()
|
||||
if token and token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
|
||||
except LexerError as e:
|
||||
self.add_error(e, False)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
|
||||
@@ -555,7 +555,12 @@ class ConceptLexerParser(BaseParser):
|
||||
self.text = text
|
||||
|
||||
if isinstance(text, str):
|
||||
self.tokens = list(Tokenizer(text))
|
||||
try:
|
||||
self.tokens = list(Tokenizer(text))
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
else:
|
||||
self.tokens = list(text)
|
||||
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
|
||||
@@ -563,6 +568,7 @@ class ConceptLexerParser(BaseParser):
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
self.next_token()
|
||||
return True
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self.token
|
||||
@@ -724,7 +730,11 @@ class ConceptLexerParser(BaseParser):
|
||||
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
||||
)
|
||||
|
||||
self.reset_parser(context, text)
|
||||
if not self.reset_parser(context, text):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
concepts_found = [[]]
|
||||
unrecognized_tokens = None
|
||||
|
||||
@@ -183,6 +183,7 @@ class DefaultParser(BaseParser):
|
||||
self._current = next(self.lexer_iter)
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
|
||||
return
|
||||
|
||||
def parse(self, context, text):
|
||||
@@ -195,8 +196,12 @@ class DefaultParser(BaseParser):
|
||||
self.log_result(context, text, ret)
|
||||
return ret
|
||||
|
||||
self.reset_parser(context, text)
|
||||
tree = self.parse_statement()
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
tree = self.parse_statement()
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(e, False)
|
||||
|
||||
# If a error is found it must be sent to error_sink
|
||||
# tree must contain what was recognized
|
||||
|
||||
@@ -2,7 +2,7 @@ import logging
|
||||
|
||||
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from core.tokenizer import Tokenizer, Keywords, TokenKind
|
||||
from core.tokenizer import Tokenizer, Keywords, TokenKind, LexerError
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
|
||||
|
||||
@@ -27,7 +27,12 @@ class ExactConceptParser(BaseParser):
|
||||
context.log(self.verbose_log, f"Parsing '{text}'", self.name)
|
||||
res = []
|
||||
sheerka = context.sheerka
|
||||
words = self.get_words(text)
|
||||
try:
|
||||
words = self.get_words(text)
|
||||
except LexerError as e:
|
||||
context.log(self.verbose_log, f"Error found in tokenizer {e}", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
|
||||
|
||||
if len(words) > self.MAX_WORDS_SIZE:
|
||||
context.log(self.verbose_log, f"Max words reached. Stopping.", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
|
||||
|
||||
+25
-19
@@ -1,5 +1,5 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import Tokenizer
|
||||
from core.tokenizer import Tokenizer, LexerError
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from dataclasses import dataclass
|
||||
import ast
|
||||
@@ -63,27 +63,33 @@ class PythonParser(BaseParser):
|
||||
self.source = kwargs.get("source", "<undef>")
|
||||
|
||||
def parse(self, context, text):
|
||||
if isinstance(text, str) and "c:" in text:
|
||||
source = self.get_text_from_tokens(list(Tokenizer(text)))
|
||||
elif isinstance(text, str):
|
||||
source = text
|
||||
else:
|
||||
source = self.get_text_from_tokens(text)
|
||||
source = source.strip()
|
||||
|
||||
text = text if isinstance(text, str) else source
|
||||
|
||||
sheerka = context.sheerka
|
||||
tree = None
|
||||
|
||||
# first, try to parse an expression
|
||||
res, tree, error = self.try_parse_expression(source)
|
||||
if not res:
|
||||
# then try to parse a statement
|
||||
res, tree, error = self.try_parse_statement(source)
|
||||
try:
|
||||
if isinstance(text, str) and "c:" in text:
|
||||
source = self.get_text_from_tokens(list(Tokenizer(text)))
|
||||
elif isinstance(text, str):
|
||||
source = text
|
||||
else:
|
||||
source = self.get_text_from_tokens(text)
|
||||
source = source.strip()
|
||||
|
||||
text = text if isinstance(text, str) else source
|
||||
|
||||
# first, try to parse an expression
|
||||
res, tree, error = self.try_parse_expression(source)
|
||||
if not res:
|
||||
self.has_error = True
|
||||
error_node = PythonErrorNode(text, error)
|
||||
self.error_sink.append(error_node)
|
||||
# then try to parse a statement
|
||||
res, tree, error = self.try_parse_statement(source)
|
||||
if not res:
|
||||
self.has_error = True
|
||||
error_node = PythonErrorNode(text, error)
|
||||
self.error_sink.append(error_node)
|
||||
|
||||
except LexerError as e:
|
||||
self.has_error = True
|
||||
self.error_sink.append(e)
|
||||
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
|
||||
Reference in New Issue
Block a user