Refactored Parsers. Introduced BaseCustomGrammarParser. Renamed DefaultParser into DefConceptParser
This commit is contained in:
@@ -287,7 +287,7 @@ def parse_unrecognized(context, source, parsers, who=None, prop=None, filter_fun
|
||||
"""
|
||||
Try to recognize concepts or code from source using the given parsers
|
||||
:param context:
|
||||
:param source:
|
||||
:param source: ParserInput if possible
|
||||
:param parsers:
|
||||
:param who: who is asking the parsing ?
|
||||
:param prop: Extra info, when parsing a property
|
||||
|
||||
@@ -335,7 +335,7 @@ class Sheerka(Concept):
|
||||
"""
|
||||
core.utils.import_module_and_sub_module("parsers")
|
||||
base_class = core.utils.get_class("parsers.BaseParser.BaseParser")
|
||||
modules_to_skip = ["parsers.BaseNodeParser"]
|
||||
modules_to_skip = ["parsers.BaseNodeParser", "parsers.BaseCustomGrammarParser"]
|
||||
|
||||
temp_result = {}
|
||||
for parser in core.utils.get_sub_classes("parsers", base_class):
|
||||
|
||||
@@ -15,11 +15,29 @@ class ParserInput:
|
||||
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True):
|
||||
self.text = text
|
||||
self.tokens = tokens or None
|
||||
self.length = len(tokens) if tokens else None
|
||||
if self.tokens:
|
||||
# make sure tokens ends with EOF token
|
||||
# and do not modify the original token list
|
||||
if len(self.tokens) == 0:
|
||||
self.tokens = [Token(TokenKind.EOF, "", 0, 1, 1)]
|
||||
|
||||
elif (last_token := self.tokens[-1]).type != TokenKind.EOF:
|
||||
self.tokens = self.tokens + [Token(TokenKind.EOF,
|
||||
"",
|
||||
last_token.index + 1,
|
||||
last_token.line,
|
||||
last_token.column + 1)]
|
||||
|
||||
self.length = None # to be computed in reset()
|
||||
self.yield_oef = yield_oef
|
||||
|
||||
self.start = start or 0
|
||||
self.end = end + 1 if end else None
|
||||
if end:
|
||||
self.original_end = end + 1
|
||||
self.end = self.original_end
|
||||
else:
|
||||
self.original_end = self.end = None
|
||||
|
||||
self.sub_text = None
|
||||
self.sub_tokens = None
|
||||
|
||||
@@ -32,15 +50,19 @@ class ParserInput:
|
||||
from_tokens = "from_tokens" if self.from_tokens else ""
|
||||
return f"ParserInput({from_tokens}'{self.text}')"
|
||||
|
||||
def reset(self, yield_oef=True):
|
||||
def reset(self, yield_oef=None):
|
||||
if yield_oef is None:
|
||||
yield_oef = self.yield_oef
|
||||
|
||||
# make sure tokens is correctly initialized
|
||||
if self.tokens is None:
|
||||
self.tokens = list(Tokenizer(self.text))
|
||||
self.length = len(self.tokens)
|
||||
self.tokens = list(Tokenizer(self.text, yield_eof=True))
|
||||
|
||||
if self.end is None:
|
||||
self.end = self.length
|
||||
if self.original_end is None:
|
||||
self.end = len(self.tokens) if yield_oef else len(self.tokens) - 1
|
||||
else:
|
||||
self.end = self.original_end if self.original_end <= len(self.tokens) else self.tokens
|
||||
|
||||
self.yield_oef = yield_oef
|
||||
self.pos = self.start - 1
|
||||
self.token = None
|
||||
return self
|
||||
@@ -70,13 +92,10 @@ class ParserInput:
|
||||
self.pos += 1
|
||||
|
||||
if self.pos >= self.end:
|
||||
if self.yield_oef:
|
||||
self.token = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
return False
|
||||
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if self.token.type == TokenKind.EOF and not self.yield_oef:
|
||||
if self.token.type == TokenKind.EOF:
|
||||
return False
|
||||
|
||||
if skip_whitespace:
|
||||
|
||||
@@ -68,7 +68,7 @@ class Token:
|
||||
if self.type == TokenKind.IDENTIFIER:
|
||||
value = str(self.value)
|
||||
elif self.type == TokenKind.WHITESPACE:
|
||||
value = "<tab>" if self.value[0] == "\t" else "<ws>"
|
||||
value = "<ws!>" if self.value == "" else "<tab>" if self.value[0] == "\t" else "<ws>"
|
||||
elif self.type == TokenKind.NEWLINE:
|
||||
value = "<nl>"
|
||||
elif self.type == TokenKind.EOF:
|
||||
@@ -148,6 +148,8 @@ class Keywords(Enum):
|
||||
POST = "post"
|
||||
ISA = "isa"
|
||||
RET = "ret"
|
||||
WHEN = "when"
|
||||
PRINT = "print"
|
||||
|
||||
|
||||
class Tokenizer:
|
||||
|
||||
Reference in New Issue
Block a user