Refactored Parsers. Introduced BaseCustomGrammarParser. Renamed DefaultParser into DefConceptParser

This commit is contained in:
2020-10-02 04:45:47 +02:00
parent d100b7e8b3
commit e8f2705dcf
28 changed files with 1411 additions and 872 deletions
+1 -1
View File
@@ -287,7 +287,7 @@ def parse_unrecognized(context, source, parsers, who=None, prop=None, filter_fun
"""
Try to recognize concepts or code from source using the given parsers
:param context:
:param source:
:param source: ParserInput if possible
:param parsers:
:param who: who is asking the parsing ?
:param prop: Extra info, when parsing a property
+1 -1
View File
@@ -335,7 +335,7 @@ class Sheerka(Concept):
"""
core.utils.import_module_and_sub_module("parsers")
base_class = core.utils.get_class("parsers.BaseParser.BaseParser")
modules_to_skip = ["parsers.BaseNodeParser"]
modules_to_skip = ["parsers.BaseNodeParser", "parsers.BaseCustomGrammarParser"]
temp_result = {}
for parser in core.utils.get_sub_classes("parsers", base_class):
+31 -12
View File
@@ -15,11 +15,29 @@ class ParserInput:
def __init__(self, text, tokens=None, start=None, end=None, yield_oef=True):
self.text = text
self.tokens = tokens or None
self.length = len(tokens) if tokens else None
if self.tokens:
# make sure tokens ends with EOF token
# and do not modify the original token list
if len(self.tokens) == 0:
self.tokens = [Token(TokenKind.EOF, "", 0, 1, 1)]
elif (last_token := self.tokens[-1]).type != TokenKind.EOF:
self.tokens = self.tokens + [Token(TokenKind.EOF,
"",
last_token.index + 1,
last_token.line,
last_token.column + 1)]
self.length = None # to be computed in reset()
self.yield_oef = yield_oef
self.start = start or 0
self.end = end + 1 if end else None
if end:
self.original_end = end + 1
self.end = self.original_end
else:
self.original_end = self.end = None
self.sub_text = None
self.sub_tokens = None
@@ -32,15 +50,19 @@ class ParserInput:
from_tokens = "from_tokens" if self.from_tokens else ""
return f"ParserInput({from_tokens}'{self.text}')"
def reset(self, yield_oef=True):
def reset(self, yield_oef=None):
if yield_oef is None:
yield_oef = self.yield_oef
# make sure tokens is correctly initialized
if self.tokens is None:
self.tokens = list(Tokenizer(self.text))
self.length = len(self.tokens)
self.tokens = list(Tokenizer(self.text, yield_eof=True))
if self.end is None:
self.end = self.length
if self.original_end is None:
self.end = len(self.tokens) if yield_oef else len(self.tokens) - 1
else:
self.end = self.original_end if self.original_end <= len(self.tokens) else self.tokens
self.yield_oef = yield_oef
self.pos = self.start - 1
self.token = None
return self
@@ -70,13 +92,10 @@ class ParserInput:
self.pos += 1
if self.pos >= self.end:
if self.yield_oef:
self.token = Token(TokenKind.EOF, "", -1, -1, -1)
return False
self.token = self.tokens[self.pos]
if self.token.type == TokenKind.EOF and not self.yield_oef:
if self.token.type == TokenKind.EOF:
return False
if skip_whitespace:
+3 -1
View File
@@ -68,7 +68,7 @@ class Token:
if self.type == TokenKind.IDENTIFIER:
value = str(self.value)
elif self.type == TokenKind.WHITESPACE:
value = "<tab>" if self.value[0] == "\t" else "<ws>"
value = "<ws!>" if self.value == "" else "<tab>" if self.value[0] == "\t" else "<ws>"
elif self.type == TokenKind.NEWLINE:
value = "<nl>"
elif self.type == TokenKind.EOF:
@@ -148,6 +148,8 @@ class Keywords(Enum):
POST = "post"
ISA = "isa"
RET = "ret"
WHEN = "when"
PRINT = "print"
class Tokenizer: