Introduced ParserInput

This commit is contained in:
2020-05-25 18:09:12 +02:00
parent c79403443f
commit 479461c0a4
35 changed files with 768 additions and 480 deletions
+36 -26
View File
@@ -14,6 +14,7 @@ from cache.Cache import Cache
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Tokenizer, Token, TokenKind
from parsers.BaseNodeParser import BaseNodeParser, LexerNode, UnrecognizedTokensNode, ConceptNode, GrammarErrorNode
from parsers.BaseParser import ErrorNode
@@ -149,7 +150,7 @@ class ConceptExpression(ParsingExpression):
return NonTerminalNode(self,
node.start,
node.end,
parser_helper.parser.tokens[node.start: node.end + 1],
parser_helper.parser.parser_input.tokens[node.start: node.end + 1],
[node])
@@ -184,7 +185,11 @@ class Sequence(ParsingExpression):
children.append(node)
end_pos = node.end
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children)
return NonTerminalNode(self,
init_pos,
end_pos,
parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1],
children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
@@ -206,7 +211,7 @@ class OrderedChoice(ParsingExpression):
return NonTerminalNode(self,
init_pos,
node.end,
parser_helper.parser.tokens[init_pos: node.end + 1],
parser_helper.parser.parser_input.tokens[init_pos: node.end + 1],
[node])
parser_helper.seek(init_pos) # backtrack
@@ -237,7 +242,7 @@ class Optional(ParsingExpression):
self,
node.start,
node.end,
parser_helper.parser.tokens[node.start: node.end + 1],
parser_helper.parser.parser_input.tokens[node.start: node.end + 1],
[node])
parser_helper.seek(init_pos) # backtrack
@@ -303,7 +308,8 @@ class ZeroOrMore(Repetition):
if len(children) == 0:
return NonTerminalNode(self, init_pos, -1, [], [])
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children)
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1],
children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
@@ -343,7 +349,11 @@ class OneOrMore(Repetition):
if len(children) == 0: # if nothing is found, it's an error
return None
return NonTerminalNode(self, init_pos, end_pos, parser_helper.parser.tokens[init_pos: end_pos + 1], children)
return NonTerminalNode(self,
init_pos,
end_pos,
parser_helper.parser.parser_input.tokens[init_pos: end_pos + 1],
children)
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
@@ -507,24 +517,24 @@ class BnfConceptParserHelper:
return False
self.pos += 1
self.token = self.parser.tokens[self.pos]
self.token = self.parser.parser_input.tokens[self.pos]
if skip_whitespace:
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
self.pos += 1
self.token = self.parser.tokens[self.pos]
self.token = self.parser.parser_input.tokens[self.pos]
return self.token.type != TokenKind.EOF
def seek(self, pos):
self.pos = pos
self.token = self.parser.tokens[self.pos]
self.token = self.parser.parser_input.tokens[self.pos]
def has_error(self):
return len(self.errors) > 0
def is_locked(self):
return self.parser.pos <= self.pos or self.has_error()
return self.parser.parser_input.pos <= self.pos or self.has_error()
def eat_concept(self, concept, token):
if self.is_locked():
@@ -546,8 +556,8 @@ class BnfConceptParserHelper:
self.errors.append(GrammarErrorNode(error_msg))
return
self.pos = self.parser.pos
self.token = self.parser.tokens[self.pos]
self.pos = self.parser.parser_input.pos
self.token = self.parser.parser_input.tokens[self.pos]
# parse
node = parsing_expression.parse(self)
@@ -557,15 +567,15 @@ class BnfConceptParserHelper:
self.bnf_parsed = True
else:
self.debug.append(("Rewind", token))
self.unrecognized_tokens.add_token(token, self.parser.pos)
self.pos = self.parser.pos # reset position
self.unrecognized_tokens.add_token(token, self.parser.parser_input.pos)
self.pos = self.parser.parser_input.pos # reset position
def eat_unrecognized(self, token):
if self.is_locked():
return
self.debug.append(token)
self.unrecognized_tokens.add_token(token, self.parser.pos)
self.unrecognized_tokens.add_token(token, self.parser.parser_input.pos)
def manage_unrecognized(self):
if self.unrecognized_tokens.is_empty():
@@ -631,7 +641,7 @@ class BnfConceptParserHelper:
concept,
underlying.start,
underlying.end,
self.parser.tokens[underlying.start: underlying.end + 1],
self.parser.parser_input.tokens[underlying.start: underlying.end + 1],
None,
underlying)
return concept_node
@@ -779,9 +789,9 @@ class BnfNodeParser(BaseNodeParser):
concept_parser_helpers = [BnfConceptParserHelper(self)]
while self.next_token(False):
while self.parser_input.next_token(False):
token = self.get_token()
token = self.parser_input.token
try:
concepts = self.get_concepts(token, self._is_eligible, strip_quotes=False)
@@ -837,7 +847,7 @@ class BnfNodeParser(BaseNodeParser):
resolved = self.resolve_parsing_expression(expression, already_seen or set())
sub_context.add_values(return_values=resolved)
self.concepts_grammars.put(concept.id, resolved)
self.concepts_grammars.put(concept.id, resolved)
if self.has_error:
return None
@@ -929,7 +939,7 @@ class BnfNodeParser(BaseNodeParser):
return self.context.concepts[concept]
return self.sheerka.get_by_key(concept)
def parse(self, context, parser_input):
def parse(self, context, parser_input: ParserInput):
"""
parser_input can be string, but text can also be an list of tokens
:param context:
@@ -940,11 +950,11 @@ class BnfNodeParser(BaseNodeParser):
context.log(f"Parsing '{parser_input}' with BnfNode", self.name)
sheerka = context.sheerka
if parser_input == "" or isinstance(parser_input, list) and len(parser_input) == 0:
if parser_input.is_empty():
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=parser_input,
body=parser_input.as_text(),
reason=BuiltinConcepts.IS_EMPTY))
if not self.reset_parser(context, parser_input):
@@ -966,7 +976,7 @@ class BnfNodeParser(BaseNodeParser):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text()))
ret = []
for parser_helper in valid_parser_helpers:
@@ -977,13 +987,13 @@ class BnfNodeParser(BaseNodeParser):
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
source=parser_input.as_text(),
body=parser_helper.sequence,
try_parsed=parser_helper.sequence)))
if len(ret) == 1:
self.log_result(context, parser_input, ret[0])
self.log_result(context, parser_input.as_text(), ret[0])
return ret[0]
else:
self.log_multiple_results(context, parser_input, ret)
self.log_multiple_results(context, parser_input.as_text(), ret)
return ret