Enhanced AtomNode parsing by name
This commit is contained in:
@@ -2,8 +2,9 @@ from dataclasses import dataclass
|
||||
|
||||
from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import DEFINITION_TYPE_BNF
|
||||
from core.concept import DEFINITION_TYPE_BNF, Concept
|
||||
from core.tokenizer import Tokenizer
|
||||
from core.utils import strip_tokens
|
||||
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode
|
||||
|
||||
@@ -196,23 +197,6 @@ class AtomConceptParserHelper:
|
||||
clone.has_unrecognized = self.has_unrecognized
|
||||
return clone
|
||||
|
||||
# def _get_lexer_nodes_from_unrecognized(self):
|
||||
# """
|
||||
# Use the source of self.unrecognized_tokens gto find concepts or source code
|
||||
# :return:
|
||||
# """
|
||||
#
|
||||
# res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
|
||||
# only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
|
||||
#
|
||||
# if not only_parsers_results.status:
|
||||
# return None
|
||||
#
|
||||
# return builtin_helpers.get_lexer_nodes(
|
||||
# only_parsers_results.body.body,
|
||||
# self.unrecognized_tokens.start,
|
||||
# self.unrecognized_tokens.tokens)
|
||||
|
||||
|
||||
class AtomNodeParser(BaseNodeParser):
|
||||
"""
|
||||
@@ -314,6 +298,26 @@ class AtomNodeParser(BaseNodeParser):
|
||||
|
||||
return concept_parser_helpers
|
||||
|
||||
def get_by_name(self, parser_input):
|
||||
"""
|
||||
Try to recognize the full parser input as a concept name
|
||||
:return:
|
||||
"""
|
||||
source = self.get_input_as_text(parser_input)
|
||||
concepts = self.sheerka.get_by_name(source.strip())
|
||||
if not self.sheerka.is_known(concepts):
|
||||
return None
|
||||
|
||||
concepts = [concepts] if isinstance(concepts, Concept) else concepts
|
||||
res = []
|
||||
start, end = self.get_tokens_boundaries(self.tokens)
|
||||
for concept in concepts:
|
||||
parser_helper = AtomConceptParserHelper(None)
|
||||
parser_helper.sequence.append(ConceptNode(concept, start, end, strip_tokens(self.tokens, True), source))
|
||||
res.append(parser_helper)
|
||||
|
||||
return res
|
||||
|
||||
def get_valid(self, concept_parser_helpers):
|
||||
valid_parser_helpers = [] # be careful, it will be a list of list
|
||||
for parser_helper in concept_parser_helpers:
|
||||
@@ -351,7 +355,11 @@ class AtomNodeParser(BaseNodeParser):
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
parser_helpers = self.get_valid(self.get_concepts_sequences())
|
||||
sequences = self.get_concepts_sequences()
|
||||
if by_name := self.get_by_name(parser_input):
|
||||
sequences.extend(by_name)
|
||||
|
||||
parser_helpers = self.get_valid(sequences)
|
||||
|
||||
if len(parser_helpers):
|
||||
ret = []
|
||||
|
||||
@@ -153,17 +153,17 @@ class BaseParser:
|
||||
|
||||
return parser_input
|
||||
|
||||
def get_input_as_tokens(self, parser_input):
|
||||
def get_input_as_tokens(self, parser_input, strip_eof=False):
|
||||
if isinstance(parser_input, list):
|
||||
return self.add_eof_if_needed(parser_input)
|
||||
return self.manage_eof(parser_input, strip_eof)
|
||||
|
||||
if isinstance(parser_input, ParserResultConcept):
|
||||
if parser_input.tokens:
|
||||
return self.add_eof_if_needed(parser_input.tokens)
|
||||
return self.manage_eof(parser_input.tokens, strip_eof)
|
||||
else:
|
||||
return Tokenizer(parser_input.source)
|
||||
|
||||
return Tokenizer(parser_input)
|
||||
return Tokenizer(parser_input, yield_eof=not strip_eof)
|
||||
|
||||
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
|
||||
if not isinstance(parser_input, ParserResultConcept):
|
||||
@@ -183,7 +183,12 @@ class BaseParser:
|
||||
return parser_input.value
|
||||
|
||||
@staticmethod
|
||||
def add_eof_if_needed(lst):
|
||||
def manage_eof(lst, strip_eof):
|
||||
if strip_eof:
|
||||
if len(lst) and lst[-1].type == TokenKind.EOF:
|
||||
lst.pop()
|
||||
return lst
|
||||
|
||||
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
|
||||
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
|
||||
return lst
|
||||
@@ -210,6 +215,30 @@ class BaseParser:
|
||||
res += value
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def get_tokens_boundaries(tokens):
|
||||
"""
|
||||
Returns the first and the last valid index of the tokens
|
||||
a valid index is a token that is not a whitespace nor and EOF
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
if tokens is None:
|
||||
return None
|
||||
|
||||
if len(tokens) == 0:
|
||||
return 0, 0
|
||||
|
||||
if tokens[0].type == TokenKind.EOF:
|
||||
return 0, 0
|
||||
|
||||
start = 1 if tokens[0].type == TokenKind.WHITESPACE else 0
|
||||
end = len(tokens) - 1
|
||||
while tokens[end].type in (TokenKind.WHITESPACE, TokenKind.EOF):
|
||||
end -= 1
|
||||
|
||||
return start, end
|
||||
|
||||
|
||||
class BaseTokenizerIterParser(BaseParser):
|
||||
|
||||
|
||||
Reference in New Issue
Block a user