97 lines
3.8 KiB
Python
97 lines
3.8 KiB
Python
from core.builtin_concepts import BuiltinConcepts
|
|
from core.tokenizer import TokenKind
|
|
from parsers.BaseParser import BaseParser
|
|
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode
|
|
import core.utils
|
|
|
|
concept_lexer_parser = ConceptLexerParser()
|
|
|
|
|
|
class MultipleConceptsParser(BaseParser):
|
|
"""
|
|
Parser that will take the result of ConceptLexerParser and
|
|
try to resolve the unrecognized tokens token by token
|
|
|
|
It is a success when it returns a list ConceptNode exclusively
|
|
"""
|
|
|
|
def __init__(self, **kwargs):
|
|
BaseParser.__init__(self, "MultipleConcepts", 45)
|
|
|
|
def parse(self, context, text):
|
|
sheerka = context.sheerka
|
|
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
|
return None
|
|
|
|
if not text.parser == concept_lexer_parser:
|
|
return None
|
|
|
|
sheerka = context.sheerka
|
|
nodes = text.value
|
|
nodes_found = [[]]
|
|
source = ""
|
|
concepts_only = True
|
|
|
|
for node in nodes:
|
|
if isinstance(node, UnrecognizedTokensNode):
|
|
unrecognized_tokens = None
|
|
for i, token in enumerate(node.tokens):
|
|
index = node.start + i
|
|
|
|
if token.type == TokenKind.IDENTIFIER:
|
|
# it may be a concept
|
|
concept = context.new_concept(token.value)
|
|
if hasattr(concept, "__iter__") or not sheerka.is_unknown(concept):
|
|
# finish processing unrecognized_tokens
|
|
if unrecognized_tokens:
|
|
unrecognized_tokens.fix_source()
|
|
source += unrecognized_tokens.source
|
|
if unrecognized_tokens.not_whitespace():
|
|
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
|
unrecognized_tokens = None
|
|
|
|
source += token.value
|
|
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
|
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
|
|
nodes_found = core.utils.product(nodes_found, concepts_nodes)
|
|
continue
|
|
else:
|
|
# it cannot be a concept
|
|
concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
|
|
|
|
if unrecognized_tokens:
|
|
unrecognized_tokens.add_token(token, index)
|
|
else:
|
|
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
|
|
|
if unrecognized_tokens:
|
|
unrecognized_tokens.fix_source()
|
|
source += unrecognized_tokens.source
|
|
if unrecognized_tokens.not_whitespace():
|
|
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
|
|
|
else:
|
|
nodes_found = core.utils.product(nodes_found, [node])
|
|
source += node.source
|
|
|
|
ret = []
|
|
for choice in nodes_found:
|
|
ret.append(
|
|
sheerka.ret(
|
|
self.name,
|
|
concepts_only,
|
|
sheerka.new(
|
|
BuiltinConcepts.PARSER_RESULT,
|
|
parser=self,
|
|
source=source,
|
|
body=choice,
|
|
try_parsed=None))
|
|
)
|
|
|
|
if len(ret) == 1:
|
|
self.log_result(context, source, ret[0])
|
|
return ret[0]
|
|
else:
|
|
self.log_multiple_results(context, source, ret)
|
|
return ret
|