Enhanced complex concepts handling
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode
|
||||
import core.utils
|
||||
|
||||
concept_lexer_parser = ConceptLexerParser()
|
||||
|
||||
|
||||
class MultipleConceptsParser(BaseParser):
|
||||
"""
|
||||
Parser that will take the result of ConceptLexerParser and
|
||||
try to resolve the unrecognized tokens token by token
|
||||
|
||||
It is a success when it returns a list ConceptNode exclusively
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "MultipleConcepts", 45)
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
return None
|
||||
|
||||
if not text.parser == concept_lexer_parser:
|
||||
return None
|
||||
|
||||
sheerka = context.sheerka
|
||||
nodes = text.value
|
||||
nodes_found = [[]]
|
||||
source = ""
|
||||
concepts_only = True
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, UnrecognizedTokensNode):
|
||||
unrecognized_tokens = None
|
||||
for i, token in enumerate(node.tokens):
|
||||
index = node.start + i
|
||||
|
||||
if token.type == TokenKind.IDENTIFIER:
|
||||
# it may be a concept
|
||||
concept = context.new_concept(token.value)
|
||||
if hasattr(concept, "__iter__") or not sheerka.is_unknown(concept):
|
||||
# finish processing unrecognized_tokens
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.fix_source()
|
||||
source += unrecognized_tokens.source
|
||||
if unrecognized_tokens.not_whitespace():
|
||||
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
||||
unrecognized_tokens = None
|
||||
|
||||
source += token.value
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
|
||||
nodes_found = core.utils.product(nodes_found, concepts_nodes)
|
||||
continue
|
||||
else:
|
||||
# it cannot be a concept
|
||||
concepts_only &= token.type == TokenKind.WHITESPACE or token.type == TokenKind.NEWLINE
|
||||
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.add_token(token, index)
|
||||
else:
|
||||
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
||||
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.fix_source()
|
||||
source += unrecognized_tokens.source
|
||||
if unrecognized_tokens.not_whitespace():
|
||||
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
||||
|
||||
else:
|
||||
nodes_found = core.utils.product(nodes_found, [node])
|
||||
source += node.source
|
||||
|
||||
ret = []
|
||||
for choice in nodes_found:
|
||||
ret.append(
|
||||
sheerka.ret(
|
||||
self.name,
|
||||
concepts_only,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=choice,
|
||||
try_parsed=None))
|
||||
)
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, source, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, source, ret)
|
||||
return ret
|
||||
Reference in New Issue
Block a user