124 lines
4.7 KiB
Python
124 lines
4.7 KiB
Python
from dataclasses import dataclass
|
|
|
|
from core.builtin_concepts import BuiltinConcepts
|
|
from core.concept import Concept
|
|
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
|
|
from parsers.BaseParser import BaseParser, ErrorNode
|
|
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
|
|
import core.utils
|
|
|
|
PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"]
|
|
|
|
|
|
@dataclass()
|
|
class CannotParseNode(ErrorNode):
|
|
unrecognized: UnrecognizedTokensNode
|
|
|
|
|
|
class UnrecognizedNodeParser(BaseParser):
|
|
"""
|
|
This parser comes after the other NodeParsers (Atom, Bnf or Sya)
|
|
It will try to resolve all UnrecognizedTokensNode.
|
|
"""
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__("UnrecognizedNode", 45) # lower than AtomNode, BnfNode and SyaNode
|
|
|
|
def add_error(self, error):
|
|
if hasattr(error, "__iter__"):
|
|
self.error_sink.extend(error)
|
|
else:
|
|
self.error_sink.append(error)
|
|
|
|
def parse(self, context, parser_input):
|
|
sheerka = context.sheerka
|
|
nodes = self.get_input_as_lexer_nodes(parser_input, None)
|
|
if not nodes:
|
|
return None
|
|
|
|
sequences_found = [[]]
|
|
has_unrecognized = False
|
|
|
|
for node in nodes:
|
|
if isinstance(node, ConceptNode):
|
|
res = self.validate_concept_node(context, node)
|
|
if not res.status:
|
|
self.add_error(res.body)
|
|
else:
|
|
sequences_found = core.utils.product(sequences_found, [res.body])
|
|
|
|
elif isinstance(node, UnrecognizedTokensNode):
|
|
res = parse_unrecognized(context, node.source, PARSERS)
|
|
res = only_successful(context, res)
|
|
if res.status:
|
|
lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens)
|
|
if lexer_nodes:
|
|
# make lexer_nodes is not empty (for example, some Python result are discarded)
|
|
sequences_found = core.utils.product(sequences_found, lexer_nodes)
|
|
else:
|
|
sequences_found = core.utils.product(sequences_found, [node])
|
|
has_unrecognized = True
|
|
else:
|
|
sequences_found = core.utils.product(sequences_found, [node])
|
|
has_unrecognized = True
|
|
|
|
elif isinstance(node, SourceCodeNode):
|
|
sequences_found = core.utils.product(sequences_found, [node])
|
|
has_unrecognized = True # never trust source code not. I may be an invalid source code
|
|
|
|
else: # cannot happen as of today :-)
|
|
raise NotImplementedError()
|
|
|
|
# concept with UnrecognizedToken in their properties is considered as fatal error
|
|
if self.has_error:
|
|
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
|
|
|
ret = []
|
|
for choice in sequences_found:
|
|
ret.append(
|
|
sheerka.ret(
|
|
self.name,
|
|
not has_unrecognized,
|
|
sheerka.new(
|
|
BuiltinConcepts.PARSER_RESULT,
|
|
parser=self,
|
|
source=parser_input,
|
|
body=choice,
|
|
try_parsed=choice)))
|
|
|
|
if len(ret) == 1:
|
|
self.log_result(context, parser_input, ret[0])
|
|
return ret[0]
|
|
else:
|
|
self.log_multiple_results(context, parser_input, ret)
|
|
return ret
|
|
|
|
def validate_concept_node(self, context, concept_node):
|
|
|
|
sheerka = context.sheerka
|
|
errors = []
|
|
|
|
def _validate_concept(concept):
|
|
"""
|
|
Recursively browse the compiled properties in order to find unrecognized
|
|
:param concept:
|
|
:return:
|
|
"""
|
|
for name, value in concept.compiled.items():
|
|
if isinstance(value, Concept):
|
|
_validate_concept(value)
|
|
|
|
elif isinstance(value, UnrecognizedTokensNode):
|
|
res = parse_unrecognized(context, value.source, PARSERS)
|
|
res = only_successful(context, res) # only key successful parsers
|
|
if res.status:
|
|
concept.compiled[name] = res.body.body
|
|
else:
|
|
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'"))
|
|
|
|
_validate_concept(concept_node.concept)
|
|
if len(errors) > 0:
|
|
return context.sheerka.ret(self.name, False, errors)
|
|
else:
|
|
return context.sheerka.ret(self.name, True, concept_node)
|