Added SyaNodeParser (finally, after one month)
This commit is contained in:
@@ -0,0 +1,369 @@
|
||||
import copy
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core import builtin_helpers
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, DEFINITION_TYPE_BNF
|
||||
from core.tokenizer import TokenKind, Tokenizer
|
||||
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, ErrorNode
|
||||
|
||||
PARSERS = ["BnfNode", "SyaNode", "Python"]
|
||||
|
||||
|
||||
@dataclass()
|
||||
class TokensNodeFound(ErrorNode):
|
||||
expected_tokens: list
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(other) == id(self):
|
||||
return True
|
||||
|
||||
if not isinstance(other, UnexpectedTokenErrorNode):
|
||||
return False
|
||||
|
||||
if self.message != other.message:
|
||||
return False
|
||||
|
||||
if self.token.type != other.token.type or self.token.value != other.token.value:
|
||||
return False
|
||||
|
||||
if len(self.expected_tokens) != len(other.expected_tokens):
|
||||
return False
|
||||
|
||||
for i, t in enumerate(self.expected_tokens):
|
||||
if t != other.expected_tokens[i]:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.message, self.token, self.expected_tokens))
|
||||
|
||||
|
||||
class AtomConceptParserHelper:
|
||||
def __init__(self, context):
|
||||
|
||||
self.context = context
|
||||
self.debug = []
|
||||
self.sequence = [] # sequence of concepts already found found
|
||||
self.current_concept: ConceptNode = None # concept being parsed
|
||||
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # buffer that keeps tracks of tokens positions
|
||||
self.expected_tokens = None # expected tokens for this concepts
|
||||
self.is_locked = False
|
||||
self.errors = []
|
||||
self.has_unrecognized = False
|
||||
self.forked = [] # use to duplicate AtomConceptParserHelper. See manage_unrecognized()
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(other) == id(self):
|
||||
return True
|
||||
|
||||
if not isinstance(other, AtomConceptParserHelper):
|
||||
return False
|
||||
|
||||
if len(self.sequence) != len(other.sequence):
|
||||
return False
|
||||
|
||||
for item_self, item_other in zip(self.sequence, other.sequence):
|
||||
if item_self != item_other:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash(len(self.sequence))
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.sequence}"
|
||||
|
||||
def lock(self):
|
||||
self.is_locked = True
|
||||
|
||||
def reset(self):
|
||||
self.is_locked = False
|
||||
|
||||
def has_error(self):
|
||||
return len(self.errors) > 0
|
||||
|
||||
def eat_token(self, token, pos):
|
||||
if not self.expected_tokens:
|
||||
return False
|
||||
|
||||
self.debug.append(token)
|
||||
|
||||
if self.expected_tokens[0] != BaseNodeParser.get_token_value(token):
|
||||
self.errors.append(UnexpectedTokenErrorNode(
|
||||
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
|
||||
token,
|
||||
[self.expected_tokens[0]]))
|
||||
return False
|
||||
|
||||
self.current_concept.end = pos
|
||||
del self.expected_tokens[0]
|
||||
|
||||
if not self.expected_tokens:
|
||||
# the concept is fully matched
|
||||
self.sequence.append(self.current_concept)
|
||||
self.expected_tokens = None
|
||||
|
||||
return True
|
||||
|
||||
def eat_concept(self, concept, pos):
|
||||
if self.is_locked:
|
||||
return
|
||||
|
||||
self.debug.append(concept)
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage that some clones may have been forked
|
||||
forked.eat_concept(concept, pos)
|
||||
|
||||
concept_node = ConceptNode(concept, pos, pos)
|
||||
expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1]
|
||||
|
||||
if not expected:
|
||||
# the concept is already matched
|
||||
self.sequence.append(concept_node)
|
||||
else:
|
||||
self.current_concept = concept_node
|
||||
self.expected_tokens = expected
|
||||
|
||||
def manage_unrecognized(self):
|
||||
if self.unrecognized_tokens.is_empty():
|
||||
return
|
||||
|
||||
# do not put empty UnrecognizedToken in out
|
||||
if self.unrecognized_tokens.is_whitespace():
|
||||
self.unrecognized_tokens.reset()
|
||||
return
|
||||
|
||||
self.unrecognized_tokens.fix_source()
|
||||
|
||||
# try to recognize concepts
|
||||
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
|
||||
if nodes_sequences:
|
||||
instances = [self]
|
||||
for i in range(len(nodes_sequences) - 1):
|
||||
clone = self.clone()
|
||||
instances.append(clone)
|
||||
self.forked.append(clone)
|
||||
|
||||
for instance, node_sequence in zip(instances, nodes_sequences):
|
||||
for node in node_sequence:
|
||||
instance.sequence.append(node)
|
||||
if isinstance(node, UnrecognizedTokensNode) or \
|
||||
hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens:
|
||||
instance.has_unrecognized = True
|
||||
instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
|
||||
else:
|
||||
self.sequence.append(self.unrecognized_tokens)
|
||||
self.has_unrecognized = True
|
||||
|
||||
# create another instance
|
||||
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
|
||||
|
||||
def eat_unrecognized(self, token, pos):
|
||||
if self.is_locked:
|
||||
return
|
||||
|
||||
self.debug.append(token)
|
||||
self.unrecognized_tokens.add_token(token, pos)
|
||||
|
||||
def finalize(self):
|
||||
if len(self.sequence) > 0:
|
||||
self.manage_unrecognized()
|
||||
for forked in self.forked:
|
||||
# manage that some clones may have been forked
|
||||
forked.finalize()
|
||||
|
||||
if self.expected_tokens:
|
||||
self.errors.append(TokensNodeFound(self.expected_tokens))
|
||||
|
||||
def clone(self):
|
||||
clone = AtomConceptParserHelper(self.context)
|
||||
clone.debug = self.debug[:]
|
||||
clone.sequence = self.sequence[:]
|
||||
clone.current_concept = self.current_concept.clone() if self.current_concept else None
|
||||
clone.unrecognized_tokens = self.unrecognized_tokens.clone()
|
||||
clone.expected_tokens = self.expected_tokens[:] if self.expected_tokens else None
|
||||
clone.is_locked = self.is_locked
|
||||
clone.errors = self.errors[:]
|
||||
clone.has_unrecognized = self.has_unrecognized
|
||||
return clone
|
||||
|
||||
def _get_lexer_nodes_from_unrecognized(self):
|
||||
"""
|
||||
Use the source of self.unrecognized_tokens gto find concepts or source code
|
||||
:return:
|
||||
"""
|
||||
|
||||
res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
|
||||
only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
|
||||
|
||||
if not only_parsers_results.status:
|
||||
return None
|
||||
|
||||
return builtin_helpers.get_lexer_nodes(
|
||||
only_parsers_results.body.body,
|
||||
self.unrecognized_tokens.start,
|
||||
self.unrecognized_tokens.tokens)
|
||||
|
||||
|
||||
class AtomNodeParser(BaseNodeParser):
|
||||
"""
|
||||
Parser used to recognize atoms concepts or sequence of atoms concepts
|
||||
An atom concept is concept that does not have any property thought it may have a body
|
||||
|
||||
So, if 'one', 'two', 'three' are defined as atom concepts (with no property/parameter)
|
||||
This parser can recognize the sequence 'one two three'
|
||||
as [ConceptNode(one), ConceptNode(two), ConceptNode(three)]
|
||||
It can partly recognized 'one x$1!! two three'
|
||||
as [ConceptNode(one), UnrecognizedTokensNode(x$1!!), [ConceptNode(two), [ConceptNode(three)]
|
||||
It cannot recognize concepts with parameters (non atom)
|
||||
ex: 'one plus two' won't be recognized as ConceptNode(plus, one, two)
|
||||
it will be [ConceptNode(one), UnrecognizedTokensNode(plus), [ConceptNode(two)]
|
||||
|
||||
Note 'one plus two' will be recognized by the SyaParser
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("AtomNode", 50, **kwargs)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def _is_eligible(concept):
|
||||
"""
|
||||
Predicate that select concepts that must handled by AtomNodeParser
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF
|
||||
|
||||
def get_concepts_sequences(self):
|
||||
|
||||
forked = []
|
||||
|
||||
def _add_forked_to_concept_parser_helpers():
|
||||
# check that if some new InfixToPostfix are created
|
||||
for parser in concept_parser_helpers:
|
||||
if len(parser.forked) > 0:
|
||||
forked.extend(parser.forked)
|
||||
parser.forked.clear()
|
||||
if len(forked) > 0:
|
||||
concept_parser_helpers.extend(forked)
|
||||
forked.clear()
|
||||
|
||||
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
|
||||
|
||||
while self.next_token(False):
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.reset()
|
||||
|
||||
token = self.token
|
||||
|
||||
try:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
if concept_parser.eat_token(self.token, self.pos):
|
||||
concept_parser.lock()
|
||||
|
||||
concepts = self.get_concepts(token, self._is_eligible)
|
||||
if not concepts:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.eat_unrecognized(token, self.pos)
|
||||
continue
|
||||
|
||||
if len(concepts) == 1:
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.eat_concept(concepts[0], self.pos)
|
||||
continue
|
||||
|
||||
# make the cartesian product
|
||||
temp_res = []
|
||||
for concept_parser in concept_parser_helpers:
|
||||
if concept_parser.is_locked:
|
||||
# It means that it already eat the token
|
||||
# so simply add it, do not clone
|
||||
temp_res.append(concept_parser)
|
||||
continue
|
||||
|
||||
for concept in concepts:
|
||||
clone = concept_parser.clone()
|
||||
temp_res.append(clone)
|
||||
clone.eat_concept(concept, self.pos)
|
||||
|
||||
concept_parser_helpers = temp_res
|
||||
finally:
|
||||
_add_forked_to_concept_parser_helpers()
|
||||
|
||||
# make sure that remaining items in stack are moved to out
|
||||
for concept_parser in concept_parser_helpers:
|
||||
concept_parser.reset()
|
||||
concept_parser.finalize()
|
||||
_add_forked_to_concept_parser_helpers()
|
||||
|
||||
return concept_parser_helpers
|
||||
|
||||
def get_valid(self, concept_parser_helpers):
|
||||
valid_parser_helpers = [] # be careful, it will be a list of list
|
||||
for parser_helper in concept_parser_helpers:
|
||||
if parser_helper.has_error():
|
||||
continue
|
||||
|
||||
if len(parser_helper.sequence) == 0:
|
||||
continue
|
||||
|
||||
for node in parser_helper.sequence:
|
||||
node.tokens = self.tokens[node.start:node.end + 1]
|
||||
node.fix_source()
|
||||
|
||||
if parser_helper in valid_parser_helpers:
|
||||
continue
|
||||
|
||||
valid_parser_helpers.append(parser_helper)
|
||||
|
||||
return valid_parser_helpers
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
if parser_input == "":
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
||||
)
|
||||
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
parser_helpers = self.get_valid(self.get_concepts_sequences())
|
||||
|
||||
if len(parser_helpers):
|
||||
ret = []
|
||||
for parser_helper in parser_helpers:
|
||||
ret.append(
|
||||
self.sheerka.ret(
|
||||
self.name,
|
||||
not parser_helper.has_unrecognized,
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input,
|
||||
body=parser_helper.sequence,
|
||||
try_parsed=parser_helper.sequence)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, parser_input, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
else:
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
@@ -0,0 +1,669 @@
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import VARIABLE_PREFIX, Concept
|
||||
from core.sheerka.ExecutionContext import ExecutionContext
|
||||
from core.tokenizer import TokenKind, LexerError, Token
|
||||
from parsers.BaseParser import Node, BaseParser, ErrorNode
|
||||
|
||||
DEBUG_COMPILED = True
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerNode(Node):
|
||||
start: int # starting index in the tokens list
|
||||
end: int # ending index in the tokens list
|
||||
tokens: list = None # tokens
|
||||
source: str = None # string representation of what was parsed
|
||||
|
||||
def __post_init__(self):
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LexerNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.tokens == other.tokens
|
||||
|
||||
def fix_source(self, force=True):
|
||||
if force or self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
return self
|
||||
|
||||
|
||||
class UnrecognizedTokensNode(LexerNode):
|
||||
def __init__(self, start, end, tokens):
|
||||
super().__init__(start, end, tokens)
|
||||
self.is_frozen = False
|
||||
self.parenthesis_count = 0
|
||||
|
||||
def freeze(self):
|
||||
self.is_frozen = True
|
||||
|
||||
def reset(self):
|
||||
self.start = self.end = -1
|
||||
self.tokens.clear()
|
||||
self.is_frozen = False
|
||||
self.parenthesis_count = 0
|
||||
|
||||
def has_open_paren(self):
|
||||
return self.parenthesis_count > 0
|
||||
|
||||
def add_token(self, token, pos):
|
||||
if self.is_frozen:
|
||||
raise Exception("The node is frozen")
|
||||
|
||||
if self.end != -1 and pos == self.end + 2:
|
||||
# add the missing whitespace
|
||||
p = self.tokens[-1] # previous token
|
||||
self.tokens.append(Token(TokenKind.WHITESPACE, " ", p.index + 1, p.line, p.column + 1))
|
||||
|
||||
self.tokens.append(token)
|
||||
self.end = pos
|
||||
if self.start == -1:
|
||||
self.start = pos
|
||||
|
||||
if token.type == TokenKind.LPAR:
|
||||
self.parenthesis_count += 1
|
||||
|
||||
if token.type == TokenKind.RPAR:
|
||||
self.parenthesis_count -= 1
|
||||
|
||||
return self
|
||||
|
||||
def not_whitespace(self):
|
||||
return not self.is_whitespace()
|
||||
|
||||
def is_whitespace(self):
|
||||
for t in self.tokens:
|
||||
if t.type not in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_empty(self):
|
||||
return len(self.tokens) == 0
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, utnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, UTN):
|
||||
return other == self
|
||||
|
||||
if not isinstance(other, UnrecognizedTokensNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
def clone(self):
|
||||
clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:])
|
||||
clone.is_frozen = self.is_frozen
|
||||
clone.parenthesis_count = self.parenthesis_count
|
||||
return clone
|
||||
|
||||
|
||||
class ConceptNode(LexerNode):
|
||||
"""
|
||||
Returned by the BnfNodeParser
|
||||
It represents a recognized concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.concept = concept
|
||||
self.underlying = underlying
|
||||
self.fix_source(False)
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, (CN, CNC)):
|
||||
return other == self
|
||||
|
||||
if isinstance(other, cnode):
|
||||
return self.concept.key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, short_cnode):
|
||||
return self.concept.key == other.concept_key and self.source == other.source
|
||||
|
||||
if not isinstance(other, ConceptNode):
|
||||
return False
|
||||
|
||||
return self.concept == other.concept and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.underlying == other.underlying
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.start, self.end, self.source, self.underlying))
|
||||
|
||||
def __repr__(self):
|
||||
text = f"ConceptNode(concept='{self.concept}', source='{self.source}', start={self.start}, end={self.end}"
|
||||
if DEBUG_COMPILED:
|
||||
for k, v in self.concept.compiled.items():
|
||||
text += f", {k}='{v}'"
|
||||
return text + ")"
|
||||
|
||||
def clone(self):
|
||||
# do we need to clone the concept as well ?
|
||||
clone = ConceptNode(self.concept, self.start, self.end, self.tokens, self.source, self.underlying)
|
||||
return clone
|
||||
|
||||
|
||||
class SourceCodeNode(LexerNode):
|
||||
"""
|
||||
Returned when some source code (like Python source code is recognized)
|
||||
"""
|
||||
|
||||
def __init__(self, node, start, end, tokens=None, source=None, return_value=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.node = node # The PythonNode (or whatever language node) that is found
|
||||
self.return_value = return_value # original result of the parsing
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, scnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, SourceCodeNode):
|
||||
return False
|
||||
|
||||
return self.node == other.node and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class SourceCodeWithConceptNode(LexerNode):
|
||||
"""
|
||||
Kind of temporary version for SourceCodeNode
|
||||
I know that there is some code,
|
||||
I know that there are some concepts
|
||||
I just don't want to make the glue yet
|
||||
|
||||
So I push all the nodes into one big bag
|
||||
"""
|
||||
|
||||
def __init__(self, first_node, last_node, content_nodes=None):
|
||||
super().__init__(9999, -1, None) # why not sys.maxint ?
|
||||
self.first = first_node
|
||||
self.last = last_node
|
||||
self.nodes = content_nodes or []
|
||||
self.has_unrecognized = False
|
||||
self.fix_all_pos()
|
||||
|
||||
def add_node(self, node):
|
||||
self.nodes.append(node)
|
||||
self.fix_pos(node)
|
||||
|
||||
return self
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if not isinstance(other, SourceCodeWithConceptNode):
|
||||
return False
|
||||
|
||||
if self.start != other.start or self.end != other.end:
|
||||
return False
|
||||
|
||||
if self.first != other.first:
|
||||
return False
|
||||
|
||||
if self.last != other.last:
|
||||
return False
|
||||
|
||||
if len(self.nodes) != len(other.nodes):
|
||||
return False
|
||||
|
||||
for self_node, other_node in zip(self.nodes, other.nodes):
|
||||
if self_node != other_node:
|
||||
return False
|
||||
|
||||
# at last
|
||||
return True
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.first, self.last, len(self.nodes)))
|
||||
|
||||
def __repr__(self):
|
||||
return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
def fix_all_pos(self):
|
||||
for n in [self.first, self.last] + self.nodes:
|
||||
self.fix_pos(n)
|
||||
|
||||
def fix_pos(self, node):
|
||||
if hasattr(node, "start") and node.start is not None:
|
||||
if node.start < self.start:
|
||||
self.start = node.start
|
||||
|
||||
if hasattr(node, "end") and node.end is not None:
|
||||
if node.end > self.end:
|
||||
self.end = node.end
|
||||
return self
|
||||
|
||||
def pseudo_fix_source(self):
|
||||
self.source = self.first.source
|
||||
for n in self.nodes:
|
||||
self.source += " "
|
||||
if hasattr(n, "source"):
|
||||
self.source += n.source
|
||||
elif hasattr(n, "concept"):
|
||||
self.source += str(n.concept)
|
||||
else:
|
||||
self.source += " unknown"
|
||||
self.source += self.last.source
|
||||
return self
|
||||
|
||||
def clone(self):
|
||||
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes)
|
||||
return clone
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GrammarErrorNode(ErrorNode):
|
||||
message: str
|
||||
|
||||
|
||||
class SyaAssociativity(Enum):
|
||||
Left = "left"
|
||||
Right = "right"
|
||||
No = "No"
|
||||
|
||||
def __repr__(self):
|
||||
return self.value
|
||||
|
||||
|
||||
cnode = namedtuple("ConceptNode", "concept_key start end source")
|
||||
short_cnode = namedtuple("ConceptNode", "concept_key source")
|
||||
utnode = namedtuple("utnode", "start end source")
|
||||
scnode = namedtuple("scnode", "start end source")
|
||||
|
||||
|
||||
@dataclass(init=False)
|
||||
class SCWC:
|
||||
"""
|
||||
SourceNodeWithConcept tester class
|
||||
It matches with a SourceNodeWithConcept
|
||||
but it's easier to instantiate during the tests
|
||||
"""
|
||||
first: LexerNode
|
||||
last: LexerNode
|
||||
content: tuple
|
||||
|
||||
def __init__(self, first, last, *args):
|
||||
self.first = first
|
||||
self.last = last
|
||||
self.content = args
|
||||
|
||||
|
||||
class HelperWithPos:
|
||||
def __init__(self, start=None, end=None):
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
self.start_is_fixed = start is not None
|
||||
self.end_is_fixed = end is not None
|
||||
|
||||
def fix_pos(self, node):
|
||||
if not self.start_is_fixed:
|
||||
start = node.start if hasattr(node, "start") else \
|
||||
node[0] if isinstance(node, tuple) else None
|
||||
|
||||
if start is not None and (self.start is None or start < self.start):
|
||||
self.start = start
|
||||
|
||||
if not self.end_is_fixed:
|
||||
end = node.end if hasattr(node, "end") else \
|
||||
node[1] if isinstance(node, tuple) else None
|
||||
|
||||
if end is not None and (self.end is None or end > self.end):
|
||||
self.end = end
|
||||
return self
|
||||
|
||||
|
||||
class CN(HelperWithPos):
|
||||
"""
|
||||
ConceptNode tester class
|
||||
It matches with ConceptNode but with less constraints
|
||||
|
||||
CNC == ConceptNode if concept key, start, end and source are the same
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start=None, end=None, source=None):
|
||||
"""
|
||||
|
||||
:param concept: Concept or concept_key (only the key is used anyway)
|
||||
:param start:
|
||||
:param end:
|
||||
:param source:
|
||||
"""
|
||||
super().__init__(start, end)
|
||||
self.concept_key = concept.key if isinstance(concept, Concept) else concept
|
||||
self.source = source
|
||||
self.concept = concept if isinstance(concept, Concept) else None
|
||||
|
||||
def fix_source(self, str_tokens):
|
||||
self.source = "".join(str_tokens)
|
||||
return self
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, ConceptNode):
|
||||
if other.concept is None:
|
||||
return False
|
||||
if other.concept.key != self.concept_key:
|
||||
return False
|
||||
if self.start is not None and self.start != other.start:
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
return True
|
||||
|
||||
if not isinstance(other, CN):
|
||||
return False
|
||||
|
||||
return self.concept_key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept_key, self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
if self.concept:
|
||||
txt = f"CN(concept='{self.concept}'"
|
||||
else:
|
||||
txt = f"CN(concept_key='{self.concept_key}'"
|
||||
txt += f", source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
txt += f", end={self.end}"
|
||||
return txt + ")"
|
||||
|
||||
|
||||
class CNC(CN):
|
||||
"""
|
||||
ConceptNode for Compiled tester class
|
||||
It matches with ConceptNode
|
||||
But focuses on the 'compiled' property of the concept
|
||||
|
||||
CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
|
||||
"""
|
||||
|
||||
def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
|
||||
super().__init__(concept_key, start, end, source)
|
||||
self.compiled = kwargs
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, ConceptNode):
|
||||
if other.concept is None:
|
||||
return False
|
||||
if other.concept.key != self.concept_key:
|
||||
return False
|
||||
if self.start is not None and self.start != other.start:
|
||||
return False
|
||||
if self.end is not None and self.end != other.end:
|
||||
return False
|
||||
return self.compiled == other.concept.compiled # assert instead of return to help debugging tests
|
||||
|
||||
if not isinstance(other, CNC):
|
||||
return False
|
||||
|
||||
return self.concept_key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.compiled == other.compiled
|
||||
|
||||
def __repr__(self):
|
||||
if self.concept:
|
||||
txt = f"CNC(concept='{self.concept}'"
|
||||
else:
|
||||
txt = f"CNC(concept_key='{self.concept_key}'"
|
||||
txt += f", source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
txt += f", end={self.end}"
|
||||
|
||||
for k, v in self.compiled.items():
|
||||
txt += f", {k}='{v}'"
|
||||
return txt + ")"
|
||||
|
||||
|
||||
class BaseNodeParser(BaseParser):
|
||||
def __init__(self, name, priority, **kwargs):
|
||||
super().__init__(name, priority)
|
||||
if 'sheerka' in kwargs:
|
||||
sheerka = kwargs.get("sheerka")
|
||||
self.init_from_sheerka(sheerka)
|
||||
|
||||
else:
|
||||
self.concepts_by_first_keyword = None
|
||||
self.sya_definitions = None
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
self.tokens = None
|
||||
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
def init_from_sheerka(self, sheerka):
|
||||
"""
|
||||
Use the definitons from Sheerka to initialize
|
||||
:param sheerka:
|
||||
:return:
|
||||
"""
|
||||
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
|
||||
if sheerka.sya_definitions:
|
||||
self.sya_definitions = {}
|
||||
for k, v in sheerka.sya_definitions.items():
|
||||
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.text = text
|
||||
|
||||
try:
|
||||
self.tokens = list(self.get_input_as_tokens(text))
|
||||
except LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
return True
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self.token
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
if self.token and self.token.type == TokenKind.EOF:
|
||||
return False
|
||||
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
|
||||
def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
|
||||
"""
|
||||
To quickly find a concept, we store them in an hash where the key is the first token of the concept
|
||||
example :
|
||||
Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
|
||||
So the key to use will be "foo"
|
||||
|
||||
Concept("a foo").def_prop("a") -> first token is "foo"
|
||||
|
||||
Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
|
||||
Note that under the same key, there will be multiple entry
|
||||
a B-Tree may be a better implementation in the future
|
||||
|
||||
We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
|
||||
:param context:
|
||||
:param concepts: list[Concept]
|
||||
:param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
|
||||
:param use_sheerka: first init with the definitions from Sheerka
|
||||
:return:
|
||||
"""
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
if use_sheerka:
|
||||
self.init_from_sheerka(self.sheerka)
|
||||
|
||||
if sya_definitions:
|
||||
if self.sya_definitions:
|
||||
self.sya_definitions.update(sya_definitions)
|
||||
else:
|
||||
self.sya_definitions = sya_definitions
|
||||
|
||||
if self.concepts_by_first_keyword is None:
|
||||
self.concepts_by_first_keyword = {}
|
||||
|
||||
for concept in concepts:
|
||||
keywords = concept.key.split()
|
||||
for keyword in keywords:
|
||||
if keyword.startswith(VARIABLE_PREFIX):
|
||||
continue
|
||||
|
||||
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
|
||||
break
|
||||
|
||||
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
|
||||
|
||||
def get_concepts(self, token, to_keep, to_map=None):
|
||||
"""
|
||||
Tries to find if there are concepts that match the value of the token
|
||||
:param token:
|
||||
:param to_keep: predicate to tell if the concept is eligible
|
||||
:param to_map:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if token.type == TokenKind.STRING:
|
||||
name = token.value[1:-1]
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
name = token.value.value
|
||||
else:
|
||||
name = token.value
|
||||
|
||||
result = []
|
||||
if name in self.concepts_by_first_keyword:
|
||||
for concept_id in self.concepts_by_first_keyword[name]:
|
||||
|
||||
concept = self.sheerka.get_by_id(concept_id)
|
||||
|
||||
if not to_keep(concept):
|
||||
continue
|
||||
|
||||
concept = to_map(concept) if to_map else concept
|
||||
result.append(concept)
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_token_value(token):
|
||||
if token.type == TokenKind.STRING:
|
||||
return token.value[1:-1]
|
||||
elif token.type == TokenKind.KEYWORD:
|
||||
return token.value.value
|
||||
else:
|
||||
return token.value
|
||||
|
||||
|
||||
class UTN(HelperWithPos):
|
||||
"""
|
||||
Tester class for UnrecognizedTokenNode
|
||||
compare the source, and start, end if defined
|
||||
"""
|
||||
|
||||
def __init__(self, source, start=None, end=None):
|
||||
"""
|
||||
:param concept: Concept or concept_key (only the key is used anyway)
|
||||
:param start:
|
||||
:param end:
|
||||
:param source:
|
||||
"""
|
||||
super().__init__(start, end)
|
||||
self.source = source
|
||||
|
||||
def __eq__(self, other):
|
||||
if id(self) == id(other):
|
||||
return True
|
||||
|
||||
if isinstance(other, UnrecognizedTokensNode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, UTN):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.source, self.start, self.end))
|
||||
|
||||
def __repr__(self):
|
||||
txt = f"UTN( source='{self.source}'"
|
||||
if self.start is not None:
|
||||
txt += f", start={self.start}"
|
||||
if self.end is not None:
|
||||
txt += f", end={self.end}"
|
||||
return txt + ")"
|
||||
@@ -1,8 +1,8 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import TokenKind, Keywords, Token
|
||||
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
|
||||
from core.sheerka_logger import get_logger
|
||||
import core.utils
|
||||
import logging
|
||||
@@ -77,7 +77,6 @@ class BaseParser:
|
||||
self.priority = priority
|
||||
self.enabled = enabled
|
||||
|
||||
self.has_error = False
|
||||
self.error_sink = []
|
||||
|
||||
def __eq__(self, other):
|
||||
@@ -91,9 +90,13 @@ class BaseParser:
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
pass
|
||||
|
||||
@property
|
||||
def has_error(self):
|
||||
return len(self.error_sink) > 0
|
||||
|
||||
def log_result(self, context, source, ret):
|
||||
if not self.log.isEnabledFor(logging.DEBUG):
|
||||
return
|
||||
@@ -132,6 +135,53 @@ class BaseParser:
|
||||
body=self.error_sink if self.has_error else tree,
|
||||
try_parsed=try_parse)
|
||||
|
||||
def get_input_as_text(self, parser_input, custom_switcher=None):
|
||||
if isinstance(parser_input, list):
|
||||
return self.get_text_from_tokens(parser_input, custom_switcher)
|
||||
|
||||
if isinstance(parser_input, ParserResultConcept):
|
||||
parser_input = parser_input.source
|
||||
|
||||
if "c:" in parser_input:
|
||||
return self.get_text_from_tokens(list(Tokenizer(parser_input)), custom_switcher)
|
||||
|
||||
return parser_input
|
||||
|
||||
def get_input_as_tokens(self, parser_input):
|
||||
if isinstance(parser_input, list):
|
||||
return self.add_eof_if_needed(parser_input)
|
||||
|
||||
if isinstance(parser_input, ParserResultConcept):
|
||||
if parser_input.tokens:
|
||||
return self.add_eof_if_needed(parser_input.tokens)
|
||||
else:
|
||||
return Tokenizer(parser_input.source)
|
||||
|
||||
return Tokenizer(parser_input)
|
||||
|
||||
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
|
||||
if not isinstance(parser_input, ParserResultConcept):
|
||||
return None
|
||||
|
||||
if expected_parser and parser_input.parser != expected_parser:
|
||||
return None
|
||||
|
||||
if len(parser_input.value) == 0:
|
||||
return None
|
||||
|
||||
for node in parser_input.value:
|
||||
from parsers.BaseNodeParser import LexerNode
|
||||
if not isinstance(node, LexerNode):
|
||||
return None
|
||||
|
||||
return parser_input.value
|
||||
|
||||
@staticmethod
|
||||
def add_eof_if_needed(lst):
|
||||
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
|
||||
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
|
||||
return lst
|
||||
|
||||
@staticmethod
|
||||
def get_text_from_tokens(tokens, custom_switcher=None):
|
||||
if tokens is None:
|
||||
|
||||
@@ -9,147 +9,17 @@
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from collections import defaultdict
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.concept import Concept, ConceptParts, DoNotResolve
|
||||
from core.tokenizer import TokenKind, Tokenizer, Token
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
import core.utils
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerNode(Node):
|
||||
start: int # starting index in the tokens list
|
||||
end: int # ending index in the tokens list
|
||||
tokens: list = None # tokens
|
||||
source: str = None # string representation of what was parsed
|
||||
|
||||
def __post_init__(self):
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LexerNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.tokens == other.tokens
|
||||
|
||||
|
||||
class UnrecognizedTokensNode(LexerNode):
|
||||
def __init__(self, start, end, tokens):
|
||||
super().__init__(start, end, tokens)
|
||||
|
||||
def add_token(self, token, pos):
|
||||
self.tokens.append(token)
|
||||
self.end = pos
|
||||
|
||||
def fix_source(self):
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def not_whitespace(self):
|
||||
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, utnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, UnrecognizedTokensNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class ConceptNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
It represents a recognized concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.concept = concept
|
||||
self.underlying = underlying
|
||||
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, cnode):
|
||||
return self.concept.key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, short_cnode):
|
||||
return self.concept.key == other.concept_key and self.source == other.source
|
||||
|
||||
if not isinstance(other, ConceptNode):
|
||||
return False
|
||||
|
||||
return self.concept == other.concept and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.underlying == other.underlying
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.start, self.end, self.source, self.underlying))
|
||||
|
||||
def __repr__(self):
|
||||
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class SourceCodeNode(LexerNode):
|
||||
"""
|
||||
Returned when some source code (like Python source code is recognized)
|
||||
"""
|
||||
|
||||
def __init__(self, node, start, end, tokens=None, source=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.node = node # The PythonNode (or whatever language node) that is found
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, scnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, SourceCodeNode):
|
||||
return False
|
||||
|
||||
return self.node == other.node and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
cnode = namedtuple("ConceptNode", "concept_key start end source")
|
||||
short_cnode = namedtuple("ConceptNode", "concept_key source")
|
||||
utnode = namedtuple("UnrecognizedTokensNode", "start end source")
|
||||
scnode = namedtuple("SourceCodeNode", "start end source")
|
||||
|
||||
|
||||
class NonTerminalNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
Returned by the BnfNodeParser
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, tokens, children=None):
|
||||
@@ -180,7 +50,7 @@ class NonTerminalNode(LexerNode):
|
||||
|
||||
class TerminalNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
Returned by the BnfNodeParser
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, value):
|
||||
@@ -205,11 +75,6 @@ class TerminalNode(LexerNode):
|
||||
return hash((self.parsing_expression, self.start, self.end, self.value))
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GrammarErrorNode(ErrorNode):
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnknownConceptNode(ErrorNode):
|
||||
concept_key: str
|
||||
@@ -574,9 +439,9 @@ class StrMatch(Match):
|
||||
return None
|
||||
|
||||
|
||||
class ConceptLexerParser(BaseParser):
|
||||
class BnfNodeParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptLexer", 50)
|
||||
super().__init__("BnfNode", 50)
|
||||
if 'grammars' in kwargs:
|
||||
self.concepts_grammars = kwargs.get("grammars")
|
||||
elif 'sheerka' in kwargs:
|
||||
@@ -595,7 +460,6 @@ class ConceptLexerParser(BaseParser):
|
||||
self.sheerka = None
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
@@ -606,16 +470,11 @@ class ConceptLexerParser(BaseParser):
|
||||
self.sheerka = context.sheerka
|
||||
self.text = text
|
||||
|
||||
if isinstance(text, str):
|
||||
try:
|
||||
self.tokens = list(Tokenizer(text))
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
else:
|
||||
self.tokens = list(text)
|
||||
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
|
||||
try:
|
||||
self.tokens = list(self.get_input_as_tokens(text))
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
@@ -785,15 +644,15 @@ class ConceptLexerParser(BaseParser):
|
||||
removed_concepts.append(e)
|
||||
return removed_concepts
|
||||
|
||||
def parse(self, context, text):
|
||||
if text == "":
|
||||
def parse(self, context, parser_input):
|
||||
if parser_input == "":
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
||||
)
|
||||
|
||||
if not self.reset_parser(context, text):
|
||||
if not self.reset_parser(context, parser_input):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
@@ -877,15 +736,15 @@ class ConceptLexerParser(BaseParser):
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
source=parser_input,
|
||||
body=choice,
|
||||
try_parsed=choice)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, text, ret[0])
|
||||
self.log_result(context, parser_input, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, text, ret)
|
||||
self.log_multiple_results(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
|
||||
@@ -915,6 +774,11 @@ class ConceptLexerParser(BaseParser):
|
||||
_concept.compiled[prop_name] = new_value
|
||||
|
||||
def _look_for_concept_match(_underlying):
|
||||
"""
|
||||
At some point, there is either an StrMatch or a ConceptMatch,
|
||||
that allowed the recognition.
|
||||
Look for the ConceptMatch, with recursion if needed
|
||||
"""
|
||||
if isinstance(_underlying.parsing_expression, ConceptExpression):
|
||||
return _underlying
|
||||
|
||||
@@ -929,6 +793,7 @@ class ConceptLexerParser(BaseParser):
|
||||
def _get_underlying_value(_underlying):
|
||||
concept_match_node = _look_for_concept_match(_underlying)
|
||||
if concept_match_node:
|
||||
# the value is a concept
|
||||
if id(concept_match_node) in _underlying_value_cache:
|
||||
result = _underlying_value_cache[id(concept_match_node)]
|
||||
else:
|
||||
@@ -936,6 +801,7 @@ class ConceptLexerParser(BaseParser):
|
||||
result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
|
||||
_underlying_value_cache[id(concept_match_node)] = result
|
||||
else:
|
||||
# the value is a string
|
||||
result = DoNotResolve(_underlying.source)
|
||||
|
||||
return result
|
||||
@@ -957,6 +823,7 @@ class ConceptLexerParser(BaseParser):
|
||||
concept.compiled[ConceptParts.BODY] = value
|
||||
if underlying.parsing_expression.rule_name:
|
||||
_add_prop(concept, underlying.parsing_expression.rule_name, value)
|
||||
# KSI : Why don't we set concept.metadata.need_validation to True ?
|
||||
|
||||
if isinstance(underlying, NonTerminalNode):
|
||||
for node in underlying.children:
|
||||
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
|
||||
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
|
||||
StrMatch, ConceptGroupExpression
|
||||
|
||||
|
||||
@@ -30,7 +30,6 @@ class BnfParser(BaseParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("Bnf", 50, False)
|
||||
# self.has_error = False
|
||||
# self.error_sink = []
|
||||
# self.name = BaseParser.PREFIX + "Bnf"
|
||||
|
||||
@@ -61,7 +60,6 @@ class BnfParser(BaseParser):
|
||||
self.eat_white_space()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
@@ -115,11 +113,11 @@ class BnfParser(BaseParser):
|
||||
token = self.get_token()
|
||||
return token.type == second or token.type == first and self.next_after().type == second
|
||||
|
||||
def parse(self, context: ExecutionContext, text):
|
||||
def parse(self, context: ExecutionContext, parser_input):
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
self.reset_parser(context, parser_input)
|
||||
tree = self.parse_choice()
|
||||
|
||||
token = self.get_token()
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
# try to match something like
|
||||
# ConceptNode 'plus' ConceptNode
|
||||
#
|
||||
# Replaced by SyaNodeParser
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
import logging
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
|
||||
@@ -12,6 +16,7 @@ multiple_concepts_parser = MultipleConceptsParser()
|
||||
class ConceptsWithConceptsParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptsWithConcepts", 25)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def get_tokens(nodes):
|
||||
@@ -71,23 +76,19 @@ class ConceptsWithConceptsParser(BaseParser):
|
||||
|
||||
return concept
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
if not text.parser == multiple_concepts_parser:
|
||||
return None
|
||||
|
||||
nodes = text.body
|
||||
|
||||
concept_key = self.get_key(nodes)
|
||||
concept = sheerka.new(concept_key)
|
||||
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
|
||||
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
|
||||
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
for concept in concepts:
|
||||
@@ -101,7 +102,7 @@ class ConceptsWithConceptsParser(BaseParser):
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text.source,
|
||||
source=parser_input.source,
|
||||
body=concept,
|
||||
try_parsed=None)))
|
||||
|
||||
|
||||
@@ -110,7 +110,7 @@ class DefaultParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "Default", 50)
|
||||
BaseParser.__init__(self, "Default", 60)
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.context: ExecutionContext = None
|
||||
@@ -168,7 +168,6 @@ class DefaultParser(BaseParser):
|
||||
self.next_token()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
@@ -188,19 +187,19 @@ class DefaultParser(BaseParser):
|
||||
|
||||
return
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
# default parser can only manage string text
|
||||
if not isinstance(text, str):
|
||||
if not isinstance(parser_input, str):
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
|
||||
self.log_result(context, text, ret)
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
self.reset_parser(context, parser_input)
|
||||
tree = self.parse_statement()
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(e, False)
|
||||
@@ -211,7 +210,7 @@ class DefaultParser(BaseParser):
|
||||
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
|
||||
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
|
||||
else:
|
||||
body = self.get_return_value_body(context.sheerka, text, tree, tree)
|
||||
body = self.get_return_value_body(context.sheerka, parser_input, tree, tree)
|
||||
# body = self.sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
@@ -224,7 +223,7 @@ class DefaultParser(BaseParser):
|
||||
not self.has_error,
|
||||
body)
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def parse_statement(self):
|
||||
|
||||
@@ -10,12 +10,12 @@ class EmptyStringParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "EmptyString", 90)
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
|
||||
if isinstance(text, str) and text.strip() == "" or \
|
||||
isinstance(text, list) and text == [] or \
|
||||
text is None:
|
||||
if isinstance(parser_input, str) and parser_input.strip() == "" or \
|
||||
isinstance(parser_input, list) and parser_input == [] or \
|
||||
parser_input is None:
|
||||
ret = sheerka.ret(self.name, True, sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
@@ -24,5 +24,5 @@ class EmptyStringParser(BaseParser):
|
||||
else:
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
@@ -16,26 +16,26 @@ class ExactConceptParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "ExactConcept", 80)
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
"""
|
||||
text can be string, but text can also be an list of tokens
|
||||
:param context:
|
||||
:param text:
|
||||
:param parser_input:
|
||||
:return:
|
||||
"""
|
||||
|
||||
context.log(f"Parsing '{text}'", self.name)
|
||||
context.log(f"Parsing '{parser_input}'", self.name)
|
||||
res = []
|
||||
sheerka = context.sheerka
|
||||
try:
|
||||
words = self.get_words(text)
|
||||
words = self.get_words(parser_input)
|
||||
except LexerError as e:
|
||||
context.log(f"Error found in tokenizer {e}", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
|
||||
|
||||
if len(words) > self.MAX_WORDS_SIZE:
|
||||
context.log(f"Max words reached. Stopping.", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input))
|
||||
|
||||
recognized = False
|
||||
for combination in self.combinations(words):
|
||||
@@ -69,26 +69,25 @@ class ExactConceptParser(BaseParser):
|
||||
context.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
|
||||
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
|
||||
body=concept,
|
||||
try_parsed=concept)))
|
||||
recognized = True
|
||||
|
||||
if recognized:
|
||||
if len(res) == 1:
|
||||
self.log_result(context, text, res[0])
|
||||
self.log_result(context, parser_input, res[0])
|
||||
else:
|
||||
self.log_multiple_results(context, text, res)
|
||||
self.log_multiple_results(context, parser_input, res)
|
||||
return res
|
||||
return res
|
||||
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text))
|
||||
self.log_result(context, text, ret)
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input))
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def get_words(text):
|
||||
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
|
||||
def get_words(self, text):
|
||||
tokens = self.get_input_as_tokens(text)
|
||||
res = []
|
||||
for t in tokens:
|
||||
if t.type == TokenKind.EOF:
|
||||
|
||||
@@ -1,18 +1,20 @@
|
||||
# to be replaced by SyaNodeParser
|
||||
import ast
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseNodeParser import SourceCodeNode
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
|
||||
from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
|
||||
import core.utils
|
||||
from parsers.PythonParser import PythonParser
|
||||
|
||||
concept_lexer_parser = ConceptLexerParser()
|
||||
concept_lexer_parser = BnfNodeParser()
|
||||
|
||||
|
||||
class MultipleConceptsParser(BaseParser):
|
||||
"""
|
||||
Parser that will take the result of ConceptLexerParser and
|
||||
Parser that will take the result of BnfNodeParser and
|
||||
try to resolve the unrecognized tokens token by token
|
||||
|
||||
It is a success when it returns a list ConceptNode exclusively
|
||||
@@ -20,6 +22,7 @@ class MultipleConceptsParser(BaseParser):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "MultipleConcepts", 45)
|
||||
self.enabled = False
|
||||
|
||||
@staticmethod
|
||||
def finalize(nodes_found, unrecognized_tokens):
|
||||
@@ -40,16 +43,12 @@ class MultipleConceptsParser(BaseParser):
|
||||
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
||||
return unrecognized_tokens
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
if not text.parser == concept_lexer_parser:
|
||||
return None
|
||||
|
||||
sheerka = context.sheerka
|
||||
nodes = text.value
|
||||
nodes_found = [[]]
|
||||
concepts_only = True
|
||||
|
||||
@@ -97,16 +96,16 @@ class MultipleConceptsParser(BaseParser):
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text.source,
|
||||
source=parser_input.source,
|
||||
body=choice,
|
||||
try_parsed=None))
|
||||
)
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, text.source, ret[0])
|
||||
self.log_result(context, parser_input.source, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, text.source, ret)
|
||||
self.log_multiple_results(context, parser_input.source, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
|
||||
+25
-24
@@ -1,4 +1,4 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
|
||||
from core.tokenizer import Tokenizer, LexerError, TokenKind
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from dataclasses import dataclass
|
||||
@@ -6,7 +6,7 @@ import ast
|
||||
import logging
|
||||
import core.utils
|
||||
|
||||
from parsers.ConceptLexerParser import ConceptNode
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -67,7 +67,7 @@ class PythonParser(BaseParser):
|
||||
BaseParser.__init__(self, "Python", 50)
|
||||
self.source = kwargs.get("source", "<undef>")
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
tree = None
|
||||
|
||||
@@ -76,15 +76,9 @@ class PythonParser(BaseParser):
|
||||
}
|
||||
|
||||
try:
|
||||
if isinstance(text, str) and "c:" in text:
|
||||
source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher)
|
||||
elif isinstance(text, str):
|
||||
source = text
|
||||
else:
|
||||
source = self.get_text_from_tokens(text, python_switcher)
|
||||
source = self.get_input_as_text(parser_input, python_switcher)
|
||||
source = source.strip()
|
||||
|
||||
text = text if isinstance(text, str) else source
|
||||
parser_input = parser_input if isinstance(parser_input, str) else source
|
||||
|
||||
# first, try to parse an expression
|
||||
res, tree, error = self.try_parse_expression(source)
|
||||
@@ -92,25 +86,32 @@ class PythonParser(BaseParser):
|
||||
# then try to parse a statement
|
||||
res, tree, error = self.try_parse_statement(source)
|
||||
if not res:
|
||||
self.has_error = True
|
||||
error_node = PythonErrorNode(text, error)
|
||||
error_node = PythonErrorNode(parser_input, error)
|
||||
self.error_sink.append(error_node)
|
||||
|
||||
except LexerError as e:
|
||||
self.has_error = True
|
||||
self.error_sink.append(e)
|
||||
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
body=self.error_sink if self.has_error else PythonNode(text, tree),
|
||||
try_parsed=None))
|
||||
if self.has_error:
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.NOT_FOR_ME,
|
||||
body=parser_input,
|
||||
reason=self.error_sink))
|
||||
else:
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input,
|
||||
body=PythonNode(parser_input, tree),
|
||||
try_parsed=None))
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
self.log_result(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def try_parse_expression(self, text):
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptNode
|
||||
from parsers.BnfNodeParser import ConceptNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from parsers.PythonParser import PythonParser
|
||||
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
unrecognized_nodes_parser = UnrecognizedNodeParser()
|
||||
|
||||
|
||||
class PythonWithConceptsParser(BaseParser):
|
||||
@@ -20,15 +21,12 @@ class PythonWithConceptsParser(BaseParser):
|
||||
res += c if c.isalnum() else "0"
|
||||
return res
|
||||
|
||||
def parse(self, context, text):
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
if not text.parser == multiple_concepts_parser:
|
||||
return None
|
||||
|
||||
nodes = text.body
|
||||
source = ""
|
||||
to_parse = ""
|
||||
identifiers = {}
|
||||
@@ -74,6 +72,7 @@ class PythonWithConceptsParser(BaseParser):
|
||||
python_id = _get_identifier(concept)
|
||||
to_parse += python_id
|
||||
python_ids_mappings[python_id] = concept
|
||||
|
||||
else:
|
||||
source += node.source
|
||||
to_parse += node.source
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,114 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
|
||||
from parsers.BaseParser import BaseParser, ErrorNode
|
||||
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
|
||||
import core.utils
|
||||
|
||||
PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"]
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CannotParseNode(ErrorNode):
|
||||
unrecognized: UnrecognizedTokensNode
|
||||
|
||||
|
||||
class UnrecognizedNodeParser(BaseParser):
|
||||
"""
|
||||
This parser comes after the other NodeParsers (Atom, Bnf or Sya)
|
||||
It will try to resolve all UnrecognizedTokensNode.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("UnrecognizedNode", 45) # lower than AtomNode, BnfNode and SyaNode
|
||||
|
||||
def add_error(self, error):
|
||||
if hasattr(error, "__iter__"):
|
||||
self.error_sink.extend(error)
|
||||
else:
|
||||
self.error_sink.append(error)
|
||||
|
||||
def parse(self, context, parser_input):
|
||||
sheerka = context.sheerka
|
||||
nodes = self.get_input_as_lexer_nodes(parser_input, None)
|
||||
if not nodes:
|
||||
return None
|
||||
|
||||
sequences_found = [[]]
|
||||
has_unrecognized = False
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
res = self.validate_concept_node(context, node)
|
||||
if not res.status:
|
||||
self.add_error(res.body)
|
||||
else:
|
||||
sequences_found = core.utils.product(sequences_found, [res.body])
|
||||
|
||||
elif isinstance(node, UnrecognizedTokensNode):
|
||||
res = parse_unrecognized(context, node.source, PARSERS)
|
||||
res = only_successful(context, res)
|
||||
if res.status:
|
||||
lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens)
|
||||
sequences_found = core.utils.product(sequences_found, lexer_nodes)
|
||||
else:
|
||||
sequences_found = core.utils.product(sequences_found, [node])
|
||||
has_unrecognized = True
|
||||
|
||||
else: # cannot happen as of today :-)
|
||||
raise NotImplementedError()
|
||||
|
||||
# concept with UnrecognizedToken in their properties is considered as fatal error
|
||||
if self.has_error:
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
ret = []
|
||||
for choice in sequences_found:
|
||||
ret.append(
|
||||
sheerka.ret(
|
||||
self.name,
|
||||
not has_unrecognized,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=parser_input,
|
||||
body=choice,
|
||||
try_parsed=choice)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, parser_input, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, parser_input, ret)
|
||||
return ret
|
||||
|
||||
def validate_concept_node(self, context, concept_node):
|
||||
|
||||
sheerka = context.sheerka
|
||||
errors = []
|
||||
|
||||
def _validate_concept(concept):
|
||||
"""
|
||||
Recursively browse the compiled properties in order to find unrecognized
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
for name, value in concept.compiled.items():
|
||||
if isinstance(value, Concept):
|
||||
_validate_concept(value)
|
||||
|
||||
elif isinstance(value, UnrecognizedTokensNode):
|
||||
res = parse_unrecognized(context, value.tokens, PARSERS)
|
||||
res = only_successful(context, res) # only key successful parsers
|
||||
if res.status:
|
||||
concept.compiled[name] = res.body.body
|
||||
else:
|
||||
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'"))
|
||||
|
||||
_validate_concept(concept_node.concept)
|
||||
if len(errors) > 0:
|
||||
return context.sheerka.ret(self.name, False, errors)
|
||||
else:
|
||||
return context.sheerka.ret(self.name, True, concept_node)
|
||||
Reference in New Issue
Block a user