Refactored sheerka class: splitted to use sub handlers. Refactored unit tests to use classes.
This commit is contained in:
@@ -0,0 +1,120 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept
|
||||
from core.tokenizer import TokenKind, Keywords
|
||||
from core.sheerka_logger import get_logger
|
||||
import logging
|
||||
|
||||
|
||||
@dataclass()
|
||||
class Node:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NopNode(Node):
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "nop"
|
||||
|
||||
|
||||
class NotInitializedNode(Node):
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "**N/A**"
|
||||
|
||||
|
||||
@dataclass()
|
||||
class ErrorNode(Node):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedTokenErrorNode(ErrorNode):
|
||||
message: str
|
||||
expected_tokens: list
|
||||
|
||||
|
||||
class BaseParser:
|
||||
PREFIX = "parsers."
|
||||
|
||||
def __init__(self, name, priority: int, enabled=True):
|
||||
self.log = get_logger("parsers." + self.__class__.__name__)
|
||||
self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__)
|
||||
self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__)
|
||||
|
||||
self.name = self.PREFIX + name
|
||||
self.priority = priority
|
||||
self.enabled = enabled
|
||||
|
||||
self.has_error = False
|
||||
self.error_sink = []
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, self.__class__):
|
||||
return False
|
||||
return self.name == other.name
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.name)
|
||||
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
def parse(self, context, text):
|
||||
pass
|
||||
|
||||
def log_result(self, context, source, ret):
|
||||
if not self.log.isEnabledFor(logging.DEBUG):
|
||||
return
|
||||
|
||||
if ret.status:
|
||||
value = context.return_value_to_str(ret)
|
||||
context.log(self.log, f"Recognized '{source}' as {value}", self.name)
|
||||
else:
|
||||
context.log(self.log, f"Failed to recognize '{source}'", self.name)
|
||||
|
||||
def log_multiple_results(self, context, source, list_of_ret):
|
||||
if not self.log.isEnabledFor(logging.DEBUG):
|
||||
return
|
||||
|
||||
context.log(self.log, f"Recognized '{source}' as multiple concepts", self.name)
|
||||
for r in list_of_ret:
|
||||
value = context.return_value_to_str(r)
|
||||
context.log(self.log, f" Recognized '{value}'", self.name)
|
||||
|
||||
def get_return_value_body(self, sheerka, source, tree, try_parse):
|
||||
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
|
||||
return self.error_sink[0]
|
||||
|
||||
return sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=self.error_sink if self.has_error else tree,
|
||||
try_parsed=try_parse)
|
||||
|
||||
@staticmethod
|
||||
def get_text_from_tokens(tokens, custom_switcher=None):
|
||||
if tokens is None:
|
||||
return ""
|
||||
res = ""
|
||||
|
||||
if not hasattr(tokens, "__iter__"):
|
||||
tokens = [tokens]
|
||||
|
||||
switcher = {
|
||||
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
|
||||
TokenKind.CONCEPT: lambda t: "c:" + t.value + ":",
|
||||
}
|
||||
|
||||
if custom_switcher:
|
||||
switcher.update(custom_switcher)
|
||||
|
||||
for token in tokens:
|
||||
value = switcher.get(token.type, lambda t: t.value)(token)
|
||||
res += value
|
||||
return res
|
||||
@@ -0,0 +1,270 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
import core.utils
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
|
||||
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
|
||||
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, StrMatch
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedEndOfFileError(ErrorNode):
|
||||
pass
|
||||
|
||||
|
||||
class BnfParser(BaseParser):
|
||||
"""
|
||||
Parser used to transform litteral into ParsingExpression
|
||||
example :
|
||||
a | b, c -> Sequence(OrderedChoice(a, b) ,c)
|
||||
|
||||
'|' (pipe) is used for OrderedChoice
|
||||
',' (comma) is used for Sequence
|
||||
'?' (question mark) is used for Optional
|
||||
'*' (star) is used for ZeroOrMore
|
||||
'+' (plus) is used for OneOrMore
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("Bnf", 50, False)
|
||||
# self.has_error = False
|
||||
# self.error_sink = []
|
||||
# self.name = BaseParser.PREFIX + "Bnf"
|
||||
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.after_current = None
|
||||
self.nb_open_par = 0
|
||||
self.context = None
|
||||
self.source = ""
|
||||
self.sheerka = None
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, BnfParser):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
self.lexer_iter = iter(Tokenizer(text.strip())) if isinstance(text, str) else iter(text)
|
||||
self._current = None
|
||||
self.after_current = None
|
||||
self.nb_open_par = 0
|
||||
|
||||
self.next_token()
|
||||
self.eat_white_space()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
def next_token(self, skip_whitespace=False):
|
||||
if self._current and self._current.type == TokenKind.EOF:
|
||||
return
|
||||
|
||||
try:
|
||||
self._current = self.after_current or next(self.lexer_iter)
|
||||
self.source += str(self._current.value)
|
||||
self.after_current = None
|
||||
|
||||
if skip_whitespace:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
self.source += str(self._current.value)
|
||||
except StopIteration:
|
||||
self._current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
|
||||
def next_after(self):
|
||||
if self.after_current is not None:
|
||||
return self.after_current
|
||||
|
||||
try:
|
||||
self.after_current = next(self.lexer_iter)
|
||||
# self.source += str(self.after_current.value)
|
||||
return self.after_current
|
||||
except StopIteration:
|
||||
self.after_current = Token(TokenKind.EOF, "", -1, -1, -1)
|
||||
return self.after_current
|
||||
|
||||
def eat_white_space(self):
|
||||
if self.after_current is not None:
|
||||
self._current = self.after_current
|
||||
self.source += str(self._current.value)
|
||||
self.after_current = None
|
||||
|
||||
try:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
self.source += str(self._current.value)
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
|
||||
def maybe_sequence(self, first, second):
|
||||
token = self.get_token()
|
||||
return token.type == second or token.type == first and self.next_after().type == second
|
||||
|
||||
def parse(self, context: ExecutionContext, text):
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
tree = self.parser_outer_rule_name()
|
||||
|
||||
token = self.get_token()
|
||||
if token and token.type != TokenKind.EOF:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
|
||||
except LexerError as e:
|
||||
self.add_error(e, False)
|
||||
|
||||
value = self.get_return_value_body(context.sheerka, self.source, tree, tree)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
value)
|
||||
|
||||
return ret
|
||||
|
||||
def parser_outer_rule_name(self):
|
||||
return self.parser_rule_name(self.parse_choice)
|
||||
|
||||
def parse_choice(self):
|
||||
sequence = self.parse_sequence()
|
||||
|
||||
self.eat_white_space()
|
||||
token = self.get_token()
|
||||
if token is None or token.type != TokenKind.VBAR:
|
||||
return sequence
|
||||
|
||||
elements = [sequence]
|
||||
while True:
|
||||
# maybe eat the vertical bar
|
||||
self.eat_white_space()
|
||||
token = self.get_token()
|
||||
if token is None or token.type != TokenKind.VBAR:
|
||||
break
|
||||
self.next_token(skip_whitespace=True)
|
||||
|
||||
sequence = self.parse_sequence()
|
||||
elements.append(sequence)
|
||||
|
||||
return OrderedChoice(*elements)
|
||||
|
||||
def parse_sequence(self):
|
||||
expr_and_modifier = self.parse_modifier()
|
||||
token = self.get_token()
|
||||
if token is None or \
|
||||
token.type == TokenKind.EOF or \
|
||||
token.type == TokenKind.EQUALS or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
return expr_and_modifier
|
||||
|
||||
elements = [expr_and_modifier]
|
||||
while True:
|
||||
token = self.get_token()
|
||||
if token is None or \
|
||||
token.type == TokenKind.EOF or \
|
||||
token.type == TokenKind.EQUALS or \
|
||||
self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.VBAR) or \
|
||||
self.nb_open_par > 0 and self.maybe_sequence(TokenKind.WHITESPACE, TokenKind.RPAR):
|
||||
break
|
||||
self.eat_white_space()
|
||||
|
||||
sequence = self.parse_modifier()
|
||||
elements.append(sequence)
|
||||
|
||||
return Sequence(*elements)
|
||||
|
||||
def parse_modifier(self):
|
||||
expression = self.parser_inner_rule_name()
|
||||
|
||||
token = self.get_token()
|
||||
|
||||
if token.type == TokenKind.QMARK:
|
||||
self.next_token()
|
||||
return Optional(expression)
|
||||
|
||||
if token.type == TokenKind.STAR:
|
||||
self.next_token()
|
||||
return ZeroOrMore(expression)
|
||||
|
||||
if token.type == TokenKind.PLUS:
|
||||
self.next_token()
|
||||
return OneOrMore(expression)
|
||||
|
||||
return expression
|
||||
|
||||
def parser_inner_rule_name(self):
|
||||
return self.parser_rule_name(self.parse_expression)
|
||||
|
||||
def parse_expression(self):
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.EOF:
|
||||
self.add_error(UnexpectedEndOfFileError(), False)
|
||||
if token.type == TokenKind.LPAR:
|
||||
self.nb_open_par += 1
|
||||
self.next_token()
|
||||
expression = self.parse_choice()
|
||||
token = self.get_token()
|
||||
if token.type == TokenKind.RPAR:
|
||||
self.nb_open_par -= 1
|
||||
self.next_token()
|
||||
return expression
|
||||
else:
|
||||
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.RPAR]))
|
||||
return expression
|
||||
|
||||
if token.type == TokenKind.IDENTIFIER:
|
||||
self.next_token()
|
||||
|
||||
concept_name = str(token.value)
|
||||
|
||||
# we are trying to match against a concept which is still under construction !
|
||||
# (for example of recursive bnf definition)
|
||||
if self.context.obj and hasattr(self.context.obj, "name"):
|
||||
if concept_name == str(self.context.obj.name):
|
||||
return ConceptExpression(concept_name)
|
||||
|
||||
concept = self.context.get_concept(concept_name)
|
||||
if not self.sheerka.is_known(concept):
|
||||
self.add_error(concept)
|
||||
return None
|
||||
elif hasattr(concept, "__iter__"):
|
||||
self.add_error(
|
||||
self.sheerka.new(BuiltinConcepts.CANNOT_RESOLVE_CONCEPT,
|
||||
body=("key", concept_name)))
|
||||
return None
|
||||
else:
|
||||
return concept
|
||||
|
||||
ret = StrMatch(core.utils.strip_quotes(token.value))
|
||||
self.next_token()
|
||||
return ret
|
||||
|
||||
def parser_rule_name(self, next_to_parse):
|
||||
expression = next_to_parse()
|
||||
token = self.get_token()
|
||||
if token is None or token.type != TokenKind.EQUALS:
|
||||
return expression
|
||||
|
||||
self.next_token() # eat equals
|
||||
token = self.get_token()
|
||||
|
||||
if token is None or token.type != TokenKind.IDENTIFIER:
|
||||
return self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.IDENTIFIER]))
|
||||
|
||||
expression.rule_name = token.value
|
||||
self.next_token()
|
||||
return expression
|
||||
@@ -0,0 +1,994 @@
|
||||
#####################################################################################################
|
||||
# This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
|
||||
# I don't directly use the project, but it helped me figure out
|
||||
# what to do.
|
||||
# Dejanović I., Milosavljević G., Vaderna R.:
|
||||
# Arpeggio: A flexible PEG parser for Python,
|
||||
# Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
|
||||
#####################################################################################################
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
from collections import defaultdict
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.concept import Concept, ConceptParts, DoNotResolve
|
||||
from core.tokenizer import TokenKind, Tokenizer, Token
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
import core.utils
|
||||
|
||||
|
||||
@dataclass()
|
||||
class LexerNode(Node):
|
||||
start: int # starting index in the tokens list
|
||||
end: int # ending index in the tokens list
|
||||
tokens: list = None # tokens
|
||||
source: str = None # string representation of what was parsed
|
||||
|
||||
def __post_init__(self):
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, LexerNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.tokens == other.tokens
|
||||
|
||||
|
||||
class UnrecognizedTokensNode(LexerNode):
|
||||
def __init__(self, start, end, tokens):
|
||||
super().__init__(start, end, tokens)
|
||||
|
||||
def add_token(self, token, pos):
|
||||
self.tokens.append(token)
|
||||
self.end = pos
|
||||
|
||||
def fix_source(self):
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def not_whitespace(self):
|
||||
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, utnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, UnrecognizedTokensNode):
|
||||
return False
|
||||
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class ConceptNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
It represents a recognized concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.concept = concept
|
||||
self.underlying = underlying
|
||||
|
||||
if self.source is None:
|
||||
self.source = BaseParser.get_text_from_tokens(self.tokens)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, cnode):
|
||||
return self.concept.key == other.concept_key and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if isinstance(other, short_cnode):
|
||||
return self.concept.key == other.concept_key and self.source == other.source
|
||||
|
||||
if not isinstance(other, ConceptNode):
|
||||
return False
|
||||
|
||||
return self.concept == other.concept and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source and \
|
||||
self.underlying == other.underlying
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.start, self.end, self.source, self.underlying))
|
||||
|
||||
def __repr__(self):
|
||||
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
class SourceCodeNode(LexerNode):
|
||||
"""
|
||||
Returned when some source code (like Python source code is recognized)
|
||||
"""
|
||||
|
||||
def __init__(self, node, start, end, tokens=None, source=None):
|
||||
super().__init__(start, end, tokens, source)
|
||||
self.node = node # The PythonNode (or whatever language node) that is found
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, scnode):
|
||||
return self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
if not isinstance(other, SourceCodeNode):
|
||||
return False
|
||||
|
||||
return self.node == other.node and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.source == other.source
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.end, self.source))
|
||||
|
||||
def __repr__(self):
|
||||
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
|
||||
|
||||
|
||||
cnode = namedtuple("ConceptNode", "concept_key start end source")
|
||||
short_cnode = namedtuple("ConceptNode", "concept_key source")
|
||||
utnode = namedtuple("UnrecognizedTokensNode", "start end source")
|
||||
scnode = namedtuple("SourceCodeNode", "start end source")
|
||||
|
||||
|
||||
class NonTerminalNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, tokens, children=None):
|
||||
super().__init__(start, end, tokens)
|
||||
self.parsing_expression = parsing_expression
|
||||
self.children = children
|
||||
|
||||
def __repr__(self):
|
||||
name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
|
||||
if len(self.children) > 0:
|
||||
sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
|
||||
else:
|
||||
sub_names = ""
|
||||
return name + sub_names
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, NonTerminalNode):
|
||||
return False
|
||||
|
||||
return self.parsing_expression == other.parsing_expression and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.children == other.children
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.parsing_expression, self.start, self.end, self.children))
|
||||
|
||||
|
||||
class TerminalNode(LexerNode):
|
||||
"""
|
||||
Returned by the ConceptLexerParser
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_expression, start, end, value):
|
||||
super().__init__(start, end, source=value)
|
||||
self.parsing_expression = parsing_expression
|
||||
self.value = value
|
||||
|
||||
def __repr__(self):
|
||||
name = self.parsing_expression.rule_name or ""
|
||||
return name + f"'{self.value}'"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, TerminalNode):
|
||||
return False
|
||||
|
||||
return self.parsing_expression == other.parsing_expression and \
|
||||
self.start == other.start and \
|
||||
self.end == other.end and \
|
||||
self.value == other.value
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.parsing_expression, self.start, self.end, self.value))
|
||||
|
||||
|
||||
@dataclass()
|
||||
class GrammarErrorNode(ErrorNode):
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnknownConceptNode(ErrorNode):
|
||||
concept_key: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class TooManyConceptNode(ErrorNode):
|
||||
concept_key: str
|
||||
|
||||
|
||||
class ParsingExpression:
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.elements = args
|
||||
|
||||
nodes = kwargs.get('nodes', [])
|
||||
if not hasattr(nodes, '__iter__'):
|
||||
nodes = [nodes]
|
||||
self.nodes = nodes
|
||||
|
||||
self.rule_name = kwargs.get('rule_name', '')
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, ParsingExpression):
|
||||
return False
|
||||
|
||||
return self.rule_name == other.rule_name and self.elements == other.elements
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.rule_name, self.elements))
|
||||
|
||||
def parse(self, parser):
|
||||
return self._parse(parser)
|
||||
|
||||
|
||||
class ConceptExpression(ParsingExpression):
|
||||
"""
|
||||
Will match a concept
|
||||
It used only for rule definition
|
||||
|
||||
When the grammar is created, it is replaced by the actual concept
|
||||
"""
|
||||
|
||||
def __init__(self, concept, rule_name=""):
|
||||
super().__init__(rule_name=rule_name)
|
||||
self.concept = concept
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.concept}"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not super().__eq__(other):
|
||||
return False
|
||||
|
||||
if not isinstance(other, ConceptExpression):
|
||||
return False
|
||||
|
||||
if isinstance(self.concept, Concept):
|
||||
return self.concept.name == other.concept.name
|
||||
|
||||
# when it's only the name of the concept
|
||||
return self.concept == other.concept
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.concept, self.rule_name))
|
||||
|
||||
@staticmethod
|
||||
def get_parsing_expression_from_name(name):
|
||||
tokens = Tokenizer(name)
|
||||
nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
|
||||
if len(nodes) == 1:
|
||||
return nodes[0]
|
||||
else:
|
||||
sequence = Sequence(nodes)
|
||||
sequence.nodes = nodes
|
||||
return sequence
|
||||
|
||||
def _parse(self, parser):
|
||||
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
||||
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return None
|
||||
|
||||
self.concept = to_match # Memoize
|
||||
|
||||
if to_match not in parser.concepts_grammars:
|
||||
# Try to match the concept using its name
|
||||
expr = self.get_parsing_expression_from_name(to_match.name)
|
||||
node = expr.parse(parser)
|
||||
else:
|
||||
node = parser.concepts_grammars[to_match].parse(parser)
|
||||
|
||||
if node is None:
|
||||
return None
|
||||
|
||||
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||
|
||||
|
||||
class ConceptGroupExpression(ConceptExpression):
|
||||
def _parse(self, parser):
|
||||
to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
|
||||
if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return None
|
||||
|
||||
self.concept = to_match # Memoize
|
||||
|
||||
if to_match not in parser.concepts_grammars:
|
||||
concepts_in_group = parser.sheerka.get_set_elements(self.concept)
|
||||
nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
|
||||
expr = OrderedChoice(nodes)
|
||||
expr.nodes = nodes
|
||||
node = expr.parse(parser)
|
||||
else:
|
||||
node = parser.concepts_grammars[to_match].parse(parser)
|
||||
|
||||
if node is None:
|
||||
return None
|
||||
|
||||
return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
|
||||
|
||||
|
||||
class Sequence(ParsingExpression):
|
||||
"""
|
||||
Will match sequence of parser expressions in exact order they are defined.
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
init_pos = parser.pos
|
||||
end_pos = parser.pos
|
||||
|
||||
children = []
|
||||
for e in self.nodes:
|
||||
node = e.parse(parser)
|
||||
if node is None:
|
||||
return None
|
||||
else:
|
||||
if node.end != -1: # because returns -1 when no match
|
||||
children.append(node)
|
||||
end_pos = node.end
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})"
|
||||
|
||||
|
||||
class OrderedChoice(ParsingExpression):
|
||||
"""
|
||||
Will match one among multiple
|
||||
It will stop at the first match (so the order of definition is important)
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
init_pos = parser.pos
|
||||
|
||||
for e in self.nodes:
|
||||
node = e.parse(parser)
|
||||
if node:
|
||||
return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
|
||||
|
||||
parser.seek(init_pos) # backtrack
|
||||
|
||||
return None
|
||||
|
||||
def __repr__(self):
|
||||
to_str = "| ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})"
|
||||
|
||||
|
||||
class Optional(ParsingExpression):
|
||||
"""
|
||||
Will match or not the elements
|
||||
if many matches, will choose longest one
|
||||
If you need order, use Optional(OrderedChoice)
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
init_pos = parser.pos
|
||||
selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
|
||||
|
||||
for e in self.nodes:
|
||||
node = e.parse(parser)
|
||||
if node:
|
||||
if node.end > selected_node.end:
|
||||
selected_node = NonTerminalNode(
|
||||
self,
|
||||
node.start,
|
||||
node.end,
|
||||
parser.tokens[node.start: node.end + 1],
|
||||
[node])
|
||||
|
||||
parser.seek(init_pos) # backtrack
|
||||
|
||||
if selected_node.end != -1:
|
||||
parser.seek(selected_node.end)
|
||||
parser.next_token() # eat the tokens found
|
||||
|
||||
return selected_node
|
||||
|
||||
def __repr__(self):
|
||||
if len(self.elements) == 1:
|
||||
return f"{self.elements[0]}?"
|
||||
else:
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})?"
|
||||
|
||||
|
||||
class Repetition(ParsingExpression):
|
||||
"""
|
||||
Base class for all repetition-like parser expressions (?,*,+)
|
||||
Args:
|
||||
eolterm(bool): Flag that indicates that end of line should
|
||||
terminate repetition match.
|
||||
"""
|
||||
|
||||
def __init__(self, *elements, **kwargs):
|
||||
super(Repetition, self).__init__(*elements, **kwargs)
|
||||
self.sep = kwargs.get('sep', None)
|
||||
|
||||
|
||||
class ZeroOrMore(Repetition):
|
||||
"""
|
||||
ZeroOrMore will try to match parser expression specified zero or more
|
||||
times. It will never fail.
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
init_pos = parser.pos
|
||||
end_pos = -1
|
||||
children = []
|
||||
|
||||
while True:
|
||||
current_pos = parser.pos
|
||||
|
||||
# maybe eat the separator if needed
|
||||
if self.sep and children:
|
||||
sep_result = self.sep.parse(parser)
|
||||
if sep_result is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
|
||||
# eat the ZeroOrMore
|
||||
node = self.nodes[0].parse(parser)
|
||||
if node is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
else:
|
||||
if node.end != -1: # because returns -1 when no match
|
||||
children.append(node)
|
||||
end_pos = node.end
|
||||
|
||||
if len(children) == 0:
|
||||
return NonTerminalNode(self, init_pos, -1, [], [])
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})*"
|
||||
|
||||
|
||||
class OneOrMore(Repetition):
|
||||
"""
|
||||
OneOrMore will try to match parser expression specified one or more times.
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
init_pos = parser.pos
|
||||
end_pos = -1
|
||||
children = []
|
||||
|
||||
while True:
|
||||
current_pos = parser.pos
|
||||
|
||||
# maybe eat the separator if needed
|
||||
if self.sep and children:
|
||||
sep_result = self.sep.parse(parser)
|
||||
if sep_result is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
|
||||
# eat the ZeroOrMore
|
||||
node = self.nodes[0].parse(parser)
|
||||
if node is None:
|
||||
parser.seek(current_pos)
|
||||
break
|
||||
else:
|
||||
if node.end != -1: # because returns -1 when no match
|
||||
children.append(node)
|
||||
end_pos = node.end
|
||||
|
||||
if len(children) == 0: # if nothing is found, it's an error
|
||||
return None
|
||||
|
||||
return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
|
||||
|
||||
def __repr__(self):
|
||||
to_str = ", ".join(repr(n) for n in self.elements)
|
||||
return f"({to_str})+"
|
||||
|
||||
|
||||
class UnorderedGroup(Repetition):
|
||||
"""
|
||||
Will try to match all of the parsing expression in any order.
|
||||
"""
|
||||
|
||||
def _parse(self, parser):
|
||||
raise NotImplementedError()
|
||||
|
||||
# def __repr__(self):
|
||||
# to_str = ", ".join(repr(n) for n in self.elements)
|
||||
# return f"({to_str})#"
|
||||
|
||||
|
||||
class Match(ParsingExpression):
|
||||
"""
|
||||
Base class for all classes that will try to match something from the input.
|
||||
"""
|
||||
|
||||
def __init__(self, rule_name, root=False):
|
||||
super(Match, self).__init__(rule_name=rule_name, root=root)
|
||||
|
||||
def parse(self, parser):
|
||||
result = self._parse(parser)
|
||||
return result
|
||||
|
||||
|
||||
class StrMatch(Match):
|
||||
"""
|
||||
Matches a literal
|
||||
"""
|
||||
|
||||
def __init__(self, to_match, rule_name="", root=False, ignore_case=True):
|
||||
super(Match, self).__init__(rule_name=rule_name, root=root)
|
||||
self.to_match = to_match
|
||||
self.ignore_case = ignore_case
|
||||
|
||||
def __repr__(self):
|
||||
return f"'{self.to_match}'"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not super().__eq__(other):
|
||||
return False
|
||||
|
||||
if not isinstance(other, StrMatch):
|
||||
return False
|
||||
|
||||
return self.to_match == other.to_match and self.ignore_case == other.ignore_case
|
||||
|
||||
def _parse(self, parser):
|
||||
token = parser.get_token()
|
||||
m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
|
||||
else token.value == self.to_match
|
||||
|
||||
if m:
|
||||
node = TerminalNode(self, parser.pos, parser.pos, token.value)
|
||||
parser.next_token()
|
||||
return node
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class ConceptLexerParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptLexer", 50)
|
||||
if 'grammars' in kwargs:
|
||||
self.concepts_grammars = kwargs.get("grammars")
|
||||
elif 'sheerka' in kwargs:
|
||||
self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
|
||||
else:
|
||||
self.concepts_grammars = {}
|
||||
|
||||
self.ignore_case = True
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
self.tokens = None
|
||||
|
||||
self.context = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
self.text = text
|
||||
|
||||
if isinstance(text, str):
|
||||
try:
|
||||
self.tokens = list(Tokenizer(text))
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
|
||||
return False
|
||||
|
||||
else:
|
||||
self.tokens = list(text)
|
||||
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
|
||||
|
||||
self.token = None
|
||||
self.pos = -1
|
||||
self.next_token(False)
|
||||
return True
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self.token
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
if self.token and self.token.type == TokenKind.EOF:
|
||||
return False
|
||||
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
|
||||
self.pos += 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
return self.token.type != TokenKind.EOF
|
||||
|
||||
def seek(self, pos):
|
||||
self.pos = pos
|
||||
self.token = self.tokens[self.pos]
|
||||
return True
|
||||
|
||||
def rewind(self, offset, skip_whitespace=True):
|
||||
self.pos += offset
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
if skip_whitespace:
|
||||
while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
|
||||
self.pos -= 1
|
||||
self.token = self.tokens[self.pos]
|
||||
|
||||
def initialize(self, context, concepts_definitions):
|
||||
"""
|
||||
Adds a bunch of concepts, and how they can be recognized
|
||||
:param context: execution context
|
||||
:param concepts_definitions: dictionary of concept, concept_definition
|
||||
:return:
|
||||
"""
|
||||
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
concepts_to_resolve = set()
|
||||
|
||||
# ## Gets the grammars
|
||||
for concept, concept_def in concepts_definitions.items():
|
||||
concept.init_key() # make sure that the key is initialized
|
||||
grammar = self.get_model(concept_def, concepts_to_resolve)
|
||||
self.concepts_grammars[concept] = grammar
|
||||
|
||||
if self.has_error:
|
||||
return self.sheerka.ret(self.name, False, self.error_sink)
|
||||
|
||||
# ## Removes concepts with infinite recursions
|
||||
concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
|
||||
for concept in concepts_to_remove:
|
||||
concepts_to_resolve.remove(concept)
|
||||
del self.concepts_grammars[concept]
|
||||
|
||||
if self.has_error:
|
||||
return self.sheerka.ret(self.name, False, self.error_sink)
|
||||
else:
|
||||
return self.sheerka.ret(self.name, True, self.concepts_grammars)
|
||||
|
||||
def get_concept(self, concept_name):
|
||||
if concept_name in self.context.concepts:
|
||||
return self.context.concepts[concept_name]
|
||||
return self.sheerka.get(concept_name)
|
||||
|
||||
def get_model(self, concept_def, concepts_to_resolve):
|
||||
|
||||
# TODO
|
||||
# inner_get_model must not modify the initial ParsingExpression
|
||||
# A copy must be created
|
||||
def inner_get_model(expression):
|
||||
if isinstance(expression, Concept):
|
||||
if self.sheerka.isagroup(expression):
|
||||
ret = ConceptGroupExpression(expression, rule_name=expression.name)
|
||||
else:
|
||||
ret = ConceptExpression(expression, rule_name=expression.name)
|
||||
concepts_to_resolve.add(expression)
|
||||
elif isinstance(expression, ConceptExpression):
|
||||
if expression.rule_name is None or expression.rule_name == "":
|
||||
expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
|
||||
else expression.concept
|
||||
concepts_to_resolve.add(expression.concept)
|
||||
ret = expression
|
||||
elif isinstance(expression, str):
|
||||
ret = StrMatch(expression, ignore_case=self.ignore_case)
|
||||
elif isinstance(expression, StrMatch):
|
||||
ret = expression
|
||||
if ret.ignore_case is None:
|
||||
ret.ignore_case = self.ignore_case
|
||||
elif isinstance(expression, Sequence) or \
|
||||
isinstance(expression, OrderedChoice) or \
|
||||
isinstance(expression, ZeroOrMore) or \
|
||||
isinstance(expression, OneOrMore) or \
|
||||
isinstance(expression, Optional):
|
||||
ret = expression
|
||||
ret.nodes = [inner_get_model(e) for e in ret.elements]
|
||||
else:
|
||||
ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
|
||||
|
||||
# Translate separator expression.
|
||||
if isinstance(expression, Repetition) and expression.sep:
|
||||
expression.sep = inner_get_model(expression.sep)
|
||||
|
||||
return ret
|
||||
|
||||
model = inner_get_model(concept_def)
|
||||
|
||||
return model
|
||||
|
||||
def detect_infinite_recursion(self, concepts_to_resolve):
|
||||
|
||||
# infinite recursion matcher
|
||||
def _is_infinite_recursion(ref_concept, node):
|
||||
if isinstance(node, ConceptExpression):
|
||||
if node.concept == ref_concept:
|
||||
return True
|
||||
|
||||
if isinstance(node.concept, str):
|
||||
to_match = self.get_concept(node.concept)
|
||||
if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return False
|
||||
else:
|
||||
to_match = node.concept
|
||||
|
||||
if to_match not in self.concepts_grammars:
|
||||
return False
|
||||
|
||||
return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
|
||||
|
||||
if isinstance(node, OrderedChoice):
|
||||
return _is_infinite_recursion(ref_concept, node.nodes[0])
|
||||
|
||||
if isinstance(node, Sequence):
|
||||
for node in node.nodes:
|
||||
if _is_infinite_recursion(ref_concept, node):
|
||||
return True
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
removed_concepts = []
|
||||
for e in concepts_to_resolve:
|
||||
if isinstance(e, str):
|
||||
e = self.get_concept(e)
|
||||
if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
continue
|
||||
|
||||
if e not in self.concepts_grammars:
|
||||
continue
|
||||
|
||||
to_resolve = self.concepts_grammars[e]
|
||||
if _is_infinite_recursion(e, to_resolve):
|
||||
removed_concepts.append(e)
|
||||
return removed_concepts
|
||||
|
||||
def parse(self, context, text):
|
||||
if text == "":
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
|
||||
)
|
||||
|
||||
if not self.reset_parser(context, text):
|
||||
return self.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
|
||||
|
||||
concepts_found = [[]]
|
||||
unrecognized_tokens = None
|
||||
has_unrecognized = False
|
||||
|
||||
# actually list of list
|
||||
# The first dimension is the number of possibilities found
|
||||
# The second dimension is the number of concepts found, under one possibility
|
||||
#
|
||||
# Example 1
|
||||
# concept foo : 'one' 'two'
|
||||
# concept bar : 'one' 'two'
|
||||
# input 'one two' -> will produce two possibilities (foo and bar).
|
||||
#
|
||||
# Example 2
|
||||
# concept foo : 'one'
|
||||
# concept bar : 'two'
|
||||
# input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
|
||||
|
||||
while True:
|
||||
init_pos = self.pos
|
||||
res = []
|
||||
|
||||
for concept, grammar in self.concepts_grammars.items():
|
||||
self.seek(init_pos)
|
||||
node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
|
||||
if node is not None and node.end != -1:
|
||||
updated_concept = self.finalize_concept(context.sheerka, concept, node)
|
||||
concept_node = ConceptNode(
|
||||
updated_concept,
|
||||
node.start,
|
||||
node.end,
|
||||
self.tokens[node.start: node.end + 1],
|
||||
None,
|
||||
node)
|
||||
res.append(concept_node)
|
||||
|
||||
if len(res) == 0: # not recognized
|
||||
self.seek(init_pos)
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.add_token(self.get_token(), init_pos)
|
||||
else:
|
||||
unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
|
||||
|
||||
if not self.next_token(False):
|
||||
break
|
||||
|
||||
else: # some concepts are recognized
|
||||
if unrecognized_tokens and unrecognized_tokens.not_whitespace():
|
||||
unrecognized_tokens.fix_source()
|
||||
concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
|
||||
has_unrecognized = True
|
||||
unrecognized_tokens = None
|
||||
|
||||
res = self.get_bests(res) # only keep the concepts that eat the more tokens
|
||||
concepts_found = core.utils.product(concepts_found, res)
|
||||
|
||||
# loop
|
||||
self.seek(res[0].end)
|
||||
if not self.next_token(False):
|
||||
break
|
||||
|
||||
# Fix the source for unrecognized tokens
|
||||
if unrecognized_tokens and unrecognized_tokens.not_whitespace():
|
||||
unrecognized_tokens.fix_source()
|
||||
concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
|
||||
has_unrecognized = True
|
||||
|
||||
# else
|
||||
# returns as many ReturnValue than choices found
|
||||
ret = []
|
||||
for choice in concepts_found:
|
||||
ret.append(
|
||||
self.sheerka.ret(
|
||||
self.name,
|
||||
not has_unrecognized,
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
body=choice,
|
||||
try_parsed=choice)))
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, text, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, text, ret)
|
||||
return ret
|
||||
|
||||
def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
|
||||
"""
|
||||
Updates the properties of the concept
|
||||
Goes in recursion if the property is a concept
|
||||
"""
|
||||
|
||||
# this cache is to make sure that we return the same concept for the same ConceptExpression
|
||||
_underlying_value_cache = {}
|
||||
|
||||
def _add_prop(_concept, prop_name, value):
|
||||
"""
|
||||
Adds a new entry,
|
||||
makes a list if the property already exists
|
||||
"""
|
||||
if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
|
||||
# new entry
|
||||
_concept.compiled[prop_name] = value
|
||||
else:
|
||||
# make a list if there was a value
|
||||
previous_value = _concept.compiled[prop_name]
|
||||
if isinstance(previous_value, list):
|
||||
previous_value.append(value)
|
||||
else:
|
||||
new_value = [previous_value, value]
|
||||
_concept.compiled[prop_name] = new_value
|
||||
|
||||
def _look_for_concept_match(_underlying):
|
||||
if isinstance(_underlying.parsing_expression, ConceptExpression):
|
||||
return _underlying
|
||||
|
||||
if not isinstance(_underlying, NonTerminalNode):
|
||||
return None
|
||||
|
||||
if len(_underlying.children) != 1:
|
||||
return None
|
||||
|
||||
return _look_for_concept_match(_underlying.children[0])
|
||||
|
||||
def _get_underlying_value(_underlying):
|
||||
concept_match_node = _look_for_concept_match(_underlying)
|
||||
if concept_match_node:
|
||||
if id(concept_match_node) in _underlying_value_cache:
|
||||
result = _underlying_value_cache[id(concept_match_node)]
|
||||
else:
|
||||
ref_tpl = concept_match_node.parsing_expression.concept
|
||||
result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
|
||||
_underlying_value_cache[id(concept_match_node)] = result
|
||||
else:
|
||||
result = DoNotResolve(_underlying.source)
|
||||
|
||||
return result
|
||||
|
||||
def _process_rule_name(_concept, _underlying):
|
||||
if _underlying.parsing_expression.rule_name:
|
||||
value = _get_underlying_value(_underlying)
|
||||
_add_prop(_concept, _underlying.parsing_expression.rule_name, value)
|
||||
|
||||
if isinstance(_underlying, NonTerminalNode):
|
||||
for child in _underlying.children:
|
||||
_process_rule_name(_concept, child)
|
||||
|
||||
key = (template.key, template.id) if template.id else template.key
|
||||
concept = sheerka.new(key)
|
||||
if init_empty_body and concept.metadata.body is None:
|
||||
value = _get_underlying_value(underlying)
|
||||
concept.compiled[ConceptParts.BODY] = value
|
||||
if underlying.parsing_expression.rule_name:
|
||||
_add_prop(concept, underlying.parsing_expression.rule_name, value)
|
||||
|
||||
if isinstance(underlying, NonTerminalNode):
|
||||
for node in underlying.children:
|
||||
_process_rule_name(concept, node)
|
||||
|
||||
return concept
|
||||
|
||||
@staticmethod
|
||||
def get_bests(results):
|
||||
"""
|
||||
Returns the result that is the longest
|
||||
:param results:
|
||||
:return:
|
||||
"""
|
||||
by_end_pos = defaultdict(list)
|
||||
for result in results:
|
||||
by_end_pos[result.end].append(result)
|
||||
|
||||
return by_end_pos[max(by_end_pos)]
|
||||
|
||||
|
||||
class ParsingExpressionVisitor:
|
||||
"""
|
||||
visit ParsingExpression
|
||||
"""
|
||||
|
||||
def visit(self, parsing_expression):
|
||||
name = parsing_expression.__class__.__name__
|
||||
|
||||
method = 'visit_' + name
|
||||
visitor = getattr(self, method, self.generic_visit)
|
||||
return visitor(parsing_expression)
|
||||
|
||||
def generic_visit(self, parsing_expression):
|
||||
if hasattr(self, "visit_all"):
|
||||
self.visit_all(parsing_expression)
|
||||
|
||||
for node in parsing_expression.elements:
|
||||
if isinstance(node, Concept):
|
||||
self.visit(ConceptExpression(node.key or node.name))
|
||||
elif isinstance(node, str):
|
||||
self.visit(StrMatch(node))
|
||||
else:
|
||||
self.visit(node)
|
||||
@@ -0,0 +1,110 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind, Token
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
import logging
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
|
||||
|
||||
class ConceptsWithConceptsParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("ConceptsWithConcepts", 25)
|
||||
|
||||
@staticmethod
|
||||
def get_tokens(nodes):
|
||||
tokens = []
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
|
||||
tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
|
||||
else:
|
||||
for token in node.tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
|
||||
continue
|
||||
else:
|
||||
tokens.append(token)
|
||||
|
||||
return tokens
|
||||
|
||||
@staticmethod
|
||||
def get_key(nodes):
|
||||
key = ""
|
||||
index = 0
|
||||
for node in nodes:
|
||||
if key:
|
||||
key += " "
|
||||
|
||||
if isinstance(node, UnrecognizedTokensNode):
|
||||
key += node.source.strip()
|
||||
else:
|
||||
key += f"{VARIABLE_PREFIX}{index}"
|
||||
index += 1
|
||||
|
||||
return key
|
||||
|
||||
def finalize_concept(self, context, concept, nodes):
|
||||
index = 0
|
||||
for node in nodes:
|
||||
|
||||
if isinstance(node, ConceptNode):
|
||||
prop_name = list(concept.props.keys())[index]
|
||||
concept.compiled[prop_name] = node.concept
|
||||
context.log(
|
||||
self.verbose_log,
|
||||
f"Setting property '{prop_name}='{node.concept}'.",
|
||||
self.name)
|
||||
index += 1
|
||||
elif isinstance(node, SourceCodeNode):
|
||||
prop_name = list(concept.props.keys())[index]
|
||||
sheerka = context.sheerka
|
||||
value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
|
||||
concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
|
||||
context.log(
|
||||
self.verbose_log,
|
||||
f"Setting property '{prop_name}'='Python({node.source})'.",
|
||||
self.name)
|
||||
index += 1
|
||||
|
||||
return concept
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
return None
|
||||
|
||||
if not text.parser == multiple_concepts_parser:
|
||||
return None
|
||||
|
||||
nodes = text.body
|
||||
|
||||
concept_key = self.get_key(nodes)
|
||||
concept = sheerka.new(concept_key)
|
||||
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
|
||||
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
for concept in concepts:
|
||||
self.finalize_concept(context, concept, nodes)
|
||||
|
||||
res = []
|
||||
for concept in concepts:
|
||||
res.append(sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text.source,
|
||||
body=concept,
|
||||
try_parsed=None)))
|
||||
|
||||
return res[0] if len(res) == 1 else res
|
||||
@@ -0,0 +1,430 @@
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept, ParserResultConcept
|
||||
from core.concept import ConceptParts
|
||||
import core.builtin_helpers
|
||||
import core.utils
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode, NotInitializedNode
|
||||
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
||||
from dataclasses import dataclass, field
|
||||
from parsers.BnfParser import BnfParser
|
||||
from core.sheerka.Sheerka import ExecutionContext
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserNode(Node):
|
||||
"""
|
||||
Base node for all default parser nodes
|
||||
"""
|
||||
tokens: list = field(compare=False, repr=False)
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserErrorNode(DefaultParserNode, ErrorNode):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass()
|
||||
class UnexpectedTokenErrorNode(DefaultParserErrorNode):
|
||||
message: str
|
||||
expected_tokens: list
|
||||
|
||||
|
||||
@dataclass()
|
||||
class SyntaxErrorNode(DefaultParserErrorNode):
|
||||
"""
|
||||
The input is recognized, but there is a syntax error
|
||||
"""
|
||||
message: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class CannotHandleErrorNode(DefaultParserErrorNode):
|
||||
"""
|
||||
The input is not recognized
|
||||
"""
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NameNode(DefaultParserNode):
|
||||
|
||||
def get_name(self):
|
||||
name = ""
|
||||
first = True
|
||||
for token in self.tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
if not first:
|
||||
name += " "
|
||||
|
||||
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||
first = False
|
||||
|
||||
return name
|
||||
|
||||
def __repr__(self):
|
||||
return self.get_name()
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, NameNode):
|
||||
return False
|
||||
|
||||
return self.get_name() == other.get_name()
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.get_name())
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefConceptNode(DefaultParserNode):
|
||||
name: NameNode = NotInitializedNode()
|
||||
where: ReturnValueConcept = NotInitializedNode()
|
||||
pre: ReturnValueConcept = NotInitializedNode()
|
||||
post: ReturnValueConcept = NotInitializedNode()
|
||||
body: ReturnValueConcept = NotInitializedNode()
|
||||
definition: ReturnValueConcept = NotInitializedNode()
|
||||
|
||||
def get_asts(self):
|
||||
asts = {}
|
||||
for part_key in ConceptParts:
|
||||
prop_value = getattr(self, part_key.value)
|
||||
if isinstance(prop_value, ReturnValueConcept) and isinstance(prop_value.body,
|
||||
ParserResultConcept) and hasattr(
|
||||
prop_value.body.body, "ast_"):
|
||||
asts[part_key] = prop_value
|
||||
#asts[part_key] = prop_value.body.body.ast_
|
||||
return asts
|
||||
|
||||
|
||||
@dataclass()
|
||||
class IsaConceptNode(DefaultParserNode):
|
||||
concept: NameNode = NotInitializedNode()
|
||||
set: NameNode = NotInitializedNode()
|
||||
|
||||
|
||||
class DefaultParser(BaseParser):
|
||||
"""
|
||||
Parse sheerka specific grammar (like def concept)
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "Default", 50)
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.context: ExecutionContext = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
@staticmethod
|
||||
def fix_indentation(tokens):
|
||||
"""
|
||||
In the following example
|
||||
def concept add one to a as:
|
||||
def func(x):
|
||||
return x+1
|
||||
func(a)
|
||||
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
if tokens[0].type != TokenKind.COLON:
|
||||
return tokens
|
||||
|
||||
if len(tokens) < 3:
|
||||
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
|
||||
|
||||
if tokens[1].type != TokenKind.NEWLINE:
|
||||
return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE])
|
||||
|
||||
if tokens[2].type != TokenKind.WHITESPACE:
|
||||
return SyntaxErrorNode([tokens[2]], "Indentation not found.")
|
||||
indent_size = len(tokens[2].value)
|
||||
|
||||
# now fix the other indentations
|
||||
i = 3
|
||||
while i < len(tokens) - 1:
|
||||
if tokens[i].type == TokenKind.NEWLINE:
|
||||
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
||||
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
|
||||
|
||||
if len(tokens[i + 1].value) < indent_size:
|
||||
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
|
||||
|
||||
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
|
||||
i += 1
|
||||
|
||||
return tokens[3:]
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
self.text = text
|
||||
self.lexer_iter = iter(Tokenizer(text))
|
||||
self._current = None
|
||||
|
||||
self.next_token()
|
||||
|
||||
def add_error(self, error, next_token=True):
|
||||
self.has_error = True
|
||||
self.error_sink.append(error)
|
||||
if next_token:
|
||||
self.next_token()
|
||||
return error
|
||||
|
||||
def get_token(self) -> Token:
|
||||
return self._current
|
||||
|
||||
def next_token(self, skip_whitespace=True):
|
||||
try:
|
||||
self._current = next(self.lexer_iter)
|
||||
if skip_whitespace:
|
||||
while self._current.type == TokenKind.WHITESPACE or self._current.type == TokenKind.NEWLINE:
|
||||
self._current = next(self.lexer_iter)
|
||||
except StopIteration:
|
||||
self._current = None
|
||||
|
||||
return
|
||||
|
||||
def parse(self, context, text):
|
||||
# default parser can only manage string text
|
||||
if not isinstance(text, str):
|
||||
ret = context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
|
||||
self.log_result(context, text, ret)
|
||||
return ret
|
||||
|
||||
tree = None
|
||||
try:
|
||||
self.reset_parser(context, text)
|
||||
tree = self.parse_statement()
|
||||
except core.tokenizer.LexerError as e:
|
||||
self.add_error(e, False)
|
||||
|
||||
# If a error is found it must be sent to error_sink
|
||||
# tree must contain what was recognized
|
||||
|
||||
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
|
||||
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
|
||||
else:
|
||||
body = self.get_return_value_body(context.sheerka, text, tree, tree)
|
||||
# body = self.sheerka.new(
|
||||
# BuiltinConcepts.PARSER_RESULT,
|
||||
# parser=self,
|
||||
# source=text,
|
||||
# body=self.error_sink if self.has_error else tree,
|
||||
# try_parsed=tree)
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
body)
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
return ret
|
||||
|
||||
def parse_statement(self):
|
||||
token = self.get_token()
|
||||
if token.value == Keywords.DEF:
|
||||
self.next_token()
|
||||
self.context.log(self.verbose_log, "Keyword DEF found.", self.name)
|
||||
return self.parse_def_concept(token)
|
||||
else:
|
||||
return self.parse_isa_concept()
|
||||
|
||||
def parse_def_concept(self, def_token):
|
||||
"""
|
||||
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
|
||||
"""
|
||||
|
||||
# init
|
||||
keywords_tokens = [def_token]
|
||||
concept_found = DefConceptNode(keywords_tokens)
|
||||
|
||||
# the definition of a concept consists of several parts
|
||||
# Keywords.CONCEPT to get the name of the concept
|
||||
# Keywords.FROM [Keywords.REGEX] to get the definition of the concept
|
||||
# Keywords.AS to get the body
|
||||
# Keywords.WHERE to get the conditions to recognize for the variables
|
||||
# Keywords.PRE to know if the conditions to evaluate the concept
|
||||
# Keywords.POST to apply or verify once the concept is executed
|
||||
#
|
||||
# Regroup the tokens by parts
|
||||
first_token, tokens_found_by_parts = self.regroup_tokens_by_parts(keywords_tokens)
|
||||
|
||||
if first_token.type == TokenKind.EOF:
|
||||
return self.add_error(UnexpectedTokenErrorNode([first_token], "Unexpected end of file", [Keywords.CONCEPT]))
|
||||
|
||||
# get the name
|
||||
concept_found.name = self.get_concept_name(first_token, tokens_found_by_parts)
|
||||
|
||||
# get the definition
|
||||
concept_found.definition = self.get_concept_definition(concept_found, tokens_found_by_parts)
|
||||
|
||||
# get the ASTs for the remaining parts
|
||||
asts_found_by_parts = self.get_concept_parts(tokens_found_by_parts)
|
||||
concept_found.where = asts_found_by_parts[Keywords.WHERE]
|
||||
concept_found.pre = asts_found_by_parts[Keywords.PRE]
|
||||
concept_found.post = asts_found_by_parts[Keywords.POST]
|
||||
concept_found.body = asts_found_by_parts[Keywords.AS]
|
||||
|
||||
return concept_found
|
||||
|
||||
def parse_isa_concept(self):
|
||||
concept_name = self.parse_concept_name()
|
||||
if isinstance(concept_name, DefaultParserErrorNode):
|
||||
return concept_name
|
||||
|
||||
keyword = []
|
||||
token = self.get_token()
|
||||
if token.value != Keywords.ISA:
|
||||
return self.add_error(CannotHandleErrorNode([token], ""))
|
||||
keyword.append(token)
|
||||
self.next_token()
|
||||
|
||||
set_name = self.parse_concept_name()
|
||||
return IsaConceptNode(keyword, concept_name, set_name)
|
||||
|
||||
def parse_concept_name(self):
|
||||
tokens = []
|
||||
token = self.get_token()
|
||||
|
||||
while not (token.type == TokenKind.EOF or token.type == TokenKind.KEYWORD):
|
||||
tokens.append(token)
|
||||
self.next_token()
|
||||
token = self.get_token()
|
||||
|
||||
if len(tokens) == 0:
|
||||
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", []))
|
||||
else:
|
||||
return NameNode(tokens)
|
||||
|
||||
def regroup_tokens_by_parts(self, keywords_tokens):
|
||||
|
||||
def_concept_parts = [Keywords.CONCEPT, Keywords.FROM, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
|
||||
|
||||
# tokens found, when trying to recognize the parts
|
||||
tokens_found_by_parts = {
|
||||
Keywords.CONCEPT: [],
|
||||
Keywords.FROM: None,
|
||||
Keywords.AS: None,
|
||||
Keywords.WHERE: None,
|
||||
Keywords.PRE: None,
|
||||
Keywords.POST: None,
|
||||
}
|
||||
current_part = Keywords.CONCEPT
|
||||
token = self.get_token()
|
||||
first_token = token
|
||||
|
||||
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
|
||||
while token.type != TokenKind.EOF:
|
||||
if token.value in def_concept_parts:
|
||||
keywords_tokens.append(token) # keep track of the keywords
|
||||
keyword = token.value
|
||||
if tokens_found_by_parts[keyword]:
|
||||
# a part is defined more than once
|
||||
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
|
||||
tokens_found_by_parts[current_part].append(token) # adds the token again
|
||||
else:
|
||||
tokens_found_by_parts[keyword] = [token]
|
||||
current_part = keyword
|
||||
self.next_token()
|
||||
else:
|
||||
tokens_found_by_parts[current_part].append(token)
|
||||
self.next_token(False)
|
||||
|
||||
token = self.get_token()
|
||||
|
||||
return first_token, tokens_found_by_parts
|
||||
|
||||
def get_concept_name(self, first_token, tokens_found_by_parts):
|
||||
name_first_token_index = 1
|
||||
token = self.get_token()
|
||||
if first_token.value != Keywords.CONCEPT:
|
||||
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
||||
name_first_token_index = 0
|
||||
|
||||
name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
|
||||
if len(name_tokens) == name_first_token_index:
|
||||
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
|
||||
|
||||
if name_tokens[-1].type == TokenKind.NEWLINE:
|
||||
name_tokens = name_tokens[:-1] # strip trailing newlines
|
||||
|
||||
if TokenKind.NEWLINE in [t.type for t in name_tokens]:
|
||||
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
|
||||
|
||||
name_node = NameNode(name_tokens[name_first_token_index:]) # skip the first token
|
||||
return name_node
|
||||
|
||||
def get_concept_definition(self, current_concept_def, tokens_found_by_parts):
|
||||
if tokens_found_by_parts[Keywords.FROM] is None:
|
||||
return NotInitializedNode()
|
||||
|
||||
definition_tokens = tokens_found_by_parts[Keywords.FROM]
|
||||
if definition_tokens[1].value != Keywords.BNF:
|
||||
return NotInitializedNode()
|
||||
|
||||
tokens = core.utils.strip_tokens(definition_tokens[2:])
|
||||
if len(tokens) == 0:
|
||||
self.add_error(SyntaxErrorNode([definition_tokens[1]], "Empty declaration"), False)
|
||||
return NotInitializedNode()
|
||||
|
||||
regex_parser = BnfParser()
|
||||
with self.context.push(self.name, obj=current_concept_def) as sub_context:
|
||||
parsing_result = regex_parser.parse(sub_context, tokens)
|
||||
sub_context.add_values(return_values=parsing_result)
|
||||
|
||||
if not parsing_result.status:
|
||||
self.add_error(parsing_result.value)
|
||||
return NotInitializedNode()
|
||||
|
||||
return parsing_result
|
||||
|
||||
def get_concept_parts(self, tokens_found_by_parts):
|
||||
asts_found_by_parts = {
|
||||
Keywords.AS: NotInitializedNode(),
|
||||
Keywords.WHERE: NotInitializedNode(),
|
||||
Keywords.PRE: NotInitializedNode(),
|
||||
Keywords.POST: NotInitializedNode(),
|
||||
}
|
||||
|
||||
for keyword in tokens_found_by_parts:
|
||||
if keyword == Keywords.CONCEPT or keyword == Keywords.FROM:
|
||||
continue # already done
|
||||
|
||||
tokens = tokens_found_by_parts[keyword]
|
||||
if tokens is None:
|
||||
continue # nothing to do
|
||||
|
||||
if len(tokens) == 1: # check for empty declarations
|
||||
self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False)
|
||||
continue
|
||||
|
||||
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
|
||||
if isinstance(tokens, ErrorNode):
|
||||
self.add_error(tokens)
|
||||
continue
|
||||
|
||||
# ask the other parsers if they recognize the tokens
|
||||
with self.context.push(self.name, desc=f"Parsing {keyword}") as sub_context:
|
||||
sub_context.log_new(self.verbose_log)
|
||||
to_parse = self.sheerka.ret(
|
||||
sub_context.who,
|
||||
True,
|
||||
self.sheerka.new(BuiltinConcepts.USER_INPUT, body=tokens))
|
||||
steps = [BuiltinConcepts.PARSING]
|
||||
parsed = self.sheerka.execute(sub_context, to_parse, steps, self.verbose_log)
|
||||
parsing_result = core.builtin_helpers.expect_one(sub_context, parsed, self.verbose_log)
|
||||
sub_context.add_values(return_values=parsing_result)
|
||||
|
||||
if not parsing_result.status:
|
||||
self.add_error(parsing_result.value)
|
||||
continue
|
||||
|
||||
asts_found_by_parts[keyword] = parsing_result
|
||||
|
||||
return asts_found_by_parts
|
||||
@@ -0,0 +1,28 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
|
||||
|
||||
class EmptyStringParser(BaseParser):
|
||||
"""
|
||||
To parse empty or blank strings
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "EmptyString", 90)
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
|
||||
if isinstance(text, str) and text.strip() == "" or \
|
||||
isinstance(text, list) and text == [] or \
|
||||
text is None:
|
||||
ret = sheerka.ret(self.name, True, sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source="",
|
||||
body=sheerka.new(BuiltinConcepts.NOP)))
|
||||
else:
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
return ret
|
||||
@@ -0,0 +1,150 @@
|
||||
import logging
|
||||
|
||||
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from core.tokenizer import Tokenizer, Keywords, TokenKind, LexerError
|
||||
from core.concept import VARIABLE_PREFIX
|
||||
|
||||
|
||||
class ExactConceptParser(BaseParser):
|
||||
"""
|
||||
Tries to recognize a single concept
|
||||
"""
|
||||
|
||||
MAX_WORDS_SIZE = 10
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "ExactConcept", 80)
|
||||
|
||||
def parse(self, context, text):
|
||||
"""
|
||||
text can be string, but text can also be an list of tokens
|
||||
:param context:
|
||||
:param text:
|
||||
:return:
|
||||
"""
|
||||
|
||||
context.log(self.verbose_log, f"Parsing '{text}'", self.name)
|
||||
res = []
|
||||
sheerka = context.sheerka
|
||||
try:
|
||||
words = self.get_words(text)
|
||||
except LexerError as e:
|
||||
context.log(self.verbose_log, f"Error found in tokenizer {e}", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
|
||||
|
||||
if len(words) > self.MAX_WORDS_SIZE:
|
||||
context.log(self.verbose_log, f"Max words reached. Stopping.", self.name)
|
||||
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
|
||||
|
||||
recognized = False
|
||||
for combination in self.combinations(words):
|
||||
|
||||
concept_key = " ".join(combination)
|
||||
result = sheerka.new(concept_key)
|
||||
|
||||
if sheerka.isinstance(result, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
continue
|
||||
|
||||
# concepts = result.body if sheerka.isinstance(result, BuiltinConcepts.ENUMERATION) else [result]
|
||||
concepts = result if isinstance(result, list) else [result]
|
||||
|
||||
for concept in concepts:
|
||||
context.log(self.verbose_log, f"Recognized concept {concept}.", self.name)
|
||||
# update the properties if needed
|
||||
for i, token in enumerate(combination):
|
||||
if token.startswith(VARIABLE_PREFIX):
|
||||
index = int(token[len(VARIABLE_PREFIX):])
|
||||
concept.def_prop_by_index(index, words[i])
|
||||
if self.verbose_log.isEnabledFor(logging.DEBUG):
|
||||
prop_name = list(concept.props.keys())[index]
|
||||
context.log(
|
||||
self.verbose_log,
|
||||
f"Added property {index}: {prop_name}='{words[i]}'.",
|
||||
self.name)
|
||||
|
||||
res.append(ReturnValueConcept(
|
||||
self.name,
|
||||
True,
|
||||
context.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
|
||||
body=concept,
|
||||
try_parsed=concept)))
|
||||
recognized = True
|
||||
|
||||
if recognized:
|
||||
if len(res) == 1:
|
||||
self.log_result(context, text, res[0])
|
||||
else:
|
||||
self.log_multiple_results(context, text, res)
|
||||
return res
|
||||
return res
|
||||
|
||||
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text))
|
||||
self.log_result(context, text, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def get_words(text):
|
||||
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
|
||||
res = []
|
||||
for t in tokens:
|
||||
if t.type == TokenKind.EOF:
|
||||
break
|
||||
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
res.append(t.value.value if isinstance(t.value, Keywords) else t.value)
|
||||
return res
|
||||
|
||||
def combinations(self, iterable):
|
||||
# combinations('foo', 'bar', 'baz') -->
|
||||
# ('foo', 'bar', 'baz'),
|
||||
# ('__var__0', 'bar', 'baz'),
|
||||
# ('foo', '__var__0', 'baz'),
|
||||
# ('foo', 'bar', '__var__0'),
|
||||
# ('__var__0', '__var__1', 'baz'),
|
||||
# ('__var__0', 'bar', '__var__1'),
|
||||
# ('foo', '__var__0', '__var__1'),
|
||||
# ('__var__0', '__var__1', '__var__2')]
|
||||
|
||||
pool = tuple(iterable)
|
||||
n = len(pool)
|
||||
|
||||
res = set()
|
||||
|
||||
for r in range(0, n + 1):
|
||||
indices = list(range(r))
|
||||
res.add(self.get_tuple(pool, indices))
|
||||
while True:
|
||||
for i in reversed(range(r)):
|
||||
if indices[i] != i + n - r:
|
||||
break
|
||||
else:
|
||||
break
|
||||
indices[i] += 1
|
||||
for j in range(i + 1, r):
|
||||
indices[j] = indices[j - 1] + 1
|
||||
res.add(self.get_tuple(pool, indices))
|
||||
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def get_tuple(pool, indices):
|
||||
res = []
|
||||
vars = {}
|
||||
k = 0
|
||||
|
||||
# init vars
|
||||
for i in indices:
|
||||
value = pool[i]
|
||||
if value not in vars:
|
||||
vars[pool[i]] = f"{VARIABLE_PREFIX}{k}"
|
||||
k += 1
|
||||
|
||||
# create tuple
|
||||
for i in range(len(pool)):
|
||||
value = pool[i]
|
||||
res.append(vars[value] if value in vars else value)
|
||||
return tuple(res)
|
||||
@@ -0,0 +1,164 @@
|
||||
import ast
|
||||
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import TokenKind
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
|
||||
import core.utils
|
||||
from parsers.PythonParser import PythonParser
|
||||
|
||||
concept_lexer_parser = ConceptLexerParser()
|
||||
|
||||
|
||||
class MultipleConceptsParser(BaseParser):
|
||||
"""
|
||||
Parser that will take the result of ConceptLexerParser and
|
||||
try to resolve the unrecognized tokens token by token
|
||||
|
||||
It is a success when it returns a list ConceptNode exclusively
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
BaseParser.__init__(self, "MultipleConcepts", 45)
|
||||
|
||||
@staticmethod
|
||||
def finalize(nodes_found, unrecognized_tokens):
|
||||
if not unrecognized_tokens:
|
||||
return nodes_found, unrecognized_tokens
|
||||
|
||||
unrecognized_tokens.fix_source()
|
||||
if unrecognized_tokens.not_whitespace():
|
||||
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
|
||||
|
||||
return nodes_found, None
|
||||
|
||||
@staticmethod
|
||||
def create_or_add(unrecognized_tokens, token, index):
|
||||
if unrecognized_tokens:
|
||||
unrecognized_tokens.add_token(token, index)
|
||||
else:
|
||||
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
|
||||
return unrecognized_tokens
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
return None
|
||||
|
||||
if not text.parser == concept_lexer_parser:
|
||||
return None
|
||||
|
||||
sheerka = context.sheerka
|
||||
nodes = text.value
|
||||
nodes_found = [[]]
|
||||
concepts_only = True
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, UnrecognizedTokensNode):
|
||||
unrecognized_tokens = None
|
||||
i = 0
|
||||
|
||||
while i < len(node.tokens):
|
||||
|
||||
token_index = node.start + i
|
||||
token = node.tokens[i]
|
||||
|
||||
concepts_nodes = self.get_concepts_nodes(context, token_index, token)
|
||||
if concepts_nodes is not None:
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
nodes_found = core.utils.product(nodes_found, concepts_nodes)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
|
||||
if source_code_node:
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
nodes_found = core.utils.product(nodes_found, [source_code_node])
|
||||
i += len(source_code_node.tokens)
|
||||
continue
|
||||
|
||||
# not a concept nor some source code
|
||||
unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
|
||||
concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
|
||||
i += 1
|
||||
|
||||
# finish processing if needed
|
||||
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
|
||||
|
||||
else:
|
||||
nodes_found = core.utils.product(nodes_found, [node])
|
||||
|
||||
ret = []
|
||||
for choice in nodes_found:
|
||||
ret.append(
|
||||
sheerka.ret(
|
||||
self.name,
|
||||
concepts_only,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text.source,
|
||||
body=choice,
|
||||
try_parsed=None))
|
||||
)
|
||||
|
||||
if len(ret) == 1:
|
||||
self.log_result(context, text.source, ret[0])
|
||||
return ret[0]
|
||||
else:
|
||||
self.log_multiple_results(context, text.source, ret)
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def get_concepts_nodes(context, index, token):
|
||||
"""
|
||||
Tries to recognize a concept
|
||||
from the univers of all known concepts
|
||||
"""
|
||||
|
||||
if token.type != TokenKind.IDENTIFIER:
|
||||
return None
|
||||
|
||||
concept = context.new_concept(token.value)
|
||||
if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
|
||||
concepts = concept if hasattr(concept, "__iter__") else [concept]
|
||||
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
|
||||
return concepts_nodes
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_source_code_node(context, index, tokens):
|
||||
"""
|
||||
Tries to recognize source code.
|
||||
For the time being, only Python is supported
|
||||
:param context:
|
||||
:param tokens:
|
||||
:param index:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
|
||||
return None
|
||||
|
||||
end_index = len(tokens)
|
||||
while end_index > 0:
|
||||
parser = PythonParser()
|
||||
tokens_to_parse = tokens[:end_index]
|
||||
res = parser.parse(context, tokens_to_parse)
|
||||
if res.status:
|
||||
# only expression are accepted
|
||||
ast_ = res.value.value.ast_
|
||||
if not isinstance(ast_, ast.Expression):
|
||||
return None
|
||||
try:
|
||||
compiled = compile(ast_, "<string>", "eval")
|
||||
eval(compiled, {}, {})
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
source = BaseParser.get_text_from_tokens(tokens_to_parse)
|
||||
return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
|
||||
end_index -= 1
|
||||
|
||||
return None
|
||||
@@ -0,0 +1,214 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from core.tokenizer import Tokenizer, LexerError, TokenKind
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from dataclasses import dataclass, field
|
||||
import ast
|
||||
import logging
|
||||
|
||||
from parsers.ConceptLexerParser import ConceptNode
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass()
|
||||
class PythonErrorNode(ErrorNode):
|
||||
source: str
|
||||
exception: Exception
|
||||
|
||||
# def __post_init__(self):
|
||||
# self.log.debug("-> PythonErrorNode: " + str(self.exception))
|
||||
|
||||
|
||||
class PythonNode(Node):
|
||||
|
||||
def __init__(self, source, ast_=None, concepts=None):
|
||||
self.source = source
|
||||
self.ast_ = ast_ if ast_ else ast.parse(source, mode="eval") if source else None
|
||||
self.concepts = concepts or {} # when concepts are recognized in the expression
|
||||
|
||||
# def __repr__(self):
|
||||
# return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")"
|
||||
|
||||
def __repr__(self):
|
||||
ast_type = "expr" if isinstance(self.ast_, ast.Expression) else "module"
|
||||
return "PythonNode(" + ast_type + "='" + self.source + "')"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, PythonNode):
|
||||
return False
|
||||
|
||||
if self.source != other.source:
|
||||
return False
|
||||
|
||||
self_dump = self.get_dump(self.ast_)
|
||||
other_dump = self.get_dump(other.ast_)
|
||||
|
||||
return self_dump == other_dump
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.source, self.ast_.hash))
|
||||
|
||||
@staticmethod
|
||||
def get_dump(ast_):
|
||||
dump = ast.dump(ast_)
|
||||
for to_remove in [", ctx=Load()", ", kind=None", ", type_ignores=[]"]:
|
||||
dump = dump.replace(to_remove, "")
|
||||
return dump
|
||||
|
||||
|
||||
class PythonParser(BaseParser):
|
||||
"""
|
||||
Parse Python scripts
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
|
||||
BaseParser.__init__(self, "Python", 50)
|
||||
self.source = kwargs.get("source", "<undef>")
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
tree = None
|
||||
|
||||
python_switcher = {
|
||||
TokenKind.CONCEPT: lambda t: f"__C__USE_CONCEPT__{t.value}__C__"
|
||||
}
|
||||
|
||||
try:
|
||||
if isinstance(text, str) and "c:" in text:
|
||||
source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher)
|
||||
elif isinstance(text, str):
|
||||
source = text
|
||||
else:
|
||||
source = self.get_text_from_tokens(text, python_switcher)
|
||||
source = source.strip()
|
||||
|
||||
text = text if isinstance(text, str) else source
|
||||
|
||||
# first, try to parse an expression
|
||||
res, tree, error = self.try_parse_expression(source)
|
||||
if not res:
|
||||
# then try to parse a statement
|
||||
res, tree, error = self.try_parse_statement(source)
|
||||
if not res:
|
||||
self.has_error = True
|
||||
error_node = PythonErrorNode(text, error)
|
||||
self.error_sink.append(error_node)
|
||||
|
||||
except LexerError as e:
|
||||
self.has_error = True
|
||||
self.error_sink.append(e)
|
||||
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
body=self.error_sink if self.has_error else PythonNode(text, tree),
|
||||
try_parsed=None))
|
||||
|
||||
self.log_result(context, text, ret)
|
||||
return ret
|
||||
|
||||
def try_parse_expression(self, text):
|
||||
try:
|
||||
return True, ast.parse(text, f"<{self.source}>", 'eval'), None
|
||||
except Exception as error:
|
||||
return False, None, error
|
||||
|
||||
def try_parse_statement(self, text):
|
||||
try:
|
||||
return True, ast.parse(text, f"<{self.source}>", 'exec'), None
|
||||
except Exception as error:
|
||||
return False, None, error
|
||||
|
||||
|
||||
class PythonGetNamesVisitor(ast.NodeVisitor):
|
||||
"""
|
||||
This visitor will find all the name declared in the ast
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.names = set()
|
||||
|
||||
def visit_Name(self, node):
|
||||
self.names.add(node.id)
|
||||
|
||||
class LexerNodeParserHelperForPython:
|
||||
"""Helper class to parse mix of concepts and Python"""
|
||||
|
||||
def __init__(self):
|
||||
self.identifiers = {} # cache for already created identifier (the key is id(concept))
|
||||
self.identifiers_key = {} # number of identifiers with the same root (prefix)
|
||||
|
||||
def _get_identifier(self, concept):
|
||||
"""
|
||||
Get an identifier for a concept.
|
||||
Make sure to return the same identifier if the same concept
|
||||
Make sure to return a different identifier if same name but different concept
|
||||
|
||||
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
|
||||
to be instance variables
|
||||
I would like to keep this parser as stateless as possible
|
||||
:param concept:
|
||||
:return:
|
||||
"""
|
||||
if id(concept) in self.identifiers:
|
||||
return self.identifiers[id(concept)]
|
||||
|
||||
identifier = "__C__" + self._sanitize(concept.key or concept.name)
|
||||
if concept.id:
|
||||
identifier += "__" + concept.id
|
||||
|
||||
if identifier in self.identifiers_key:
|
||||
self.identifiers_key[identifier] += 1
|
||||
identifier += f"_{self.identifiers_key[identifier]}"
|
||||
else:
|
||||
self.identifiers_key[identifier] = 0
|
||||
|
||||
identifier += "__C__"
|
||||
|
||||
self.identifiers[id(concept)] = identifier
|
||||
return identifier
|
||||
|
||||
@staticmethod
|
||||
def _sanitize(identifier):
|
||||
res = ""
|
||||
for c in identifier:
|
||||
res += c if c.isalnum() else "0"
|
||||
return res
|
||||
|
||||
def parse(self, context, nodes):
|
||||
source = ""
|
||||
to_parse = ""
|
||||
|
||||
concepts = {} # the key is the Python identifier
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
source += node.source
|
||||
if to_parse:
|
||||
to_parse += " "
|
||||
concept = node.concept
|
||||
python_id = self._get_identifier(concept)
|
||||
to_parse += python_id
|
||||
concepts[python_id] = concept
|
||||
else:
|
||||
source += node.source
|
||||
to_parse += node.source
|
||||
|
||||
with context.push(self, desc="Trying Python for '" + to_parse + "'") as sub_context:
|
||||
sub_context.add_inputs(to_parse=to_parse)
|
||||
python_parser = PythonParser()
|
||||
result = python_parser.parse(sub_context, to_parse)
|
||||
sub_context.add_values(return_values=result)
|
||||
|
||||
if result.status:
|
||||
python_node = result.body.body
|
||||
python_node.source = source
|
||||
python_node.concepts = concepts
|
||||
return python_node
|
||||
|
||||
return result.body # the error
|
||||
@@ -0,0 +1,105 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from parsers.ConceptLexerParser import ConceptNode
|
||||
from parsers.MultipleConceptsParser import MultipleConceptsParser
|
||||
from parsers.PythonParser import PythonParser
|
||||
|
||||
multiple_concepts_parser = MultipleConceptsParser()
|
||||
|
||||
|
||||
class PythonWithConceptsParser(BaseParser):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__("PythonWithConcepts", 20)
|
||||
self.identifiers = None
|
||||
self.identifiers_key = None
|
||||
|
||||
@staticmethod
|
||||
def sanitize(identifier):
|
||||
res = ""
|
||||
for c in identifier:
|
||||
res += c if c.isalnum() else "0"
|
||||
return res
|
||||
|
||||
def parse(self, context, text):
|
||||
sheerka = context.sheerka
|
||||
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
|
||||
return None
|
||||
|
||||
if not text.parser == multiple_concepts_parser:
|
||||
return None
|
||||
|
||||
nodes = text.body
|
||||
source = ""
|
||||
to_parse = ""
|
||||
identifiers = {}
|
||||
identifiers_key = {}
|
||||
python_ids_mappings = {}
|
||||
|
||||
def _get_identifier(c):
|
||||
"""
|
||||
Get an identifier for a concept.
|
||||
Make sure to return the same identifier if the same concept
|
||||
Make sure to return a different identifier if same name but different concept
|
||||
|
||||
Internal function because I don't want identifiers, identifiers_key and python_ids_mappings
|
||||
to be instance variables
|
||||
I would like to keep this parser as stateless as possible
|
||||
:param c:
|
||||
:return:
|
||||
"""
|
||||
if id(c) in identifiers:
|
||||
return identifiers[id(c)]
|
||||
|
||||
identifier = "__C__" + self.sanitize(c.key or c.name)
|
||||
if c.id:
|
||||
identifier += "__" + c.id
|
||||
|
||||
if identifier in identifiers_key:
|
||||
identifiers_key[identifier] += 1
|
||||
identifier += f"_{identifiers_key[identifier]}"
|
||||
else:
|
||||
identifiers_key[identifier] = 0
|
||||
|
||||
identifier += "__C__"
|
||||
|
||||
identifiers[id(c)] = identifier
|
||||
return identifier
|
||||
|
||||
for node in nodes:
|
||||
if isinstance(node, ConceptNode):
|
||||
source += node.source
|
||||
if to_parse:
|
||||
to_parse += " "
|
||||
concept = node.concept
|
||||
python_id = _get_identifier(concept)
|
||||
to_parse += python_id
|
||||
python_ids_mappings[python_id] = concept
|
||||
else:
|
||||
source += node.source
|
||||
to_parse += node.source
|
||||
|
||||
with context.push(self, "Trying Python for '" + to_parse + "'") as sub_context:
|
||||
python_parser = PythonParser()
|
||||
result = python_parser.parse(sub_context, to_parse)
|
||||
|
||||
if result.status:
|
||||
python_node = result.body.body
|
||||
python_node.source = source
|
||||
python_node.concepts = python_ids_mappings
|
||||
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
True,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=source,
|
||||
body=result.body.body,
|
||||
try_parsed=None))
|
||||
|
||||
else:
|
||||
|
||||
return sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
result.body)
|
||||
Reference in New Issue
Block a user