Added first version of DebugManager. Implemented draft of the rule engine

This commit is contained in:
2020-11-20 13:41:45 +01:00
parent cd066881b4
commit 315f8ea09b
156 changed files with 8388 additions and 2852 deletions
+4 -1
View File
@@ -128,7 +128,7 @@ class BaseCustomGrammarParser(BaseParser):
return tokens[pos:]
def get_parts(self, keywords, expected_first_token=None):
def get_parts(self, keywords, expected_first_token=None, strip_tokens=False):
"""
Reads Parser Input and groups the tokens by keywords
ex:
@@ -148,6 +148,7 @@ class BaseCustomGrammarParser(BaseParser):
:param keywords:
:param expected_first_token: it must be a KeyW
:param strip_tokens: if True, the returned tokens will be trimmed
:return: dictionary
"""
@@ -246,5 +247,7 @@ class BaseCustomGrammarParser(BaseParser):
# replace double quoted strings by their content
elif len(stripped) == 1 and stripped[0].type == TokenKind.STRING and stripped[0].value[0] == '"':
res[k] = v[0:1] + list(Tokenizer(stripped[0].strip_quote, yield_eof=False))
elif strip_tokens:
res[k] = core.utils.strip_tokens(v)
return res
+112 -19
View File
@@ -6,6 +6,7 @@ from typing import Set
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
from core.rule import Rule
from core.tokenizer import TokenKind, Token
from parsers.BaseParser import Node, BaseParser, ErrorNode
@@ -26,7 +27,7 @@ class LexerNode(Node):
def __post_init__(self):
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
self.source = core.utils.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if not isinstance(other, LexerNode):
@@ -39,7 +40,7 @@ class LexerNode(Node):
def fix_source(self, force=True):
if force or self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
self.source = core.utils.get_text_from_tokens(self.tokens)
return self
def clone(self):
@@ -151,6 +152,40 @@ class UnrecognizedTokensNode(LexerNode):
return f"UTN('{self.source}')"
class RuleNode(LexerNode):
def __init__(self, rule, start, end, tokens=None, source=None):
super().__init__(start, end, tokens, source)
self.rule = rule
self.fix_source(False)
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, RN):
return other == self
if not isinstance(other, RuleNode):
return False
return self.rule == other.rule and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.rule, self.start, self.end, self.source))
def __repr__(self):
return f"RuleNode(rule='{self.rule}', source='{self.source}', start={self.start}, end={self.end})"
def clone(self):
return RuleNode(self.rule, self.start, self.end, self.tokens, self.source)
def to_short_str(self):
return f'RN({self.rule})'
class ConceptNode(LexerNode):
"""
Returned by the BnfNodeParser
@@ -194,7 +229,7 @@ class ConceptNode(LexerNode):
def __repr__(self):
text = f"ConceptNode(concept='{self.concept}', source='{self.source}', start={self.start}, end={self.end}"
if DEBUG_COMPILED:
for k, v in self.concept.compiled.items():
for k, v in self.concept.get_compiled().items():
text += f", {k}='{v}'"
return text + ")"
@@ -213,7 +248,7 @@ class ConceptNode(LexerNode):
bag[k] = v
# if isinstance(self.concept, Concept):
# bag["compiled"] = self.concept.compiled
# bag["compiled"] = self.concept.get_compiled()
return bag
def to_short_str(self):
@@ -607,7 +642,7 @@ class CNC(CN):
It matches with ConceptNode
But focuses on the 'compiled' property of the concept
CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
CNC == ConceptNode if CNC.get_compiled() == ConceptNode.concept.get_compiled()
"""
def __init__(self, concept_key, start=None, end=None, source=None, exclude_body=False, **kwargs):
@@ -634,9 +669,9 @@ class CNC(CN):
if self.source is not None and self.source != other.source:
return False
if self.exclude_body:
to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
to_compare = {k: v for k, v in other.concept.get_compiled().items() if k != ConceptParts.BODY}
else:
to_compare = other.concept.compiled
to_compare = other.concept.get_compiled()
if self.compiled == to_compare: # expanded form to ease the debug
return True
else:
@@ -675,10 +710,9 @@ class UTN(HelperWithPos):
def __init__(self, source, start=None, end=None):
"""
:param concept: Concept or concept_key (only the key is used anyway)
:param source:
:param start:
:param end:
:param source:
"""
super().__init__(start, end)
self.source = source
@@ -711,6 +745,65 @@ class UTN(HelperWithPos):
return txt + ")"
class RN(HelperWithPos):
"""
Helper class to test RuleNode
"""
def __init__(self, rule, start=None, end=None, source=None):
"""
:param concept: Concept or concept_key (only the key is used anyway)
:param start:
:param end:
:param source:
"""
super().__init__(start, end)
self.rule_id = rule.id if isinstance(rule, Rule) else rule
self.source = source or core.utils.str_concept((None, self.rule_id), prefix="r:")
self.rule = rule if isinstance(rule, Rule) else None
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, RuleNode):
if other.rule is None:
return False
if other.rule.id != self.rule_id:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
if self.source is not None and self.source != other.source:
return False
return True
if not isinstance(other, RN):
return False
return self.rule_id == other.rule_id and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.rule_id, self.start, self.end, self.source))
def __repr__(self):
if self.rule:
txt = f"RN(rule='{self.rule}'"
else:
txt = f"RN(rule_id='{self.rule_id}'"
txt += f", source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
class BaseNodeParser(BaseParser):
"""
Parser that return LexerNode
@@ -938,10 +1031,10 @@ class BaseNodeParser(BaseParser):
:param concept:
:return:
"""
if concept.bnf:
if concept.get_bnf():
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.bnf)
bnf_visitor.visit(concept.get_bnf())
return bnf_visitor.first_tokens
else:
keywords = concept.key.split()
@@ -955,22 +1048,22 @@ class BaseNodeParser(BaseParser):
@staticmethod
def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
if concept.metadata.definition_type == DEFINITION_TYPE_BNF and not concept.bnf:
from parsers.BnfParser import BnfParser
regex_parser = BnfParser()
desc = f"Resolving BNF '{concept.metadata.definition}'"
if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF and not concept.get_bnf():
from parsers.BnfDefinitionParser import BnfDefinitionParser
regex_parser = BnfDefinitionParser()
desc = f"Resolving BNF '{concept.get_metadata().definition}'"
with context.push(BuiltinConcepts.INIT_BNF,
concept,
who=parser_name,
obj=concept,
desc=desc) as sub_context:
sub_context.add_inputs(parser_input=concept.metadata.definition)
bnf_parsing_ret_val = regex_parser.parse(sub_context, concept.metadata.definition)
sub_context.add_inputs(parser_input=concept.get_metadata().definition)
bnf_parsing_ret_val = regex_parser.parse(sub_context, concept.get_metadata().definition)
sub_context.add_values(return_values=bnf_parsing_ret_val)
if not bnf_parsing_ret_val.status:
raise Exception(bnf_parsing_ret_val.value)
concept.bnf = bnf_parsing_ret_val.body.body
concept.set_bnf(bnf_parsing_ret_val.body.body)
if concept.id:
context.sheerka.get_by_id(concept.id).bnf = concept.bnf # update bnf in cache
context.sheerka.get_by_id(concept.id).set_bnf(concept.get_bnf()) # update bnf in cache
+51 -48
View File
@@ -4,6 +4,7 @@ from typing import Union
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept
from core.error import ErrorObj
from core.sheerka.ExecutionContext import ExecutionContext
from core.sheerka.services.SheerkaExecute import ParserInput
from core.sheerka_logger import get_logger
@@ -50,7 +51,7 @@ class NotInitializedNode(Node):
@dataclass()
class ErrorNode(Node):
class ErrorNode(Node, ErrorObj):
pass
@@ -89,9 +90,9 @@ class BaseParser:
PREFIX = "parsers."
def __init__(self, name, priority: int, enabled=True, yield_eof=False):
self.log = get_logger("parsers." + self.__class__.__name__)
self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__)
self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__)
# self.log = get_logger("parsers." + self.__class__.__name__)
# self.init_log = get_logger("init." + self.PREFIX + self.__class__.__name__)
# self.verbose_log = get_logger("verbose." + self.PREFIX + self.__class__.__name__)
self.name = self.PREFIX + name
self.priority = priority
@@ -141,23 +142,25 @@ class BaseParser:
return len(self.error_sink) > 0
def log_result(self, context, source, ret):
if not self.log.isEnabledFor(logging.DEBUG):
return
if ret.status:
value = context.return_value_to_str(ret)
context.log(f"Recognized '{source}' as {value}", self.name)
else:
context.log(f"Failed to recognize '{source}'", self.name)
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# if ret.status:
# value = context.return_value_to_str(ret)
# context.log(f"Recognized '{source}' as {value}", self.name)
# else:
# context.log(f"Failed to recognize '{source}'", self.name)
def log_multiple_results(self, context, source, list_of_ret):
if not self.log.isEnabledFor(logging.DEBUG):
return
context.log(f"Recognized '{source}' as multiple concepts", self.name)
for r in list_of_ret:
value = context.return_value_to_str(r)
context.log(f" Recognized '{value}'", self.name)
pass
# if not self.log.isEnabledFor(logging.DEBUG):
# return
#
# context.log(f"Recognized '{source}' as multiple concepts", self.name)
# for r in list_of_ret:
# value = context.return_value_to_str(r)
# context.log(f" Recognized '{value}'", self.name)
def get_return_value_body(self, sheerka, source, parsed, try_parse):
"""
@@ -221,35 +224,35 @@ class BaseParser:
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
return lst
@staticmethod
def get_text_from_tokens(tokens, custom_switcher=None, tracker=None):
"""
Create the source code, from the list of token
:param tokens: list of tokens
:param custom_switcher: to override the behaviour (the return value) of some token
:param tracker: keep track of the original token value when custom switched
:return:
"""
if tokens is None:
return ""
res = ""
if not hasattr(tokens, "__iter__"):
tokens = [tokens]
switcher = {
# TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
}
if custom_switcher:
switcher.update(custom_switcher)
for token in tokens:
value = switcher.get(token.type, lambda t: t.str_value)(token)
res += value
if tracker is not None and token.type in custom_switcher:
tracker[value] = token.value
return res
# @staticmethod
# def get_text_from_tokens(tokens, custom_switcher=None, tracker=None):
# """
# Create the source code, from the list of token
# :param tokens: list of tokens
# :param custom_switcher: to override the behaviour (the return value) of some token
# :param tracker: keep track of the original token value when custom switched
# :return:
# """
# if tokens is None:
# return ""
# res = ""
#
# if not hasattr(tokens, "__iter__"):
# tokens = [tokens]
#
# switcher = {
# # TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
# }
#
# if custom_switcher:
# switcher.update(custom_switcher)
#
# for token in tokens:
# value = switcher.get(token.type, lambda t: t.str_value)(token)
# res += value
# if tracker is not None and token.type in custom_switcher:
# tracker[value] = token.value
# return res
@staticmethod
def get_tokens_boundaries(tokens):
@@ -14,7 +14,7 @@ class UnexpectedEndOfFileError(ErrorNode):
pass
class BnfParser(BaseParser):
class BnfDefinitionParser(BaseParser):
"""
Parser used to transform literal into ParsingExpression
example :
@@ -28,10 +28,10 @@ class BnfParser(BaseParser):
"""
NAME = "BnfDefinition"
def __init__(self, **kwargs):
super().__init__("Bnf", 50, False)
# self.error_sink = []
# self.name = BaseParser.PREFIX + "Bnf"
super().__init__(BnfDefinitionParser.NAME, 50, False)
self.lexer_iter = None
self._current = None
@@ -42,7 +42,7 @@ class BnfParser(BaseParser):
self.sheerka = None
def __eq__(self, other):
if not isinstance(other, BnfParser):
if not isinstance(other, BnfDefinitionParser):
return False
return True
@@ -294,7 +294,7 @@ class BnfParser(BaseParser):
expression.rule_name = token.value
self.next_token()
if BnfParser.is_expression_a_set(self.context, expression):
if BnfDefinitionParser.is_expression_a_set(self.context, expression):
root_concept = self.context.search(start_with_self=True,
predicate=lambda ec: ec.action == BuiltinConcepts.INIT_BNF,
get_obj=lambda ec: ec.action_context,
@@ -313,8 +313,8 @@ class BnfParser(BaseParser):
@staticmethod
def update_recurse_id(context, concept_id, expression):
if BnfParser.is_expression_a_set(context, expression):
if BnfDefinitionParser.is_expression_a_set(context, expression):
expression.recurse_id = expression.get_recurse_id(concept_id, expression.concept.id, expression.rule_name)
for element in expression.elements:
BnfParser.update_recurse_id(context, concept_id, element)
BnfDefinitionParser.update_recurse_id(context, concept_id, element)
+21 -18
View File
@@ -11,6 +11,7 @@ from dataclasses import dataclass
from operator import attrgetter
import core.builtin_helpers
import core.utils
from cache.Cache import Cache
from core.builtin_concepts import BuiltinConcepts
from core.concept import DEFINITION_TYPE_BNF, DoNotResolve, ConceptParts, Concept
@@ -19,7 +20,7 @@ from core.tokenizer import Tokenizer, TokenKind, Token
from parsers.BaseNodeParser import BaseNodeParser, GrammarErrorNode, UnrecognizedTokensNode, ConceptNode, LexerNode
from parsers.BaseParser import BaseParser
PARSERS = ["AtomNode", "SyaNode", "Python"]
PARSERS = ["Sequence", "Sya", "Python"]
@dataclass
@@ -41,7 +42,7 @@ class ParsingContext:
:return:
"""
self.node.tokens = parser_helper.parser.parser_input.tokens[self.node.start: self.node.end + 1]
self.node.source = BaseParser.get_text_from_tokens(self.node.tokens)
self.node.source = core.utils.get_text_from_tokens(self.node.tokens)
def __mul__(self, other):
res = [self]
@@ -1044,17 +1045,17 @@ class BnfConceptParserHelper:
Adds a new entry,
makes a list if the property already exists
"""
if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
if prop_name not in _concept.get_compiled() or _concept.get_compiled()[prop_name] is None:
# new entry
_concept.compiled[prop_name] = value
_concept.get_compiled()[prop_name] = value
else:
# make a list if there was a value
previous_value = _concept.compiled[prop_name]
previous_value = _concept.get_compiled()[prop_name]
if isinstance(previous_value, list):
previous_value.append(value)
else:
new_value = [previous_value, value]
_concept.compiled[prop_name] = new_value
_concept.get_compiled()[prop_name] = new_value
def _look_for_concept_match(_underlying):
"""
@@ -1094,18 +1095,18 @@ class BnfConceptParserHelper:
if _underlying.parsing_expression.rule_name:
value = _get_underlying_value(_underlying)
_add_prop(_concept, _underlying.parsing_expression.rule_name, value)
_concept.metadata.need_validation = True
_concept.get_metadata().need_validation = True
elif isinstance(_underlying, NonTerminalNode):
for child in _underlying.children:
_process_rule_name(_concept, child)
if init_empty_body and concept.metadata.body is None:
if init_empty_body and concept.get_metadata().body is None:
value = _get_underlying_value(underlying)
concept.compiled[ConceptParts.BODY] = value
concept.get_compiled()[ConceptParts.BODY] = value
if underlying.parsing_expression.rule_name:
_add_prop(concept, underlying.parsing_expression.rule_name, value)
# KSI : Why don't we set concept.metadata.need_validation to True ?
# KSI : Why don't we set concept.get_metadata().need_validation to True ?
if isinstance(underlying, NonTerminalNode) and not isinstance(underlying.parsing_expression, ConceptExpression):
for node in underlying.children:
@@ -1147,8 +1148,11 @@ class ToUpdate:
class BnfNodeParser(BaseNodeParser):
NAME = "Bnf"
def __init__(self, **kwargs):
super().__init__("BnfNode", 50, **kwargs)
super().__init__(BnfNodeParser.NAME, 50, **kwargs)
if 'sheerka' in kwargs:
sheerka = kwargs.get("sheerka")
@@ -1162,11 +1166,11 @@ class BnfNodeParser(BaseNodeParser):
@staticmethod
def _is_eligible(concept):
"""
Predicate that select concepts that must handled by AtomNodeParser
Predicate that select concepts that must handled by BnfNodeParser
:param concept:
:return:
"""
return concept.metadata.definition_type == DEFINITION_TYPE_BNF
return concept.get_metadata().definition_type == DEFINITION_TYPE_BNF
@staticmethod
def get_valid(parsers_helpers):
@@ -1422,7 +1426,6 @@ class BnfNodeParser(BaseNodeParser):
with context.push(BuiltinConcepts.INIT_BNF, concept,
who=self.name,
obj=concept,
root_concept=concept,
desc=desc) as sub_context:
# get the parsing expression
to_skip = {concept.id}
@@ -1500,13 +1503,13 @@ class BnfNodeParser(BaseNodeParser):
desc = f"Resolve concept parsing expression for '{concept}'. {key_to_use=}"
with context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as sub_context:
if not concept.bnf: # 'if' is done outside to save a function call. Not sure it worth it.
if not concept.get_bnf(): # 'if' is done outside to save a function call. Not sure it worth it.
BaseNodeParser.ensure_bnf(sub_context, concept, self.name)
grammar[key_to_use] = UnderConstruction(concept.id)
if concept.metadata.definition_type == DEFINITION_TYPE_BNF:
expression = concept.bnf
if concept.get_metadata().definition_type == DEFINITION_TYPE_BNF:
expression = concept.get_bnf()
desc = f"Bnf concept detected. Resolving parsing expression '{expression}'"
with sub_context.push(BuiltinConcepts.INIT_BNF, concept, who=self.name, obj=concept, desc=desc) as ssc:
ssc.add_inputs(expression=expression)
@@ -1630,7 +1633,7 @@ class BnfNodeParser(BaseNodeParser):
if isinstance(concept, Concept):
return concept
if concept in context.concepts:
if context.concepts and concept in context.concepts:
return context.concepts[concept]
return self.sheerka.get_by_key(concept)
+2 -2
View File
@@ -8,7 +8,7 @@ from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.tokenizer import TokenKind, Keywords
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, SyntaxErrorNode
from parsers.BaseParser import Node, ErrorNode, NotInitializedNode, UnexpectedTokenErrorNode
from parsers.BnfParser import BnfParser
from parsers.BnfDefinitionParser import BnfDefinitionParser
class ParsingException(Exception):
@@ -221,7 +221,7 @@ class DefConceptParser(BaseCustomGrammarParser):
if tokens[0].type == TokenKind.COLON:
tokens = self.get_body(tokens[1:])
bnf_regex_parser = BnfParser()
bnf_regex_parser = BnfDefinitionParser()
desc = f"Resolving BNF {current_concept_def.definition}"
with self.context.push(BuiltinConcepts.INIT_BNF,
current_concept_def,
+6 -6
View File
@@ -75,12 +75,12 @@ class ExactConceptParser(BaseParser):
index = int(token[len(VARIABLE_PREFIX):])
value = words[i]
concept.def_var_by_index(index, str_concept(value) if isinstance(value, tuple) else value)
concept.metadata.need_validation = True
if self.verbose_log.isEnabledFor(logging.DEBUG):
prop_name = concept.metadata.variables[index][0]
context.log(
f"Added variable {index}: {prop_name}='{words[i]}'.",
self.name)
concept.get_metadata().need_validation = True
# if self.verbose_log.isEnabledFor(logging.DEBUG):
# prop_name = concept.get_metadata().variables[index][0]
# context.log(
# f"Added variable {index}: {prop_name}='{words[i]}'.",
# self.name)
already_recognized.append(concept)
+17 -27
View File
@@ -1,24 +1,14 @@
from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
from core.builtin_helpers import parse_unrecognized, expect_one
from core.sheerka.services.SheerkaExecute import ParserInput, SheerkaExecute
from core.sheerka.services.SheerkaExecute import ParserInput
from core.sheerka.services.SheerkaRuleManager import SheerkaRuleManager, FormatAstNode
from core.tokenizer import Keywords
from core.utils import strip_tokens
from parsers.BaseCustomGrammarParser import BaseCustomGrammarParser, KeywordNotFound
from parsers.BaseParser import BaseParser, Node
@dataclass
class FormatAstNode:
pass
@dataclass
class FormatAstRawText(FormatAstNode):
text: str
@dataclass
class FormatRuleNode(Node):
tokens: dict
@@ -29,7 +19,7 @@ class FormatRuleNode(Node):
class FormatRuleParser(BaseCustomGrammarParser):
"""
Class that will parse formatting rules definitions
when xxx print yyy
eg: when xxx print yyy
where xxx will be evaluated in the context of BuiltinConcepts.EVAL_QUESTION_REQUESTED
and yyy is a internal way to describe a format (yet another one)
"""
@@ -81,7 +71,7 @@ class FormatRuleParser(BaseCustomGrammarParser):
return ret
def parse_rule(self):
parts = self.get_parts(self.KEYWORDS_VALUES)
parts = self.get_parts(self.KEYWORDS_VALUES, strip_tokens=True)
if parts is None:
return None
@@ -108,19 +98,14 @@ class FormatRuleParser(BaseCustomGrammarParser):
:param tokens:
:return:
"""
source = self.sheerka.services[SheerkaExecute.NAME].get_parser_input(None, strip_tokens(tokens[1:]))
parsed = parse_unrecognized(self.context,
source,
parsers="all",
who=self.name,
prop=Keywords.WHEN,
filter_func=expect_one)
source = core.utils.get_text_from_tokens(core.utils.strip_tokens(tokens[1:]))
res = self.sheerka.services[SheerkaRuleManager.NAME].compile_when(self.context, self.name, source)
if not parsed.status:
self.add_error(parsed.value)
if not isinstance(res, list):
self.add_error(res.value)
return None
return parsed
return res
def get_print(self, tokens):
"""
@@ -128,5 +113,10 @@ class FormatRuleParser(BaseCustomGrammarParser):
:param tokens:
:return:
"""
source = BaseParser.get_text_from_tokens(strip_tokens(tokens[1:]))
return FormatAstRawText(source)
source = core.utils.get_text_from_tokens(core.utils.strip_tokens(tokens[1:]))
res = self.sheerka.services[SheerkaRuleManager.NAME].compile_print(self.context, source)
if not res.status:
self.add_error(res.value)
return None
return res.body
+10 -4
View File
@@ -3,16 +3,24 @@ from typing import List
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import get_lexer_nodes_from_unrecognized, update_compiled
from core.concept import Concept
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind, Token
from core.utils import get_n_clones
from parsers.SequenceNodeParser import SequenceNodeParser
from parsers.BaseNodeParser import SourceCodeNode, SourceCodeWithConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, UnexpectedEofNode, Node
from parsers.BnfNodeParser import BnfNodeParser
from parsers.PythonWithConceptsParser import PythonWithConceptsParser
from parsers.RuleParser import RuleParser
from parsers.SyaNodeParser import SyaNodeParser
# No need to check for Python code as the source code node will resolve to python code anyway
# I only look for concepts, so
PARSERS = ["BnfNode", "SyaNode", "AtomNode"]
PARSERS = [RuleParser.NAME,
SequenceNodeParser.NAME,
BnfNodeParser.NAME,
SyaNodeParser.NAME]
@dataclass
@@ -334,8 +342,6 @@ class FunctionParser(BaseParser):
res = [SourceCodeWithConceptNode(function_node.first.to_unrecognized(), function_node.last.to_unrecognized())]
function_name = function_node.first.str_value()
for param in function_node.parameters:
if isinstance(param.value, NamesNode):
# try to recognize concepts
@@ -383,7 +389,7 @@ class FunctionParser(BaseParser):
# make sure that concepts found can be evaluated
errors = []
for c in source_code_node.python_node.concepts.values():
for c in [c for c in source_code_node.python_node.objects.values() if isinstance(c, Concept)]:
update_compiled(self.context, c, errors)
return res
+24 -3
View File
@@ -20,12 +20,23 @@ class PythonErrorNode(ErrorNode):
# self.log.debug("-> PythonErrorNode: " + str(self.exception))
@dataclass()
class ConceptDetected(ErrorNode):
name: str
class PythonNode(Node):
def __init__(self, source, ast_=None, concepts=None):
def __init__(self, source, ast_=None, objects=None):
self.source = source
self.ast_ = ast_ if ast_ else ast.parse(source, mode="eval") if source else None
self.concepts = concepts or {} # when concepts are recognized in the expression
self.objects = objects or {} # when objects (mainly concepts or rules) are recognized in the expression
self.compiled = None
def get_compiled(self):
if self.compiled is None:
self.compiled = compile(self.ast_, "<string>", "eval")
return self.compiled
# def __repr__(self):
# return "PythonNode(parser_input='" + self.parser_input + "', ast=" + self.get_dump(self.ast_) + ")"
@@ -80,12 +91,16 @@ class PythonParser(BaseParser):
self.source = kwargs.get("source", "<undef>")
def parse(self, context, parser_input: ParserInput):
if not isinstance(parser_input, ParserInput):
return None
sheerka = context.sheerka
tree = None
tracker = {} # to keep track of concept tokens (c:xxx:)
python_switcher = {
TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value)
TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value),
TokenKind.RULE: lambda t: core.utils.encode_concept(t.value, "R")
}
try:
@@ -106,6 +121,12 @@ class PythonParser(BaseParser):
except LexerError as e:
self.error_sink.append(e)
# Python parser will refuse input that directly refers to a concept
if isinstance(tree, ast.Expression) and isinstance(tree.body, ast.Name):
if tree.body.id in tracker or context.sheerka.fast_resolve(tree.body.id, return_new=False) is not None:
context.log("It's a simple concept. Not for me.", self.name)
self.error_sink.append(ConceptDetected(tree.body.id))
if self.has_error:
ret = sheerka.ret(
self.name,
+18 -6
View File
@@ -1,6 +1,6 @@
from core.builtin_concepts import BuiltinConcepts
from core.sheerka.services.SheerkaExecute import SheerkaExecute
from parsers.BaseNodeParser import ConceptNode
from parsers.BaseNodeParser import ConceptNode, RuleNode
from parsers.BaseNodeParser import SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser
from parsers.PythonParser import PythonParser
@@ -15,6 +15,9 @@ class PythonWithConceptsParser(BaseParser):
@staticmethod
def sanitize(identifier):
if identifier is None:
return ""
res = ""
for c in identifier:
res += c if c.isalnum() else "0"
@@ -46,7 +49,7 @@ class PythonWithConceptsParser(BaseParser):
identifiers_key = {}
python_ids_mappings = {}
def _get_identifier(c):
def _get_identifier(c, wrapper):
"""
Get an identifier for a concept.
Make sure to return the same identifier if the same concept
@@ -61,7 +64,7 @@ class PythonWithConceptsParser(BaseParser):
if id(c) in identifiers:
return identifiers[id(c)]
identifier = "__C__" + self.sanitize(c.key or c.name)
identifier = wrapper + self.sanitize(c.key or c.name)
if c.id:
identifier += "__" + c.id
@@ -71,7 +74,7 @@ class PythonWithConceptsParser(BaseParser):
else:
identifiers_key[identifier] = 0
identifier += "__C__"
identifier += wrapper
identifiers[id(c)] = identifier
return identifier
@@ -82,10 +85,19 @@ class PythonWithConceptsParser(BaseParser):
if to_parse:
to_parse += " "
concept = node.concept
python_id = _get_identifier(concept)
python_id = _get_identifier(concept, "__C__")
to_parse += python_id
python_ids_mappings[python_id] = concept
elif isinstance(node, RuleNode):
source += node.source
if to_parse:
to_parse += " "
rule = node.rule
python_id = _get_identifier(rule, "__R__")
to_parse += python_id
python_ids_mappings[python_id] = rule
else:
source += node.source
to_parse += node.source
@@ -100,7 +112,7 @@ class PythonWithConceptsParser(BaseParser):
if result.status:
python_node = result.body.body
python_node.source = source
python_node.concepts = python_ids_mappings
python_node.objects = python_ids_mappings
return sheerka.ret(
self.name,
+88
View File
@@ -0,0 +1,88 @@
from core.builtin_concepts import BuiltinConcepts
from core.rule import Rule, ACTION_TYPE_DEFERRED
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import LexerError, TokenKind
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
class RuleNotFound(ErrorNode):
def __init__(self, id_as_tuple):
self.key = id_as_tuple[0]
self.id = id_as_tuple[1]
def __repr__(self):
return f"RuleNotFound(id={self.id}, key={self.key}"
class RuleParser(BaseParser):
"""
Tries to recognize rules
"""
NAME = "Rule"
def __init__(self, **kwargs):
BaseParser.__init__(self, RuleParser.NAME, 80)
def parse(self, context, parser_input: ParserInput):
"""
text can be string, but text can also be an list of tokens
:param context:
:param parser_input:
:return:
"""
context.log(f"Parsing '{parser_input}'", self.name)
sheerka = context.sheerka
if parser_input.is_empty():
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.IS_EMPTY))
try:
parser_input.reset()
parser_input.next_token()
if parser_input.token.type != TokenKind.RULE:
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text()))
token = parser_input.token
if parser_input.next_token():
reason = UnexpectedTokenErrorNode("Only one rule supported",
parser_input.token,
[TokenKind.EOF])
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.as_text(), reason=reason))
if token.value[1] is None:
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.NOT_IMPLEMENTED))
if token.value[1].isdigit():
rule = sheerka.get_rule_by_id(token.value[1])
else:
rule = Rule().set_id(token.value[1])
rule.metadata.action_type = ACTION_TYPE_DEFERRED
if sheerka.isinstance(rule, BuiltinConcepts.UNKNOWN_RULE):
return sheerka.ret(self.name,
False,
sheerka.new(BuiltinConcepts.ERROR,
body=[RuleNotFound(token.value)]))
body = sheerka.new(BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input.as_text(),
body=[rule],
try_parsed=[rule])
return sheerka.ret(self.name, True, body)
except LexerError as e:
context.log(f"Error found in tokenizer {e}", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
@@ -8,8 +8,10 @@ from core.tokenizer import Tokenizer, TokenKind
from core.utils import strip_tokens, make_unique
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode
from parsers.BnfNodeParser import BnfNodeParser
from parsers.SyaNodeParser import SyaNodeParser
PARSERS = ["BnfNode", "SyaNode", "Python"]
PARSERS = [BnfNodeParser.NAME, SyaNodeParser.NAME, "Python"]
@dataclass()
@@ -199,7 +201,7 @@ class AtomConceptParserHelper:
return clone
class AtomNodeParser(BaseNodeParser):
class SequenceNodeParser(BaseNodeParser):
"""
Parser used to recognize atoms concepts or sequence of atoms concepts
An atom concept is concept that does not have any property thought it may have a body
@@ -216,17 +218,19 @@ class AtomNodeParser(BaseNodeParser):
Note 'one plus two' will be recognized by the SyaParser
"""
NAME = "Sequence"
def __init__(self, **kwargs):
super().__init__("AtomNode", 50, **kwargs)
super().__init__(SequenceNodeParser.NAME, 50, **kwargs)
@staticmethod
def _is_eligible(concept):
"""
Predicate that select concepts that must handled by AtomNodeParser
Predicate that select concepts that must handled by SequenceNodeParser
:param concept:
:return:
"""
return len(concept.metadata.variables) == 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF
return len(concept.get_metadata().variables) == 0 and concept.get_metadata().definition_type != DEFINITION_TYPE_BNF
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
@@ -245,7 +249,7 @@ class AtomNodeParser(BaseNodeParser):
return a if isinstance(a, list) else [a]
concepts_by_name = as_list(self.sheerka.resolve(token.value))
concepts_by_name = as_list(self.sheerka.resolve(token))
concepts_by_first_keyword = new_instances(super().get_concepts(token, self._is_eligible))
if concepts_by_name is None:
@@ -368,8 +372,8 @@ class AtomNodeParser(BaseNodeParser):
for node in parser_helper.sequence:
# if isinstance(node, ConceptNode):
# if len(node.concept.metadata.variables) > 0:
# node.concept.metadata.is_evaluated = True # Do not try to evaluate those concepts
# if len(node.concept.get_metadata().variables) > 0:
# node.concept.get_metadata().is_evaluated = True # Do not try to evaluate those concepts
node.tokens = self.parser_input.tokens[node.start:node.end + 1]
node.fix_source()
+21 -20
View File
@@ -7,6 +7,7 @@ from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import parse_function
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.global_symbols import CONCEPT_COMPARISON_CONTEXT
from core.sheerka.services.SheerkaComparisonManager import SheerkaComparisonManager
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Token, TokenKind, Tokenizer
@@ -15,7 +16,7 @@ from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCo
SourceCodeWithConceptNode, BaseNodeParser
from parsers.BaseParser import ErrorNode
PARSERS = ["BnfNode", "AtomNode", "Python"]
PARSERS = ["Sequence", "Bnf", "Python"]
function_parser_res = namedtuple("FunctionParserRes", 'to_out function')
@@ -159,7 +160,7 @@ class SyaConceptParserHelper:
return len(self.expected) == 0
def is_atom(self):
return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0
return len(self.concept.concept.get_metadata().variables) == 0 and len(self.expected) == 0
def is_next(self, token):
"""
@@ -1056,9 +1057,10 @@ class PostFixToItem:
class SyaNodeParser(BaseNodeParser):
NAME = "Sya"
def __init__(self, **kwargs):
super().__init__("SyaNode", 50, **kwargs)
super().__init__(SyaNodeParser.NAME, 50, **kwargs)
if 'sheerka' in kwargs:
sheerka = kwargs.get("sheerka")
self.sya_definitions = sheerka.resolved_sya_def
@@ -1085,16 +1087,15 @@ class SyaNodeParser(BaseNodeParser):
@staticmethod
def _is_eligible(concept):
"""
Predicate that select concepts that must handled by AtomNodeParser
Predicate that select concepts that must handled by SyaNodeParser
:param concept:
:return:
"""
# We only concepts that has parameter (refuse atoms)
# Bnf definitions are not supposed to be managed by this parser either
return len(concept.metadata.variables) > 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF
return len(concept.get_metadata().variables) > 0 and concept.get_metadata().definition_type != DEFINITION_TYPE_BNF
@staticmethod
def _get_sya_concept_def(parser, concept):
def _get_sya_concept_def(self, parser, concept):
sya_concept_def = SyaConceptDef(concept)
if concept.id in parser.sya_definitions:
# Manage when precedence and associativity are given in the unit tests
@@ -1105,9 +1106,9 @@ class SyaNodeParser(BaseNodeParser):
sya_concept_def.associativity = sya_def[1]
if parser.sheerka:
concept_weight = parser.sheerka.get_concepts_weights(BuiltinConcepts.PRECEDENCE)
if concept.id in concept_weight:
sya_concept_def.precedence = concept_weight[concept.id]
concept_weight = parser.sheerka.get_concepts_weights(BuiltinConcepts.PRECEDENCE, CONCEPT_COMPARISON_CONTEXT)
if concept.str_id in concept_weight:
sya_concept_def.precedence = concept_weight[concept.str_id]
if associativity := concept.get_prop(BuiltinConcepts.ASSOCIATIVITY):
sya_concept_def.associativity = SyaAssociativity(associativity)
@@ -1137,7 +1138,7 @@ class SyaNodeParser(BaseNodeParser):
res.extend(forked)
forked.clear()
res = [InFixToPostFix(context, context.in_context(BuiltinConcepts.DEBUG))]
res = [InFixToPostFix(context, context.debug_enabled)]
while self.parser_input.next_token(False):
for infix_to_postfix in res:
infix_to_postfix.reset()
@@ -1184,13 +1185,13 @@ class SyaNodeParser(BaseNodeParser):
infix_to_postfix.finalize(self.parser_input.pos)
_add_forked_to_res()
if context.in_context(BuiltinConcepts.DEBUG):
context.debug(f"Parsing {parser_input}")
context.debug(f"{len(res)} InfixToPostFix(s) found")
if context.debug_enabled:
context.debug(self.name, "infix_to_postfix", None, f"Parsing {parser_input}")
context.debug(self.name, "infix_to_postfix", "nb_found", f"{len(res)} InfixToPostFix(s) found")
for i, r in enumerate(res):
context.debug(f"#{i}")
context.debug(self.name, "infix_to_postfix", "infix_to_postfix", f"#{i}")
for line in r.debug:
context.debug(line)
context.debug(self.name, "infix_to_postfix", "infix_to_postfix", line)
return res
@@ -1221,21 +1222,21 @@ class SyaNodeParser(BaseNodeParser):
end = item.end
has_unrecognized = False
concept = sheerka.new_from_template(item.concept, item.concept.key)
for param_index in reversed(range(len(concept.metadata.variables))):
for param_index in reversed(range(len(concept.get_metadata().variables))):
inner_item = self.postfix_to_item(sheerka, postfixed)
if inner_item.start < start:
start = inner_item.start
if inner_item.end > end:
end = inner_item.end
has_unrecognized |= isinstance(inner_item, (UnrecognizedTokensNode, SourceCodeWithConceptNode)) or \
hasattr(inner_item, "has_unrecognized") and inner_item.has_unrecognized
hasattr(inner_item, "has_unrecognized") and inner_item.has_unrecognized
param_name = concept.metadata.variables[param_index][0]
param_name = concept.get_metadata().variables[param_index][0]
param_value = inner_item.concept if hasattr(inner_item, "concept") else \
[inner_item.return_value] if isinstance(inner_item, SourceCodeNode) else \
inner_item
concept.compiled[param_name] = param_value
concept.get_compiled()[param_name] = param_value
return PostFixToItem(concept, start, end, has_unrecognized)
+10 -3
View File
@@ -3,11 +3,18 @@ from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes, update_compiled
from core.concept import Concept
from parsers.SequenceNodeParser import SequenceNodeParser
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ErrorNode
from parsers.BnfNodeParser import BnfNodeParser
from parsers.SyaNodeParser import SyaNodeParser
PARSERS = ["EmptyString", "ShortTermMemory", "AtomNode", "BnfNode", "SyaNode", "Python"]
PARSERS = ["EmptyString",
"ShortTermMemory",
SequenceNodeParser.NAME,
BnfNodeParser.NAME,
SyaNodeParser.NAME,
"Python"]
@dataclass()
@@ -22,7 +29,7 @@ class UnrecognizedNodeParser(BaseParser):
"""
def __init__(self, **kwargs):
super().__init__("UnrecognizedNode", 45) # lower than AtomNode, BnfNode and SyaNode
super().__init__("UnrecognizedNode", 45) # lower than SequenceNode, BnfNode and SyaNode
def add_error(self, error):
if hasattr(error, "__iter__"):