Added basic implentation for where

This commit is contained in:
2020-02-05 18:47:20 +01:00
parent a5a721094b
commit afc1e22949
35 changed files with 864 additions and 320 deletions
+19
View File
@@ -50,6 +50,7 @@ class BuiltinConcepts(Enum):
CONCEPT_EVAL_REQUESTED = "concept eval requested"
REDUCE_REQUESTED = "reduce requested" # remove meaningless error when possible
NOT_A_SET = "not a set" # the concept has no entry in sets
WHERE_CLAUSE_FAILED = "where clause failed" # failed to validate where clause during evaluation
NODE = "node"
GENERIC_NODE = "generic node"
@@ -91,6 +92,7 @@ BuiltinErrors = [str(e) for e in {
BuiltinConcepts.CONCEPT_EVAL_ERROR,
BuiltinConcepts.CONCEPT_ALREADY_IN_SET,
BuiltinConcepts.NOT_A_SET,
BuiltinConcepts.WHERE_CLAUSE_FAILED
}]
"""
@@ -385,3 +387,20 @@ class ConceptAlreadyInSet(Concept):
@property
def concept_set(self):
return self.props["concept_set"].value
class WhereClauseFailed(Concept):
def __init__(self, concept=None):
super().__init__(BuiltinConcepts.WHERE_CLAUSE_FAILED,
True,
False,
BuiltinConcepts.WHERE_CLAUSE_FAILED)
self.set_metadata_value(ConceptParts.BODY, concept)
self.metadata.is_evaluated = True
def __repr__(self):
return f"WhereClauseFailed(concept={self.concept})"
@property
def concept(self):
return self.body
+2 -1
View File
@@ -231,7 +231,8 @@ class Concept:
if token.value in variables:
key += VARIABLE_PREFIX + str(variables.index(token.value))
else:
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
value = token.value[1:-1] if token.type == TokenKind.STRING else token.value
key += value
first = False
self.metadata.key = key
@@ -45,7 +45,7 @@ class SheerkaCreateNewConcept:
# add the BNF if known
if concept.bnf:
concepts_definitions = self.sheerka.get_concept_definition()
concepts_definitions = self.sheerka.get_concepts_definitions(context)
concepts_definitions[concept] = concept.bnf
# check if it's a valid BNF or whether it breaks the known rules
@@ -61,22 +61,26 @@ class SheerkaCreateNewConcept:
# save the new concept in sdp
try:
# TODO : needs to make these calls atomic (or at least one single call)
# save the new concept
self.sheerka.sdp.add(
context.event.get_digest(),
self.sheerka.CONCEPTS_ENTRY,
concept,
use_ref=True)
# save it by id
self.sheerka.sdp.add(
context.event.get_digest(),
self.sheerka.CONCEPTS_BY_ID_ENTRY,
{concept.id: concept.get_digest()},
is_ref=True)
# update the definition table
if concepts_definitions is not None:
self.sheerka.sdp.set(
context.event.get_digest(),
self.sheerka.CONCEPTS_DEFINITIONS_ENTRY,
concepts_definitions,
concept_lexer_parser.encode_grammar(init_ret_value.body),
use_ref=True)
self.sheerka.concepts_definitions_cache = None # invalidate cache
except SheerkaDataProviderDuplicateKeyError as error:
context.log_error(logger, "Failed to create a new concept.", who=self.logger_name)
return self.sheerka.ret(
@@ -94,6 +98,3 @@ class SheerkaCreateNewConcept:
# process the return in needed
ret = self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.NEW_CONCEPT, body=concept))
return ret
+14
View File
@@ -1,5 +1,14 @@
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from sdp.sheerkaDataProvider import SheerkaDataProvider
import pprint
import os
def get_pp():
rows, columns = os.popen('stty size', 'r').read().split()
pp = pprint.PrettyPrinter(width=columns, compact=True)
return pp
class SheerkaDump:
@@ -71,3 +80,8 @@ class SheerkaDump:
break
page_count += 1
def dump_state(self):
snapshot = self.sheerka.sdp.get_snapshot(SheerkaDataProvider.HeadFile)
state = self.sheerka.sdp.load_state(snapshot)
self.sheerka.log.info(get_pp().pformat(state.data))
@@ -147,9 +147,6 @@ class SheerkaEvaluateConcept:
if concept.metadata.is_evaluated:
return concept
# WHERE condition should already be validated by the parser.
# It's a mandatory condition for the concept before it can be recognized
#
# TODO : Validate the PRE condition
#
@@ -157,8 +154,8 @@ class SheerkaEvaluateConcept:
self.initialize_concept_asts(context, concept, logger)
# to make sure of the order, it don't use ConceptParts.get_parts()
# props must be evaluated first
all_metadata_to_eval = ["props", "where", "pre", "post", "body"]
# props must be evaluated first, body must be evaluated before where
all_metadata_to_eval = ["pre", "post", "props", "body", "where"]
for metadata_to_eval in all_metadata_to_eval:
if metadata_to_eval == "props":
@@ -186,6 +183,12 @@ class SheerkaEvaluateConcept:
else:
concept.values[part_key] = resolved
# validate where clause
if concept.metadata.where is not None:
where_value = concept.values[ConceptParts.WHERE]
if not (where_value is None or self.sheerka.value(where_value) is True):
return self.sheerka.new(BuiltinConcepts.WHERE_CLAUSE_FAILED, body=concept)
#
# TODO : Validate the POST condition
#
@@ -19,7 +19,7 @@ class History:
return msg
def __repr__(self):
return f"event={self.event!r}, status={self.status}, result={self.result}"
return f"History(event={self.event!r}, status={self.status}, result={self.result})"
def __eq__(self, other):
if id(self) == id(other):
@@ -38,6 +38,21 @@ class SheerkaSetsManager:
context.log_error(logger, "Failed to add to set.", who=self.logger_name)
return self.sheerka.ret(self.logger_name, False, ErrorConcept(error), error.args[0])
def add_concepts_to_set(self, context, concepts, concept_set, logger=None):
"""Adding multiple concepts at the same time"""
logger = logger or self.sheerka.log
context.log(logger, f"Adding concepts {concepts} to set {concept_set}", who=self.logger_name)
previous = self.sheerka.sdp.get_safe(GROUP_PREFIX + concept_set.id)
new_ids = [c.id for c in concepts] if previous is None else previous + [c.id for c in concepts]
try:
self.sheerka.sdp.set(context.event.get_digest(), GROUP_PREFIX + concept_set.id, new_ids)
return self.sheerka.ret(self.logger_name, True, self.sheerka.new(BuiltinConcepts.SUCCESS))
except Exception as error:
context.log_error(logger, "Failed to add to set.", who=self.logger_name)
return self.sheerka.ret(self.logger_name, False, ErrorConcept(error), error.args[0])
def get_set_elements(self, concept):
"""
Concept is supposed to be a set
+26 -9
View File
@@ -22,6 +22,7 @@ import logging
# BuiltinConcepts.AFTER_EVALUATION]
CONCEPT_LEXER_PARSER_CLASS = "parsers.ConceptLexerParser.ConceptLexerParser"
BNF_PARSER_CLASS = "parsers.BnfParser.BnfParser"
CONCEPTS_FILE = "_concepts.txt"
@@ -52,7 +53,7 @@ class Sheerka(Concept):
# cache for concept definitions,
# Primarily used for unit test that does not have access to sdp
self.concepts_definition_cache = {}
self.concepts_definitions_cache = {}
#
# cache for concepts grammars
@@ -187,7 +188,8 @@ class Sheerka(Concept):
def initialize_concepts_definitions(self, execution_context):
self.init_log.debug("Initializing concepts definitions")
definitions = self.sdp.get_safe(self.CONCEPTS_DEFINITIONS_ENTRY, load_origin=False)
# definitions = self.sdp.get_safe(self.CONCEPTS_DEFINITIONS_ENTRY, load_origin=False)
definitions = self.get_concepts_definitions(execution_context)
if definitions is None:
self.init_log.debug("No BNF defined")
@@ -389,14 +391,26 @@ class Sheerka(Concept):
return result or self._get_unknown(('id', concept_id))
def get_concept_definition(self):
if self.concepts_definition_cache:
return self.concepts_definition_cache
def get_concepts_definitions(self, context):
if self.concepts_definitions_cache:
return self.concepts_definitions_cache
self.concepts_definition_cache = self.sdp.get_safe(
encoded = self.sdp.get_safe(
self.CONCEPTS_DEFINITIONS_ENTRY,
load_origin=False) or {}
return self.concepts_definition_cache
self.concepts_definitions_cache = {}
bnf_parser = self.parsers[BNF_PARSER_CLASS]()
for k, v in encoded.items():
key, id_ = core.utils.unstr_concept(k)
concept = self.new((key, id_))
rule_result = bnf_parser.parse(context, v)
if rule_result.status:
self.concepts_definitions_cache[concept] = rule_result.value.value
else:
self.log.error(f"Failed to load bnf rule for concept {key}")
return self.concepts_definitions_cache
def new(self, concept_key, **kwargs):
"""
@@ -411,7 +425,7 @@ class Sheerka(Concept):
else:
concept_id = None
template = self.get(concept_key, concept_id)
template = self.get_by_id(concept_id) if not concept_key else self.get(concept_key, concept_id)
# manage concept not found
if self.isinstance(template, BuiltinConcepts.UNKNOWN_CONCEPT) and \
@@ -579,7 +593,10 @@ class Sheerka(Concept):
self.during_restore = True
with open(CONCEPTS_FILE, "r") as f:
for line in f.readlines():
self.log.info(line.strip())
line = line.strip()
if line == "" or line.startswith("#"):
continue
self.log.info(line)
self.evaluate_user_input(line)
self.during_restore = False
except IOError:
+27 -17
View File
@@ -266,10 +266,10 @@ class Tokenizer:
self.column = 1
self.line += 1
elif c == "c" and self.i + 1 < self.text_len and self.text[self.i + 1] == ":":
concept_name = self.eat_concept_name(self.i + 2, self.line, self.column)
yield Token(TokenKind.CONCEPT, concept_name, self.i, self.line, self.column)
self.i += len(concept_name) + 3
self.column += len(concept_name) + 3
name, id, length = self.eat_concept(self.i + 2, self.line, self.column + 2)
yield Token(TokenKind.CONCEPT, (name, id), self.i, self.line, self.column)
self.i += length + 2
self.column += length + 2
elif c.isalpha() or c == "_":
identifier = self.eat_identifier(self.i)
token_type = TokenKind.KEYWORD if identifier in self.KEYWORDS else TokenKind.IDENTIFIER
@@ -297,31 +297,41 @@ class Tokenizer:
yield Token(TokenKind.EOF, "", self.i, self.line, self.column)
def eat_concept_name(self, start, line, column):
result = ""
def eat_concept(self, start, line, column):
key, id, buffer = None, None, ""
i = start
end_colon_found = False
processing_key = True
while i < self.text_len:
c = self.text[i]
c = self.text[i]
if c == "\n":
raise LexerError(f"New line is forbidden in concept name", result, i, line, column + 2 + len(result))
raise LexerError(f"New line in concept name", self.text[start:i], i, line, column + i - start)
if c == ":":
end_colon_found = True
if processing_key:
key = buffer if buffer else None
else:
id = buffer if buffer else None
i += 1 # eat the colon
break
result += c
if c == "|":
key = buffer if buffer else None
buffer = ""
processing_key = False
i += 1
continue
buffer += c
i += 1
else:
raise LexerError(f"Missing ending colon", self.text[start:i], i, line, column + i - start)
if not end_colon_found:
raise LexerError(f"Missing ending colon", result, i, line, column + 2 + len(result))
if (key, id) == (None, None):
raise LexerError(f"Concept identifiers not found", "", start, line, column)
if result == "":
raise LexerError(f"Concept name not found", result, start, line, column + 2 + len(result))
return result
return key, id, i - start
def eat_whitespace(self, start):
result = self.text[start]
+108 -37
View File
@@ -1,6 +1,7 @@
import importlib
import inspect
import pkgutil
import re
from core.tokenizer import TokenKind
@@ -239,43 +240,6 @@ def pp(items):
return " \n" + " \n".join(str(item) for item in items)
def decode_concept(concept_repr):
"""
if concept_repr is like :c:key:id:
return the key and the id
:param concept_repr:
:return:
"""
if not (concept_repr and isinstance(concept_repr, str) and concept_repr.startswith(":c:")):
return None, None
i = 3
length = len(concept_repr)
key = ""
while i < length:
if concept_repr[i] == ":":
break
key += concept_repr[i]
i += 1
else:
return None, None
i += 1
if i >= length:
return key, None
id = ""
while i < length:
if concept_repr[i] == ":":
break
id += concept_repr[i]
i += 1
else:
return None, None
return key, id
def decode_enum(enum_repr: str):
"""
Tries to transform ClassName.Name into an enum
@@ -300,3 +264,110 @@ def decode_enum(enum_repr: str):
except TypeError:
return None
def str_concept(t):
"""
The key,id identifiers of a concept are stored in a tuple
we want to return the key and the id, separated by a pipe
None value must be replaced by an empty string
>>> assert str_concept(("key", "id")) == "c:key|id:"
>>> assert str_concept((None, "id")) == "c:|id:"
>>> assert str_concept(("key", None)) == "c:key:"
>>> assert str_concept((None, None)) == ""
:param t:
:return:
"""
if isinstance(t, tuple):
key, id_ = t[0], t[1]
else:
key, id_ = t.key, t.id
if key is None and id_ is None:
return ""
result = 'c:' if key is None else "c:" + key
if id_:
result += "|" + id_
return result + ":"
def unstr_concept(concept_repr):
"""
if concept_repr is like :c:key:id:
return the key and the id
:param concept_repr:
:return:
"""
if not (concept_repr and isinstance(concept_repr, str) and concept_repr.startswith("c:")):
return None, None
i = 2
length = len(concept_repr)
key = ""
while i < length:
c = concept_repr[i]
if c in (":", "|"):
break
key += c
i += 1
else:
return None, None
if c == ":":
return key if key != "" else None, None
i += 1
id = ""
while i < length:
c = concept_repr[i]
if c == ":":
break
id += c
i += 1
else:
return None, None
return key if key != "" else None, id if id != "" else None
def encode_concept(t, use_concept=False):
"""
Given a tuple of concept id, concept id
Create a valid Python identifier that can be parsed back
>>> assert encode_concept(("key", "id")) == "__C__KEY_key__ID_id__C__"
>>> assert encode_concept((None, "id")) == "__C__KEY_00None00__ID_id__C__"
>>> assert encode_concept(("key", None)) == "__C__KEY_key__ID_00None00__C__"
>>> assert encode_concept(("key", "id"), True) == "__C__USE_CONCEPT__KEY_key__ID_id__C__"
:param t:
:param use_concept:
:return:
"""
key, id_ = (t[0], t[1]) if isinstance(t, tuple) else (t.key, t.id)
prefix = "__C__USE_CONCEPT" if use_concept else "__C"
sanitized_key = "".join(c if c.isalnum() else "0" for c in key) if key else "00None00"
return prefix + f"__KEY_{sanitized_key}__ID_{id_ or '00None00'}__C__"
decode_regex = re.compile(r"__KEY_(\w+)__ID_(\w+)__C__")
def decode_concept(text):
"""
Decode what was encoded by encode_concept_key_id
:param text:
:return:
"""
use_concept = text.startswith("__C__USE_CONCEPT")
m = decode_regex.search(text)
lookup = {"00None00": None}
if m:
key = lookup.get(m.group(1), m.group(1))
id_ = lookup.get(m.group(2), m.group(2))
return key, id_, use_concept
return None, None, None
+65 -51
View File
@@ -8,6 +8,7 @@ from evaluators.BaseEvaluator import OneReturnValueEvaluator
from parsers.PythonParser import PythonNode
import ast
import core.ast.nodes
import core.utils
class PythonEvaluator(OneReturnValueEvaluator):
@@ -40,9 +41,11 @@ class PythonEvaluator(OneReturnValueEvaluator):
not_for_me = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=node)
return sheerka.ret(self.name, False, not_for_me, parents=[return_value])
# get locals
my_locals = self.get_locals(context, node)
context.log(self.verbose_log, f"locals={my_locals}", self.name)
# eval
if isinstance(node.ast_, ast.Expression):
context.log(self.verbose_log, "Evaluating using 'eval'.", self.name)
compiled = compile(node.ast_, "<string>", "eval")
@@ -53,6 +56,7 @@ class PythonEvaluator(OneReturnValueEvaluator):
context.log(self.verbose_log, f"{evaluated=}", self.name)
return sheerka.ret(self.name, True, evaluated, parents=[return_value])
except Exception as error:
context.log_error(self.verbose_log, error, self.name)
error = sheerka.new(BuiltinConcepts.ERROR, body=error)
@@ -65,16 +69,19 @@ class PythonEvaluator(OneReturnValueEvaluator):
"concepts": context.sheerka.dump_handler.dump_concepts,
"definitions": context.sheerka.dump_handler.dump_definitions,
"history": context.sheerka.dump_handler.dump_history,
"state": context.sheerka.dump_handler.dump_state,
}
if context.obj:
context.log(self.verbose_log,
f"Concept '{context.obj}' is in context. Adding its properties to locals if any.", self.name)
f"Concept '{context.obj}' is in context. Adding it and its properties to locals.", self.name)
for prop_name, prop_value in context.obj.props.items():
if not isinstance(prop_value.value, Concept):
my_locals[prop_name] = prop_value.value
else:
if isinstance(prop_value.value, Concept):
my_locals[prop_name] = context.sheerka.value(prop_value.value)
else:
my_locals[prop_name] = prop_value.value
my_locals["self"] = context.obj.body
node_concept = core.ast.nodes.python_to_concept(node.ast_)
unreferenced_names_visitor = UnreferencedNamesVisitor(context.sheerka)
@@ -89,16 +96,17 @@ class PythonEvaluator(OneReturnValueEvaluator):
return_concept = False
else:
concept_key, concept_id, return_concept = self.resolve_name(context, name)
c_key, c_id, return_concept = self.resolve_name(name)
if concept_key in my_locals:
if c_key in my_locals:
context.log(self.verbose_log, f"Using value from property.", self.name)
continue
context.log(self.verbose_log, f"Instantiating new concept.", self.name)
concept = context.sheerka.new((concept_key, concept_id))
context.log(self.verbose_log, f"Instantiating new concept with {c_key=}, {c_id=}.", self.name)
new = context.sheerka.new
concept = new((None, c_id)) if c_id else new(c_key)
if context.sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
context.log(self.verbose_log, f"'{concept_key}' is not a concept. Skipping.", self.name)
context.log(self.verbose_log, f"({c_key=}, {c_id=}) is not a concept. Skipping.", self.name)
continue
context.log(self.verbose_log, f"Evaluating '{concept}'", self.name)
@@ -110,62 +118,68 @@ class PythonEvaluator(OneReturnValueEvaluator):
if evaluated.key == concept.key:
my_locals[name] = evaluated if return_concept else context.sheerka.value(evaluated)
if self.locals:
if self.locals: # when exta values are given. Add them
my_locals.update(self.locals)
return my_locals
def resolve_name(self, context, to_resolve):
@staticmethod
def resolve_name(to_resolve):
"""
Try to match
__C__concept_key__C__
or
__C__concept_key__concept_id__C__
:param context:
:param to_resolve:
:return:
"""
if not to_resolve.startswith("__C__"):
return to_resolve, None, False
context.log(self.verbose_log, f"Resolving name '{to_resolve}'.", self.name)
if len(to_resolve) >= 18 and to_resolve[:18] == "__C__USE_CONCEPT__":
use_concept = True
index = 18
key, id_, use_concept = core.utils.decode_concept(to_resolve)
if key or id_:
return key, id_, use_concept
else:
use_concept = False
index = 5
try:
next_index = to_resolve.index("__", index)
if next_index == index:
context.log(self.verbose_log, f"Error: no key between '__'.", self.name)
return None
concept_key = to_resolve[index: next_index]
except ValueError:
context.log(self.verbose_log, f"Error: Missing trailing '__'.", self.name)
return None
if next_index == len(to_resolve) - 5:
context.log(self.verbose_log, f"Recognized concept '{concept_key}'", self.name)
return concept_key, None, use_concept
index = next_index + 2
try:
next_index = to_resolve.index("__", index)
if next_index == index:
context.log(self.verbose_log, f"Error: no id between '__'.", self.name)
return None
concept_id = to_resolve[index: next_index]
except ValueError:
context.log(self.verbose_log, f"Recognized concept '{concept_key}'.", self.name)
return concept_key, None, use_concept
context.log(self.verbose_log, f"Recognized concept '{concept_key}' (id='{concept_id}').", self.name)
return concept_key, concept_id, use_concept
return to_resolve, None, False
#
# if not to_resolve.startswith("__C__"):
# return to_resolve, None, False
#
# context.log(self.verbose_log, f"Resolving name '{to_resolve}'.", self.name)
#
# if len(to_resolve) >= 18 and to_resolve[:18] == "__C__USE_CONCEPT__":
# use_concept = True
# index = 18
# else:
# use_concept = False
# index = 5
#
# try:
# next_index = to_resolve.index("__", index)
# if next_index == index:
# context.log(self.verbose_log, f"Error: no key between '__'.", self.name)
# return None
# concept_key = to_resolve[index: next_index]
# except ValueError:
# context.log(self.verbose_log, f"Error: Missing trailing '__'.", self.name)
# return None
#
# if next_index == len(to_resolve) - 5:
# context.log(self.verbose_log, f"Recognized concept '{concept_key}'", self.name)
# return concept_key, None, use_concept
#
# index = next_index + 2
# try:
# next_index = to_resolve.index("__", index)
# if next_index == index:
# context.log(self.verbose_log, f"Error: no id between '__'.", self.name)
# return None
#
# concept_id = to_resolve[index: next_index]
# except ValueError:
# context.log(self.verbose_log, f"Recognized concept '{concept_key}'.", self.name)
# return concept_key, None, use_concept
#
# context.log(self.verbose_log, f"Recognized concept '{concept_key}' (id='{concept_id}').", self.name)
# return concept_key, concept_id, use_concept
@staticmethod
def expr_to_expression(expr):
+29 -2
View File
@@ -2,8 +2,9 @@ from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from core.tokenizer import TokenKind, Keywords
from core.tokenizer import TokenKind, Keywords, Token
from core.sheerka_logger import get_logger
import core.utils
import logging
@@ -35,8 +36,34 @@ class ErrorNode(Node):
@dataclass()
class UnexpectedTokenErrorNode(ErrorNode):
message: str
token: Token
expected_tokens: list
def __eq__(self, other):
if id(other) == id(self):
return True
if not isinstance(other, UnexpectedTokenErrorNode):
return False
if self.message != other.message:
return False
if self.token.type != other.token.type or self.token.value != other.token.value:
return False
if len(self.expected_tokens) != len(other.expected_tokens):
return False
for i, t in enumerate(self.expected_tokens):
if t != other.expected_tokens[i]:
return False
return True
def __hash__(self):
return hash((self.message, self.token, self.expected_tokens))
class BaseParser:
PREFIX = "parsers."
@@ -108,7 +135,7 @@ class BaseParser:
switcher = {
TokenKind.KEYWORD: lambda t: Keywords(t.value).value,
TokenKind.CONCEPT: lambda t: "c:" + t.value + ":",
TokenKind.CONCEPT: lambda t: core.utils.str_concept(t.value),
}
if custom_switcher:
+39 -24
View File
@@ -5,7 +5,8 @@ from core.builtin_concepts import BuiltinConcepts
from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, StrMatch
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
StrMatch, ConceptGroupExpression
@dataclass()
@@ -119,11 +120,11 @@ class BnfParser(BaseParser):
tree = None
try:
self.reset_parser(context, text)
tree = self.parser_outer_rule_name()
tree = self.parse_choice()
token = self.get_token()
if token and token.type != TokenKind.EOF:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", []))
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, []))
except LexerError as e:
self.add_error(e, False)
@@ -136,10 +137,11 @@ class BnfParser(BaseParser):
return ret
def parser_outer_rule_name(self):
return self.parser_rule_name(self.parse_choice)
def parse_choice(self):
"""
a | b | c
:return:
"""
sequence = self.parse_sequence()
self.eat_white_space()
@@ -159,9 +161,13 @@ class BnfParser(BaseParser):
sequence = self.parse_sequence()
elements.append(sequence)
return OrderedChoice(*elements)
return self.eat_rule_name_if_needed(OrderedChoice(*elements))
def parse_sequence(self):
"""
a b c
:return:
"""
expr_and_modifier = self.parse_modifier()
token = self.get_token()
if token is None or \
@@ -185,30 +191,31 @@ class BnfParser(BaseParser):
sequence = self.parse_modifier()
elements.append(sequence)
return Sequence(*elements)
return self.eat_rule_name_if_needed(Sequence(*elements))
def parse_modifier(self):
expression = self.parser_inner_rule_name()
"""
a? | a* | a+
:return:
"""
expression = self.parse_expression()
token = self.get_token()
if token.type == TokenKind.QMARK:
self.next_token()
return Optional(expression)
return self.eat_rule_name_if_needed(Optional(expression))
if token.type == TokenKind.STAR:
self.next_token()
return ZeroOrMore(expression)
return self.eat_rule_name_if_needed(ZeroOrMore(expression))
if token.type == TokenKind.PLUS:
self.next_token()
return OneOrMore(expression)
return self.eat_rule_name_if_needed(OneOrMore(expression))
return expression
def parser_inner_rule_name(self):
return self.parser_rule_name(self.parse_expression)
def parse_expression(self):
token = self.get_token()
if token.type == TokenKind.EOF:
@@ -216,15 +223,21 @@ class BnfParser(BaseParser):
if token.type == TokenKind.LPAR:
self.nb_open_par += 1
self.next_token()
expression = self.parse_choice()
expr = self.parse_choice()
token = self.get_token()
if token.type == TokenKind.RPAR:
self.nb_open_par -= 1
self.next_token()
return expression
return self.eat_rule_name_if_needed(expr)
else:
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.RPAR]))
return expression
self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.RPAR]))
return expr
if token.type == TokenKind.CONCEPT:
self.next_token()
concept = self.sheerka.new((token.value[0], token.value[1]))
expr = ConceptGroupExpression(concept) if self.sheerka.isaset(concept) else ConceptExpression(concept)
return self.eat_rule_name_if_needed(expr)
if token.type == TokenKind.IDENTIFIER:
self.next_token()
@@ -247,14 +260,15 @@ class BnfParser(BaseParser):
body=("key", concept_name)))
return None
else:
return concept
expr = ConceptGroupExpression(concept) if self.sheerka.isaset(concept) else ConceptExpression(concept)
expr.rule_name = concept.name
return expr
ret = StrMatch(core.utils.strip_quotes(token.value))
self.next_token()
return ret
return self.eat_rule_name_if_needed(ret)
def parser_rule_name(self, next_to_parse):
expression = next_to_parse()
def eat_rule_name_if_needed(self, expression):
token = self.get_token()
if token is None or token.type != TokenKind.EQUALS:
return expression
@@ -263,7 +277,8 @@ class BnfParser(BaseParser):
token = self.get_token()
if token is None or token.type != TokenKind.IDENTIFIER:
return self.add_error(UnexpectedTokenErrorNode(f"Unexpected token '{token}'", [TokenKind.IDENTIFIER]))
return self.add_error(
UnexpectedTokenErrorNode(f"Unexpected token '{token}'", token, [TokenKind.IDENTIFIER]))
expression.rule_name = token.value
self.next_token()
+58 -10
View File
@@ -243,6 +243,9 @@ class ParsingExpression:
def parse(self, parser):
return self._parse(parser)
def add_rule_name_if_needed(self, text):
return text + "=" + self.rule_name if self.rule_name else text
class ConceptExpression(ParsingExpression):
"""
@@ -257,7 +260,7 @@ class ConceptExpression(ParsingExpression):
self.concept = concept
def __repr__(self):
return f"{self.concept}"
return self.add_rule_name_if_needed(f"{self.concept}")
def __eq__(self, other):
if not super().__eq__(other):
@@ -352,7 +355,7 @@ class Sequence(ParsingExpression):
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})"
return self.add_rule_name_if_needed(f"({to_str})")
class OrderedChoice(ParsingExpression):
@@ -375,7 +378,7 @@ class OrderedChoice(ParsingExpression):
def __repr__(self):
to_str = "| ".join(repr(n) for n in self.elements)
return f"({to_str})"
return self.add_rule_name_if_needed(f"({to_str})")
class Optional(ParsingExpression):
@@ -413,7 +416,7 @@ class Optional(ParsingExpression):
return f"{self.elements[0]}?"
else:
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})?"
return self.add_rule_name_if_needed(f"({to_str})?")
class Repetition(ParsingExpression):
@@ -467,7 +470,7 @@ class ZeroOrMore(Repetition):
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})*"
return self.add_rule_name_if_needed(f"({to_str})*")
class OneOrMore(Repetition):
@@ -507,7 +510,7 @@ class OneOrMore(Repetition):
def __repr__(self):
to_str = ", ".join(repr(n) for n in self.elements)
return f"({to_str})+"
return self.add_rule_name_if_needed(f"({to_str})+")
class UnorderedGroup(Repetition):
@@ -541,13 +544,13 @@ class StrMatch(Match):
Matches a literal
"""
def __init__(self, to_match, rule_name="", root=False, ignore_case=True):
super(Match, self).__init__(rule_name=rule_name, root=root)
def __init__(self, to_match, rule_name="", ignore_case=True):
super(Match, self).__init__(rule_name=rule_name)
self.to_match = to_match
self.ignore_case = ignore_case
def __repr__(self):
return f"'{self.to_match}'"
return self.add_rule_name_if_needed(f"'{self.to_match}'")
def __eq__(self, other):
if not super().__eq__(other):
@@ -699,10 +702,14 @@ class ConceptLexerParser(BaseParser):
else:
ret = ConceptExpression(expression, rule_name=expression.name)
concepts_to_resolve.add(expression)
elif isinstance(expression, ConceptExpression):
elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
if expression.rule_name is None or expression.rule_name == "":
expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
else expression.concept
if isinstance(expression.concept, str):
concept = self.get_concept(expression.concept)
if self.sheerka.is_known(concept):
expression.concept = concept
concepts_to_resolve.add(expression.concept)
ret = expression
elif isinstance(expression, str):
@@ -955,6 +962,47 @@ class ConceptLexerParser(BaseParser):
return concept
def encode_grammar(self, grammar):
"""
Transform the grammar into something that can easily can be serialized
:param grammar:
:return:
"""
def _encode(expression):
if isinstance(expression, StrMatch):
res = f"'{expression.to_match}'"
elif isinstance(expression, ConceptExpression):
res = core.utils.str_concept(expression.concept)
elif isinstance(expression, Sequence):
res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"
elif isinstance(expression, OrderedChoice):
res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"
elif isinstance(expression, Optional):
res = _encode(expression.nodes[0]) + "?"
elif isinstance(expression, ZeroOrMore):
res = _encode(expression.nodes[0]) + "*"
elif isinstance(expression, OneOrMore):
res = _encode(expression.nodes[0]) + "+"
if expression.rule_name:
res += "=" + expression.rule_name
return res
result = {}
for k, v in grammar.items():
key = core.utils.str_concept(k)
value = _encode(v)
result[key] = value
return result
@staticmethod
def get_bests(results):
"""
+4 -2
View File
@@ -1,9 +1,10 @@
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import Tokenizer, LexerError, TokenKind
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass, field
from dataclasses import dataclass
import ast
import logging
import core.utils
from parsers.ConceptLexerParser import ConceptNode
@@ -71,7 +72,7 @@ class PythonParser(BaseParser):
tree = None
python_switcher = {
TokenKind.CONCEPT: lambda t: f"__C__USE_CONCEPT__{t.value}__C__"
TokenKind.CONCEPT: lambda t: core.utils.encode_concept(t.value, True)
}
try:
@@ -136,6 +137,7 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
def visit_Name(self, node):
self.names.add(node.id)
class LexerNodeParserHelperForPython:
"""Helper class to parse mix of concepts and Python"""
+18 -8
View File
@@ -223,7 +223,7 @@ class PickleSerializer(BaseSerializer):
class StateSerializer(PickleSerializer):
def __init__(self, ):
def __init__(self):
PickleSerializer.__init__(
self,
lambda obj: core.utils.get_full_qualified_name(obj) == "sdp.sheerkaDataProvider.State",
@@ -239,13 +239,23 @@ class ConceptSerializer(JsonSerializer):
return isinstance(obj, Concept)
class DictionarySerializer(PickleSerializer):
def __init__(self, ):
PickleSerializer.__init__(
self,
lambda obj: isinstance(obj, dict),
"D",
1)
class DictionarySerializer(BaseSerializer):
def __init__(self):
super().__init__("D", 1)
def matches(self, obj):
return isinstance(obj, dict)
def dump(self, stream, obj, context):
stream.write(json.dumps(obj, default=json_default_converter).encode("utf-8"))
stream.seek(0)
return stream
def load(self, stream, context):
json_stream = stream.read().decode("utf-8")
obj = json.loads(json_stream)
return obj
class ExecutionContextSerializer(BaseSerializer):
+1 -1
View File
@@ -70,7 +70,7 @@ class SheerkaPickler:
elif utils.is_enum(k):
k_str = core.utils.get_full_qualified_name(k) + "." + k.name
elif isinstance(k, Concept):
k_str = f":c:{k.key}:{k.id}:"
k_str = core.utils.str_concept(k)
else:
k_str = k
+1 -1
View File
@@ -90,7 +90,7 @@ class SheerkaUnpickler:
if key == "null":
return None
concept_key, concept_id = core.utils.decode_concept(key)
concept_key, concept_id = core.utils.unstr_concept(key)
if concept_key is not None:
return self.sheerka.new((concept_key, concept_id)) if concept_id else self.sheerka.new(concept_key)