Added ExactConceptParser

This commit is contained in:
2019-11-09 17:29:50 +01:00
parent a636198222
commit 576ce77740
12 changed files with 603 additions and 169 deletions
+76 -5
View File
@@ -2,6 +2,8 @@ import hashlib
from enum import Enum from enum import Enum
import logging import logging
from core.tokenizer import Tokenizer, TokenKind
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -20,6 +22,8 @@ class Concept:
""" """
props_to_serialize = ("id", "is_builtin", "name", "where", "pre", "post", "body", "desc") props_to_serialize = ("id", "is_builtin", "name", "where", "pre", "post", "body", "desc")
PROPERTY_PREFIX = "__var__"
def __init__(self, name=None, is_builtin=False, where=None, pre=None, post=None, body=None, desc=None, key=None): def __init__(self, name=None, is_builtin=False, where=None, pre=None, post=None, body=None, desc=None, key=None):
self.name = name self.name = name
self.is_builtin = is_builtin self.is_builtin = is_builtin
@@ -31,7 +35,7 @@ class Concept:
self.id = None self.id = None
self.key = key self.key = key
self.props = [] # list of Property for this concept self.props = {} # list of Property for this concept
self.functions = {} # list of helper functions self.functions = {} # list of helper functions
self.codes = {} # cached ast for the where, pre, post and body parts self.codes = {} # cached ast for the where, pre, post and body parts
@@ -54,10 +58,48 @@ class Concept:
def get_key(self): def get_key(self):
return self.key return self.key
def init_key(self, tokens=None):
"""
Create the key for this concept.
Must be called only when the concept if fully initialized
The method is not called set_key to make sure that no other class set the key by mistake
:param tokens:
:return:
"""
if self.key is not None:
return self.key
if tokens is None:
tokens = iter(Tokenizer(self.name))
variables = list(self.props.keys())
key = ""
first = True
for token in tokens:
if token.type == TokenKind.EOF:
break
if token.type == TokenKind.WHITESPACE:
continue
if not first:
key += " "
if variables is not None and token.value in variables:
key += self.PROPERTY_PREFIX + str(variables.index(token.value))
else:
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False
self.key = key
return self
def add_codes(self, codes): def add_codes(self, codes):
""" """
From a dict of <ConceptParts, AST> Gets the ASTs for 'where', 'pre', 'post' and 'body'
fill the codes There ASTs are know when the concept is freshly parsed.
So the values are kept in cache.
For concepts loaded from sdp, these ASTs must be created again
:param codes: :param codes:
:return: :return:
""" """
@@ -68,6 +110,8 @@ class Concept:
if key in possibles_codes: if key in possibles_codes:
self.codes[ConceptParts(key)] = codes[key] self.codes[ConceptParts(key)] = codes[key]
return self
def get_digest(self): def get_digest(self):
""" """
Returns the digest of the event Returns the digest of the event
@@ -76,23 +120,47 @@ class Concept:
return hashlib.sha256(f"Concept:{self.name}{self.pre}{self.post}{self.body}".encode("utf-8")).hexdigest() return hashlib.sha256(f"Concept:{self.name}{self.pre}{self.post}{self.body}".encode("utf-8")).hexdigest()
def to_dict(self): def to_dict(self):
"""
Returns a dict representing 'self'
:return:
"""
props_as_dict = dict((prop, getattr(self, prop)) for prop in self.props_to_serialize) props_as_dict = dict((prop, getattr(self, prop)) for prop in self.props_to_serialize)
props_as_dict["props"] = [(p.name, p.value) for p in self.props] props_as_dict["props"] = [(p, self.props[p].value) for p in self.props]
return props_as_dict return props_as_dict
def from_dict(self, as_dict): def from_dict(self, as_dict):
"""
Initializes 'self' from a dict
:param as_dict:
:return:
"""
for prop in self.props_to_serialize: for prop in self.props_to_serialize:
if prop in as_dict: if prop in as_dict:
setattr(self, prop, as_dict[prop]) setattr(self, prop, as_dict[prop])
if "props" in as_dict: if "props" in as_dict:
for n, v in as_dict["props"]: for n, v in as_dict["props"]:
self.props.append(Property(n, v)) self.set_prop(n, v)
return self return self
def update_from(self, other): def update_from(self, other):
"""
Update self using the properties of another concept
This method is to mimic the class to instance pattern
'other' is the class, the template, and 'self' is a new instance
:param other:
:return:
"""
for prop in self.props_to_serialize: for prop in self.props_to_serialize:
setattr(self, prop, getattr(other, prop)) setattr(self, prop, getattr(other, prop))
return self
def set_prop(self, prop_name, prop_value):
self.props[prop_name] = Property(prop_name, prop_value)
def set_prop_by_index(self, index, prop_value):
prop_name = list(self.props.keys())[index]
self.props[prop_name] = Property(prop_name, prop_value)
class ErrorConcept(Concept): class ErrorConcept(Concept):
NAME = "Error" NAME = "Error"
@@ -132,3 +200,6 @@ class Property:
def __init__(self, name, value): def __init__(self, name, value):
self.name = name self.name = name
self.value = value self.value = value
def __repr__(self):
return f"{self.name}={self.value}"
+42 -20
View File
@@ -1,9 +1,9 @@
from dataclasses import dataclass from dataclasses import dataclass
from core.concept import Concept, ErrorConcept, Property, TooManySuccessConcept, ReturnValueConcept from core.concept import Concept, ErrorConcept, Property, TooManySuccessConcept, ReturnValueConcept
from parsers.PythonParser import PythonParser, PythonGetNamesVisitor, PythonNode from parsers.PythonParser import PythonGetNamesVisitor, PythonNode
from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderDuplicateKeyError from sdp.sheerkaDataProvider import SheerkaDataProvider, Event, SheerkaDataProviderDuplicateKeyError
from parsers.DefaultParser import DefaultParser, DefConceptNode from parsers.DefaultParser import DefConceptNode, DefaultParser
import core.utils import core.utils
import logging import logging
@@ -50,6 +50,7 @@ class Sheerka(Concept):
NAME = "Sheerka" NAME = "Sheerka"
UNKNOWN_CONCEPT_NAME = "Unknown Concept" UNKNOWN_CONCEPT_NAME = "Unknown Concept"
SUCCESS_CONCEPT_NAME = "Success" SUCCESS_CONCEPT_NAME = "Success"
CONCEPT_TOO_LONG_CONCEPT_NAME = "Concept too long"
CONCEPTS_ENTRY = "All_Concepts" CONCEPTS_ENTRY = "All_Concepts"
BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts" BUILTIN_CONCEPTS_KEYS = "Builtins_Concepts"
@@ -60,6 +61,8 @@ class Sheerka(Concept):
super().__init__(Sheerka.NAME) super().__init__(Sheerka.NAME)
# cache of the most used concepts # cache of the most used concepts
# Note that these are only templates
# They are used as a footprint for instantiation
self.concepts_cache = {} self.concepts_cache = {}
# a concept can be instantiated # a concept can be instantiated
@@ -91,8 +94,9 @@ class Sheerka(Concept):
try: try:
self.init_logging() self.init_logging()
self.sdp = SheerkaDataProvider(root_folder) self.sdp = SheerkaDataProvider(root_folder)
self.parsers.append(lambda text: DefaultParser(text, PythonParser)) self.parsers.append(core.utils.get_class("parsers.DefaultParser.DefaultParser"))
self.parsers.append(lambda text: PythonParser(text)) self.parsers.append(core.utils.get_class("parsers.PythonParser.PythonParser"))
#self.parsers.append(core.utils.get_class("parsers.ExactConceptParser.ExactConceptParser"))
self.evaluators.append(core.utils.get_object("evaluators.DefaultEvaluator.DefaultEvaluator")) self.evaluators.append(core.utils.get_object("evaluators.DefaultEvaluator.DefaultEvaluator"))
self.evaluators.append(core.utils.get_object("evaluators.AddConceptEvaluator.AddConceptEvaluator")) self.evaluators.append(core.utils.get_object("evaluators.AddConceptEvaluator.AddConceptEvaluator"))
@@ -103,7 +107,7 @@ class Sheerka(Concept):
self.create_builtin_concepts() self.create_builtin_concepts()
except IOError as e: except IOError as e:
return ReturnValue(self, False, self.get(Sheerka.ERROR_CONCEPT_NAME), e) return ReturnValue(self, False, self.get(ErrorConcept.NAME), e)
return ReturnValue(self, True, self.get(Sheerka.SUCCESS_CONCEPT_NAME)) return ReturnValue(self, True, self.get(Sheerka.SUCCESS_CONCEPT_NAME))
@@ -129,12 +133,15 @@ class Sheerka(Concept):
self, self,
Concept(Sheerka.UNKNOWN_CONCEPT_NAME, key=Sheerka.UNKNOWN_CONCEPT_NAME), Concept(Sheerka.UNKNOWN_CONCEPT_NAME, key=Sheerka.UNKNOWN_CONCEPT_NAME),
Concept(Sheerka.SUCCESS_CONCEPT_NAME, key=Sheerka.SUCCESS_CONCEPT_NAME), Concept(Sheerka.SUCCESS_CONCEPT_NAME, key=Sheerka.SUCCESS_CONCEPT_NAME),
Concept(Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME, key=Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME),
ErrorConcept(), ErrorConcept(),
TooManySuccessConcept(), TooManySuccessConcept(),
ReturnValueConcept(), ReturnValueConcept(),
] ]
for concept in builtins: for concept in builtins:
self.add_in_cache(concept)
from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key) from_db = self.sdp.get_safe(self.CONCEPTS_ENTRY, concept.key)
if from_db is None: if from_db is None:
log.debug(f"'{concept.name}' concept is not found. Adding.") log.debug(f"'{concept.name}' concept is not found. Adding.")
@@ -143,7 +150,6 @@ class Sheerka(Concept):
else: else:
log.debug(f"Found concept '{from_db}'. Updating.") log.debug(f"Found concept '{from_db}'. Updating.")
concept.update_from(from_db) concept.update_from(from_db)
self.concepts_cache[concept.key] = concept
def init_logging(self): def init_logging(self):
if self.debug: if self.debug:
@@ -158,7 +164,7 @@ class Sheerka(Concept):
def eval(self, text): def eval(self, text):
evt_digest = self.sdp.save_event(Event(text)) evt_digest = self.sdp.save_event(Event(text))
exec_context = ExecutionContext(self, evt_digest) exec_context = ExecutionContext(self, evt_digest)
return_values = self.try_parse(text) return_values = self.try_parse(exec_context, text)
return_values = self.try_eval(exec_context, return_values) return_values = self.try_eval(exec_context, return_values)
# return_values = [] # return_values = []
@@ -172,17 +178,17 @@ class Sheerka(Concept):
return return_values return return_values
def try_parse(self, text): def try_parse(self, context, text):
result = [] result = []
log.debug(f"Parsing '{text}'") log.debug(f"Parsing '{text}'")
for parser in self.parsers: for parser in self.parsers:
p = parser(text) p = parser()
# try: # try:
# tree = p.parse() # tree = p.parse()
# result.append((p.name, tree)) # result.append((p.name, tree))
# except Exception as e: # except Exception as e:
# result.append((p.name, e)) # result.append((p.name, e))
tree = p.parse() tree = p.parse(context, text)
result.append(ReturnValue(p.name, not p.has_error, p.error_sink if p.has_error else tree)) result.append(ReturnValue(p.name, not p.has_error, p.error_sink if p.has_error else tree))
return result return result
@@ -235,11 +241,12 @@ class Sheerka(Concept):
setattr(concept, prop, source) setattr(concept, prop, source)
# try to find variables (eg props) # try to find variables (eg props)
# Note that with this method, the variables will be created in the order of appearance
for token in def_concept_node.tokens["name"]: for token in def_concept_node.tokens["name"]:
if token.value in get_names_visitor.names: if token.value in get_names_visitor.names:
concept.props.append(Property(token.value, None)) concept.set_prop(token.value, None)
concept.key = DefaultParser.get_concept_name(def_concept_node.tokens["name"], [p.name for p in concept.props]) concept.init_key(def_concept_node.tokens["name"])
concept.add_codes(def_concept_node.get_codes()) concept.add_codes(def_concept_node.get_codes())
self.set_id_if_needed(concept, False) self.set_id_if_needed(concept, False)
@@ -249,22 +256,34 @@ class Sheerka(Concept):
return ReturnValue(self.add_concept.__name__, False, ErrorConcept(body=error), error.args[0]) return ReturnValue(self.add_concept.__name__, False, ErrorConcept(body=error), error.args[0])
return ReturnValue(self.add_concept.__name__, True, concept) return ReturnValue(self.add_concept.__name__, True, concept)
def get(self, concept_name): def add_in_cache(self, concept):
"""
Adds a concept template in cache.
The cache is used as a proxy before looking at sdp
:param concept:
:return:
"""
self.concepts_cache[concept.key] = concept
def get(self, concept_key):
""" """
Tries to find a concept Tries to find a concept
:param concept_name: TODO: how to manage single vs multiple instances
:param concept_key:
:return: :return:
""" """
# first search in cache # first search in cache
if concept_name in self.concepts_cache: if concept_key in self.concepts_cache:
return self.concepts_cache[concept_name] return self.concepts_cache[concept_key]
return self.sdp.get(self.CONCEPTS_ENTRY, concept_name) return self.sdp.get_safe(self.CONCEPTS_ENTRY, concept_key) or \
self.new(self.UNKNOWN_CONCEPT_NAME, body=concept_key)
def new(self, concept, **kwargs): def new(self, concept, **kwargs):
""" """
Returns an instance of a new concept Returns an instance of a new concept
TODO: Checks if the concept is supposed to be unique (ex Sheerka, or the number 'one' for example)
:param concept: :param concept:
:param kwargs: :param kwargs:
:return: :return:
@@ -287,11 +306,14 @@ class Sheerka(Concept):
:return: :return:
""" """
if not isinstance(a, Concept) or not isinstance(b, Concept): if not isinstance(a, Concept):
return False raise SyntaxError("The first parameter of isinstance MUST be a concept")
b_key = b if isinstance(b, str) else b.key
# TODO : manage when a is the list of all possible b # TODO : manage when a is the list of all possible b
return a.key == b.key # for example, if a is a color, it will be found the entry 'All_Colors'
return a.key == b_key
@staticmethod @staticmethod
def test(): def test():
@@ -31,6 +31,20 @@ class TokenKind(Enum):
VBAR = "vbar" VBAR = "vbar"
AMPER = "amper" AMPER = "amper"
EQUALS = "=" EQUALS = "="
AT = "at"
BACK_QUOTE = "bquote" # `
BACK_SLASH = "bslash" # \
CARAT = "carat" # ^
DOLLAR = "dollar" # $
EMARK = "emark" # !
GREATER = "greater" # >
LESS = "less" # <
HASH = "HASH" # #
TILDE = "tilde" # ~
UNDERSCORE = "underscore" # _
DEGREE = "degree" # °
@dataclass() @dataclass()
@@ -159,6 +173,14 @@ class Tokenizer:
yield Token(TokenKind.QMARK, "?", self.i, self.line, self.column) yield Token(TokenKind.QMARK, "?", self.i, self.line, self.column)
self.i += 1 self.i += 1
self.column += 1 self.column += 1
elif c == "|":
yield Token(TokenKind.VBAR, "|", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "&":
yield Token(TokenKind.AMPER, "&", self.i, self.line, self.column)
self.i += 1
self.column += 1
elif c == "\n" or c == "\r": elif c == "\n" or c == "\r":
newline = self.eat_newline(self.i) newline = self.eat_newline(self.i)
yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column) yield Token(TokenKind.NEWLINE, newline, self.i, self.line, self.column)
+2 -2
View File
@@ -1,5 +1,5 @@
from core.concept import TooManySuccessConcept
from core.sheerka import ReturnValue from core.sheerka import ReturnValue
from core.sheerka import Sheerka
from evaluators.BaseEvaluator import BaseEvaluator from evaluators.BaseEvaluator import BaseEvaluator
import logging import logging
@@ -32,7 +32,7 @@ class DefaultEvaluator(BaseEvaluator):
log.debug(f"{number_of_successful} / {total_items} good items. Too many success") log.debug(f"{number_of_successful} / {total_items} good items. Too many success")
return ReturnValue(self.name, return ReturnValue(self.name,
False, False,
context.sheerka.new(Sheerka.TOO_MANY_SUCCESS_CONCEPT_NAME, body=items)) context.sheerka.new(TooManySuccessConcept.NAME, body=items))
# only errors, i cannot help you # only errors, i cannot help you
log.debug(f"{total_items} items. Only errors") log.debug(f"{total_items} items. Only errors")
+4 -5
View File
@@ -1,5 +1,5 @@
from dataclasses import dataclass, field from dataclasses import dataclass
from parsers.tokenizer import TokenKind, Keywords from core.tokenizer import TokenKind, Keywords
@dataclass() @dataclass()
@@ -21,13 +21,12 @@ class ErrorNode(Node):
class BaseParser: class BaseParser:
def __init__(self, name, text): def __init__(self, name):
self.name = name self.name = name
self.text = text
self.has_error = False self.has_error = False
self.error_sink = [] self.error_sink = []
def parse(self): def parse(self, context, text):
pass pass
@staticmethod @staticmethod
+102 -94
View File
@@ -1,5 +1,5 @@
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode
from parsers.tokenizer import Tokenizer, TokenKind, Token, Keywords from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
from dataclasses import dataclass, field from dataclasses import dataclass, field
import logging import logging
@@ -147,24 +147,29 @@ class BinaryNode(DefaultParserNode):
class DefaultParser(BaseParser): class DefaultParser(BaseParser):
def __init__(self, text, sub_parser): """
BaseParser.__init__(self, "DefaultParser", text) Parse sheerka specific grammar (like def concept)
"""
def __init__(self, sub_parser=None):
BaseParser.__init__(self, "DefaultParser")
self.sub_parser = sub_parser self.sub_parser = sub_parser
self.lexer = Tokenizer(text) self.lexer_iter = None
self._current = None
self.context = None
self.text = None
def reset_parser(self, context, text):
self.context = context
# hack before implementing all the sub parsers
if context:
self.sub_parser = context.sheerka.parsers[1]
self.text = text
self.lexer_iter = iter(Tokenizer(text)) self.lexer_iter = iter(Tokenizer(text))
self._current = None self._current = None
self.next_token() self.next_token()
def collect_tokens(self, *args):
result = []
for item in args:
if isinstance(item, Node):
result.extend(item.tokens)
else:
result.append(item)
return result
def add_error(self, error, next_token=True): def add_error(self, error, next_token=True):
self.has_error = True self.has_error = True
self.error_sink.append(error) self.error_sink.append(error)
@@ -186,21 +191,23 @@ class DefaultParser(BaseParser):
return return
@staticmethod @staticmethod
def get_concept_name(tokens, variables=None): def get_concept_key(tokens, variables=None):
name = "" key = ""
first = True first = True
for token in tokens: for token in tokens:
if token.type == TokenKind.EOF: if token.type == TokenKind.EOF:
break break
if token.type == TokenKind.WHITESPACE:
continue
if not first: if not first:
name += " " key += " "
if variables is not None and token.value in variables: if variables is not None and token.value in variables:
name += "__var__" + str(variables.index(token.value)) key += "__var__" + str(variables.index(token.value))
else: else:
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
first = False first = False
return name return key
@staticmethod @staticmethod
def fix_indentation(tokens): def fix_indentation(tokens):
@@ -242,7 +249,8 @@ class DefaultParser(BaseParser):
return tokens[4:] return tokens[4:]
def parse(self): def parse(self, context, text):
self.reset_parser(context, text)
return self.parse_statement() return self.parse_statement()
def parse_statement(self): def parse_statement(self):
@@ -277,7 +285,7 @@ class DefaultParser(BaseParser):
name_as_tokens.append(token) name_as_tokens.append(token)
self.next_token() self.next_token()
token = self.get_token() token = self.get_token()
name = self.get_concept_name(name_as_tokens) name = self.get_concept_key(name_as_tokens)
tokens_found["name"] = name_as_tokens tokens_found["name"] = name_as_tokens
# try to parse as, where, pre and post declarations # try to parse as, where, pre and post declarations
@@ -328,8 +336,8 @@ class DefaultParser(BaseParser):
# start = current_tokens[0].index # start = current_tokens[0].index
# end = current_tokens[-1].index + len(current_tokens[-1].value) # end = current_tokens[-1].index + len(current_tokens[-1].value)
sub_parser = self.sub_parser(current_tokens, source=keyword.value) sub_parser = self.sub_parser(source=keyword.value)
sub_tree = sub_parser.parse() sub_tree = sub_parser.parse(self.context, current_tokens)
if isinstance(sub_tree, ErrorNode): if isinstance(sub_tree, ErrorNode):
self.add_error(sub_tree, False) self.add_error(sub_tree, False)
asts[keyword] = sub_tree asts[keyword] = sub_tree
@@ -344,74 +352,74 @@ class DefaultParser(BaseParser):
log.debug(f"Found DefConcept node '{def_concept_node}'") log.debug(f"Found DefConcept node '{def_concept_node}'")
return def_concept_node return def_concept_node
def parse_expression(self): # def parse_expression(self):
return self.parse_addition() # return self.parse_addition()
#
def parse_addition(self): # def parse_addition(self):
left = self.parse_multiply() # left = self.parse_multiply()
token = self.get_token() # token = self.get_token()
if token is None or token.type == TokenKind.EOF: # if token is None or token.type == TokenKind.EOF:
return left # return left
#
if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5 # if token.type == TokenKind.NUMBER: # example 15 +5 or 15 -5
right = self.parse_addition() # right = self.parse_addition()
return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right) # return BinaryNode(self.collect_tokens(left, token, right), TokenKind.PLUS, left, right)
#
if token.type not in (TokenKind.PLUS, TokenKind.MINUS): # if token.type not in (TokenKind.PLUS, TokenKind.MINUS):
return left # return left
#
self.next_token() # self.next_token()
right = self.parse_addition() # right = self.parse_addition()
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right) # return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
#
def parse_multiply(self): # def parse_multiply(self):
left = self.parse_atom() # left = self.parse_atom()
token = self.get_token() # token = self.get_token()
if token is None or token.type == TokenKind.EOF: # if token is None or token.type == TokenKind.EOF:
return left # return left
#
if token.type not in (TokenKind.STAR, TokenKind.SLASH): # if token.type not in (TokenKind.STAR, TokenKind.SLASH):
return left # return left
#
self.next_token() # self.next_token()
right = self.parse_multiply() # right = self.parse_multiply()
return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right) # return BinaryNode(self.collect_tokens(left, token, right), token.type, left, right)
#
def parse_atom(self): # def parse_atom(self):
token = self.get_token() # token = self.get_token()
if token.type == TokenKind.NUMBER: # if token.type == TokenKind.NUMBER:
self.next_token() # self.next_token()
return NumberNode([token], float(token.value) if '.' in token.value else int(token.value)) # return NumberNode([token], float(token.value) if '.' in token.value else int(token.value))
elif token.type == TokenKind.STRING: # elif token.type == TokenKind.STRING:
self.next_token() # self.next_token()
return StringNode([token], token.value[1:-1], token.value[0]) # return StringNode([token], token.value[1:-1], token.value[0])
elif token.type == TokenKind.IDENTIFIER: # elif token.type == TokenKind.IDENTIFIER:
if token.value == "true": # if token.value == "true":
self.next_token() # self.next_token()
return TrueNode([token]) # return TrueNode([token])
elif token.value == "false": # elif token.value == "false":
self.next_token() # self.next_token()
return FalseNode([token]) # return FalseNode([token])
elif token.value == "null": # elif token.value == "null":
self.next_token() # self.next_token()
return NullNode([token]) # return NullNode([token])
else: # else:
self.next_token() # self.next_token()
return VariableNode([token], token.value) # return VariableNode([token], token.value)
elif token.type == TokenKind.LPAR: # elif token.type == TokenKind.LPAR:
self.next_token() # self.next_token()
exp = self.parse_expression() # exp = self.parse_expression()
token = self.get_token() # token = self.get_token()
self.next_token() # self.next_token()
#
if token.type != TokenKind.RPAR: # if token.type != TokenKind.RPAR:
error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR]) # error = UnexpectedTokenErrorNode([token], "Right parenthesis not found.", [TokenKind.RPAR])
self.add_error(error) # self.add_error(error)
return error # return error
#
return exp # return exp
else: # else:
error = UnexpectedTokenErrorNode([token], "Unexpected token", # error = UnexpectedTokenErrorNode([token], "Unexpected token",
[TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false", # [TokenKind.NUMBER, TokenKind.STRING, TokenKind.IDENTIFIER, "true", "false",
"null", TokenKind.LPAR]) # "null", TokenKind.LPAR])
return self.add_error(error) # return self.add_error(error)
+107
View File
@@ -0,0 +1,107 @@
from core.sheerka import ReturnValue
from parsers.BaseParser import BaseParser
from core.tokenizer import Tokenizer, Keywords, TokenKind
from core.concept import Concept
class ExactConceptParser(BaseParser):
"""
Tries to recognize a single concept
"""
MAX_WORDS_SIZE = 10
def __init__(self):
BaseParser.__init__(self, "ConceptParser")
def parse(self, context, text):
res = []
sheerka = context.sheerka
words = self.get_words(text)
if len(words) > self.MAX_WORDS_SIZE:
return ReturnValue(self.name, False, sheerka.new(sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME))
recognized = False
for combination in self.combinations(words):
concept_key = " ".join(combination)
# Very important question to think about later
# Must we return a new instance or the existing one
# That will depend on the context
# Let's return a new one for now and see if it works
concept = sheerka.new(concept_key)
if not sheerka.isinstance(concept, sheerka.UNKNOWN_CONCEPT_NAME):
# update the properties if needed
for i, token in enumerate(combination):
if token.startswith(Concept.PROPERTY_PREFIX):
index = int(token[len(Concept.PROPERTY_PREFIX):])
concept.set_prop_by_index(index, words[i])
res.append(ReturnValue(self.name, True, concept))
recognized = True
if recognized:
return res
return ReturnValue(self.name, False, sheerka.new(sheerka.UNKNOWN_CONCEPT_NAME, body=text))
@staticmethod
def get_words(text):
res = []
for t in iter(Tokenizer(text)):
if t.type == TokenKind.EOF:
break
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
continue
res.append(t.value.value if isinstance(t.value, Keywords) else t.value)
return res
def combinations(self, iterable):
# combinations('foo', 'bar', 'baz') -->
# ('foo', 'bar', 'baz'),
# ('__var__0', 'bar', 'baz'),
# ('foo', '__var__0', 'baz'),
# ('foo', 'bar', '__var__0'),
# ('__var__0', '__var__1', 'baz'),
# ('__var__0', 'bar', '__var__1'),
# ('foo', '__var__0', '__var__1'),
# ('__var__0', '__var__1', '__var__2')]
pool = tuple(iterable)
n = len(pool)
res = set()
for r in range(0, n + 1):
indices = list(range(r))
res.add(self.get_tuple(pool, indices))
while True:
for i in reversed(range(r)):
if indices[i] != i + n - r:
break
else:
break
indices[i] += 1
for j in range(i + 1, r):
indices[j] = indices[j - 1] + 1
res.add(self.get_tuple(pool, indices))
return res
@staticmethod
def get_tuple(pool, indices):
res = []
vars = {}
k = 0
# init vars
for i in indices:
value = pool[i]
if value not in vars:
vars[pool[i]] = f"{Concept.PROPERTY_PREFIX}{k}"
k += 1
# create tuple
for i in range(len(pool)):
value = pool[i]
res.append(vars[value] if value in vars else value)
return tuple(res)
+18 -13
View File
@@ -26,36 +26,41 @@ class PythonNode(Node):
class PythonParser(BaseParser): class PythonParser(BaseParser):
def __init__(self, text, source="<undef>"): """
text = text if isinstance(text, str) else self.get_text_from_tokens(text) Parse Python scripts
text = text.strip() """
BaseParser.__init__(self, "PythonParser", text) def __init__(self, source="<undef>"):
BaseParser.__init__(self, "PythonParser")
self.source = source self.source = source
def parse(self): def parse(self, context, text):
text = text if isinstance(text, str) else self.get_text_from_tokens(text)
text = text.strip()
# first, try to parse an expression # first, try to parse an expression
res, tree, error = self.try_parse_expression() res, tree, error = self.try_parse_expression(text)
if not res: if not res:
# then try to parse a statement # then try to parse a statement
res, tree, error = self.try_parse_statement() res, tree, error = self.try_parse_statement(text)
if not res: if not res:
self.has_error = True self.has_error = True
error_node = PythonErrorNode(self.text, error) error_node = PythonErrorNode(text, error)
self.error_sink.append(error_node) self.error_sink.append(error_node)
return error_node return error_node
log.debug("Recognized python code.") log.debug("Recognized python code.")
return PythonNode(self.text, tree) return PythonNode(text, tree)
def try_parse_expression(self): def try_parse_expression(self, text):
try: try:
return True, ast.parse(self.text, f"<{self.source}>", 'eval'), None return True, ast.parse(text, f"<{self.source}>", 'eval'), None
except Exception as error: except Exception as error:
return False, None, error return False, None, error
def try_parse_statement(self): def try_parse_statement(self, text):
try: try:
return True, ast.parse(self.text, f"<{self.source}>", 'exec'), None return True, ast.parse(text, f"<{self.source}>", 'exec'), None
except Exception as error: except Exception as error:
return False, None, error return False, None, error
+152
View File
@@ -0,0 +1,152 @@
import pytest
from os import path
import shutil
import os
from core.concept import Concept, Property
from core.sheerka import Sheerka, ExecutionContext
from parsers.DefaultParser import DefaultParser
from parsers.ExactConceptParser import ExactConceptParser
tests_root = path.abspath("../build/tests")
root_folder = "init_folder"
@pytest.fixture(autouse=True)
def init_test():
if path.exists(tests_root):
shutil.rmtree(tests_root)
if not path.exists(tests_root):
os.makedirs(tests_root)
current_pwd = os.getcwd()
os.chdir(tests_root)
yield None
os.chdir(current_pwd)
def test_i_can_compute_combinations():
parser = ExactConceptParser()
res = parser.combinations(["foo", "bar", "baz"])
assert res == {('foo', 'bar', 'baz'),
('__var__0', 'bar', 'baz'),
('foo', '__var__0', 'baz'),
('foo', 'bar', '__var__0'),
('__var__0', '__var__1', 'baz'),
('__var__0', 'bar', '__var__1'),
('foo', '__var__0', '__var__1'),
('__var__0', '__var__1', '__var__2')}
def test_i_can_compute_combinations_with_duplicates():
parser = ExactConceptParser()
res = parser.combinations(["foo", "bar", "foo"])
assert res == {('foo', 'bar', 'foo'),
('__var__0', 'bar', '__var__0'),
('foo', '__var__0', 'foo'),
('__var__0', '__var__1', '__var__0'),
('__var__1', '__var__0', '__var__1')}
# TODO: the last tuple is not possible, so the algo can be improved
def test_i_can_recognize_a_simple_concept():
sheerka = get_sheerka()
concept = get_concept("hello world", [])
sheerka.add_in_cache(concept)
source = "hello world"
context = ExecutionContext(sheerka, "xxxx")
results = ExactConceptParser().parse(context, source)
assert len(results) == 1
assert results[0].status
assert results[0].value.key == concept.key
def test_i_can_recognize_concepts_defined_several_times():
sheerka = get_sheerka()
sheerka.add_in_cache(get_concept("hello world", []))
sheerka.add_in_cache(get_concept("hello a", ["a"]))
source = "hello world"
context = ExecutionContext(sheerka, "xxxx")
results = ExactConceptParser().parse(context, source)
assert len(results) == 2
results = sorted(results, key=lambda x: x.value.name) # because of the usage of sets
assert results[0].status
assert results[0].value.name == "hello a"
assert results[0].value.props["a"].value == "world"
assert results[1].status
assert results[1].value.name == "hello world"
def test_i_can_recognize_a_concept_with_variables():
sheerka = get_sheerka()
concept = get_concept("a + b", ["a", "b"])
sheerka.concepts_cache[concept.key] = concept
source = "10 + 5"
context = ExecutionContext(sheerka, "xxxx")
results = ExactConceptParser().parse(context, source)
assert len(results) == 1
assert results[0].status
assert results[0].value.key == concept.key
assert results[0].value.props["a"].value == "10"
assert results[0].value.props["b"].value == "5"
def test_i_can_recognize_a_concept_with_duplicate_variables():
sheerka = get_sheerka()
concept = get_concept("a + b + a", ["a", "b"])
sheerka.concepts_cache[concept.key] = concept
source = "10 + 5 + 10"
context = ExecutionContext(sheerka, "xxxx")
results = ExactConceptParser().parse(context, source)
assert len(results) == 1
assert results[0].status
assert results[0].value.key == concept.key
assert results[0].value.props["a"].value == "10"
assert results[0].value.props["b"].value == "5"
def test_i_can_manage_unknown_concept():
sheerka = get_sheerka()
source = "def concept hello world" # this is not a concept by itself
context = ExecutionContext(sheerka, "xxxx")
res = ExactConceptParser().parse(context, source)
assert not res.status
assert sheerka.isinstance(res.value, Sheerka.UNKNOWN_CONCEPT_NAME)
def test_i_can_detect_concepts_too_long():
sheerka = get_sheerka()
source = "a very very long concept that cannot be an unique one"
context = ExecutionContext(sheerka, "xxxx")
res = ExactConceptParser().parse(context, source)
assert not res.status
assert sheerka.isinstance(res.value, Sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME)
def get_concept(name, variables):
c = Concept(name=name)
if variables:
for v in variables:
c.props[v] = Property(v, None)
c.init_key()
return c
def get_sheerka():
sheerka = Sheerka()
sheerka.initialize(root_folder)
return sheerka
+38
View File
@@ -0,0 +1,38 @@
import pytest
from core.concept import Concept
@pytest.mark.parametrize("name, variables, expected", [
("my name is a", ["a"], "my name is __var__0"),
("a b c d", ["b", "c"], "a __var__0 __var__1 d"),
("a 'b c' d", ["b", "c"], "a b c d"),
("a | b", ["a", "b"], "__var__0 | __var__1"),
("a b a c", ["a", "b"], "__var__0 __var__1 __var__0 c"),
("a b a c", ["b", "a"], "__var__1 __var__0 __var__1 c"),
])
def test_i_can_get_concept_key(name, variables, expected):
concept = Concept(name)
for v in variables:
concept.set_prop(v, None)
concept.init_key()
assert concept.key == expected
def test_i_can_serialize():
"""
Test concept.to_dict()
:return:
"""
# TODO
pass
def test_i_can_deserialize():
"""
Test concept.from_dict()
:return:
"""
# TODO
pass
+27 -22
View File
@@ -1,10 +1,11 @@
import pytest import pytest
from parsers.ExactConceptParser import ExactConceptParser
from parsers.PythonParser import PythonParser, PythonNode, PythonErrorNode from parsers.PythonParser import PythonParser, PythonNode, PythonErrorNode
from parsers.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError from core.tokenizer import Tokenizer, Token, TokenKind, Keywords, LexerError
from parsers.DefaultParser import DefaultParser from parsers.DefaultParser import DefaultParser
from parsers.DefaultParser import NumberNode, StringNode, VariableNode, TrueNode, FalseNode, NullNode, BinaryNode from parsers.DefaultParser import NumberNode, StringNode, VariableNode, TrueNode, FalseNode, NullNode, BinaryNode
from parsers.DefaultParser import Node, UnexpectedTokenErrorNode, DefConceptNode, NopNode from parsers.DefaultParser import UnexpectedTokenErrorNode, DefConceptNode, NopNode
import ast import ast
@@ -39,6 +40,7 @@ def null():
def b(operator, left, right): def b(operator, left, right):
return BinaryNode([], operator, left, right) return BinaryNode([], operator, left, right)
def compare_ast(left, right): def compare_ast(left, right):
left_as_string = ast.dump(left) left_as_string = ast.dump(left)
left_as_string = left_as_string.replace(", ctx=Load()", "") left_as_string = left_as_string.replace(", ctx=Load()", "")
@@ -51,9 +53,8 @@ def compare_ast(left, right):
return left_as_string == right_as_string return left_as_string == right_as_string
def test_i_can_tokenize(): def test_i_can_tokenize():
source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=" source = "+*-/{}[]() ,;:.?\n\n\r\r\r\nidentifier_0\t \t10.15 10 'string\n' \"another string\"=|&"
tokens = list(Tokenizer(source)) tokens = list(Tokenizer(source))
assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1) assert tokens[0] == Token(TokenKind.PLUS, "+", 0, 1, 1)
assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2) assert tokens[1] == Token(TokenKind.STAR, "*", 1, 1, 2)
@@ -85,6 +86,8 @@ def test_i_can_tokenize():
assert tokens[27] == Token(TokenKind.WHITESPACE, " ", 59, 6, 1) assert tokens[27] == Token(TokenKind.WHITESPACE, " ", 59, 6, 1)
assert tokens[28] == Token(TokenKind.STRING, '"another string"', 60, 6, 2) assert tokens[28] == Token(TokenKind.STRING, '"another string"', 60, 6, 2)
assert tokens[29] == Token(TokenKind.EQUALS, '=', 76, 6, 18) assert tokens[29] == Token(TokenKind.EQUALS, '=', 76, 6, 18)
assert tokens[30] == Token(TokenKind.VBAR, '|', 77, 6, 19)
assert tokens[31] == Token(TokenKind.AMPER, '&', 78, 6, 20)
@pytest.mark.parametrize("text, expected", [ @pytest.mark.parametrize("text, expected", [
@@ -220,8 +223,8 @@ def test_i_can_recognize_keywords(text, expected):
("def concept h as 1 + 1", "h", ast.Expression(ast.BinOp(left=ast.Num(n=1), op=ast.Add(), right=ast.Num(n=1)))), ("def concept h as 1 + 1", "h", ast.Expression(ast.BinOp(left=ast.Num(n=1), op=ast.Add(), right=ast.Num(n=1)))),
]) ])
def test_i_can_parse_def_concept(text, expected_name, expected_expr): def test_i_can_parse_def_concept(text, expected_name, expected_expr):
parser = DefaultParser(text, PythonParser) parser = DefaultParser(PythonParser)
tree = parser.parse() tree = parser.parse(None, text)
assert isinstance(tree, DefConceptNode) assert isinstance(tree, DefConceptNode)
assert tree.name == expected_name assert tree.name == expected_name
if isinstance(tree.body, PythonNode): if isinstance(tree.body, PythonNode):
@@ -230,8 +233,6 @@ def test_i_can_parse_def_concept(text, expected_name, expected_expr):
assert tree.body == expected_expr assert tree.body == expected_expr
def test_i_can_parse_complex_def_concept_statement(): def test_i_can_parse_complex_def_concept_statement():
text = """def concept a plus b text = """def concept a plus b
where a,b where a,b
@@ -239,8 +240,8 @@ def test_i_can_parse_complex_def_concept_statement():
post isinstance(res, int) post isinstance(res, int)
as res = a + b as res = a + b
""" """
parser = DefaultParser(text, PythonParser) parser = DefaultParser(PythonParser)
tree = parser.parse() tree = parser.parse(None, text)
assert not parser.has_error assert not parser.has_error
assert isinstance(tree, DefConceptNode) assert isinstance(tree, DefConceptNode)
assert tree.name == "a plus b" assert tree.name == "a plus b"
@@ -261,19 +262,20 @@ def concept add one to a as:
return x+1 return x+1
func(a) func(a)
""" """
parser = DefaultParser(text, PythonParser) parser = DefaultParser(PythonParser)
tree = parser.parse() tree = parser.parse(None, text)
assert not parser.has_error assert not parser.has_error
assert isinstance(tree, DefConceptNode) assert isinstance(tree, DefConceptNode)
def test_i_can_use_colon_to_declare_indentation2(): def test_i_can_use_colon_to_declare_indentation2():
text = """ text = """
def concept add one to a as: def concept add one to a as:
def func(x): def func(x):
return x+1 return x+1
""" """
parser = DefaultParser(text, PythonParser) parser = DefaultParser(PythonParser)
tree = parser.parse() tree = parser.parse(None, text)
assert not parser.has_error assert not parser.has_error
assert isinstance(tree, DefConceptNode) assert isinstance(tree, DefConceptNode)
@@ -285,8 +287,8 @@ def concept add one to a as
return x+1 return x+1
func(a) func(a)
""" """
parser = DefaultParser(text, PythonParser) parser = DefaultParser(PythonParser)
tree = parser.parse() tree = parser.parse(None, text)
assert parser.has_error assert parser.has_error
assert isinstance(tree, DefConceptNode) assert isinstance(tree, DefConceptNode)
assert isinstance(parser.error_sink[0].exception, IndentationError) assert isinstance(parser.error_sink[0].exception, IndentationError)
@@ -304,8 +306,8 @@ def concept add one to a as:
func(a) func(a)
func(b) func(b)
""" """
parser = DefaultParser(text, PythonParser) parser = DefaultParser(PythonParser)
tree = parser.parse() tree = parser.parse(None, text)
assert parser.has_error assert parser.has_error
assert isinstance(tree, DefConceptNode) assert isinstance(tree, DefConceptNode)
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode) assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
@@ -319,8 +321,8 @@ func(b)
("def concept as", Keywords.AS, ["<name>"]), ("def concept as", Keywords.AS, ["<name>"]),
]) ])
def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, expected_tokens): def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, expected_tokens):
parser = DefaultParser(text, PythonParser) parser = DefaultParser(PythonParser)
parser.parse() parser.parse(None, text)
assert parser.has_error assert parser.has_error
assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode) assert isinstance(parser.error_sink[0], UnexpectedTokenErrorNode)
@@ -335,7 +337,10 @@ def test_i_can_detect_unexpected_token_error_in_def_concept(text, token_found, e
"def concept hello as 1+" "def concept hello as 1+"
]) ])
def test_i_can_detect_error_in_declaration(text): def test_i_can_detect_error_in_declaration(text):
parser = DefaultParser(text, PythonParser) parser = DefaultParser(PythonParser)
parser.parse() parser.parse(None, text)
assert parser.has_error assert parser.has_error
assert isinstance(parser.error_sink[0], PythonErrorNode) assert isinstance(parser.error_sink[0], PythonErrorNode)
+13 -8
View File
@@ -37,8 +37,7 @@ def test_root_folder_is_created_after_initialization():
def test_lists_of_concepts_is_initialized(): def test_lists_of_concepts_is_initialized():
sheerka = Sheerka() sheerka = get_sheerka()
sheerka.initialize(root_folder)
assert len(sheerka.concepts_cache) > 1 assert len(sheerka.concepts_cache) > 1
@@ -53,14 +52,13 @@ def get_concept():
return x+y return x+y
func(a,b) func(a,b)
""" """
parser = DefaultParser(text, PythonParser) parser = DefaultParser(PythonParser)
return parser.parse() return parser.parse(None, text)
def test_i_can_add_a_concept(): def test_i_can_add_a_concept():
sheerka = get_sheerka()
concept = get_concept() concept = get_concept()
sheerka = Sheerka()
sheerka.initialize(root_folder)
res = sheerka.add_concept(ExecutionContext(sheerka, "xxx"), concept) res = sheerka.add_concept(ExecutionContext(sheerka, "xxx"), concept)
concept_found = res.value concept_found = res.value
@@ -76,7 +74,7 @@ def test_i_can_add_a_concept():
assert isinstance(concept_found.codes[ConceptParts.POST], ast.Expression) assert isinstance(concept_found.codes[ConceptParts.POST], ast.Expression)
assert isinstance(concept_found.codes[ConceptParts.BODY], ast.Module) assert isinstance(concept_found.codes[ConceptParts.BODY], ast.Module)
all_props = [p.name for p in concept_found.props] all_props = list(concept_found.props.keys())
assert all_props == ["a", "b"] assert all_props == ["a", "b"]
assert concept_found.key == "__var__0 + __var__1" assert concept_found.key == "__var__0 + __var__1"
@@ -123,7 +121,14 @@ def test_i_can_instantiate_a_concept():
""" """
Test the new() functionnality Test the new() functionnality
make sure that some Concept are singleton (ex Sheerka, True, False) make sure that some Concept are singleton (ex Sheerka, True, False)
but some other need a new instance everytime otherwise, make sure that new() returns a **new** instance
:return: :return:
""" """
pass pass
def get_sheerka():
sheerka = Sheerka()
sheerka.initialize(root_folder)
return sheerka