Refactored to allow ConceptEvaluator
This commit is contained in:
+25
-1
@@ -15,17 +15,34 @@ class NopNode(Node):
|
||||
return "nop"
|
||||
|
||||
|
||||
class NotInitializedNode(Node):
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "**N/A**"
|
||||
|
||||
|
||||
@dataclass()
|
||||
class ErrorNode(Node):
|
||||
pass
|
||||
|
||||
|
||||
class BaseParser:
|
||||
PREFIX = "Parsers:"
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.name = self.PREFIX + name
|
||||
self.has_error = False
|
||||
self.error_sink = []
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, self.__class__):
|
||||
return False
|
||||
return self.name == other.name
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.name)
|
||||
|
||||
def parse(self, context, text):
|
||||
pass
|
||||
|
||||
@@ -38,3 +55,10 @@ class BaseParser:
|
||||
value = Keywords(token.value).value if token.type == TokenKind.KEYWORD else token.value
|
||||
res += value
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def log_result(log, text, ret):
|
||||
if ret.status:
|
||||
log.debug(f"Recognized '{text}' as {ret.value}")
|
||||
else:
|
||||
log.debug(f"Failed to recognize '{text}'")
|
||||
|
||||
+303
-230
@@ -1,4 +1,6 @@
|
||||
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode
|
||||
from core.builtin_concepts import BuiltinConcepts, ReturnValueConcept
|
||||
from core.concept import ConceptParts
|
||||
from parsers.BaseParser import BaseParser, Node, NopNode, ErrorNode, NotInitializedNode
|
||||
from core.tokenizer import Tokenizer, TokenKind, Token, Keywords
|
||||
from dataclasses import dataclass, field
|
||||
import logging
|
||||
@@ -8,16 +10,10 @@ log = logging.getLogger(__name__)
|
||||
|
||||
@dataclass()
|
||||
class DefaultParserNode(Node):
|
||||
tokens: list = field(compare=False)
|
||||
|
||||
def is_same(self, other):
|
||||
if type(self) != type(other):
|
||||
return False
|
||||
|
||||
if hasattr(self, "value") and self.value != other.value:
|
||||
return False
|
||||
|
||||
return True
|
||||
"""
|
||||
Base node for all default parser nodes
|
||||
"""
|
||||
tokens: list = field(compare=False, repr=False)
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -30,8 +26,8 @@ class UnexpectedTokenErrorNode(DefaultParserErrorNode):
|
||||
message: str
|
||||
expected_tokens: list
|
||||
|
||||
def __post_init__(self):
|
||||
log.debug("-> UnexpectedTokenErrorNode: " + self.message)
|
||||
# def __post_init__(self):
|
||||
# log.debug("-> UnexpectedTokenErrorNode: " + self.message)
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -41,8 +37,8 @@ class SyntaxErrorNode(DefaultParserErrorNode):
|
||||
"""
|
||||
message: str
|
||||
|
||||
def __post_init__(self):
|
||||
log.debug("-> SyntaxErrorNode: " + self.message)
|
||||
# def __post_init__(self):
|
||||
# log.debug("-> SyntaxErrorNode: " + self.message)
|
||||
|
||||
|
||||
@dataclass()
|
||||
@@ -52,117 +48,209 @@ class CannotHandleErrorNode(DefaultParserErrorNode):
|
||||
"""
|
||||
text: str
|
||||
|
||||
def __post_init__(self):
|
||||
log.debug("-> CannotHandleErrorNode: " + self.text)
|
||||
# def __post_init__(self):
|
||||
# log.debug("-> CannotHandleErrorNode: " + self.text)
|
||||
|
||||
#
|
||||
# @dataclass()
|
||||
# class NumberNode(DefaultParserNode):
|
||||
# value: object
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return str(self.value)
|
||||
#
|
||||
#
|
||||
# @dataclass()
|
||||
# class StringNode(DefaultParserNode):
|
||||
# value: str
|
||||
# quote: str
|
||||
#
|
||||
# def is_same(self, other):
|
||||
# if not super(StringNode, self).is_same(other):
|
||||
# return False
|
||||
# return self.quote == other.quote
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return self.quote + self.value + self.quote
|
||||
#
|
||||
#
|
||||
# @dataclass()
|
||||
# class VariableNode(DefaultParserNode):
|
||||
# value: str
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return self.value
|
||||
#
|
||||
#
|
||||
# @dataclass()
|
||||
# class TrueNode(DefaultParserNode):
|
||||
# pass
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return "true"
|
||||
#
|
||||
#
|
||||
# @dataclass()
|
||||
# class FalseNode(DefaultParserNode):
|
||||
# pass
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return "false"
|
||||
#
|
||||
#
|
||||
# @dataclass()
|
||||
# class NullNode(DefaultParserNode):
|
||||
# pass
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return "null"
|
||||
#
|
||||
#
|
||||
# @dataclass()
|
||||
# class BinaryNode(DefaultParserNode):
|
||||
# operator: TokenKind
|
||||
# left: Node
|
||||
# right: Node
|
||||
#
|
||||
# def is_same(self, other):
|
||||
# if not super(BinaryNode, self).is_same(other):
|
||||
# return False
|
||||
# if self.operator != other.operator:
|
||||
# return False
|
||||
# if not self.left.is_same(other.left):
|
||||
# return False
|
||||
# return self.right.is_same(other.right)
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return f"({self.left} {self.operator} {self.right})"
|
||||
#
|
||||
|
||||
@staticmethod
|
||||
def get_concept_key(tokens, variables=None):
|
||||
key = ""
|
||||
first = True
|
||||
for token in tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
if not first:
|
||||
key += " "
|
||||
if variables is not None and token.value in variables:
|
||||
key += "__var__" + str(variables.index(token.value))
|
||||
else:
|
||||
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||
first = False
|
||||
|
||||
return key
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NameNode(DefaultParserNode):
|
||||
|
||||
def get_name(self):
|
||||
name = ""
|
||||
first = True
|
||||
for token in self.tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
if not first:
|
||||
name += " "
|
||||
|
||||
name += token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||
first = False
|
||||
|
||||
return name
|
||||
|
||||
def __repr__(self):
|
||||
return self.get_name()
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, NameNode):
|
||||
return False
|
||||
|
||||
return self.get_name() == other.get_name()
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.get_name())
|
||||
|
||||
|
||||
@dataclass()
|
||||
class DefConceptNode(DefaultParserNode):
|
||||
name: str
|
||||
where: Node = None
|
||||
pre: Node = None
|
||||
post: Node = None
|
||||
body: Node = NopNode
|
||||
|
||||
name: NameNode = NotInitializedNode()
|
||||
where: ReturnValueConcept = NotInitializedNode()
|
||||
pre: ReturnValueConcept = NotInitializedNode()
|
||||
post: ReturnValueConcept = NotInitializedNode()
|
||||
body: ReturnValueConcept = NotInitializedNode()
|
||||
|
||||
def get_codes(self):
|
||||
codes = {}
|
||||
for prop in ["where", "pre", "post", "body"]:
|
||||
prop_value = getattr(self, prop)
|
||||
if hasattr(prop_value, "ast"):
|
||||
codes[prop] = prop_value.ast
|
||||
for part_key in ConceptParts:
|
||||
prop_value = getattr(self, part_key.value)
|
||||
if hasattr(prop_value, "ast_"):
|
||||
codes[part_key] = prop_value.ast_
|
||||
return codes
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NumberNode(DefaultParserNode):
|
||||
value: object
|
||||
|
||||
def __repr__(self):
|
||||
return str(self.value)
|
||||
|
||||
|
||||
@dataclass()
|
||||
class StringNode(DefaultParserNode):
|
||||
value: str
|
||||
quote: str
|
||||
|
||||
def is_same(self, other):
|
||||
if not super(StringNode, self).is_same(other):
|
||||
return False
|
||||
return self.quote == other.quote
|
||||
|
||||
def __repr__(self):
|
||||
return self.quote + self.value + self.quote
|
||||
|
||||
|
||||
@dataclass()
|
||||
class VariableNode(DefaultParserNode):
|
||||
value: str
|
||||
|
||||
def __repr__(self):
|
||||
return self.value
|
||||
|
||||
|
||||
@dataclass()
|
||||
class TrueNode(DefaultParserNode):
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "true"
|
||||
|
||||
|
||||
@dataclass()
|
||||
class FalseNode(DefaultParserNode):
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "false"
|
||||
|
||||
|
||||
@dataclass()
|
||||
class NullNode(DefaultParserNode):
|
||||
pass
|
||||
|
||||
def __repr__(self):
|
||||
return "null"
|
||||
|
||||
|
||||
@dataclass()
|
||||
class BinaryNode(DefaultParserNode):
|
||||
operator: TokenKind
|
||||
left: Node
|
||||
right: Node
|
||||
|
||||
def is_same(self, other):
|
||||
if not super(BinaryNode, self).is_same(other):
|
||||
return False
|
||||
if self.operator != other.operator:
|
||||
return False
|
||||
if not self.left.is_same(other.left):
|
||||
return False
|
||||
return self.right.is_same(other.right)
|
||||
|
||||
def __repr__(self):
|
||||
return f"({self.left} {self.operator} {self.right})"
|
||||
|
||||
|
||||
class DefaultParser(BaseParser):
|
||||
"""
|
||||
Parse sheerka specific grammar (like def concept)
|
||||
"""
|
||||
def __init__(self, sub_parser=None):
|
||||
|
||||
def __init__(self):
|
||||
BaseParser.__init__(self, "DefaultParser")
|
||||
self.sub_parser = sub_parser
|
||||
self.lexer_iter = None
|
||||
self._current = None
|
||||
self.context = None
|
||||
self.text = None
|
||||
self.sheerka = None
|
||||
|
||||
@staticmethod
|
||||
def fix_indentation(tokens):
|
||||
"""
|
||||
In the following example
|
||||
def concept add one to a as:
|
||||
def func(x):
|
||||
return x+1
|
||||
func(a)
|
||||
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
if tokens[0].type != TokenKind.COLON:
|
||||
return tokens
|
||||
|
||||
if len(tokens) < 3:
|
||||
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
|
||||
|
||||
if tokens[1].type != TokenKind.NEWLINE:
|
||||
return UnexpectedTokenErrorNode([tokens[1]], "Unexpected token after colon", [TokenKind.NEWLINE])
|
||||
|
||||
if tokens[2].type != TokenKind.WHITESPACE:
|
||||
return SyntaxErrorNode([tokens[2]], "Indentation not found.")
|
||||
indent_size = len(tokens[2].value)
|
||||
|
||||
# now fix the other indentations
|
||||
i = 3
|
||||
while i < len(tokens) - 1:
|
||||
if tokens[i].type == TokenKind.NEWLINE:
|
||||
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
||||
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
|
||||
|
||||
if len(tokens[i + 1].value) < indent_size:
|
||||
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
|
||||
|
||||
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
|
||||
i += 1
|
||||
|
||||
return tokens[3:]
|
||||
|
||||
def reset_parser(self, context, text):
|
||||
self.context = context
|
||||
# hack before implementing all the sub parsers
|
||||
if context:
|
||||
self.sub_parser = context.sheerka.parsers[1]
|
||||
self.sheerka = context.sheerka
|
||||
|
||||
self.text = text
|
||||
self.lexer_iter = iter(Tokenizer(text))
|
||||
@@ -190,167 +278,152 @@ class DefaultParser(BaseParser):
|
||||
self._current = None
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def get_concept_key(tokens, variables=None):
|
||||
key = ""
|
||||
first = True
|
||||
for token in tokens:
|
||||
if token.type == TokenKind.EOF:
|
||||
break
|
||||
if token.type == TokenKind.WHITESPACE:
|
||||
continue
|
||||
if not first:
|
||||
key += " "
|
||||
if variables is not None and token.value in variables:
|
||||
key += "__var__" + str(variables.index(token.value))
|
||||
else:
|
||||
key += token.value[1:-1] if token.type == TokenKind.STRING else token.value
|
||||
first = False
|
||||
|
||||
return key
|
||||
|
||||
@staticmethod
|
||||
def fix_indentation(tokens):
|
||||
"""
|
||||
In the following example
|
||||
def concept add one to a as:
|
||||
def func(x):
|
||||
return x+1
|
||||
func(a)
|
||||
indentations in front of 'def func(x)', 'return x+1' and 'func(a)' must be fixed to avoid a python syntax error
|
||||
:param tokens:
|
||||
:return:
|
||||
"""
|
||||
if tokens[1].type != TokenKind.COLON:
|
||||
return tokens[1:]
|
||||
|
||||
if len(tokens) < 3:
|
||||
return UnexpectedTokenErrorNode(tokens[0:2], "Unexpected end of file", [TokenKind.NEWLINE])
|
||||
|
||||
if tokens[2].type != TokenKind.NEWLINE:
|
||||
return UnexpectedTokenErrorNode([tokens[2]], "Unexpected token after colon", [TokenKind.NEWLINE])
|
||||
|
||||
if tokens[3].type != TokenKind.WHITESPACE:
|
||||
return SyntaxErrorNode([tokens[3]], "Indentation not found")
|
||||
indent_size = len(tokens[3].value)
|
||||
|
||||
# now fix the other indentations
|
||||
i = 4
|
||||
while i < len(tokens) - 1:
|
||||
if tokens[i].type == TokenKind.NEWLINE:
|
||||
if tokens[i + 1].type != TokenKind.WHITESPACE:
|
||||
return UnexpectedTokenErrorNode([tokens[i + 1]], "Unexpected token", [TokenKind.WHITESPACE])
|
||||
|
||||
if len(tokens[i + 1].value) < indent_size:
|
||||
return SyntaxErrorNode([tokens[i + 1]], "Invalid indentation.")
|
||||
|
||||
tokens[i + 1].value = " " * (len(tokens[i + 1].value) - indent_size)
|
||||
i += 1
|
||||
|
||||
return tokens[4:]
|
||||
|
||||
def parse(self, context, text):
|
||||
# default parser can only manage string text
|
||||
if not isinstance(text, str):
|
||||
log.debug(f"Failed to recognize '{text}'")
|
||||
return context.sheerka.ret(
|
||||
self.name,
|
||||
False,
|
||||
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
|
||||
|
||||
self.reset_parser(context, text)
|
||||
return self.parse_statement()
|
||||
tree = self.parse_statement()
|
||||
|
||||
# If a error is found it must be sent to error_sink
|
||||
# tree must contain what was recognized
|
||||
|
||||
ret = self.sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
self.sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
body=self.error_sink if self.has_error else tree,
|
||||
try_parsed=tree))
|
||||
|
||||
self.log_result(log, text, ret)
|
||||
return ret
|
||||
|
||||
def parse_statement(self):
|
||||
token = self.get_token()
|
||||
if token.value == Keywords.DEF:
|
||||
self.next_token()
|
||||
return self.parse_def_concept()
|
||||
return self.parse_def_concept(token)
|
||||
else:
|
||||
return self.add_error(CannotHandleErrorNode([], self.text))
|
||||
|
||||
def parse_def_concept(self):
|
||||
def parse_def_concept(self, def_token):
|
||||
"""
|
||||
def concept name [where xxx] [pre xxx] [post xxx] [as xxx]
|
||||
"""
|
||||
|
||||
def_concept_parts = [Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
|
||||
# init
|
||||
log.debug("It may be a definition of a concept")
|
||||
concept_special_tokens = [def_token]
|
||||
concept_found = DefConceptNode(concept_special_tokens)
|
||||
|
||||
tokens_found = {} # Node token is supposed to be a list, but here, it will be a dict
|
||||
# the definition of a concept consists of several parts
|
||||
# Keywords.CONCEPT to get the name of the concept
|
||||
# Keywords.AS to get the body
|
||||
# Keywords.WHERE to get the conditions to recognize for the variables
|
||||
# Keywords.PRE to know if the conditions to evaluate the concept
|
||||
# Keywords.POST to apply or verify once the concept is executed
|
||||
def_concept_parts = [Keywords.CONCEPT, Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST]
|
||||
|
||||
token = self.get_token()
|
||||
if token.value != Keywords.CONCEPT:
|
||||
return self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
||||
|
||||
self.next_token()
|
||||
token = self.get_token()
|
||||
|
||||
if token.value in (Keywords.AS, Keywords.WHERE, Keywords.PRE, Keywords.POST):
|
||||
return self.add_error(UnexpectedTokenErrorNode([token], "Concept name is missing.", ["<name>"]))
|
||||
|
||||
name_as_tokens = []
|
||||
while token.type != TokenKind.EOF and token.value not in def_concept_parts:
|
||||
name_as_tokens.append(token)
|
||||
self.next_token()
|
||||
token = self.get_token()
|
||||
name = self.get_concept_key(name_as_tokens)
|
||||
tokens_found["name"] = name_as_tokens
|
||||
|
||||
# try to parse as, where, pre and post declarations
|
||||
tokens = {
|
||||
# tokens found, when trying to recognize the parts
|
||||
tokens_found_by_parts = {
|
||||
Keywords.CONCEPT: [],
|
||||
Keywords.AS: None,
|
||||
Keywords.WHERE: None,
|
||||
Keywords.PRE: None,
|
||||
Keywords.POST: None,
|
||||
}
|
||||
current_part = None
|
||||
current_part = Keywords.CONCEPT
|
||||
token = self.get_token()
|
||||
first_token = token
|
||||
|
||||
# loop thru the tokens, and put them in the correct tokens_found_by_parts entry
|
||||
while token.type != TokenKind.EOF:
|
||||
if token.value in def_concept_parts:
|
||||
concept_special_tokens.append(token) # keep track of the keywords
|
||||
keyword = token.value
|
||||
if tokens[keyword]:
|
||||
return self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
|
||||
tokens[keyword] = [token] # first element of the list is the keyword
|
||||
if tokens_found_by_parts[keyword]:
|
||||
# a part is defined more than once
|
||||
self.add_error(SyntaxErrorNode([token], f"Too many '{keyword.value}' declarations."))
|
||||
tokens_found_by_parts[current_part].append(token) # adds the token again
|
||||
else:
|
||||
tokens_found_by_parts[keyword] = [token]
|
||||
current_part = keyword
|
||||
self.next_token()
|
||||
else:
|
||||
if current_part is None:
|
||||
return self.add_error(UnexpectedTokenErrorNode([token], "Unexpected token", def_concept_parts))
|
||||
else:
|
||||
tokens[current_part].append(token)
|
||||
self.next_token(False)
|
||||
tokens_found_by_parts[current_part].append(token)
|
||||
self.next_token(False)
|
||||
|
||||
token = self.get_token()
|
||||
for t in tokens:
|
||||
tokens_found[t.value] = tokens[t]
|
||||
|
||||
asts = {
|
||||
Keywords.AS: NopNode(),
|
||||
Keywords.WHERE: NopNode(),
|
||||
Keywords.PRE: NopNode(),
|
||||
Keywords.POST: NopNode(),
|
||||
# semantic checks
|
||||
name_first_token_index = 1
|
||||
if first_token.value != Keywords.CONCEPT:
|
||||
self.add_error(UnexpectedTokenErrorNode([token], "Syntax error.", [Keywords.CONCEPT]))
|
||||
name_first_token_index = 0
|
||||
|
||||
# Manage the name
|
||||
name_tokens = tokens_found_by_parts[Keywords.CONCEPT]
|
||||
if len(name_tokens) == name_first_token_index:
|
||||
self.add_error(SyntaxErrorNode([], "Name is mandatory"))
|
||||
|
||||
if name_tokens[-1].type == TokenKind.NEWLINE:
|
||||
name_tokens = name_tokens[:-1] # strip trailing newlines
|
||||
|
||||
if TokenKind.NEWLINE in [t.type for t in name_tokens]:
|
||||
self.add_error(SyntaxErrorNode(tokens_found_by_parts[Keywords.CONCEPT], "Newline are not allowed in name."))
|
||||
|
||||
concept_found.name = NameNode(name_tokens[name_first_token_index:]) # skip the first token
|
||||
|
||||
asts_found_by_parts = {
|
||||
Keywords.AS: NotInitializedNode(),
|
||||
Keywords.WHERE: NotInitializedNode(),
|
||||
Keywords.PRE: NotInitializedNode(),
|
||||
Keywords.POST: NotInitializedNode(),
|
||||
}
|
||||
|
||||
# check for empty declarations
|
||||
for keyword in tokens:
|
||||
current_tokens = tokens[keyword]
|
||||
if current_tokens is not None:
|
||||
if len(current_tokens) == 0: # only one element means empty decl
|
||||
return self.add_error(SyntaxErrorNode([current_tokens[0]], "Empty declaration"), False)
|
||||
else:
|
||||
current_tokens = self.fix_indentation(current_tokens)
|
||||
if isinstance(current_tokens, ErrorNode):
|
||||
self.add_error(current_tokens)
|
||||
continue
|
||||
for keyword in tokens_found_by_parts:
|
||||
if keyword == Keywords.CONCEPT:
|
||||
continue # already done
|
||||
|
||||
# start = current_tokens[0].index
|
||||
# end = current_tokens[-1].index + len(current_tokens[-1].value)
|
||||
sub_parser = self.sub_parser(source=keyword.value)
|
||||
sub_tree = sub_parser.parse(self.context, current_tokens)
|
||||
if isinstance(sub_tree, ErrorNode):
|
||||
self.add_error(sub_tree, False)
|
||||
asts[keyword] = sub_tree
|
||||
log.debug("Processing part '" + keyword.name + "'")
|
||||
|
||||
def_concept_node = DefConceptNode(tokens_found, # dict instead of list is wanted.
|
||||
name,
|
||||
asts[Keywords.WHERE],
|
||||
asts[Keywords.PRE],
|
||||
asts[Keywords.POST],
|
||||
asts[Keywords.AS])
|
||||
tokens = tokens_found_by_parts[keyword]
|
||||
if tokens is None:
|
||||
continue # nothing to do
|
||||
|
||||
log.debug(f"Found DefConcept node '{def_concept_node}'")
|
||||
return def_concept_node
|
||||
if len(tokens) == 1: # check for empty declarations
|
||||
self.add_error(SyntaxErrorNode([tokens[0]], "Empty declaration"), False)
|
||||
continue
|
||||
|
||||
tokens = self.fix_indentation(tokens[1:]) # manage multi-lines declarations
|
||||
if isinstance(tokens, ErrorNode):
|
||||
self.add_error(tokens)
|
||||
continue
|
||||
|
||||
# ask the other parsers if they recognize the tokens
|
||||
new_context = self.context.push(self)
|
||||
parsing_result = self.sheerka.expect_one(new_context, self.sheerka.parse(new_context, tokens))
|
||||
if not parsing_result.status:
|
||||
self.add_error(parsing_result.value)
|
||||
continue
|
||||
|
||||
asts_found_by_parts[keyword] = parsing_result
|
||||
|
||||
concept_found.where = asts_found_by_parts[Keywords.WHERE]
|
||||
concept_found.pre = asts_found_by_parts[Keywords.PRE]
|
||||
concept_found.post = asts_found_by_parts[Keywords.POST]
|
||||
concept_found.body = asts_found_by_parts[Keywords.AS]
|
||||
|
||||
log.debug(f"Found DefConcept node '{concept_found}'")
|
||||
return concept_found
|
||||
|
||||
# def parse_expression(self):
|
||||
# return self.parse_addition()
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
from core.sheerka import ReturnValue
|
||||
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser
|
||||
from core.tokenizer import Tokenizer, Keywords, TokenKind
|
||||
from core.concept import Concept
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExactConceptParser(BaseParser):
|
||||
@@ -15,11 +18,17 @@ class ExactConceptParser(BaseParser):
|
||||
BaseParser.__init__(self, "ConceptParser")
|
||||
|
||||
def parse(self, context, text):
|
||||
"""
|
||||
text can be string, but text can also be an list of tokens
|
||||
:param context:
|
||||
:param text:
|
||||
:return:
|
||||
"""
|
||||
res = []
|
||||
sheerka = context.sheerka
|
||||
words = self.get_words(text)
|
||||
if len(words) > self.MAX_WORDS_SIZE:
|
||||
return ReturnValue(self.name, False, sheerka.new(sheerka.CONCEPT_TOO_LONG_CONCEPT_NAME))
|
||||
return ReturnValueConcept(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, obj=text))
|
||||
|
||||
recognized = False
|
||||
for combination in self.combinations(words):
|
||||
@@ -30,24 +39,27 @@ class ExactConceptParser(BaseParser):
|
||||
# That will depend on the context
|
||||
# Let's return a new one for now and see if it works
|
||||
concept = sheerka.new(concept_key)
|
||||
if not sheerka.isinstance(concept, sheerka.UNKNOWN_CONCEPT_NAME):
|
||||
if not sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
|
||||
# update the properties if needed
|
||||
for i, token in enumerate(combination):
|
||||
if token.startswith(Concept.PROPERTY_PREFIX):
|
||||
index = int(token[len(Concept.PROPERTY_PREFIX):])
|
||||
concept.set_prop_by_index(index, words[i])
|
||||
res.append(ReturnValue(self.name, True, concept))
|
||||
res.append(ReturnValueConcept(self.name, True, concept))
|
||||
log.debug(f"Recognized '{text}' as '{concept}'")
|
||||
recognized = True
|
||||
|
||||
if recognized:
|
||||
return res
|
||||
|
||||
return ReturnValue(self.name, False, sheerka.new(sheerka.UNKNOWN_CONCEPT_NAME, body=text))
|
||||
log.debug(f"Failed to recognize {words}")
|
||||
return ReturnValueConcept(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, obj=text))
|
||||
|
||||
@staticmethod
|
||||
def get_words(text):
|
||||
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
|
||||
res = []
|
||||
for t in iter(Tokenizer(text)):
|
||||
for t in tokens:
|
||||
if t.type == TokenKind.EOF:
|
||||
break
|
||||
if t.type == TokenKind.NEWLINE or t.type == TokenKind.WHITESPACE:
|
||||
|
||||
+48
-9
@@ -1,3 +1,4 @@
|
||||
from core.builtin_concepts import BuiltinConcepts
|
||||
from parsers.BaseParser import BaseParser, Node, ErrorNode
|
||||
from dataclasses import dataclass
|
||||
import ast
|
||||
@@ -12,23 +13,50 @@ class PythonErrorNode(ErrorNode):
|
||||
source: str
|
||||
exception: Exception
|
||||
|
||||
def __post_init__(self):
|
||||
log.debug("-> PythonErrorNode: " + str(self.exception))
|
||||
# def __post_init__(self):
|
||||
# log.debug("-> PythonErrorNode: " + str(self.exception))
|
||||
|
||||
|
||||
@dataclass()
|
||||
class PythonNode(Node):
|
||||
source: str
|
||||
ast: ast.AST
|
||||
ast_: ast.AST
|
||||
|
||||
# def __repr__(self):
|
||||
# return "PythonNode(source='" + self.source + "', ast=" + self.get_dump(self.ast_) + ")"
|
||||
|
||||
def __repr__(self):
|
||||
return "PythonNode(" + ast.dump(self.ast) + ")"
|
||||
# return "PythonNode(" + self.source + ")"
|
||||
ast_type = "expr" if isinstance(self.ast_, ast.Expression) else "module"
|
||||
return "PythonNode(" + ast_type + "='" + self.source + "')"
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, PythonNode):
|
||||
return False
|
||||
|
||||
if self.source != other.source:
|
||||
return False
|
||||
|
||||
self_dump = self.get_dump(self.ast_)
|
||||
other_dump = self.get_dump(other.ast_)
|
||||
|
||||
return self_dump == other_dump
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.source, self.ast_.hash))
|
||||
|
||||
@staticmethod
|
||||
def get_dump(ast_):
|
||||
dump = ast.dump(ast_)
|
||||
for to_remove in [", ctx=Load()", ", kind=None", ", type_ignores=[]"]:
|
||||
dump = dump.replace(to_remove, "")
|
||||
return dump
|
||||
|
||||
|
||||
class PythonParser(BaseParser):
|
||||
"""
|
||||
Parse Python scripts
|
||||
"""
|
||||
|
||||
def __init__(self, source="<undef>"):
|
||||
|
||||
BaseParser.__init__(self, "PythonParser")
|
||||
@@ -38,6 +66,8 @@ class PythonParser(BaseParser):
|
||||
text = text if isinstance(text, str) else self.get_text_from_tokens(text)
|
||||
text = text.strip()
|
||||
|
||||
sheerka = context.sheerka
|
||||
|
||||
# first, try to parse an expression
|
||||
res, tree, error = self.try_parse_expression(text)
|
||||
if not res:
|
||||
@@ -47,10 +77,19 @@ class PythonParser(BaseParser):
|
||||
self.has_error = True
|
||||
error_node = PythonErrorNode(text, error)
|
||||
self.error_sink.append(error_node)
|
||||
return error_node
|
||||
|
||||
log.debug("Recognized python code.")
|
||||
return PythonNode(text, tree)
|
||||
ret = sheerka.ret(
|
||||
self.name,
|
||||
not self.has_error,
|
||||
sheerka.new(
|
||||
BuiltinConcepts.PARSER_RESULT,
|
||||
parser=self,
|
||||
source=text,
|
||||
body=self.error_sink if self.has_error else PythonNode(text, tree),
|
||||
try_parsed=None))
|
||||
|
||||
self.log_result(log, text, ret)
|
||||
return ret
|
||||
|
||||
def try_parse_expression(self, text):
|
||||
try:
|
||||
@@ -91,6 +130,7 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
|
||||
"""
|
||||
This visitor will find all the name declared in the ast
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.names = set()
|
||||
log.debug("Searching for names.")
|
||||
@@ -98,4 +138,3 @@ class PythonGetNamesVisitor(ast.NodeVisitor):
|
||||
def visit_Name(self, node):
|
||||
log.debug(f"Found name : {node.id}")
|
||||
self.names.add(node.id)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user