Fixed #32 : concept groups are not correctly updated

Fixed #35 : Refactor test helper class (CNC, CC, CIO)
Fixed #36 : Concept values are not used when declared with variable expression
Fixed #37 : Objects in memory lose their values are restart
Fixed #38 : func(a=b, c) (which is not allowed) raise an exception
This commit is contained in:
2021-03-05 11:16:19 +01:00
parent 646c428edb
commit 05577012f3
38 changed files with 1942 additions and 1463 deletions
+44 -458
View File
@@ -1,11 +1,9 @@
from collections import namedtuple
from dataclasses import dataclass
from enum import Enum
import core.utils
from core.concept import Concept, ConceptParts
from core.rule import Rule
from core.tokenizer import TokenKind, Token
from core.var_ref import VariableRef
from parsers.BaseParser import Node, BaseParser, ParsingError
DEBUG_COMPILED = True
@@ -117,14 +115,6 @@ class UnrecognizedTokensNode(LexerNode):
return self.tokens[-1].type
def __eq__(self, other):
if isinstance(other, utnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, UTN):
return other == self
if not isinstance(other, UnrecognizedTokensNode):
return False
@@ -158,9 +148,6 @@ class RuleNode(LexerNode):
if id(self) == id(other):
return True
if isinstance(other, RN):
return other == self
if not isinstance(other, RuleNode):
return False
@@ -198,18 +185,6 @@ class ConceptNode(LexerNode):
if id(self) == id(other):
return True
if isinstance(other, (CN, CNC)):
return other == self
if isinstance(other, cnode):
return self.concept.key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, short_cnode):
return self.concept.key == other.concept_key and self.source == other.source
if not isinstance(other, ConceptNode):
return False
@@ -255,7 +230,8 @@ class SourceCodeNode(LexerNode):
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, start, end, tokens=None, source=None, python_node=None, return_value=None):
def __init__(self, start, end, tokens=None, source=None,
python_node=None, return_value=None, error_when_parsing=None):
"""
:param start: start position (index of the first token)
@@ -269,18 +245,12 @@ class SourceCodeNode(LexerNode):
You should have return_value.body.body == node
"""
super().__init__(start, end, tokens, source)
self.python_node = python_node # The PythonNode (or whatever language node) that is found
self.return_value = return_value # original result of the parsing
self.error_when_parsing = error_when_parsing # if python_node is still None after parsing, it explains why
def __eq__(self, other):
if isinstance(other, scnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, SCN):
return other == self
if not isinstance(other, SourceCodeNode):
return False
@@ -336,6 +306,7 @@ class SourceCodeWithConceptNode(LexerNode):
self.python_node = None # if the source code node is validated against a python parse, here is the PythonNode
self.return_value = None # return_value that produced the PythonNode
self.error_when_parsing = None # if python_node is still None after parsing, it explains why
def add_node(self, node):
self.nodes.append(node)
@@ -348,9 +319,6 @@ class SourceCodeWithConceptNode(LexerNode):
if id(self) == id(other):
return True
if isinstance(other, SCWC):
return other == self
if not isinstance(other, SourceCodeWithConceptNode):
return False
@@ -436,6 +404,44 @@ class SourceCodeWithConceptNode(LexerNode):
return self.python_node.source
class VariableNode(LexerNode):
"""
When trying to parser source code, a reference to a variable is recognized
Not sure yet if it has to be a lexer node
"""
def __init__(self, obj, prop, start, end, tokens=None, source=None):
super().__init__(start, end, tokens, source)
self.var_ref = VariableRef(obj, prop)
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, VariableNode):
return False
return self.var_ref == other.var_ref and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.var_ref.obj, self.var_ref.prop, self.start, self.end, self.source))
def __repr__(self):
ret = f"VariableNode(obj={self.var_ref.obj}, prop={self.var_ref.prop}, "
ret += f"start={self.start}, end={self.end}, source='{self.source}')"
return ret
def to_short_str(self):
return f"VN({self.var_ref.obj})" if self.var_ref.prop is None else f"VN({self.var_ref.obj}.{self.var_ref.prop})"
def clone(self):
clone = VariableNode(self.var_ref.obj, self.var_ref.prop, self.start, self.end, self.tokens, self.source)
return clone
@dataclass()
class GrammarErrorNode(ParsingError):
message: str
@@ -455,426 +461,6 @@ class SyaAssociativity(Enum):
return self.value
cnode = namedtuple("ConceptNode", "concept_key start end source")
short_cnode = namedtuple("ConceptNode", "concept_key source")
utnode = namedtuple("utnode", "start end source")
scnode = namedtuple("scnode", "start end source")
class HelperWithPos:
def __init__(self, start=None, end=None):
self.start = start
self.end = end
self.start_is_fixed = start is not None
self.end_is_fixed = end is not None
def fix_pos(self, node):
if not self.start_is_fixed:
start = node.start if hasattr(node, "start") else \
node[0] if isinstance(node, tuple) else None
if start is not None and (self.start is None or start < self.start):
self.start = start
if not self.end_is_fixed:
end = node.end if hasattr(node, "end") else \
node[1] if isinstance(node, tuple) else None
if end is not None and (self.end is None or end > self.end):
self.end = end
return self
class SCN(HelperWithPos):
"""
SourceCodeNode tester class
It matches with SourceCodeNode but with less constraints
SCN == SourceCodeNode if source, start, end (start and end are not validated when None)
"""
def __init__(self, source, start=None, end=None):
super().__init__(start, end)
self.source = source
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, SourceCodeNode):
if self.source != other.source:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
return True
if not isinstance(other, CN):
return False
return self.source == other.source and \
self.start == other.start and \
self.end == other.end
def __hash__(self):
return hash((self.source, self.start, self.end))
def __repr__(self):
txt = f"SCN(source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
class SCWC(HelperWithPos):
"""
SourceNodeWithConcept tester class
It matches with a SourceNodeWithConcept
but it's easier to instantiate during the tests
"""
def __init__(self, first, last, *args):
super().__init__(None, None)
self.first = first
self.last = last
self.content = args
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, SourceCodeWithConceptNode):
if self.first != other.first:
return False
if self.last != other.last:
return False
if len(self.content) != len(other.nodes):
return False
for self_node, other_node in zip(self.content, other.nodes):
if self_node != other_node:
return False
# at last
return True
def __repr__(self):
txt = "SCWC("
if self.start is not None:
txt += f"start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
txt += f", source='{self.source}'"
return txt + ")"
@property
def source(self):
"""
this code is a copy and paste from SourceCodeWithConceptNode.pseudo_fix_source
TODO: create a common function or whatever...
:return:
"""
source = self.first.source if hasattr(self.first, "source") else self.first
for n in self.content:
source += " "
if hasattr(n, "source"):
source += n.source
elif hasattr(n, "concept"):
source += str(n.concept)
else:
source += " unknown"
source += self.last.source if hasattr(self.last, "source") else self.last
return source
class CN(HelperWithPos):
"""
ConceptNode tester class
It matches with ConceptNode but with less constraints
CN == ConceptNode if concept key, start, end and source are the same
"""
def __init__(self, concept, start=None, end=None, source=None):
"""
:param concept: Concept or concept_key (only the key is used anyway)
:param start:
:param end:
:param source:
"""
super().__init__(start, end)
self.concept_key = concept.key if isinstance(concept, Concept) else concept
self.source = source
self.concept = concept if isinstance(concept, Concept) else None
def fix_source(self, str_tokens):
self.source = "".join(str_tokens)
return self
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, ConceptNode):
if other.concept is None:
return False
if other.concept.key != self.concept_key:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
if self.source is not None and self.source != other.source:
return False
return True
if not isinstance(other, CN):
return False
return self.concept_key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.concept_key, self.start, self.end, self.source))
def __repr__(self):
if self.concept:
txt = f"CN(concept='{self.concept}'"
else:
txt = f"CN(concept_key='{self.concept_key}'"
txt += f", source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
class CNC(CN):
"""
ConceptNode for Compiled tester class
It matches with ConceptNode
But focuses on the 'compiled' property of the concept
CNC == ConceptNode if CNC.get_compiled() == ConceptNode.concept.get_compiled()
"""
def __init__(self, concept_key, start=None, end=None, source=None, exclude_body=False, **kwargs):
super().__init__(concept_key, start, end, source)
self.compiled = kwargs
self.exclude_body = exclude_body
if "body" in self.compiled:
self.compiled[ConceptParts.BODY] = self.compiled["body"]
del self.compiled["body"]
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, ConceptNode):
if other.concept is None:
return False
if other.concept.key != self.concept_key:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
if self.source is not None and self.source != other.source:
return False
if self.exclude_body:
to_compare = {k: v for k, v in other.concept.get_compiled().items() if k != ConceptParts.BODY}
else:
to_compare = other.concept.get_compiled()
if self.compiled == to_compare: # expanded form to ease the debug
return True
else:
return False
if not isinstance(other, CNC):
return False
return self.concept_key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.compiled == other.compiled
def __repr__(self):
if self.concept:
txt = f"CNC(concept='{self.concept}'"
else:
txt = f"CNC(concept_key='{self.concept_key}'"
txt += f", source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
for k, v in self.compiled.items():
txt += f", {k}='{v}'"
return txt + ")"
def to_compare(self, other, to_compare_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param to_compare_delegate:
:return:
"""
if isinstance(other, CNC):
return other
if isinstance(other, ConceptNode):
if self.exclude_body:
compiled = {k: v for k, v in other.concept.get_compiled().items() if k != ConceptParts.BODY}
else:
compiled = other.concept.get_compiled()
self_compile_to_use = self.compiled or compiled
compiled = to_compare_delegate(self_compile_to_use, compiled, to_compare_delegate)
return CNC(other.concept,
other.start if self.start is not None else None,
other.end if self.end is not None else None,
other.source if self.source is not None else None,
self.exclude_body,
**compiled)
raise NotImplementedError("CNC")
class UTN(HelperWithPos):
"""
Tester class for UnrecognizedTokenNode
compare the source, and start, end if defined
"""
def __init__(self, source, start=None, end=None):
"""
:param source:
:param start:
:param end:
"""
super().__init__(start, end)
self.source = source
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, UnrecognizedTokensNode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, UTN):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.source, self.start, self.end))
def __repr__(self):
txt = f"UTN(source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
def to_compare(self, other, to_compare_delegate):
"""
Transform other into CNC, to ease the comparison
:param other:
:param to_compare_delegate:
:return:
"""
if isinstance(other, UTN):
return other
if isinstance(other, UnrecognizedTokensNode):
return UTN(other.source,
other.start,
other.end)
raise NotImplementedError("UTN")
class RN(HelperWithPos):
"""
Helper class to test RuleNode
"""
def __init__(self, rule, start=None, end=None, source=None):
"""
:param concept: Concept or concept_key (only the key is used anyway)
:param start:
:param end:
:param source:
"""
super().__init__(start, end)
self.rule_id = rule.id if isinstance(rule, Rule) else rule
self.source = source or core.utils.str_concept((None, self.rule_id), prefix="r:")
self.rule = rule if isinstance(rule, Rule) else None
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, RuleNode):
if other.rule is None:
return False
if other.rule.id != self.rule_id:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
if self.source is not None and self.source != other.source:
return False
return True
if not isinstance(other, RN):
return False
return self.rule_id == other.rule_id and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.rule_id, self.start, self.end, self.source))
def __repr__(self):
if self.rule:
txt = f"RN(rule='{self.rule}'"
else:
txt = f"RN(rule_id='{self.rule_id}'"
txt += f", source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
class BaseNodeParser(BaseParser):
"""
Parser that return LexerNode
+14 -7
View File
@@ -168,15 +168,22 @@ class BaseParser:
if expected_parser and parser_input.parser != expected_parser:
return None
if len(parser_input.value) == 0:
return None
for node in parser_input.value:
from parsers.BaseNodeParser import LexerNode
if not isinstance(node, LexerNode):
from parsers.BaseNodeParser import LexerNode
if isinstance(parser_input.value, list):
if len(parser_input.value) == 0:
return None
return parser_input.value
for node in parser_input.value:
if not isinstance(node, LexerNode):
return None
return parser_input.value
else:
if not isinstance(parser_input.value, LexerNode):
return None
return [parser_input.value]
@staticmethod
def get_tokens_boundaries(tokens):
+43 -70
View File
@@ -53,59 +53,6 @@ class FunctionNode(FunctionParserNode):
parameters: list
class FN(FunctionNode):
"""
Test class only
It matches with FunctionNode but with less constraints
Thereby,
FN("first", "last", ["param1," ...]) can be compared to
FunctionNode(NameExprNode("first"), NameExprNode("second"), [FunctionParameter(NamesNodes("param1"), NamesNodes(", ")])
Note that FunctionParameter can easily be defined with a single string
* "param" -> FunctionParameter(NameExprNode("param"), None)
* "param, " -> FunctionParameter(NameExprNode("param"), NameExprNode(", "))
For more complicated situations, you can use a tuple (value, sep) to define the value part and the separator part
"""
def __init__(self, first, last, parameters):
self.first = first
self.last = last
self.parameters = []
for param in parameters:
if isinstance(param, tuple):
self.parameters.append(param)
elif isinstance(param, str) and (pos := param.find(",")) != -1:
self.parameters.append((param[:pos], param[pos:]))
else:
self.parameters.append((param, None))
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, FN):
return self.first == other.first and self.last == other.last and self.parameters == other.parameters
if isinstance(other, FunctionNode):
if self.first != other.first.value or self.last != other.last.value:
return False
if len(self.parameters) != len(other.parameters):
return False
for self_parameter, other_parameter in zip(self.parameters, other.parameters):
value = other_parameter.value.value if isinstance(self_parameter[0], str) else other_parameter.value
sep = other_parameter.separator.value if other_parameter.separator else None
if self_parameter[0] != value or self_parameter[1] != sep:
return False
return True
return False
def __hash__(self):
return hash((self.first, self.last, self.parameters))
class FunctionParser(BaseParser):
"""
The parser will be used to parse func(x, y, z)
@@ -126,6 +73,17 @@ class FunctionParser(BaseParser):
self.longest_concepts_only = longest_concepts_only
self.record_errors = True
def function_parser_get_return_value_body(self, source_code_node):
if source_code_node.error_when_parsing:
return self.sheerka.new(BuiltinConcepts.ERROR,
body=source_code_node.error_when_parsing)
return self.sheerka.new(BuiltinConcepts.PARSER_RESULT,
parser=self,
source=self.parser_input.as_text(),
body=source_code_node,
try_parsed=source_code_node)
def add_error(self, error, next_token=True):
if not self.record_errors:
return
@@ -166,6 +124,9 @@ class FunctionParser(BaseParser):
[TokenKind.EOF]))
if self.has_error:
if len(self.error_sink) == 1 and isinstance(self.error_sink[0], Concept):
return self.error_sink[0]
if node is None:
body = context.sheerka.new(BuiltinConcepts.NOT_FOR_ME,
body=parser_input.as_text(),
@@ -178,12 +139,8 @@ class FunctionParser(BaseParser):
res = []
for source_code_node in source_code_nodes:
value = self.get_return_value_body(context.sheerka,
self.parser_input.as_text(),
source_code_node,
source_code_node)
res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, value))
body = self.function_parser_get_return_value_body(source_code_node)
res.append(self.sheerka.ret(self.name, source_code_node.python_node is not None, body))
return res[0] if len(res) == 1 else res
@@ -288,6 +245,25 @@ class FunctionParser(BaseParser):
def to_source_code_node(self, function_node: FunctionNode):
python_parser = PythonWithConceptsParser()
def update_source_code_node(scn, nodes, sep):
if hasattr(nodes, "__iter__"):
for n in nodes:
scn.add_node(n)
else:
scn.add_node(nodes)
if sep:
scn.add_node(sep.to_unrecognized())
def get_errors_from_python_parsing(parsing_res):
if parsing_res.status:
return None
if self.sheerka.isinstance(parsing_res.body, BuiltinConcepts.NOT_FOR_ME):
return parsing_res.body.reason
else:
return parsing_res.body.body
if len(function_node.parameters) == 0:
# validate the source
nodes_to_parse = [function_node.first.to_unrecognized(), function_node.last.to_unrecognized()]
@@ -298,17 +274,8 @@ class FunctionParser(BaseParser):
end=function_node.last.end,
tokens=function_node.first.tokens + function_node.last.tokens,
python_node=python_node,
return_value=python_parsing_res)]
def update_source_code_node(scn, nodes, sep):
if hasattr(nodes, "__iter__"):
for n in nodes:
scn.add_node(n)
else:
scn.add_node(nodes)
if sep:
scn.add_node(sep.to_unrecognized())
return_value=python_parsing_res,
error_when_parsing=get_errors_from_python_parsing(python_parsing_res))]
res = [SourceCodeWithConceptNode(function_node.first.to_unrecognized(), function_node.last.to_unrecognized())]
@@ -363,6 +330,12 @@ class FunctionParser(BaseParser):
for c in [c for c in source_code_node.python_node.objects.values() if isinstance(c, Concept)]:
update_compiled(self.context, c, errors)
if errors:
source_code_node.error_when_parsing = errors
else:
source_code_node.error_when_parsing = get_errors_from_python_parsing(python_parsing_res)
return res
@staticmethod
+14 -1
View File
@@ -1,6 +1,6 @@
from core.builtin_concepts import BuiltinConcepts
from core.builtin_helpers import CreateObjectIdentifiers
from parsers.BaseNodeParser import ConceptNode, RuleNode
from parsers.BaseNodeParser import ConceptNode, RuleNode, VariableNode
from parsers.BaseNodeParser import SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
@@ -64,6 +64,19 @@ class PythonWithConceptsParser(BaseParser):
python_ids_mappings[python_id] = rule
last_token_index = node.end
elif isinstance(node, VariableNode):
if node.start != last_token_index + 1 and source: # put back missing whitespace
source += " "
to_parse += " "
source += node.source
var_ref = node.var_ref
python_id = ids_manager.get_identifier(var_ref, "__V__")
to_parse += python_id
python_ids_mappings[python_id] = var_ref
last_token_index = node.end
else:
source += node.source
to_parse += node.get_source_to_parse()
+2 -2
View File
@@ -12,7 +12,7 @@ from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import Token, TokenKind, Tokenizer
from core.utils import get_n_clones, get_text_from_tokens, NextIdManager
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
SourceCodeWithConceptNode, BaseNodeParser
SourceCodeWithConceptNode, BaseNodeParser, VariableNode
from parsers.BaseParser import ParsingError
PARSERS = ["Sequence", "Bnf", "Python"]
@@ -1262,7 +1262,7 @@ class SyaNodeParser(BaseNodeParser):
def postfix_to_item(self, sheerka, postfixed):
item = postfixed.pop()
if isinstance(item, (UnrecognizedTokensNode, SourceCodeNode, ConceptNode)):
if isinstance(item, (UnrecognizedTokensNode, SourceCodeNode, ConceptNode, VariableNode)):
return item
if isinstance(item, SourceCodeWithConceptNode):