Implemented FunctionParser

This commit is contained in:
2020-09-17 14:11:09 +02:00
parent 8a866880bc
commit 177a6b1d5f
40 changed files with 1752 additions and 561 deletions
+59 -14
View File
@@ -7,7 +7,7 @@ import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
from core.sheerka.services.SheerkaExecute import ParserInput
from core.tokenizer import TokenKind, LexerError, Token, Keywords
from core.tokenizer import TokenKind, LexerError, Token
from parsers.BaseParser import Node, BaseParser, ErrorNode
DEBUG_COMPILED = True
@@ -46,14 +46,18 @@ class LexerNode(Node):
def clone(self):
pass
def to_short_str(self):
raise NotImplementedError
class UnrecognizedTokensNode(LexerNode):
def __init__(self, start, end, tokens):
super().__init__(start, end, tokens)
self.is_frozen = False
self.is_frozen = False # TODO: Remove as it seems to now be useless
self.parenthesis_count = 0
def freeze(self):
# TODO: Remove as it seems to now be useless
self.is_frozen = True
def reset(self):
@@ -61,6 +65,7 @@ class UnrecognizedTokensNode(LexerNode):
self.tokens.clear()
self.is_frozen = False
self.parenthesis_count = 0
self.source = ""
def add_token(self, token, pos):
if self.is_frozen:
@@ -135,7 +140,7 @@ class UnrecognizedTokensNode(LexerNode):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
return f"UnrecognizedTokensNode(source='{self.source}', start={self.start}, end={self.end})"
def clone(self):
clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:])
@@ -143,6 +148,9 @@ class UnrecognizedTokensNode(LexerNode):
clone.parenthesis_count = self.parenthesis_count
return clone
def to_short_str(self):
return f"UTN('{self.source}')"
class ConceptNode(LexerNode):
"""
@@ -209,15 +217,30 @@ class ConceptNode(LexerNode):
# bag["compiled"] = self.concept.compiled
return bag
def to_short_str(self):
return f'CN({self.concept})'
class SourceCodeNode(LexerNode):
"""
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, node, start, end, tokens=None, source=None, return_value=None):
def __init__(self, start, end, tokens=None, source=None, python_node=None, return_value=None):
"""
:param start: start position (index of the first token)
:param end: end position (index of the last token)
:param tokens:
:param source: tokens as string
:param python_node: PythonNode found (when the SourceCodeNode is validated)
:param return_value: ReturnValueConcept returned when the source was validated
When return_value is provided,
You should have return_value.body.body == node
"""
super().__init__(start, end, tokens, source)
self.node = node # The PythonNode (or whatever language node) that is found
self.python_node = python_node # The PythonNode (or whatever language node) that is found
self.return_value = return_value # original result of the parsing
def __eq__(self, other):
@@ -232,7 +255,7 @@ class SourceCodeNode(LexerNode):
if not isinstance(other, SourceCodeNode):
return False
return self.node == other.node and \
return self.python_node == other.python_node and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
@@ -243,6 +266,9 @@ class SourceCodeNode(LexerNode):
def __repr__(self):
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
def to_short_str(self):
return f"SCN('{self.source}')"
class SourceCodeWithConceptNode(LexerNode):
"""
@@ -254,17 +280,22 @@ class SourceCodeWithConceptNode(LexerNode):
So I push all the nodes into one big bag
"""
def __init__(self, first_node, last_node, content_nodes=None):
def __init__(self, first_node, last_node, content_nodes=None, has_unrecognized=False):
super().__init__(9999, -1, None) # why not sys.maxint ?
self.first = first_node
self.last = last_node
self.nodes = content_nodes or []
self.has_unrecognized = False
self.has_unrecognized = has_unrecognized
self._all_nodes = None
self.fix_all_pos()
self.python_node = None # if the source code node is validated against a python parse, here is the PythonNode
self.return_value = None # return_value that produced the PythonNode
def add_node(self, node):
self.nodes.append(node)
self.fix_pos(node)
self._all_nodes = None
return self
@@ -304,6 +335,9 @@ class SourceCodeWithConceptNode(LexerNode):
return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')"
def fix_all_pos(self):
if self.first is None: # to ease some unit test where only the python_node is necessary
return
for n in [self.first, self.last] + self.nodes:
self.fix_pos(n)
@@ -334,10 +368,20 @@ class SourceCodeWithConceptNode(LexerNode):
self.source += self.last.source
return self
def get_all_nodes(self):
if self._all_nodes:
return self._all_nodes
self._all_nodes = [self.first, *self.nodes, self.last]
return self._all_nodes
def clone(self):
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes)
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes.copy(), self.has_unrecognized)
return clone
def to_short_str(self):
return f"SCWC({self.first}" + ", ".join(n.to_short_str for n in self.nodes) + f"{self.last})"
@dataclass()
class GrammarErrorNode(ErrorNode):
@@ -479,7 +523,7 @@ class SCWC(HelperWithPos):
TODO: create a common function or whatever...
:return:
"""
source = self.first.source
source = self.first.source if hasattr(self.first, "source") else self.first
for n in self.content:
source += " "
if hasattr(n, "source"):
@@ -488,7 +532,7 @@ class SCWC(HelperWithPos):
source += str(n.concept)
else:
source += " unknown"
source += self.last.source
source += self.last.source if hasattr(self.last, "source") else self.last
return source
@@ -514,7 +558,7 @@ class CN(HelperWithPos):
self.concept = concept if isinstance(concept, Concept) else None
def fix_source(self, str_tokens):
self.source = "".join([s.value if isinstance(s, Keywords) else s for s in str_tokens])
self.source = "".join(str_tokens)
return self
def __eq__(self, other):
@@ -660,7 +704,7 @@ class UTN(HelperWithPos):
return hash((self.source, self.start, self.end))
def __repr__(self):
txt = f"UTN( source='{self.source}'"
txt = f"UTN(source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
@@ -733,7 +777,7 @@ class BaseNodeParser(BaseParser):
else:
name = token.value
custom_concepts = custom(name) if custom else []
custom_concepts = custom(name) if custom else [] # to get extra concepts using an alternative method
result = []
if name in self.concepts_by_first_keyword:
@@ -746,6 +790,7 @@ class BaseNodeParser(BaseParser):
concept = to_map(self, concept) if to_map else concept
result.append(concept)
return core.utils.make_unique(result + custom_concepts,
lambda c: c.concept.id if hasattr(c, "concept") else c.id)