Added SyaNodeParser (finally, after one month)

This commit is contained in:
2020-04-09 15:42:36 +02:00
parent c9acfa99a1
commit 6c7c529016
56 changed files with 5322 additions and 404 deletions
+369
View File
@@ -0,0 +1,369 @@
import copy
from dataclasses import dataclass
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.tokenizer import TokenKind, Tokenizer
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, ErrorNode
PARSERS = ["BnfNode", "SyaNode", "Python"]
@dataclass()
class TokensNodeFound(ErrorNode):
expected_tokens: list
def __eq__(self, other):
if id(other) == id(self):
return True
if not isinstance(other, UnexpectedTokenErrorNode):
return False
if self.message != other.message:
return False
if self.token.type != other.token.type or self.token.value != other.token.value:
return False
if len(self.expected_tokens) != len(other.expected_tokens):
return False
for i, t in enumerate(self.expected_tokens):
if t != other.expected_tokens[i]:
return False
return True
def __hash__(self):
return hash((self.message, self.token, self.expected_tokens))
class AtomConceptParserHelper:
def __init__(self, context):
self.context = context
self.debug = []
self.sequence = [] # sequence of concepts already found found
self.current_concept: ConceptNode = None # concept being parsed
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, []) # buffer that keeps tracks of tokens positions
self.expected_tokens = None # expected tokens for this concepts
self.is_locked = False
self.errors = []
self.has_unrecognized = False
self.forked = [] # use to duplicate AtomConceptParserHelper. See manage_unrecognized()
def __eq__(self, other):
if id(other) == id(self):
return True
if not isinstance(other, AtomConceptParserHelper):
return False
if len(self.sequence) != len(other.sequence):
return False
for item_self, item_other in zip(self.sequence, other.sequence):
if item_self != item_other:
return False
return True
def __hash__(self):
return hash(len(self.sequence))
def __repr__(self):
return f"{self.sequence}"
def lock(self):
self.is_locked = True
def reset(self):
self.is_locked = False
def has_error(self):
return len(self.errors) > 0
def eat_token(self, token, pos):
if not self.expected_tokens:
return False
self.debug.append(token)
if self.expected_tokens[0] != BaseNodeParser.get_token_value(token):
self.errors.append(UnexpectedTokenErrorNode(
f"Found '{token}' while expecting '{self.expected_tokens[0]}'",
token,
[self.expected_tokens[0]]))
return False
self.current_concept.end = pos
del self.expected_tokens[0]
if not self.expected_tokens:
# the concept is fully matched
self.sequence.append(self.current_concept)
self.expected_tokens = None
return True
def eat_concept(self, concept, pos):
if self.is_locked:
return
self.debug.append(concept)
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.eat_concept(concept, pos)
concept_node = ConceptNode(concept, pos, pos)
expected = [BaseNodeParser.get_token_value(t) for t in Tokenizer(concept.name)][1:-1]
if not expected:
# the concept is already matched
self.sequence.append(concept_node)
else:
self.current_concept = concept_node
self.expected_tokens = expected
def manage_unrecognized(self):
if self.unrecognized_tokens.is_empty():
return
# do not put empty UnrecognizedToken in out
if self.unrecognized_tokens.is_whitespace():
self.unrecognized_tokens.reset()
return
self.unrecognized_tokens.fix_source()
# try to recognize concepts
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
if nodes_sequences:
instances = [self]
for i in range(len(nodes_sequences) - 1):
clone = self.clone()
instances.append(clone)
self.forked.append(clone)
for instance, node_sequence in zip(instances, nodes_sequences):
for node in node_sequence:
instance.sequence.append(node)
if isinstance(node, UnrecognizedTokensNode) or \
hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens:
instance.has_unrecognized = True
instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
else:
self.sequence.append(self.unrecognized_tokens)
self.has_unrecognized = True
# create another instance
self.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
def eat_unrecognized(self, token, pos):
if self.is_locked:
return
self.debug.append(token)
self.unrecognized_tokens.add_token(token, pos)
def finalize(self):
if len(self.sequence) > 0:
self.manage_unrecognized()
for forked in self.forked:
# manage that some clones may have been forked
forked.finalize()
if self.expected_tokens:
self.errors.append(TokensNodeFound(self.expected_tokens))
def clone(self):
clone = AtomConceptParserHelper(self.context)
clone.debug = self.debug[:]
clone.sequence = self.sequence[:]
clone.current_concept = self.current_concept.clone() if self.current_concept else None
clone.unrecognized_tokens = self.unrecognized_tokens.clone()
clone.expected_tokens = self.expected_tokens[:] if self.expected_tokens else None
clone.is_locked = self.is_locked
clone.errors = self.errors[:]
clone.has_unrecognized = self.has_unrecognized
return clone
def _get_lexer_nodes_from_unrecognized(self):
"""
Use the source of self.unrecognized_tokens gto find concepts or source code
:return:
"""
res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
if not only_parsers_results.status:
return None
return builtin_helpers.get_lexer_nodes(
only_parsers_results.body.body,
self.unrecognized_tokens.start,
self.unrecognized_tokens.tokens)
class AtomNodeParser(BaseNodeParser):
"""
Parser used to recognize atoms concepts or sequence of atoms concepts
An atom concept is concept that does not have any property thought it may have a body
So, if 'one', 'two', 'three' are defined as atom concepts (with no property/parameter)
This parser can recognize the sequence 'one two three'
as [ConceptNode(one), ConceptNode(two), ConceptNode(three)]
It can partly recognized 'one x$1!! two three'
as [ConceptNode(one), UnrecognizedTokensNode(x$1!!), [ConceptNode(two), [ConceptNode(three)]
It cannot recognize concepts with parameters (non atom)
ex: 'one plus two' won't be recognized as ConceptNode(plus, one, two)
it will be [ConceptNode(one), UnrecognizedTokensNode(plus), [ConceptNode(two)]
Note 'one plus two' will be recognized by the SyaParser
"""
def __init__(self, **kwargs):
super().__init__("AtomNode", 50, **kwargs)
self.enabled = False
@staticmethod
def _is_eligible(concept):
"""
Predicate that select concepts that must handled by AtomNodeParser
:param concept:
:return:
"""
return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF
def get_concepts_sequences(self):
forked = []
def _add_forked_to_concept_parser_helpers():
# check that if some new InfixToPostfix are created
for parser in concept_parser_helpers:
if len(parser.forked) > 0:
forked.extend(parser.forked)
parser.forked.clear()
if len(forked) > 0:
concept_parser_helpers.extend(forked)
forked.clear()
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
while self.next_token(False):
for concept_parser in concept_parser_helpers:
concept_parser.reset()
token = self.token
try:
for concept_parser in concept_parser_helpers:
if concept_parser.eat_token(self.token, self.pos):
concept_parser.lock()
concepts = self.get_concepts(token, self._is_eligible)
if not concepts:
for concept_parser in concept_parser_helpers:
concept_parser.eat_unrecognized(token, self.pos)
continue
if len(concepts) == 1:
for concept_parser in concept_parser_helpers:
concept_parser.eat_concept(concepts[0], self.pos)
continue
# make the cartesian product
temp_res = []
for concept_parser in concept_parser_helpers:
if concept_parser.is_locked:
# It means that it already eat the token
# so simply add it, do not clone
temp_res.append(concept_parser)
continue
for concept in concepts:
clone = concept_parser.clone()
temp_res.append(clone)
clone.eat_concept(concept, self.pos)
concept_parser_helpers = temp_res
finally:
_add_forked_to_concept_parser_helpers()
# make sure that remaining items in stack are moved to out
for concept_parser in concept_parser_helpers:
concept_parser.reset()
concept_parser.finalize()
_add_forked_to_concept_parser_helpers()
return concept_parser_helpers
def get_valid(self, concept_parser_helpers):
valid_parser_helpers = [] # be careful, it will be a list of list
for parser_helper in concept_parser_helpers:
if parser_helper.has_error():
continue
if len(parser_helper.sequence) == 0:
continue
for node in parser_helper.sequence:
node.tokens = self.tokens[node.start:node.end + 1]
node.fix_source()
if parser_helper in valid_parser_helpers:
continue
valid_parser_helpers.append(parser_helper)
return valid_parser_helpers
def parse(self, context, parser_input):
if parser_input == "":
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
)
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
parser_helpers = self.get_valid(self.get_concepts_sequences())
if len(parser_helpers):
ret = []
for parser_helper in parser_helpers:
ret.append(
self.sheerka.ret(
self.name,
not parser_helper.has_unrecognized,
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
body=parser_helper.sequence,
try_parsed=parser_helper.sequence)))
if len(ret) == 1:
self.log_result(context, parser_input, ret[0])
return ret[0]
else:
self.log_multiple_results(context, parser_input, ret)
return ret
else:
return self.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
+669
View File
@@ -0,0 +1,669 @@
from collections import namedtuple
from dataclasses import dataclass
from enum import Enum
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept
from core.sheerka.ExecutionContext import ExecutionContext
from core.tokenizer import TokenKind, LexerError, Token
from parsers.BaseParser import Node, BaseParser, ErrorNode
DEBUG_COMPILED = True
@dataclass()
class LexerNode(Node):
start: int # starting index in the tokens list
end: int # ending index in the tokens list
tokens: list = None # tokens
source: str = None # string representation of what was parsed
def __post_init__(self):
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if not isinstance(other, LexerNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.tokens == other.tokens
def fix_source(self, force=True):
if force or self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
return self
class UnrecognizedTokensNode(LexerNode):
def __init__(self, start, end, tokens):
super().__init__(start, end, tokens)
self.is_frozen = False
self.parenthesis_count = 0
def freeze(self):
self.is_frozen = True
def reset(self):
self.start = self.end = -1
self.tokens.clear()
self.is_frozen = False
self.parenthesis_count = 0
def has_open_paren(self):
return self.parenthesis_count > 0
def add_token(self, token, pos):
if self.is_frozen:
raise Exception("The node is frozen")
if self.end != -1 and pos == self.end + 2:
# add the missing whitespace
p = self.tokens[-1] # previous token
self.tokens.append(Token(TokenKind.WHITESPACE, " ", p.index + 1, p.line, p.column + 1))
self.tokens.append(token)
self.end = pos
if self.start == -1:
self.start = pos
if token.type == TokenKind.LPAR:
self.parenthesis_count += 1
if token.type == TokenKind.RPAR:
self.parenthesis_count -= 1
return self
def not_whitespace(self):
return not self.is_whitespace()
def is_whitespace(self):
for t in self.tokens:
if t.type not in (TokenKind.WHITESPACE, TokenKind.NEWLINE):
return False
return True
def is_empty(self):
return len(self.tokens) == 0
def __eq__(self, other):
if isinstance(other, utnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, UTN):
return other == self
if not isinstance(other, UnrecognizedTokensNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
def clone(self):
clone = UnrecognizedTokensNode(self.start, self.end, self.tokens[:])
clone.is_frozen = self.is_frozen
clone.parenthesis_count = self.parenthesis_count
return clone
class ConceptNode(LexerNode):
"""
Returned by the BnfNodeParser
It represents a recognized concept
"""
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
super().__init__(start, end, tokens, source)
self.concept = concept
self.underlying = underlying
self.fix_source(False)
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, (CN, CNC)):
return other == self
if isinstance(other, cnode):
return self.concept.key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, short_cnode):
return self.concept.key == other.concept_key and self.source == other.source
if not isinstance(other, ConceptNode):
return False
return self.concept == other.concept and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.underlying == other.underlying
def __hash__(self):
return hash((self.concept, self.start, self.end, self.source, self.underlying))
def __repr__(self):
text = f"ConceptNode(concept='{self.concept}', source='{self.source}', start={self.start}, end={self.end}"
if DEBUG_COMPILED:
for k, v in self.concept.compiled.items():
text += f", {k}='{v}'"
return text + ")"
def clone(self):
# do we need to clone the concept as well ?
clone = ConceptNode(self.concept, self.start, self.end, self.tokens, self.source, self.underlying)
return clone
class SourceCodeNode(LexerNode):
"""
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, node, start, end, tokens=None, source=None, return_value=None):
super().__init__(start, end, tokens, source)
self.node = node # The PythonNode (or whatever language node) that is found
self.return_value = return_value # original result of the parsing
def __eq__(self, other):
if isinstance(other, scnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, SourceCodeNode):
return False
return self.node == other.node and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
class SourceCodeWithConceptNode(LexerNode):
"""
Kind of temporary version for SourceCodeNode
I know that there is some code,
I know that there are some concepts
I just don't want to make the glue yet
So I push all the nodes into one big bag
"""
def __init__(self, first_node, last_node, content_nodes=None):
super().__init__(9999, -1, None) # why not sys.maxint ?
self.first = first_node
self.last = last_node
self.nodes = content_nodes or []
self.has_unrecognized = False
self.fix_all_pos()
def add_node(self, node):
self.nodes.append(node)
self.fix_pos(node)
return self
def __eq__(self, other):
if id(self) == id(other):
return True
if not isinstance(other, SourceCodeWithConceptNode):
return False
if self.start != other.start or self.end != other.end:
return False
if self.first != other.first:
return False
if self.last != other.last:
return False
if len(self.nodes) != len(other.nodes):
return False
for self_node, other_node in zip(self.nodes, other.nodes):
if self_node != other_node:
return False
# at last
return True
def __hash__(self):
return hash((self.first, self.last, len(self.nodes)))
def __repr__(self):
return f"SourceCodeWithConceptNode(start={self.start}, end={self.end}, source='{self.source}')"
def fix_all_pos(self):
for n in [self.first, self.last] + self.nodes:
self.fix_pos(n)
def fix_pos(self, node):
if hasattr(node, "start") and node.start is not None:
if node.start < self.start:
self.start = node.start
if hasattr(node, "end") and node.end is not None:
if node.end > self.end:
self.end = node.end
return self
def pseudo_fix_source(self):
self.source = self.first.source
for n in self.nodes:
self.source += " "
if hasattr(n, "source"):
self.source += n.source
elif hasattr(n, "concept"):
self.source += str(n.concept)
else:
self.source += " unknown"
self.source += self.last.source
return self
def clone(self):
clone = SourceCodeWithConceptNode(self.first, self.last, self.nodes)
return clone
@dataclass()
class GrammarErrorNode(ErrorNode):
message: str
class SyaAssociativity(Enum):
Left = "left"
Right = "right"
No = "No"
def __repr__(self):
return self.value
cnode = namedtuple("ConceptNode", "concept_key start end source")
short_cnode = namedtuple("ConceptNode", "concept_key source")
utnode = namedtuple("utnode", "start end source")
scnode = namedtuple("scnode", "start end source")
@dataclass(init=False)
class SCWC:
"""
SourceNodeWithConcept tester class
It matches with a SourceNodeWithConcept
but it's easier to instantiate during the tests
"""
first: LexerNode
last: LexerNode
content: tuple
def __init__(self, first, last, *args):
self.first = first
self.last = last
self.content = args
class HelperWithPos:
def __init__(self, start=None, end=None):
self.start = start
self.end = end
self.start_is_fixed = start is not None
self.end_is_fixed = end is not None
def fix_pos(self, node):
if not self.start_is_fixed:
start = node.start if hasattr(node, "start") else \
node[0] if isinstance(node, tuple) else None
if start is not None and (self.start is None or start < self.start):
self.start = start
if not self.end_is_fixed:
end = node.end if hasattr(node, "end") else \
node[1] if isinstance(node, tuple) else None
if end is not None and (self.end is None or end > self.end):
self.end = end
return self
class CN(HelperWithPos):
"""
ConceptNode tester class
It matches with ConceptNode but with less constraints
CNC == ConceptNode if concept key, start, end and source are the same
"""
def __init__(self, concept, start=None, end=None, source=None):
"""
:param concept: Concept or concept_key (only the key is used anyway)
:param start:
:param end:
:param source:
"""
super().__init__(start, end)
self.concept_key = concept.key if isinstance(concept, Concept) else concept
self.source = source
self.concept = concept if isinstance(concept, Concept) else None
def fix_source(self, str_tokens):
self.source = "".join(str_tokens)
return self
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, ConceptNode):
if other.concept is None:
return False
if other.concept.key != self.concept_key:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
return True
if not isinstance(other, CN):
return False
return self.concept_key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.concept_key, self.start, self.end, self.source))
def __repr__(self):
if self.concept:
txt = f"CN(concept='{self.concept}'"
else:
txt = f"CN(concept_key='{self.concept_key}'"
txt += f", source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
class CNC(CN):
"""
ConceptNode for Compiled tester class
It matches with ConceptNode
But focuses on the 'compiled' property of the concept
CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
"""
def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
super().__init__(concept_key, start, end, source)
self.compiled = kwargs
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, ConceptNode):
if other.concept is None:
return False
if other.concept.key != self.concept_key:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
return self.compiled == other.concept.compiled # assert instead of return to help debugging tests
if not isinstance(other, CNC):
return False
return self.concept_key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.compiled == other.compiled
def __repr__(self):
if self.concept:
txt = f"CNC(concept='{self.concept}'"
else:
txt = f"CNC(concept_key='{self.concept_key}'"
txt += f", source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
for k, v in self.compiled.items():
txt += f", {k}='{v}'"
return txt + ")"
class BaseNodeParser(BaseParser):
def __init__(self, name, priority, **kwargs):
super().__init__(name, priority)
if 'sheerka' in kwargs:
sheerka = kwargs.get("sheerka")
self.init_from_sheerka(sheerka)
else:
self.concepts_by_first_keyword = None
self.sya_definitions = None
self.token = None
self.pos = -1
self.tokens = None
self.context: ExecutionContext = None
self.text = None
self.sheerka = None
def init_from_sheerka(self, sheerka):
"""
Use the definitons from Sheerka to initialize
:param sheerka:
:return:
"""
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
if sheerka.sya_definitions:
self.sya_definitions = {}
for k, v in sheerka.sya_definitions.items():
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.text = text
try:
self.tokens = list(self.get_input_as_tokens(text))
except LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
self.token = None
self.pos = -1
return True
def add_error(self, error, next_token=True):
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self.token
def next_token(self, skip_whitespace=True):
if self.token and self.token.type == TokenKind.EOF:
return False
self.pos += 1
self.token = self.tokens[self.pos]
if skip_whitespace:
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
self.pos += 1
self.token = self.tokens[self.pos]
return self.token.type != TokenKind.EOF
def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
"""
To quickly find a concept, we store them in an hash where the key is the first token of the concept
example :
Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
So the key to use will be "foo"
Concept("a foo").def_prop("a") -> first token is "foo"
Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
Note that under the same key, there will be multiple entry
a B-Tree may be a better implementation in the future
We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
:param context:
:param concepts: list[Concept]
:param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
:param use_sheerka: first init with the definitions from Sheerka
:return:
"""
self.context = context
self.sheerka = context.sheerka
if use_sheerka:
self.init_from_sheerka(self.sheerka)
if sya_definitions:
if self.sya_definitions:
self.sya_definitions.update(sya_definitions)
else:
self.sya_definitions = sya_definitions
if self.concepts_by_first_keyword is None:
self.concepts_by_first_keyword = {}
for concept in concepts:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
break
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
def get_concepts(self, token, to_keep, to_map=None):
"""
Tries to find if there are concepts that match the value of the token
:param token:
:param to_keep: predicate to tell if the concept is eligible
:param to_map:
:return:
"""
if token.type == TokenKind.STRING:
name = token.value[1:-1]
elif token.type == TokenKind.KEYWORD:
name = token.value.value
else:
name = token.value
result = []
if name in self.concepts_by_first_keyword:
for concept_id in self.concepts_by_first_keyword[name]:
concept = self.sheerka.get_by_id(concept_id)
if not to_keep(concept):
continue
concept = to_map(concept) if to_map else concept
result.append(concept)
return result
return None
@staticmethod
def get_token_value(token):
if token.type == TokenKind.STRING:
return token.value[1:-1]
elif token.type == TokenKind.KEYWORD:
return token.value.value
else:
return token.value
class UTN(HelperWithPos):
"""
Tester class for UnrecognizedTokenNode
compare the source, and start, end if defined
"""
def __init__(self, source, start=None, end=None):
"""
:param concept: Concept or concept_key (only the key is used anyway)
:param start:
:param end:
:param source:
"""
super().__init__(start, end)
self.source = source
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, UnrecognizedTokensNode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, UTN):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.source, self.start, self.end))
def __repr__(self):
txt = f"UTN( source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
+54 -4
View File
@@ -1,8 +1,8 @@
from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept
from core.tokenizer import TokenKind, Keywords, Token
from core.tokenizer import TokenKind, Keywords, Token, Tokenizer
from core.sheerka_logger import get_logger
import core.utils
import logging
@@ -77,7 +77,6 @@ class BaseParser:
self.priority = priority
self.enabled = enabled
self.has_error = False
self.error_sink = []
def __eq__(self, other):
@@ -91,9 +90,13 @@ class BaseParser:
def __repr__(self):
return self.name
def parse(self, context, text):
def parse(self, context, parser_input):
pass
@property
def has_error(self):
return len(self.error_sink) > 0
def log_result(self, context, source, ret):
if not self.log.isEnabledFor(logging.DEBUG):
return
@@ -132,6 +135,53 @@ class BaseParser:
body=self.error_sink if self.has_error else tree,
try_parsed=try_parse)
def get_input_as_text(self, parser_input, custom_switcher=None):
if isinstance(parser_input, list):
return self.get_text_from_tokens(parser_input, custom_switcher)
if isinstance(parser_input, ParserResultConcept):
parser_input = parser_input.source
if "c:" in parser_input:
return self.get_text_from_tokens(list(Tokenizer(parser_input)), custom_switcher)
return parser_input
def get_input_as_tokens(self, parser_input):
if isinstance(parser_input, list):
return self.add_eof_if_needed(parser_input)
if isinstance(parser_input, ParserResultConcept):
if parser_input.tokens:
return self.add_eof_if_needed(parser_input.tokens)
else:
return Tokenizer(parser_input.source)
return Tokenizer(parser_input)
def get_input_as_lexer_nodes(self, parser_input, expected_parser=None):
if not isinstance(parser_input, ParserResultConcept):
return None
if expected_parser and parser_input.parser != expected_parser:
return None
if len(parser_input.value) == 0:
return None
for node in parser_input.value:
from parsers.BaseNodeParser import LexerNode
if not isinstance(node, LexerNode):
return None
return parser_input.value
@staticmethod
def add_eof_if_needed(lst):
if len(lst) == 0 or not lst[-1].type == TokenKind.EOF:
lst.append(Token(TokenKind.EOF, "", -1, -1, -1))
return lst
@staticmethod
def get_text_from_tokens(tokens, custom_switcher=None):
if tokens is None:
@@ -9,147 +9,17 @@
from collections import namedtuple
from dataclasses import dataclass
from collections import defaultdict
from core.builtin_concepts import BuiltinConcepts
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.concept import Concept, ConceptParts, DoNotResolve
from core.tokenizer import TokenKind, Tokenizer, Token
from parsers.BaseParser import BaseParser, Node, ErrorNode
from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, ErrorNode
import core.utils
@dataclass()
class LexerNode(Node):
start: int # starting index in the tokens list
end: int # ending index in the tokens list
tokens: list = None # tokens
source: str = None # string representation of what was parsed
def __post_init__(self):
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if not isinstance(other, LexerNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.tokens == other.tokens
class UnrecognizedTokensNode(LexerNode):
def __init__(self, start, end, tokens):
super().__init__(start, end, tokens)
def add_token(self, token, pos):
self.tokens.append(token)
self.end = pos
def fix_source(self):
self.source = BaseParser.get_text_from_tokens(self.tokens)
def not_whitespace(self):
return not (len(self.tokens) == 1 and self.tokens[0].type in (TokenKind.WHITESPACE, TokenKind.NEWLINE))
def __eq__(self, other):
if isinstance(other, utnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, UnrecognizedTokensNode):
return False
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"UnrecognizedTokensNode(start={self.start}, end={self.end}, source='{self.source}')"
class ConceptNode(LexerNode):
"""
Returned by the ConceptLexerParser
It represents a recognized concept
"""
def __init__(self, concept, start, end, tokens=None, source=None, underlying=None):
super().__init__(start, end, tokens, source)
self.concept = concept
self.underlying = underlying
if self.source is None:
self.source = BaseParser.get_text_from_tokens(self.tokens)
def __eq__(self, other):
if isinstance(other, cnode):
return self.concept.key == other.concept_key and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
if isinstance(other, short_cnode):
return self.concept.key == other.concept_key and self.source == other.source
if not isinstance(other, ConceptNode):
return False
return self.concept == other.concept and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source and \
self.underlying == other.underlying
def __hash__(self):
return hash((self.concept, self.start, self.end, self.source, self.underlying))
def __repr__(self):
return f"ConceptNode(concept='{self.concept}', start={self.start}, end={self.end}, source='{self.source}')"
class SourceCodeNode(LexerNode):
"""
Returned when some source code (like Python source code is recognized)
"""
def __init__(self, node, start, end, tokens=None, source=None):
super().__init__(start, end, tokens, source)
self.node = node # The PythonNode (or whatever language node) that is found
def __eq__(self, other):
if isinstance(other, scnode):
return self.start == other.start and \
self.end == other.end and \
self.source == other.source
if not isinstance(other, SourceCodeNode):
return False
return self.node == other.node and \
self.start == other.start and \
self.end == other.end and \
self.source == other.source
def __hash__(self):
return hash((self.start, self.end, self.source))
def __repr__(self):
return f"SourceCodeNode(start={self.start}, end={self.end}, source='{self.source}')"
cnode = namedtuple("ConceptNode", "concept_key start end source")
short_cnode = namedtuple("ConceptNode", "concept_key source")
utnode = namedtuple("UnrecognizedTokensNode", "start end source")
scnode = namedtuple("SourceCodeNode", "start end source")
class NonTerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
Returned by the BnfNodeParser
"""
def __init__(self, parsing_expression, start, end, tokens, children=None):
@@ -180,7 +50,7 @@ class NonTerminalNode(LexerNode):
class TerminalNode(LexerNode):
"""
Returned by the ConceptLexerParser
Returned by the BnfNodeParser
"""
def __init__(self, parsing_expression, start, end, value):
@@ -205,11 +75,6 @@ class TerminalNode(LexerNode):
return hash((self.parsing_expression, self.start, self.end, self.value))
@dataclass()
class GrammarErrorNode(ErrorNode):
message: str
@dataclass()
class UnknownConceptNode(ErrorNode):
concept_key: str
@@ -574,9 +439,9 @@ class StrMatch(Match):
return None
class ConceptLexerParser(BaseParser):
class BnfNodeParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("ConceptLexer", 50)
super().__init__("BnfNode", 50)
if 'grammars' in kwargs:
self.concepts_grammars = kwargs.get("grammars")
elif 'sheerka' in kwargs:
@@ -595,7 +460,6 @@ class ConceptLexerParser(BaseParser):
self.sheerka = None
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
@@ -606,16 +470,11 @@ class ConceptLexerParser(BaseParser):
self.sheerka = context.sheerka
self.text = text
if isinstance(text, str):
try:
self.tokens = list(Tokenizer(text))
except core.tokenizer.LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
else:
self.tokens = list(text)
self.tokens.append(Token(TokenKind.EOF, "", -1, -1, -1)) # make sure to finish with end of file token
try:
self.tokens = list(self.get_input_as_tokens(text))
except core.tokenizer.LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
self.token = None
self.pos = -1
@@ -785,15 +644,15 @@ class ConceptLexerParser(BaseParser):
removed_concepts.append(e)
return removed_concepts
def parse(self, context, text):
if text == "":
def parse(self, context, parser_input):
if parser_input == "":
return context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.IS_EMPTY)
)
if not self.reset_parser(context, text):
if not self.reset_parser(context, parser_input):
return self.sheerka.ret(
self.name,
False,
@@ -877,15 +736,15 @@ class ConceptLexerParser(BaseParser):
self.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
source=parser_input,
body=choice,
try_parsed=choice)))
if len(ret) == 1:
self.log_result(context, text, ret[0])
self.log_result(context, parser_input, ret[0])
return ret[0]
else:
self.log_multiple_results(context, text, ret)
self.log_multiple_results(context, parser_input, ret)
return ret
def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
@@ -915,6 +774,11 @@ class ConceptLexerParser(BaseParser):
_concept.compiled[prop_name] = new_value
def _look_for_concept_match(_underlying):
"""
At some point, there is either an StrMatch or a ConceptMatch,
that allowed the recognition.
Look for the ConceptMatch, with recursion if needed
"""
if isinstance(_underlying.parsing_expression, ConceptExpression):
return _underlying
@@ -929,6 +793,7 @@ class ConceptLexerParser(BaseParser):
def _get_underlying_value(_underlying):
concept_match_node = _look_for_concept_match(_underlying)
if concept_match_node:
# the value is a concept
if id(concept_match_node) in _underlying_value_cache:
result = _underlying_value_cache[id(concept_match_node)]
else:
@@ -936,6 +801,7 @@ class ConceptLexerParser(BaseParser):
result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
_underlying_value_cache[id(concept_match_node)] = result
else:
# the value is a string
result = DoNotResolve(_underlying.source)
return result
@@ -957,6 +823,7 @@ class ConceptLexerParser(BaseParser):
concept.compiled[ConceptParts.BODY] = value
if underlying.parsing_expression.rule_name:
_add_prop(concept, underlying.parsing_expression.rule_name, value)
# KSI : Why don't we set concept.metadata.need_validation to True ?
if isinstance(underlying, NonTerminalNode):
for node in underlying.children:
+3 -5
View File
@@ -5,7 +5,7 @@ from core.builtin_concepts import BuiltinConcepts
from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
from parsers.ConceptLexerParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
StrMatch, ConceptGroupExpression
@@ -30,7 +30,6 @@ class BnfParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("Bnf", 50, False)
# self.has_error = False
# self.error_sink = []
# self.name = BaseParser.PREFIX + "Bnf"
@@ -61,7 +60,6 @@ class BnfParser(BaseParser):
self.eat_white_space()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
@@ -115,11 +113,11 @@ class BnfParser(BaseParser):
token = self.get_token()
return token.type == second or token.type == first and self.next_after().type == second
def parse(self, context: ExecutionContext, text):
def parse(self, context: ExecutionContext, parser_input):
tree = None
try:
self.reset_parser(context, text)
self.reset_parser(context, parser_input)
tree = self.parse_choice()
token = self.get_token()
+12 -11
View File
@@ -1,10 +1,14 @@
# try to match something like
# ConceptNode 'plus' ConceptNode
#
# Replaced by SyaNodeParser
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind, Token
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from core.concept import VARIABLE_PREFIX
import logging
multiple_concepts_parser = MultipleConceptsParser()
@@ -12,6 +16,7 @@ multiple_concepts_parser = MultipleConceptsParser()
class ConceptsWithConceptsParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("ConceptsWithConcepts", 25)
self.enabled = False
@staticmethod
def get_tokens(nodes):
@@ -71,23 +76,19 @@ class ConceptsWithConceptsParser(BaseParser):
return concept
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
if not nodes:
return None
if not text.parser == multiple_concepts_parser:
return None
nodes = text.body
concept_key = self.get_key(nodes)
concept = sheerka.new(concept_key)
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text.body))
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
concepts = concept if hasattr(concept, "__iter__") else [concept]
for concept in concepts:
@@ -101,7 +102,7 @@ class ConceptsWithConceptsParser(BaseParser):
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text.source,
source=parser_input.source,
body=concept,
try_parsed=None)))
+8 -9
View File
@@ -110,7 +110,7 @@ class DefaultParser(BaseParser):
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "Default", 50)
BaseParser.__init__(self, "Default", 60)
self.lexer_iter = None
self._current = None
self.context: ExecutionContext = None
@@ -168,7 +168,6 @@ class DefaultParser(BaseParser):
self.next_token()
def add_error(self, error, next_token=True):
self.has_error = True
self.error_sink.append(error)
if next_token:
self.next_token()
@@ -188,19 +187,19 @@ class DefaultParser(BaseParser):
return
def parse(self, context, text):
def parse(self, context, parser_input):
# default parser can only manage string text
if not isinstance(text, str):
if not isinstance(parser_input, str):
ret = context.sheerka.ret(
self.name,
False,
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=text))
self.log_result(context, text, ret)
context.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
tree = None
try:
self.reset_parser(context, text)
self.reset_parser(context, parser_input)
tree = self.parse_statement()
except core.tokenizer.LexerError as e:
self.add_error(e, False)
@@ -211,7 +210,7 @@ class DefaultParser(BaseParser):
if self.has_error and isinstance(self.error_sink[0], CannotHandleErrorNode):
body = self.sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=self.error_sink)
else:
body = self.get_return_value_body(context.sheerka, text, tree, tree)
body = self.get_return_value_body(context.sheerka, parser_input, tree, tree)
# body = self.sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
@@ -224,7 +223,7 @@ class DefaultParser(BaseParser):
not self.has_error,
body)
self.log_result(context, text, ret)
self.log_result(context, parser_input, ret)
return ret
def parse_statement(self):
+5 -5
View File
@@ -10,12 +10,12 @@ class EmptyStringParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "EmptyString", 90)
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if isinstance(text, str) and text.strip() == "" or \
isinstance(text, list) and text == [] or \
text is None:
if isinstance(parser_input, str) and parser_input.strip() == "" or \
isinstance(parser_input, list) and parser_input == [] or \
parser_input is None:
ret = sheerka.ret(self.name, True, sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
@@ -24,5 +24,5 @@ class EmptyStringParser(BaseParser):
else:
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.NOT_FOR_ME))
self.log_result(context, text, ret)
self.log_result(context, parser_input, ret)
return ret
+12 -13
View File
@@ -16,26 +16,26 @@ class ExactConceptParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "ExactConcept", 80)
def parse(self, context, text):
def parse(self, context, parser_input):
"""
text can be string, but text can also be an list of tokens
:param context:
:param text:
:param parser_input:
:return:
"""
context.log(f"Parsing '{text}'", self.name)
context.log(f"Parsing '{parser_input}'", self.name)
res = []
sheerka = context.sheerka
try:
words = self.get_words(text)
words = self.get_words(parser_input)
except LexerError as e:
context.log(f"Error found in tokenizer {e}", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
if len(words) > self.MAX_WORDS_SIZE:
context.log(f"Max words reached. Stopping.", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=text))
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input))
recognized = False
for combination in self.combinations(words):
@@ -69,26 +69,25 @@ class ExactConceptParser(BaseParser):
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text if isinstance(text, str) else self.get_text_from_tokens(text),
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
body=concept,
try_parsed=concept)))
recognized = True
if recognized:
if len(res) == 1:
self.log_result(context, text, res[0])
self.log_result(context, parser_input, res[0])
else:
self.log_multiple_results(context, text, res)
self.log_multiple_results(context, parser_input, res)
return res
return res
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=text))
self.log_result(context, text, ret)
ret = sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.UNKNOWN_CONCEPT, body=parser_input))
self.log_result(context, parser_input, ret)
return ret
@staticmethod
def get_words(text):
tokens = iter(Tokenizer(text)) if isinstance(text, str) else text
def get_words(self, text):
tokens = self.get_input_as_tokens(text)
res = []
for t in tokens:
if t.type == TokenKind.EOF:
+12 -13
View File
@@ -1,18 +1,20 @@
# to be replaced by SyaNodeParser
import ast
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptLexerParser, UnrecognizedTokensNode, ConceptNode, SourceCodeNode
from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
import core.utils
from parsers.PythonParser import PythonParser
concept_lexer_parser = ConceptLexerParser()
concept_lexer_parser = BnfNodeParser()
class MultipleConceptsParser(BaseParser):
"""
Parser that will take the result of ConceptLexerParser and
Parser that will take the result of BnfNodeParser and
try to resolve the unrecognized tokens token by token
It is a success when it returns a list ConceptNode exclusively
@@ -20,6 +22,7 @@ class MultipleConceptsParser(BaseParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "MultipleConcepts", 45)
self.enabled = False
@staticmethod
def finalize(nodes_found, unrecognized_tokens):
@@ -40,16 +43,12 @@ class MultipleConceptsParser(BaseParser):
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
return unrecognized_tokens
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
if not nodes:
return None
if not text.parser == concept_lexer_parser:
return None
sheerka = context.sheerka
nodes = text.value
nodes_found = [[]]
concepts_only = True
@@ -97,16 +96,16 @@ class MultipleConceptsParser(BaseParser):
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text.source,
source=parser_input.source,
body=choice,
try_parsed=None))
)
if len(ret) == 1:
self.log_result(context, text.source, ret[0])
self.log_result(context, parser_input.source, ret[0])
return ret[0]
else:
self.log_multiple_results(context, text.source, ret)
self.log_multiple_results(context, parser_input.source, ret)
return ret
@staticmethod
+25 -24
View File
@@ -1,4 +1,4 @@
from core.builtin_concepts import BuiltinConcepts
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.tokenizer import Tokenizer, LexerError, TokenKind
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass
@@ -6,7 +6,7 @@ import ast
import logging
import core.utils
from parsers.ConceptLexerParser import ConceptNode
from parsers.BnfNodeParser import ConceptNode
log = logging.getLogger(__name__)
@@ -67,7 +67,7 @@ class PythonParser(BaseParser):
BaseParser.__init__(self, "Python", 50)
self.source = kwargs.get("source", "<undef>")
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
tree = None
@@ -76,15 +76,9 @@ class PythonParser(BaseParser):
}
try:
if isinstance(text, str) and "c:" in text:
source = self.get_text_from_tokens(list(Tokenizer(text)), python_switcher)
elif isinstance(text, str):
source = text
else:
source = self.get_text_from_tokens(text, python_switcher)
source = self.get_input_as_text(parser_input, python_switcher)
source = source.strip()
text = text if isinstance(text, str) else source
parser_input = parser_input if isinstance(parser_input, str) else source
# first, try to parse an expression
res, tree, error = self.try_parse_expression(source)
@@ -92,25 +86,32 @@ class PythonParser(BaseParser):
# then try to parse a statement
res, tree, error = self.try_parse_statement(source)
if not res:
self.has_error = True
error_node = PythonErrorNode(text, error)
error_node = PythonErrorNode(parser_input, error)
self.error_sink.append(error_node)
except LexerError as e:
self.has_error = True
self.error_sink.append(e)
ret = sheerka.ret(
self.name,
not self.has_error,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=text,
body=self.error_sink if self.has_error else PythonNode(text, tree),
try_parsed=None))
if self.has_error:
ret = sheerka.ret(
self.name,
False,
sheerka.new(
BuiltinConcepts.NOT_FOR_ME,
body=parser_input,
reason=self.error_sink))
else:
ret = sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
body=PythonNode(parser_input, tree),
try_parsed=None))
self.log_result(context, text, ret)
self.log_result(context, parser_input, ret)
return ret
def try_parse_expression(self, text):
+7 -8
View File
@@ -1,10 +1,11 @@
from core.builtin_concepts import BuiltinConcepts
from parsers.BaseParser import BaseParser
from parsers.ConceptLexerParser import ConceptNode
from parsers.BnfNodeParser import ConceptNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
multiple_concepts_parser = MultipleConceptsParser()
unrecognized_nodes_parser = UnrecognizedNodeParser()
class PythonWithConceptsParser(BaseParser):
@@ -20,15 +21,12 @@ class PythonWithConceptsParser(BaseParser):
res += c if c.isalnum() else "0"
return res
def parse(self, context, text):
def parse(self, context, parser_input):
sheerka = context.sheerka
if not sheerka.isinstance(text, BuiltinConcepts.PARSER_RESULT):
nodes = self.get_input_as_lexer_nodes(parser_input, unrecognized_nodes_parser)
if not nodes:
return None
if not text.parser == multiple_concepts_parser:
return None
nodes = text.body
source = ""
to_parse = ""
identifiers = {}
@@ -74,6 +72,7 @@ class PythonWithConceptsParser(BaseParser):
python_id = _get_identifier(concept)
to_parse += python_id
python_ids_mappings[python_id] = concept
else:
source += node.source
to_parse += node.source
File diff suppressed because it is too large Load Diff
+114
View File
@@ -0,0 +1,114 @@
from dataclasses import dataclass
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept
from parsers.BaseNodeParser import ConceptNode, UnrecognizedTokensNode, SourceCodeNode, SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ErrorNode
from core.builtin_helpers import only_successful, parse_unrecognized, get_lexer_nodes
import core.utils
PARSERS = ["EmptyString", "AtomNode", "BnfNode", "SyaNode", "Python"]
@dataclass()
class CannotParseNode(ErrorNode):
unrecognized: UnrecognizedTokensNode
class UnrecognizedNodeParser(BaseParser):
"""
This parser comes after the other NodeParsers (Atom, Bnf or Sya)
It will try to resolve all UnrecognizedTokensNode.
"""
def __init__(self, **kwargs):
super().__init__("UnrecognizedNode", 45) # lower than AtomNode, BnfNode and SyaNode
def add_error(self, error):
if hasattr(error, "__iter__"):
self.error_sink.extend(error)
else:
self.error_sink.append(error)
def parse(self, context, parser_input):
sheerka = context.sheerka
nodes = self.get_input_as_lexer_nodes(parser_input, None)
if not nodes:
return None
sequences_found = [[]]
has_unrecognized = False
for node in nodes:
if isinstance(node, ConceptNode):
res = self.validate_concept_node(context, node)
if not res.status:
self.add_error(res.body)
else:
sequences_found = core.utils.product(sequences_found, [res.body])
elif isinstance(node, UnrecognizedTokensNode):
res = parse_unrecognized(context, node.source, PARSERS)
res = only_successful(context, res)
if res.status:
lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens)
sequences_found = core.utils.product(sequences_found, lexer_nodes)
else:
sequences_found = core.utils.product(sequences_found, [node])
has_unrecognized = True
else: # cannot happen as of today :-)
raise NotImplementedError()
# concept with UnrecognizedToken in their properties is considered as fatal error
if self.has_error:
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
ret = []
for choice in sequences_found:
ret.append(
sheerka.ret(
self.name,
not has_unrecognized,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input,
body=choice,
try_parsed=choice)))
if len(ret) == 1:
self.log_result(context, parser_input, ret[0])
return ret[0]
else:
self.log_multiple_results(context, parser_input, ret)
return ret
def validate_concept_node(self, context, concept_node):
sheerka = context.sheerka
errors = []
def _validate_concept(concept):
"""
Recursively browse the compiled properties in order to find unrecognized
:param concept:
:return:
"""
for name, value in concept.compiled.items():
if isinstance(value, Concept):
_validate_concept(value)
elif isinstance(value, UnrecognizedTokensNode):
res = parse_unrecognized(context, value.tokens, PARSERS)
res = only_successful(context, res) # only key successful parsers
if res.status:
concept.compiled[name] = res.body.body
else:
errors.append(sheerka.new(BuiltinConcepts.ERROR, body=f"Cannot parse '{value.source}'"))
_validate_concept(concept_node.concept)
if len(errors) > 0:
return context.sheerka.ret(self.name, False, errors)
else:
return context.sheerka.ret(self.name, True, concept_node)