Refactored Caching, Refactored BnfNodeParser, Introduced Sphinx

This commit is contained in:
2020-05-12 17:21:10 +02:00
parent 7d3a490bc5
commit 6e343ba996
110 changed files with 13865 additions and 7540 deletions
+36 -26
View File
@@ -1,12 +1,11 @@
import copy
from dataclasses import dataclass
from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import Concept, DEFINITION_TYPE_BNF
from core.tokenizer import TokenKind, Tokenizer
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode
from parsers.BaseParser import BaseParser, UnexpectedTokenErrorNode, ErrorNode
from core.concept import DEFINITION_TYPE_BNF
from core.tokenizer import Tokenizer
from parsers.BaseNodeParser import BaseNodeParser, ConceptNode, UnrecognizedTokensNode, SourceCodeNode
from parsers.BaseParser import UnexpectedTokenErrorNode, ErrorNode
PARSERS = ["BnfNode", "SyaNode", "Python"]
@@ -141,7 +140,11 @@ class AtomConceptParserHelper:
self.unrecognized_tokens.fix_source()
# try to recognize concepts
nodes_sequences = self._get_lexer_nodes_from_unrecognized()
nodes_sequences = builtin_helpers.get_lexer_nodes_from_unrecognized(
self.context,
self.unrecognized_tokens,
PARSERS)
if nodes_sequences:
instances = [self]
for i in range(len(nodes_sequences) - 1):
@@ -152,7 +155,7 @@ class AtomConceptParserHelper:
for instance, node_sequence in zip(instances, nodes_sequences):
for node in node_sequence:
instance.sequence.append(node)
if isinstance(node, UnrecognizedTokensNode) or \
if isinstance(node, (UnrecognizedTokensNode, SourceCodeNode)) or \
hasattr(node, "unrecognized_tokens") and node.unrecognized_tokens:
instance.has_unrecognized = True
instance.unrecognized_tokens = UnrecognizedTokensNode(-1, -1, [])
@@ -193,22 +196,22 @@ class AtomConceptParserHelper:
clone.has_unrecognized = self.has_unrecognized
return clone
def _get_lexer_nodes_from_unrecognized(self):
"""
Use the source of self.unrecognized_tokens gto find concepts or source code
:return:
"""
res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
if not only_parsers_results.status:
return None
return builtin_helpers.get_lexer_nodes(
only_parsers_results.body.body,
self.unrecognized_tokens.start,
self.unrecognized_tokens.tokens)
# def _get_lexer_nodes_from_unrecognized(self):
# """
# Use the source of self.unrecognized_tokens gto find concepts or source code
# :return:
# """
#
# res = builtin_helpers.parse_unrecognized(self.context, self.unrecognized_tokens.source, PARSERS)
# only_parsers_results = builtin_helpers.only_parsers_results(self.context, res)
#
# if not only_parsers_results.status:
# return None
#
# return builtin_helpers.get_lexer_nodes(
# only_parsers_results.body.body,
# self.unrecognized_tokens.start,
# self.unrecognized_tokens.tokens)
class AtomNodeParser(BaseNodeParser):
@@ -230,7 +233,6 @@ class AtomNodeParser(BaseNodeParser):
def __init__(self, **kwargs):
super().__init__("AtomNode", 50, **kwargs)
self.enabled = False
@staticmethod
def _is_eligible(concept):
@@ -239,7 +241,8 @@ class AtomNodeParser(BaseNodeParser):
:param concept:
:return:
"""
return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF
# return len(concept.metadata.props) == 0 or concept.metadata.definition_type == DEFINITION_TYPE_BNF
return len(concept.metadata.variables) == 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF
def get_concepts_sequences(self):
@@ -255,6 +258,13 @@ class AtomNodeParser(BaseNodeParser):
concept_parser_helpers.extend(forked)
forked.clear()
def _get_concepts_by_name(name):
other_concepts = self.sheerka.get_by_name(name)
if isinstance(other_concepts, list):
return other_concepts
return [other_concepts] if self.sheerka.is_known(other_concepts) else []
concept_parser_helpers = [AtomConceptParserHelper(self.context)]
while self.next_token(False):
@@ -268,7 +278,7 @@ class AtomNodeParser(BaseNodeParser):
if concept_parser.eat_token(self.token, self.pos):
concept_parser.lock()
concepts = self.get_concepts(token, self._is_eligible)
concepts = self.get_concepts(token, self._is_eligible, custom=_get_concepts_by_name)
if not concepts:
for concept_parser in concept_parser_helpers:
concept_parser.eat_unrecognized(token, self.pos)
+195 -65
View File
@@ -2,8 +2,9 @@ from collections import namedtuple
from dataclasses import dataclass
from enum import Enum
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF, ConceptParts
from core.sheerka.ExecutionContext import ExecutionContext
from core.tokenizer import TokenKind, LexerError, Token
from parsers.BaseParser import Node, BaseParser, ErrorNode
@@ -187,6 +188,9 @@ class SourceCodeNode(LexerNode):
self.end == other.end and \
self.source == other.source
if isinstance(other, SCN):
return other == self
if not isinstance(other, SourceCodeNode):
return False
@@ -352,6 +356,51 @@ class HelperWithPos:
return self
class SCN(HelperWithPos):
"""
SourceCodeNode tester class
It matches with SourceCodeNode but with less constraints
SCN == SourceCodeNode if source, start, end (start and end are not validated when None)
"""
def __init__(self, source, start=None, end=None):
super().__init__(start, end)
self.source = source
def __eq__(self, other):
if id(self) == id(other):
return True
if isinstance(other, SourceCodeNode):
if self.source != other.source:
return False
if self.start is not None and self.start != other.start:
return False
if self.end is not None and self.end != other.end:
return False
return True
if not isinstance(other, CN):
return False
return self.source == other.source and \
self.start == other.start and \
self.end == other.end
def __hash__(self):
return hash((self.source, self.start, self.end))
def __repr__(self):
txt = f"SCN(source='{self.source}'"
if self.start is not None:
txt += f", start={self.start}"
if self.end is not None:
txt += f", end={self.end}"
return txt + ")"
class CN(HelperWithPos):
"""
ConceptNode tester class
@@ -390,6 +439,8 @@ class CN(HelperWithPos):
return False
if self.end is not None and self.end != other.end:
return False
if self.source is not None and self.source != other.source:
return False
return True
if not isinstance(other, CN):
@@ -425,9 +476,10 @@ class CNC(CN):
CNC == ConceptNode if CNC.compiled == ConceptNode.concept.compiled
"""
def __init__(self, concept_key, start=None, end=None, source=None, **kwargs):
def __init__(self, concept_key, start=None, end=None, source=None, exclude_body=False, **kwargs):
super().__init__(concept_key, start, end, source)
self.compiled = kwargs
self.exclude_body = exclude_body
def __eq__(self, other):
if id(self) == id(other):
@@ -442,7 +494,13 @@ class CNC(CN):
return False
if self.end is not None and self.end != other.end:
return False
return self.compiled == other.concept.compiled # assert instead of return to help debugging tests
if self.source is not None and self.source != other.source:
return False
if self.exclude_body:
to_compare = {k: v for k, v in other.concept.compiled.items() if k != ConceptParts.BODY}
else:
to_compare = other.concept.compiled
return self.compiled == to_compare
if not isinstance(other, CNC):
return False
@@ -518,11 +576,10 @@ class BaseNodeParser(BaseParser):
super().__init__(name, priority)
if 'sheerka' in kwargs:
sheerka = kwargs.get("sheerka")
self.init_from_sheerka(sheerka)
self.concepts_by_first_keyword = sheerka.resolved_concepts_by_first_keyword
else:
self.concepts_by_first_keyword = None
self.sya_definitions = None
self.token = None
self.pos = -1
@@ -532,17 +589,16 @@ class BaseNodeParser(BaseParser):
self.text = None
self.sheerka = None
def init_from_sheerka(self, sheerka):
def init_from_concepts(self, context, concepts, **kwargs):
"""
Use the definitons from Sheerka to initialize
:param sheerka:
Initialize the parser with a list of concepts
For unit tests convenience
:param context
:param concepts
:return:
"""
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
if sheerka.sya_definitions:
self.sya_definitions = {}
for k, v in sheerka.sya_definitions.items():
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
concepts_by_first_keyword = self.get_concepts_by_first_keyword(context, concepts).body
self.concepts_by_first_keyword = self.resolve_concepts_by_first_keyword(context, concepts_by_first_keyword).body
def reset_parser(self, context, text):
self.context = context
@@ -582,82 +638,43 @@ class BaseNodeParser(BaseParser):
return self.token.type != TokenKind.EOF
def initialize(self, context, concepts, sya_definitions=None, use_sheerka=False):
"""
To quickly find a concept, we store them in an hash where the key is the first token of the concept
example :
Concept("foo a").def_prop("a"), "foo" is a token, "a" is a variable
So the key to use will be "foo"
Concept("a foo").def_prop("a") -> first token is "foo"
Concept("Hello my dear a").def_prop("a") -> first token is "Hello"
Note that under the same key, there will be multiple entry
a B-Tree may be a better implementation in the future
We also store sya_definition which a is tuple (concept_precedence:int, concept_associativity:SyaAssociativity)
:param context:
:param concepts: list[Concept]
:param sya_definitions: hash[concept_id, tuple(precedence:int, associativity:SyaAssociativity)]
:param use_sheerka: first init with the definitions from Sheerka
:return:
"""
self.context = context
self.sheerka = context.sheerka
if use_sheerka:
self.init_from_sheerka(self.sheerka)
if sya_definitions:
if self.sya_definitions:
self.sya_definitions.update(sya_definitions)
else:
self.sya_definitions = sya_definitions
if self.concepts_by_first_keyword is None:
self.concepts_by_first_keyword = {}
for concept in concepts:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
break
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
def get_concepts(self, token, to_keep, to_map=None):
def get_concepts(self, token, to_keep, custom=None, to_map=None, strip_quotes=False):
"""
Tries to find if there are concepts that match the value of the token
:param token:
:param to_keep: predicate to tell if the concept is eligible
:param custom: lambda name -> List[Concepts] that gives extra concepts, according to the name
:param to_map:
:param strip_quotes: Remove quotes from strings
:return:
"""
if token.type == TokenKind.WHITESPACE:
return None
if token.type == TokenKind.STRING:
name = token.value[1:-1]
name = token.value[1:-1] if strip_quotes else token.value
elif token.type == TokenKind.KEYWORD:
name = token.value.value
else:
name = token.value
custom_concepts = custom(name) if custom else []
result = []
if name in self.concepts_by_first_keyword:
for concept_id in self.concepts_by_first_keyword[name]:
for concept_id in self.concepts_by_first_keyword.get(name):
concept = self.sheerka.get_by_id(concept_id)
if not to_keep(concept):
continue
concept = to_map(concept) if to_map else concept
concept = to_map(self, concept) if to_map else concept
result.append(concept)
return result
return result + custom_concepts
return None
return custom_concepts if custom else None
@staticmethod
def get_token_value(token):
@@ -667,3 +684,116 @@ class BaseNodeParser(BaseParser):
return token.value.value
else:
return token.value
@staticmethod
def get_concepts_by_first_keyword(context, concepts, use_sheerka=False):
"""
Create the map describing the first token expected by a concept
:param context:
:param concepts: lists of concepts to parse
:param use_sheerka: if True, update concepts_by_first_keyword from sheerka
:return:
"""
sheerka = context.sheerka
res = sheerka.cache_manager.copy(sheerka.CONCEPTS_BY_FIRST_KEYWORD_ENTRY) if use_sheerka else {}
for concept in concepts:
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
if keywords is None:
# no first token found for a concept ?
return sheerka.ret(sheerka.name, False, concept)
for keyword in keywords:
res.setdefault(keyword, []).append(concept.id)
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def resolve_concepts_by_first_keyword(context, concepts_by_first_keyword):
sheerka = context.sheerka
def _make_unique(elements):
keys = {}
for e in elements:
keys[e] = 1
return list(keys.keys())
def _resolve_concepts(concept_str):
resolved = []
to_resolve = []
concept = sheerka.get_by_id(core.utils.unstr_concept(concept_str)[1])
if sheerka.isaset(context, concept):
concepts = sheerka.get_set_elements(context, concept)
else:
concepts = [concept]
for concept in concepts:
BaseNodeParser.ensure_bnf(context, concept) # need to make sure that it cannot fail
keywords = BaseNodeParser.get_first_tokens(sheerka, concept)
for keyword in keywords:
(to_resolve if keyword.startswith("c:|") else resolved).append(keyword)
for concept_to_resolve_str in to_resolve:
resolved += _resolve_concepts(concept_to_resolve_str)
return resolved
res = {}
for k, v in concepts_by_first_keyword.items():
if k.startswith("c:|"):
resolved_keywords = _resolve_concepts(k)
for resolved in resolved_keywords:
res.setdefault(resolved, []).extend(v)
else:
res.setdefault(k, []).extend(v)
# 'uniquify' the lists
for k, v in res.items():
res[k] = _make_unique(v)
return sheerka.ret("BaseNodeParser", True, res)
@staticmethod
def resolve_sya_associativity_and_precedence(context, sya):
pass
@staticmethod
def get_first_tokens(sheerka, concept):
"""
:param sheerka:
:param concept:
:return:
"""
if concept.bnf:
from parsers.BnfNodeParser import BnfNodeFirstTokenVisitor
bnf_visitor = BnfNodeFirstTokenVisitor(sheerka)
bnf_visitor.visit(concept.bnf)
return bnf_visitor.first_tokens
else:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
return [keyword]
return None
@staticmethod
def ensure_bnf(context, concept, parser_name="BaseNodeParser"):
if concept.metadata.definition_type == DEFINITION_TYPE_BNF and not concept.bnf:
from parsers.BnfParser import BnfParser
regex_parser = BnfParser()
desc = f"Resolving BNF {concept.metadata.definition}"
with context.push(parser_name, obj=concept, desc=desc) as sub_context:
sub_context.add_inputs(parser_input=concept.metadata.definition)
bnf_parsing_ret_val = regex_parser.parse(sub_context, concept.metadata.definition)
sub_context.add_values(return_values=bnf_parsing_ret_val)
if not bnf_parsing_ret_val.status:
raise Exception(bnf_parsing_ret_val.value)
concept.bnf = bnf_parsing_ret_val.body.body
if concept.id:
context.sheerka.get_by_id(concept.id).bnf = concept.bnf # update bnf in cache
File diff suppressed because it is too large Load Diff
+5 -5
View File
@@ -6,7 +6,7 @@ from core.sheerka.Sheerka import ExecutionContext
from core.tokenizer import Tokenizer, Token, TokenKind, LexerError
from parsers.BaseParser import BaseParser, ErrorNode, UnexpectedTokenErrorNode
from parsers.BnfNodeParser import OrderedChoice, Sequence, Optional, ZeroOrMore, OneOrMore, ConceptExpression, \
StrMatch, ConceptGroupExpression
StrMatch
@dataclass()
@@ -234,8 +234,9 @@ class BnfParser(BaseParser):
if token.type == TokenKind.CONCEPT:
self.next_token()
concept = self.sheerka.new((token.value[0], token.value[1]))
expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \
else ConceptExpression(concept)
expr = ConceptExpression(concept)
# expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \
# else ConceptExpression(concept)
return self.eat_rule_name_if_needed(expr)
if token.type == TokenKind.IDENTIFIER:
@@ -259,8 +260,7 @@ class BnfParser(BaseParser):
body=("key", concept_name)))
return None
else:
expr = ConceptGroupExpression(concept) if self.sheerka.isaset(self.context, concept) \
else ConceptExpression(concept)
expr = ConceptExpression(concept)
expr.rule_name = concept.name
return self.eat_rule_name_if_needed(expr)
-109
View File
@@ -1,109 +0,0 @@
# try to match something like
# ConceptNode 'plus' ConceptNode
#
# Replaced by SyaNodeParser
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind, Token
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BaseParser import BaseParser
from parsers.BnfNodeParser import ConceptNode, UnrecognizedTokensNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from core.concept import VARIABLE_PREFIX
multiple_concepts_parser = MultipleConceptsParser()
class ConceptsWithConceptsParser(BaseParser):
def __init__(self, **kwargs):
super().__init__("ConceptsWithConcepts", 25)
self.enabled = False
@staticmethod
def get_tokens(nodes):
tokens = []
for node in nodes:
if isinstance(node, ConceptNode):
index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
else:
for token in node.tokens:
if token.type == TokenKind.EOF:
break
elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
continue
else:
tokens.append(token)
return tokens
@staticmethod
def get_key(nodes):
key = ""
index = 0
for node in nodes:
if key:
key += " "
if isinstance(node, UnrecognizedTokensNode):
key += node.source.strip()
else:
key += f"{VARIABLE_PREFIX}{index}"
index += 1
return key
def finalize_concept(self, context, concept, nodes):
index = 0
for node in nodes:
if isinstance(node, ConceptNode):
prop_name = list(concept.props.keys())[index]
concept.compiled[prop_name] = node.concept
context.log(
f"Setting property '{prop_name}='{node.concept}'.",
self.name)
index += 1
elif isinstance(node, SourceCodeNode):
prop_name = list(concept.props.keys())[index]
sheerka = context.sheerka
value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
context.log(
f"Setting property '{prop_name}'='Python({node.source})'.",
self.name)
index += 1
return concept
def parse(self, context, parser_input):
sheerka = context.sheerka
nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
if not nodes:
return None
concept_key = self.get_key(nodes)
concept = sheerka.new(concept_key)
if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
return sheerka.ret(
self.name,
False,
sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
concepts = concept if hasattr(concept, "__iter__") else [concept]
for concept in concepts:
self.finalize_concept(context, concept, nodes)
res = []
for concept in concepts:
res.append(sheerka.ret(
self.name,
True,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input.source,
body=concept,
try_parsed=None)))
return res[0] if len(res) == 1 else res
+2 -1
View File
@@ -384,7 +384,8 @@ class DefaultParser(BaseParser):
return None, NotInitializedNode()
regex_parser = BnfParser()
with self.context.push(self.name, obj=current_concept_def) as sub_context:
desc = f"Resolving BNF {current_concept_def.definition}"
with self.context.push(self.name, obj=current_concept_def, desc=desc) as sub_context:
parsing_result = regex_parser.parse(sub_context, tokens)
sub_context.add_values(return_values=parsing_result)
+21 -12
View File
@@ -1,9 +1,9 @@
import logging
from core.builtin_concepts import ReturnValueConcept, BuiltinConcepts
from parsers.BaseParser import BaseParser
from core.tokenizer import Tokenizer, Keywords, TokenKind, LexerError
from core.concept import VARIABLE_PREFIX
from core.tokenizer import Keywords, TokenKind, LexerError
from parsers.BaseParser import BaseParser
class ExactConceptParser(BaseParser):
@@ -11,10 +11,11 @@ class ExactConceptParser(BaseParser):
Tries to recognize a single concept
"""
MAX_WORDS_SIZE = 10
MAX_WORDS_SIZE = 3
def __init__(self, **kwargs):
def __init__(self, max_word_size=None, **kwargs):
BaseParser.__init__(self, "ExactConcept", 80)
self.max_word_size = max_word_size
def parse(self, context, parser_input):
"""
@@ -33,11 +34,11 @@ class ExactConceptParser(BaseParser):
context.log(f"Error found in tokenizer {e}", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.ERROR, body=e))
if len(words) > self.MAX_WORDS_SIZE:
if len(words) > (self.max_word_size or self.MAX_WORDS_SIZE):
context.log(f"Max words reached. Stopping.", self.name)
return sheerka.ret(self.name, False, sheerka.new(BuiltinConcepts.CONCEPT_TOO_LONG, body=parser_input))
recognized = False
recognized = [] # keep track of the concepts founds
for combination in self.combinations(words):
concept_key = " ".join(combination)
@@ -49,16 +50,23 @@ class ExactConceptParser(BaseParser):
concepts = result if isinstance(result, list) else [result]
for concept in concepts:
if concept.id in recognized:
context.log(f"Recognized concept {concept} again. Skipping.", self.name)
# example
# if the input is foo a and a concept is defined as foo a
# The will be two matches. One for 'foo a' and 'foo _var_0'
# but it's the same concept foo a
continue
context.log(f"Recognized concept {concept}.", self.name)
# update the properties if needed
need_validation = False
for i, token in enumerate(combination):
if token.startswith(VARIABLE_PREFIX):
index = int(token[len(VARIABLE_PREFIX):])
concept.def_prop_by_index(index, words[i])
concept.def_var_by_index(index, words[i])
concept.metadata.need_validation = True
if self.verbose_log.isEnabledFor(logging.DEBUG):
prop_name = list(concept.props.keys())[index]
prop_name = concept.metadata.variables[index][0]
context.log(
f"Added property {index}: {prop_name}='{words[i]}'.",
self.name)
@@ -69,12 +77,13 @@ class ExactConceptParser(BaseParser):
context.sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(parser_input),
source=parser_input if isinstance(parser_input, str) else self.get_text_from_tokens(
parser_input),
body=concept,
try_parsed=concept)))
recognized = True
recognized.append(concept.id)
if recognized:
if len(recognized) > 0:
if len(res) == 1:
self.log_result(context, parser_input, res[0])
else:
+1 -1
View File
@@ -318,7 +318,7 @@ class ExplainParser(BaseSplitIterParser):
def parse(self, context, parser_input):
"""
text can be string, but text can also be an list of tokens
parser_input can be string, but text can also be an list of tokens
:param context:
:param parser_input:
:return:
-163
View File
@@ -1,163 +0,0 @@
# to be replaced by SyaNodeParser
import ast
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import TokenKind
from parsers.BaseNodeParser import SourceCodeNode
from parsers.BaseParser import BaseParser
from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
import core.utils
from parsers.PythonParser import PythonParser
concept_lexer_parser = BnfNodeParser()
class MultipleConceptsParser(BaseParser):
"""
Parser that will take the result of BnfNodeParser and
try to resolve the unrecognized tokens token by token
It is a success when it returns a list ConceptNode exclusively
"""
def __init__(self, **kwargs):
BaseParser.__init__(self, "MultipleConcepts", 45)
self.enabled = False
@staticmethod
def finalize(nodes_found, unrecognized_tokens):
if not unrecognized_tokens:
return nodes_found, unrecognized_tokens
unrecognized_tokens.fix_source()
if unrecognized_tokens.not_whitespace():
nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
return nodes_found, None
@staticmethod
def create_or_add(unrecognized_tokens, token, index):
if unrecognized_tokens:
unrecognized_tokens.add_token(token, index)
else:
unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
return unrecognized_tokens
def parse(self, context, parser_input):
sheerka = context.sheerka
nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
if not nodes:
return None
nodes_found = [[]]
concepts_only = True
for node in nodes:
if isinstance(node, UnrecognizedTokensNode):
unrecognized_tokens = None
i = 0
while i < len(node.tokens):
token_index = node.start + i
token = node.tokens[i]
concepts_nodes = self.get_concepts_nodes(context, token_index, token)
if concepts_nodes is not None:
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
nodes_found = core.utils.product(nodes_found, concepts_nodes)
i += 1
continue
source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
if source_code_node:
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
nodes_found = core.utils.product(nodes_found, [source_code_node])
i += len(source_code_node.tokens)
continue
# not a concept nor some source code
unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
i += 1
# finish processing if needed
nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
else:
nodes_found = core.utils.product(nodes_found, [node])
ret = []
for choice in nodes_found:
ret.append(
sheerka.ret(
self.name,
concepts_only,
sheerka.new(
BuiltinConcepts.PARSER_RESULT,
parser=self,
source=parser_input.source,
body=choice,
try_parsed=None))
)
if len(ret) == 1:
self.log_result(context, parser_input.source, ret[0])
return ret[0]
else:
self.log_multiple_results(context, parser_input.source, ret)
return ret
@staticmethod
def get_concepts_nodes(context, index, token):
"""
Tries to recognize a concept
from the univers of all known concepts
"""
if token.type != TokenKind.IDENTIFIER:
return None
concept = context.new_concept(token.value)
if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
concepts = concept if hasattr(concept, "__iter__") else [concept]
concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
return concepts_nodes
return None
@staticmethod
def get_source_code_node(context, index, tokens):
"""
Tries to recognize source code.
For the time being, only Python is supported
:param context:
:param tokens:
:param index:
:return:
"""
if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
return None
end_index = len(tokens)
while end_index > 0:
parser = PythonParser()
tokens_to_parse = tokens[:end_index]
res = parser.parse(context, tokens_to_parse)
if res.status:
# only expression are accepted
ast_ = res.value.value.ast_
if not isinstance(ast_, ast.Expression):
return None
try:
compiled = compile(ast_, "<string>", "eval")
eval(compiled, {}, {})
except Exception:
return None
source = BaseParser.get_text_from_tokens(tokens_to_parse)
return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
end_index -= 1
return None
+5 -5
View File
@@ -1,11 +1,11 @@
from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
from core.tokenizer import Tokenizer, LexerError, TokenKind
from parsers.BaseParser import BaseParser, Node, ErrorNode
from dataclasses import dataclass
import ast
import logging
import core.utils
from dataclasses import dataclass
import core.utils
from core.builtin_concepts import BuiltinConcepts
from core.tokenizer import LexerError, TokenKind
from parsers.BaseParser import BaseParser, Node, ErrorNode
from parsers.BnfNodeParser import ConceptNode
log = logging.getLogger(__name__)
-1
View File
@@ -1,7 +1,6 @@
from core.builtin_concepts import BuiltinConcepts
from parsers.BaseParser import BaseParser
from parsers.BnfNodeParser import ConceptNode
from parsers.MultipleConceptsParser import MultipleConceptsParser
from parsers.PythonParser import PythonParser
from parsers.UnrecognizedNodeParser import UnrecognizedNodeParser
+166 -118
View File
@@ -1,4 +1,3 @@
import copy
from collections import namedtuple
from dataclasses import dataclass, field
from typing import List
@@ -7,10 +6,10 @@ from core import builtin_helpers
from core.builtin_concepts import BuiltinConcepts
from core.concept import VARIABLE_PREFIX, Concept, DEFINITION_TYPE_BNF
from core.sheerka.ExecutionContext import ExecutionContext
from core.tokenizer import LexerError, Token, TokenKind
from core.tokenizer import Token, TokenKind
from parsers.BaseNodeParser import UnrecognizedTokensNode, ConceptNode, SourceCodeNode, SyaAssociativity, \
SourceCodeWithConceptNode
from parsers.BaseParser import BaseParser, ErrorNode
SourceCodeWithConceptNode, BaseNodeParser
from parsers.BaseParser import ErrorNode, UnexpectedTokenErrorNode
PARSERS = ["BnfNode", "AtomNode", "Python"]
@@ -116,13 +115,13 @@ class SyaConceptParserHelper:
return len(self.expected) == 0
def is_atom(self):
return len(self.concept.concept.metadata.props) == 0 and len(self.expected) == 0
return len(self.concept.concept.metadata.variables) == 0 and len(self.expected) == 0
def is_expected(self, token):
if self.is_matched():
return False
token_value = self._get_token_value(token)
token_value = BaseNodeParser.get_token_value(token)
for expected in self.expected:
if not expected.startswith(VARIABLE_PREFIX) and expected == token_value:
@@ -139,7 +138,7 @@ class SyaConceptParserHelper:
# return True is a whole sequence of keyword is eaten
# example
# Concept("foo a bar baz qux b").def_prop("a").def_prop("b")
# Concept("foo a bar baz qux b").def_var("a").def_var("b")
# 'bar' is just eaten. We will return False because 'baz' and 'qux' are still waiting
if len(self.expected) == 0:
return True
@@ -169,14 +168,14 @@ class SyaConceptParserHelper:
self.concept = self.concept.concept
return self
@staticmethod
def _get_token_value(token):
if token.type == TokenKind.STRING:
return token.value[1:-1]
elif token.type == TokenKind.KEYWORD:
return token.value.value
else:
return token.value
# @staticmethod
# def _get_token_value(token):
# if token.type == TokenKind.STRING:
# return token.value[1:-1]
# elif token.type == TokenKind.KEYWORD:
# return token.value.value
# else:
# return token.value
def clone(self):
clone = SyaConceptParserHelper(self.concept, self.start, self.end)
@@ -215,7 +214,10 @@ class InFixToPostFix:
if not isinstance(other, InFixToPostFix):
return False
return self.out == other.out
return self.out == other.out and self.errors == other.errors
def __hash__(self):
return len(self.sequence) + len(self.errors)
def _add_error(self, error):
self.errors.append(error)
@@ -396,6 +398,7 @@ class InFixToPostFix:
del current_concept.expected[0]
def manage_unrecognized(self):
if self.unrecognized_tokens.is_empty():
return
@@ -514,10 +517,10 @@ class InFixToPostFix:
def handle_expected_token(self, token, pos):
"""
True if the token is part of the concept being parsed and the last token in a sequence is eaten
Example : Concept("foo a bar b").def_prop("a").def_prop("b")
Example : Concept("foo a bar b").def_var("a").def_var("b")
The expected tokens are 'foo' and 'bar' (as a and b are parameters)
Example: Concept("foo a bar baz b").def_prop("a").def_prop("b")
Example: Concept("foo a bar baz b").def_var("a").def_var("b")
If the token is 'bar', it will be eaten but handle_expected_token() will return False
as we still expect 'baz'
:param token:
@@ -565,6 +568,18 @@ class InFixToPostFix:
return True
# else:
# if token.type != TokenKind.WHITESPACE:
# # hack, because whitespaces are not correctly parsed in self.expected
# # KSI 2020/04/25
# # I no longer understand why we are in a loop (the reverse one)
# # if we are parsing a concept and the expected token does not match
# # The whole class should be in error
# self._add_error(UnexpectedTokenErrorNode(
# f"Failed to parse '{current_concept.concept.concept}'",
# token, current_concept.expected))
# return False
return False
def eat_token(self, token, pos):
@@ -581,7 +596,7 @@ class InFixToPostFix:
if self.handle_expected_token(token, pos):
# a token is found, let's check if it's part of a concepts being parsed
# example Concept(name="foo", definition="foo a bar b").def_prop("a").def_prop("b")
# example Concept(name="foo", definition="foo a bar b").def_var("a").def_var("b")
# if the token 'bar' is found, it has to be considered as part of the concept foo
self.debug.append(token)
return True
@@ -780,16 +795,13 @@ class PostFixToItem:
has_unrecognized: bool
class SyaNodeParser(BaseParser):
class SyaNodeParser(BaseNodeParser):
def __init__(self, **kwargs):
BaseParser.__init__(self, "SyaNode", 50)
super().__init__("SyaNode", 50, **kwargs)
if 'sheerka' in kwargs:
sheerka = kwargs.get("sheerka")
self.concepts_by_first_keyword = sheerka.concepts_by_first_keyword
self.sya_definitions = {}
if sheerka.sya_definitions:
for k, v in sheerka.sya_definitions.items():
self.sya_definitions[k] = (v[0], SyaAssociativity(v[1]))
self.sya_definitions = sheerka.resolved_sya_def
else:
self.concepts_by_first_keyword = {}
@@ -803,104 +815,133 @@ class SyaNodeParser(BaseParser):
self.text = None
self.sheerka = None
def reset_parser(self, context, text):
self.context = context
self.sheerka = context.sheerka
self.text = text
try:
self.tokens = list(self.get_input_as_tokens(text))
except LexerError as e:
self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
return False
self.token = None
self.pos = -1
return True
def add_error(self, error, next_token=True):
self.error_sink.append(error)
if next_token:
self.next_token()
return error
def get_token(self) -> Token:
return self.token
def next_token(self, skip_whitespace=True):
if self.token and self.token.type == TokenKind.EOF:
return False
self.pos += 1
self.token = self.tokens[self.pos]
if skip_whitespace:
while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
self.pos += 1
self.token = self.tokens[self.pos]
return self.token.type != TokenKind.EOF
def initialize(self, context, concepts=None, sya_definitions=None):
self.context = context
self.sheerka = context.sheerka
def init_from_concepts(self, context, concepts, **kwargs):
super().init_from_concepts(context, concepts)
sya_definitions = kwargs.get("sya", None)
if sya_definitions:
self.sya_definitions = sya_definitions
if concepts:
for concept in concepts:
keywords = concept.key.split()
for keyword in keywords:
if keyword.startswith(VARIABLE_PREFIX):
continue
self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
break
return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
def get_concepts(self, token):
@staticmethod
def _is_eligible(concept):
"""
Tries to find if there are concepts that match the value of the token
:param token:
Predicate that select concepts that must handled by AtomNodeParser
:param concept:
:return:
"""
# We only concepts that has parameter (refuse atoms)
# Bnf definitions are not supposed to be managed by this parser either
return len(concept.metadata.variables) > 0 and concept.metadata.definition_type != DEFINITION_TYPE_BNF
if token.type == TokenKind.STRING:
name = token.value[1:-1]
elif token.type == TokenKind.KEYWORD:
name = token.value.value
else:
name = token.value
@staticmethod
def _get_sya_concept_def(parser, concept):
sya_concept_def = SyaConceptDef(concept)
if concept.id in parser.sya_definitions:
sya_def = parser.sya_definitions.get(concept.id)
if sya_def[0] is not None:
sya_concept_def.precedence = sya_def[0]
if sya_def[1] is not None:
sya_concept_def.associativity = sya_def[1]
return sya_concept_def
result = []
if name in self.concepts_by_first_keyword:
for concept_id in self.concepts_by_first_keyword[name]:
# def reset_parser(self, context, text):
# self.context = context
# self.sheerka = context.sheerka
# self.text = text
#
# try:
# self.tokens = list(self.get_input_as_tokens(text))
# except LexerError as e:
# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
# return False
#
# self.token = None
# self.pos = -1
# return True
#
# def add_error(self, error, next_token=True):
# self.error_sink.append(error)
# if next_token:
# self.next_token()
# return error
#
# def get_token(self) -> Token:
# return self.token
#
# def next_token(self, skip_whitespace=True):
# if self.token and self.token.type == TokenKind.EOF:
# return False
#
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# if skip_whitespace:
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# return self.token.type != TokenKind.EOF
concept = self.sheerka.get_by_id(concept_id)
if len(concept.metadata.props) == 0:
# only concepts that has parameter (refuse atoms)
# Note that this test is needed if the definition of the concept has changed
continue
if concept.metadata.definition_type == DEFINITION_TYPE_BNF:
# bnf definitions are not supposed to be managed by this parser
continue
sya_concept_def = SyaConceptDef(concept)
if concept.id in self.sya_definitions:
sya_def = self.sya_definitions[concept.id]
if sya_def[0] is not None:
sya_concept_def.precedence = sya_def[0]
if sya_def[1] is not None:
sya_concept_def.associativity = sya_def[1]
result.append(sya_concept_def)
return result
return None
# def initialize(self, context, concepts=None, sya_definitions=None):
# self.context = context
# self.sheerka = context.sheerka
#
# if sya_definitions:
# self.sya_definitions = sya_definitions
#
# if concepts:
# for concept in concepts:
# keywords = concept.key.split()
# for keyword in keywords:
# if keyword.startswith(VARIABLE_PREFIX):
# continue
#
# self.concepts_by_first_keyword.setdefault(keyword, []).append(concept.id)
# break
#
# return self.sheerka.ret(self.name, True, self.concepts_by_first_keyword)
#
# def get_concepts(self, token):
# """
# Tries to find if there are concepts that match the value of the token
# :param token:
# :return:
# """
#
# if token.type == TokenKind.STRING:
# name = token.value[1:-1]
# elif token.type == TokenKind.KEYWORD:
# name = token.value.value
# else:
# name = token.value
#
# result = []
# if name in self.concepts_by_first_keyword:
# for concept_id in self.concepts_by_first_keyword[name]:
#
# concept = self.sheerka.get_by_id(concept_id)
#
# if len(concept.metadata.props) == 0:
# # only concepts that has parameter (refuse atoms)
# # Note that this test is needed if the definition of the concept has changed
# continue
#
# if concept.metadata.definition_type == DEFINITION_TYPE_BNF:
# # bnf definitions are not supposed to be managed by this parser
# continue
#
# sya_concept_def = SyaConceptDef(concept)
# if concept.id in self.sya_definitions:
# sya_def = self.sya_definitions[concept.id]
# if sya_def[0] is not None:
# sya_concept_def.precedence = sya_def[0]
# if sya_def[1] is not None:
# sya_concept_def.associativity = sya_def[1]
#
# result.append(sya_concept_def)
# return result
#
# return None
def infix_to_postfix(self, context, text):
"""
@@ -943,7 +984,7 @@ class SyaNodeParser(BaseParser):
if infix_to_postfix.eat_token(token, self.pos):
infix_to_postfix.lock()
concepts = self.get_concepts(token)
concepts = self.get_concepts(token, self._is_eligible, to_map=self._get_sya_concept_def)
if not concepts:
for infix_to_postfix in res:
infix_to_postfix.eat_unrecognized(token, self.pos)
@@ -988,7 +1029,7 @@ class SyaNodeParser(BaseParser):
else:
items.append(res)
item.has_unrecognized |= hasattr(res, "has_unrecognized") and res.has_unrecognized or \
isinstance(res, UnrecognizedTokensNode)
isinstance(res, UnrecognizedTokensNode)
item.nodes = items
item.fix_all_pos()
item.tokens = self.tokens[item.start:item.end + 1]
@@ -1000,7 +1041,7 @@ class SyaNodeParser(BaseParser):
end = item.end
has_unrecognized = False
concept = sheerka.new_from_template(item.concept, item.concept.id)
for param_index in reversed(range(len(concept.metadata.props))):
for param_index in reversed(range(len(concept.metadata.variables))):
inner_item = self.postfix_to_item(sheerka, postfixed)
if inner_item.start < start:
start = inner_item.start
@@ -1008,7 +1049,7 @@ class SyaNodeParser(BaseParser):
end = inner_item.end
has_unrecognized |= isinstance(inner_item, UnrecognizedTokensNode)
param_name = concept.metadata.props[param_index][0]
param_name = concept.metadata.variables[param_index][0]
param_value = inner_item.concept if hasattr(inner_item, "concept") else \
[inner_item.return_value] if isinstance(inner_item, SourceCodeNode) else \
inner_item
@@ -1115,3 +1156,10 @@ class SyaNodeParser(BaseParser):
result.append(infix_to_postfix)
return result
# @staticmethod
# def init_sheerka(self, sheerka):
# if hasattr(BaseNodeParser, "init_sheerka"):
# BaseNodeParser.init_sheerka(sheerka)
#
# # init syadefinitins
+10 -1
View File
@@ -52,11 +52,20 @@ class UnrecognizedNodeParser(BaseParser):
res = only_successful(context, res)
if res.status:
lexer_nodes = get_lexer_nodes(res.body.body, node.start, node.tokens)
sequences_found = core.utils.product(sequences_found, lexer_nodes)
if lexer_nodes:
# make lexer_nodes is not empty (for example, some Python result are discarded)
sequences_found = core.utils.product(sequences_found, lexer_nodes)
else:
sequences_found = core.utils.product(sequences_found, [node])
has_unrecognized = True
else:
sequences_found = core.utils.product(sequences_found, [node])
has_unrecognized = True
elif isinstance(node, SourceCodeNode):
sequences_found = core.utils.product(sequences_found, [node])
has_unrecognized = True # never trust source code not. I may be an invalid source code
else: # cannot happen as of today :-)
raise NotImplementedError()
+912
View File
@@ -0,0 +1,912 @@
# #####################################################################################################
# # This implementation of the parser is highly inspired by the arpeggio project (https://github.com/textX/Arpeggio)
# # I don't directly use the project, but it helped me figure out
# # what to do.
# # Dejanović I., Milosavljević G., Vaderna R.:
# # Arpeggio: A flexible PEG parser for Python,
# # Knowledge-Based Systems, 2016, 95, 71 - 74, doi:10.1016/j.knosys.2015.12.004
# #####################################################################################################
# from collections import namedtuple
# from dataclasses import dataclass
# from collections import defaultdict
# from core.builtin_concepts import BuiltinConcepts, ParserResultConcept
# from core.concept import Concept, ConceptParts, DoNotResolve
# from core.tokenizer import TokenKind, Tokenizer, Token
# from parsers.BaseNodeParser import LexerNode, GrammarErrorNode, ConceptNode, UnrecognizedTokensNode
# from parsers.BaseParser import BaseParser, ErrorNode
# import core.utils
#
#
# class NonTerminalNode(LexerNode):
# """
# Returned by the BnfNodeParser
# """
#
# def __init__(self, parsing_expression, start, end, tokens, children=None):
# super().__init__(start, end, tokens)
# self.parsing_expression = parsing_expression
# self.children = children
#
# def __repr__(self):
# name = self.parsing_expression.rule_name or self.parsing_expression.__class__.__name__
# if len(self.children) > 0:
# sub_names = "(" + ",".join([repr(child) for child in self.children]) + ")"
# else:
# sub_names = ""
# return name + sub_names
#
# def __eq__(self, other):
# if not isinstance(other, NonTerminalNode):
# return False
#
# return self.parsing_expression == other.parsing_expression and \
# self.start == other.start and \
# self.end == other.end and \
# self.children == other.children
#
# def __hash__(self):
# return hash((self.parsing_expression, self.start, self.end, self.children))
#
#
# class TerminalNode(LexerNode):
# """
# Returned by the BnfNodeParser
# """
#
# def __init__(self, parsing_expression, start, end, value):
# super().__init__(start, end, source=value)
# self.parsing_expression = parsing_expression
# self.value = value
#
# def __repr__(self):
# name = self.parsing_expression.rule_name or ""
# return name + f"'{self.value}'"
#
# def __eq__(self, other):
# if not isinstance(other, TerminalNode):
# return False
#
# return self.parsing_expression == other.parsing_expression and \
# self.start == other.start and \
# self.end == other.end and \
# self.value == other.value
#
# def __hash__(self):
# return hash((self.parsing_expression, self.start, self.end, self.value))
#
#
# @dataclass()
# class UnknownConceptNode(ErrorNode):
# concept_key: str
#
#
# @dataclass()
# class TooManyConceptNode(ErrorNode):
# concept_key: str
#
#
# class ParsingExpression:
# def __init__(self, *args, **kwargs):
# self.elements = args
#
# nodes = kwargs.get('nodes', [])
# if not hasattr(nodes, '__iter__'):
# nodes = [nodes]
# self.nodes = nodes
#
# self.rule_name = kwargs.get('rule_name', '')
#
# def __eq__(self, other):
# if not isinstance(other, ParsingExpression):
# return False
#
# return self.rule_name == other.rule_name and self.elements == other.elements
#
# def __hash__(self):
# return hash((self.rule_name, self.elements))
#
# def parse(self, parser):
# return self._parse(parser)
#
# def add_rule_name_if_needed(self, text):
# return text + "=" + self.rule_name if self.rule_name else text
#
#
# class ConceptExpression(ParsingExpression):
# """
# Will match a concept
# It used only for rule definition
#
# When the grammar is created, it is replaced by the actual concept
# """
#
# def __init__(self, concept, rule_name=""):
# super().__init__(rule_name=rule_name)
# self.concept = concept
#
# def __repr__(self):
# return self.add_rule_name_if_needed(f"{self.concept}")
#
# def __eq__(self, other):
# if not super().__eq__(other):
# return False
#
# if not isinstance(other, ConceptExpression):
# return False
#
# if isinstance(self.concept, Concept):
# return self.concept.name == other.concept.name
#
# # when it's only the name of the concept
# return self.concept == other.concept
#
# def __hash__(self):
# return hash((self.concept, self.rule_name))
#
# @staticmethod
# def get_parsing_expression_from_name(name):
# tokens = Tokenizer(name)
# nodes = [StrMatch(core.utils.strip_quotes(token.value)) for token in list(tokens)[:-1]]
# if len(nodes) == 1:
# return nodes[0]
# else:
# sequence = Sequence(nodes)
# sequence.nodes = nodes
# return sequence
#
# def _parse(self, parser):
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return None
#
# self.concept = to_match # Memoize
#
# if to_match not in parser.concepts_grammars:
# # Try to match the concept using its name
# expr = self.get_parsing_expression_from_name(to_match.name)
# node = expr.parse(parser)
# else:
# node = parser.concepts_grammars[to_match].parse(parser)
#
# if node is None:
# return None
#
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
#
#
# class ConceptGroupExpression(ConceptExpression):
# def _parse(self, parser):
# to_match = parser.get_concept(self.concept) if isinstance(self.concept, str) else self.concept
# if parser.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return None
#
# self.concept = to_match # Memoize
#
# if to_match not in parser.concepts_grammars:
# concepts_in_group = parser.sheerka.get_set_elements(parser.context, self.concept)
# nodes = [ConceptExpression(c, rule_name=c.name) for c in concepts_in_group]
# expr = OrderedChoice(nodes)
# expr.nodes = nodes
# node = expr.parse(parser)
# else:
# node = parser.concepts_grammars[to_match].parse(parser)
#
# if node is None:
# return None
#
# return NonTerminalNode(self, node.start, node.end, parser.tokens[node.start: node.end + 1], [node])
#
#
# class Sequence(ParsingExpression):
# """
# Will match sequence of parser expressions in exact order they are defined.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = parser.pos
#
# children = []
# for e in self.nodes:
# node = e.parse(parser)
# if node is None:
# return None
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})")
#
#
# class OrderedChoice(ParsingExpression):
# """
# Will match one among multiple
# It will stop at the first match (so the order of definition is important)
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
#
# for e in self.nodes:
# node = e.parse(parser)
# if node:
# return NonTerminalNode(self, init_pos, node.end, parser.tokens[init_pos: node.end + 1], [node])
#
# parser.seek(init_pos) # backtrack
#
# return None
#
# def __repr__(self):
# to_str = "| ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})")
#
#
# class Optional(ParsingExpression):
# """
# Will match or not the elements
# if many matches, will choose longest one
# If you need order, use Optional(OrderedChoice)
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# selected_node = NonTerminalNode(self, parser.pos, -1, [], []) # means that nothing is found
#
# for e in self.nodes:
# node = e.parse(parser)
# if node:
# if node.end > selected_node.end:
# selected_node = NonTerminalNode(
# self,
# node.start,
# node.end,
# parser.tokens[node.start: node.end + 1],
# [node])
#
# parser.seek(init_pos) # backtrack
#
# if selected_node.end != -1:
# parser.seek(selected_node.end)
# parser.next_token() # eat the tokens found
#
# return selected_node
#
# def __repr__(self):
# if len(self.elements) == 1:
# return f"{self.elements[0]}?"
# else:
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})?")
#
#
# class Repetition(ParsingExpression):
# """
# Base class for all repetition-like parser expressions (?,*,+)
# Args:
# eolterm(bool): Flag that indicates that end of line should
# terminate repetition match.
# """
#
# def __init__(self, *elements, **kwargs):
# super(Repetition, self).__init__(*elements, **kwargs)
# self.sep = kwargs.get('sep', None)
#
#
# class ZeroOrMore(Repetition):
# """
# ZeroOrMore will try to match parser expression specified zero or more
# times. It will never fail.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = -1
# children = []
#
# while True:
# current_pos = parser.pos
#
# # maybe eat the separator if needed
# if self.sep and children:
# sep_result = self.sep.parse(parser)
# if sep_result is None:
# parser.seek(current_pos)
# break
#
# # eat the ZeroOrMore
# node = self.nodes[0].parse(parser)
# if node is None:
# parser.seek(current_pos)
# break
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# if len(children) == 0:
# return NonTerminalNode(self, init_pos, -1, [], [])
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})*")
#
#
# class OneOrMore(Repetition):
# """
# OneOrMore will try to match parser expression specified one or more times.
# """
#
# def _parse(self, parser):
# init_pos = parser.pos
# end_pos = -1
# children = []
#
# while True:
# current_pos = parser.pos
#
# # maybe eat the separator if needed
# if self.sep and children:
# sep_result = self.sep.parse(parser)
# if sep_result is None:
# parser.seek(current_pos)
# break
#
# # eat the ZeroOrMore
# node = self.nodes[0].parse(parser)
# if node is None:
# parser.seek(current_pos)
# break
# else:
# if node.end != -1: # because returns -1 when no match
# children.append(node)
# end_pos = node.end
#
# if len(children) == 0: # if nothing is found, it's an error
# return None
#
# return NonTerminalNode(self, init_pos, end_pos, parser.tokens[init_pos: end_pos + 1], children)
#
# def __repr__(self):
# to_str = ", ".join(repr(n) for n in self.elements)
# return self.add_rule_name_if_needed(f"({to_str})+")
#
#
# class UnorderedGroup(Repetition):
# """
# Will try to match all of the parsing expression in any order.
# """
#
# def _parse(self, parser):
# raise NotImplementedError()
#
# # def __repr__(self):
# # to_str = ", ".join(repr(n) for n in self.elements)
# # return f"({to_str})#"
#
#
# class Match(ParsingExpression):
# """
# Base class for all classes that will try to match something from the input.
# """
#
# def __init__(self, rule_name, root=False):
# super(Match, self).__init__(rule_name=rule_name, root=root)
#
# def parse(self, parser):
# result = self._parse(parser)
# return result
#
#
# class StrMatch(Match):
# """
# Matches a literal
# """
#
# def __init__(self, to_match, rule_name="", ignore_case=True):
# super(Match, self).__init__(rule_name=rule_name)
# self.to_match = to_match
# self.ignore_case = ignore_case
#
# def __repr__(self):
# return self.add_rule_name_if_needed(f"'{self.to_match}'")
#
# def __eq__(self, other):
# if not super().__eq__(other):
# return False
#
# if not isinstance(other, StrMatch):
# return False
#
# return self.to_match == other.to_match and self.ignore_case == other.ignore_case
#
# def _parse(self, parser):
# token = parser.get_token()
# m = str(token.value).lower() == self.to_match.lower() if self.ignore_case \
# else token.value == self.to_match
#
# if m:
# node = TerminalNode(self, parser.pos, parser.pos, token.value)
# parser.next_token()
# return node
#
# return None
#
#
# class BnfNodeParser(BaseParser):
# def __init__(self, **kwargs):
# super().__init__("BnfNode_old", 50)
# self.enabled = False
# if 'grammars' in kwargs:
# self.concepts_grammars = kwargs.get("grammars")
# elif 'sheerka' in kwargs:
# self.concepts_grammars = kwargs.get("sheerka").concepts_grammars
# else:
# self.concepts_grammars = {}
#
# self.ignore_case = True
#
# self.token = None
# self.pos = -1
# self.tokens = None
#
# self.context = None
# self.text = None
# self.sheerka = None
#
# def add_error(self, error, next_token=True):
# self.error_sink.append(error)
# if next_token:
# self.next_token()
# return error
#
# def reset_parser(self, context, text):
# self.context = context
# self.sheerka = context.sheerka
# self.text = text
#
# try:
# self.tokens = list(self.get_input_as_tokens(text))
# except core.tokenizer.LexerError as e:
# self.add_error(self.sheerka.new(BuiltinConcepts.ERROR, body=e), False)
# return False
#
# self.token = None
# self.pos = -1
# self.next_token(False)
# return True
#
# def get_token(self) -> Token:
# return self.token
#
# def next_token(self, skip_whitespace=True):
# if self.token and self.token.type == TokenKind.EOF:
# return False
#
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# if skip_whitespace:
# while self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE:
# self.pos += 1
# self.token = self.tokens[self.pos]
#
# return self.token.type != TokenKind.EOF
#
# def seek(self, pos):
# self.pos = pos
# self.token = self.tokens[self.pos]
# return True
#
# def rewind(self, offset, skip_whitespace=True):
# self.pos += offset
# self.token = self.tokens[self.pos]
#
# if skip_whitespace:
# while self.pos > 0 and (self.token.type == TokenKind.WHITESPACE or self.token.type == TokenKind.NEWLINE):
# self.pos -= 1
# self.token = self.tokens[self.pos]
#
# def initialize(self, context, concepts_definitions):
# """
# Adds a bunch of concepts, and how they can be recognized
# :param context: execution context
# :param concepts_definitions: dictionary of concept, concept_definition
# :return:
# """
#
# self.context = context
# self.sheerka = context.sheerka
# concepts_to_resolve = set()
#
# for concept, concept_def in concepts_definitions.items():
# # ## Gets the grammars
# context.log(f"Resolving grammar for '{concept}'", context.who)
# concept.init_key() # make sure that the key is initialized
# grammar = self.get_model(concept_def, concepts_to_resolve)
# self.concepts_grammars[concept] = grammar
#
# if self.has_error:
# return self.sheerka.ret(self.name, False, self.error_sink)
#
# # ## Removes concepts with infinite recursions
# concepts_to_remove = self.detect_infinite_recursion(concepts_to_resolve)
# for concept in concepts_to_remove:
# concepts_to_resolve.remove(concept)
# del self.concepts_grammars[concept]
#
# if self.has_error:
# return self.sheerka.ret(self.name, False, self.error_sink)
# else:
# return self.sheerka.ret(self.name, True, self.concepts_grammars)
#
# def get_concept(self, concept_name):
# if concept_name in self.context.concepts:
# return self.context.concepts[concept_name]
# return self.sheerka.get_by_key(concept_name)
#
# def get_model(self, concept_def, concepts_to_resolve):
#
# # TODO
# # inner_get_model must not modify the initial ParsingExpression
# # A copy must be created
# def inner_get_model(expression):
# if isinstance(expression, Concept):
# if self.sheerka.isaset(self.context, expression):
# ret = ConceptGroupExpression(expression, rule_name=expression.name)
# else:
# ret = ConceptExpression(expression, rule_name=expression.name)
# concepts_to_resolve.add(expression)
# elif isinstance(expression, ConceptExpression): # it includes ConceptGroupExpression
# if expression.rule_name is None or expression.rule_name == "":
# expression.rule_name = expression.concept.name if isinstance(expression.concept, Concept) \
# else expression.concept
# if isinstance(expression.concept, str):
# concept = self.get_concept(expression.concept)
# if self.sheerka.is_known(concept):
# expression.concept = concept
# concepts_to_resolve.add(expression.concept)
# ret = expression
# elif isinstance(expression, str):
# ret = StrMatch(expression, ignore_case=self.ignore_case)
# elif isinstance(expression, StrMatch):
# ret = expression
# if ret.ignore_case is None:
# ret.ignore_case = self.ignore_case
# elif isinstance(expression, Sequence) or \
# isinstance(expression, OrderedChoice) or \
# isinstance(expression, ZeroOrMore) or \
# isinstance(expression, OneOrMore) or \
# isinstance(expression, Optional):
# ret = expression
# ret.nodes = [inner_get_model(e) for e in ret.elements]
# else:
# ret = self.add_error(GrammarErrorNode(f"Unrecognized grammar element '{expression}'."), False)
#
# # Translate separator expression.
# if isinstance(expression, Repetition) and expression.sep:
# expression.sep = inner_get_model(expression.sep)
#
# return ret
#
# model = inner_get_model(concept_def)
#
# return model
#
# def detect_infinite_recursion(self, concepts_to_resolve):
#
# # infinite recursion matcher
# def _is_infinite_recursion(ref_concept, node):
# if isinstance(node, ConceptExpression):
# if node.concept == ref_concept:
# return True
#
# if isinstance(node.concept, str):
# to_match = self.get_concept(node.concept)
# if self.sheerka.isinstance(to_match, BuiltinConcepts.UNKNOWN_CONCEPT):
# return False
# else:
# to_match = node.concept
#
# if to_match not in self.concepts_grammars:
# return False
#
# return _is_infinite_recursion(ref_concept, self.concepts_grammars[to_match])
#
# if isinstance(node, OrderedChoice):
# return _is_infinite_recursion(ref_concept, node.nodes[0])
#
# if isinstance(node, Sequence):
# for node in node.nodes:
# if _is_infinite_recursion(ref_concept, node):
# return True
# return False
#
# return False
#
# removed_concepts = []
# for e in concepts_to_resolve:
# if isinstance(e, str):
# e = self.get_concept(e)
# if self.sheerka.isinstance(e, BuiltinConcepts.UNKNOWN_CONCEPT):
# continue
#
# if e not in self.concepts_grammars:
# continue
#
# to_resolve = self.concepts_grammars[e]
# if _is_infinite_recursion(e, to_resolve):
# removed_concepts.append(e)
# return removed_concepts
#
# def parse(self, context, parser_input):
# if parser_input == "":
# return context.sheerka.ret(
# self.name,
# False,
# context.sheerka.new(BuiltinConcepts.IS_EMPTY)
# )
#
# if not self.reset_parser(context, parser_input):
# return self.sheerka.ret(
# self.name,
# False,
# context.sheerka.new(BuiltinConcepts.ERROR, body=self.error_sink))
#
# concepts_found = [[]]
# unrecognized_tokens = None
# has_unrecognized = False
#
# # actually list of list
# # The first dimension is the number of possibilities found
# # The second dimension is the number of concepts found, under one possibility
# #
# # Example 1
# # concept foo : 'one' 'two'
# # concept bar : 'one' 'two'
# # input 'one two' -> will produce two possibilities (foo and bar).
# #
# # Example 2
# # concept foo : 'one'
# # concept bar : 'two'
# # input 'one two' -> will produce one possibility which is (foo, bar) (foo then bar)
#
# while True:
# init_pos = self.pos
# res = []
#
# for concept, grammar in self.concepts_grammars.items():
# self.seek(init_pos)
# node = grammar.parse(self) # a node is TerminalNode or NonTerminalNode
# if node is not None and node.end != -1:
# updated_concept = self.finalize_concept(context.sheerka, concept, node)
# concept_node = ConceptNode(
# updated_concept,
# node.start,
# node.end,
# self.tokens[node.start: node.end + 1],
# None,
# node)
# res.append(concept_node)
#
# if len(res) == 0: # not recognized
# self.seek(init_pos)
# if unrecognized_tokens:
# unrecognized_tokens.add_token(self.get_token(), init_pos)
# else:
# unrecognized_tokens = UnrecognizedTokensNode(init_pos, init_pos, [self.get_token()])
#
# if not self.next_token(False):
# break
#
# else: # some concepts are recognized
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
# unrecognized_tokens.fix_source()
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
# has_unrecognized = True
# unrecognized_tokens = None
#
# res = self.get_bests(res) # only keep the concepts that eat the more tokens
# concepts_found = core.utils.product(concepts_found, res)
#
# # loop
# self.seek(res[0].end)
# if not self.next_token(False):
# break
#
# # Fix the source for unrecognized tokens
# if unrecognized_tokens and unrecognized_tokens.not_whitespace():
# unrecognized_tokens.fix_source()
# concepts_found = core.utils.product(concepts_found, [unrecognized_tokens])
# has_unrecognized = True
#
# # else
# # returns as many ReturnValue than choices found
# ret = []
# for choice in concepts_found:
# ret.append(
# self.sheerka.ret(
# self.name,
# not has_unrecognized,
# self.sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input,
# body=choice,
# try_parsed=choice)))
#
# if len(ret) == 1:
# self.log_result(context, parser_input, ret[0])
# return ret[0]
# else:
# self.log_multiple_results(context, parser_input, ret)
# return ret
#
# def finalize_concept(self, sheerka, template, underlying, init_empty_body=True):
# """
# Updates the properties of the concept
# Goes in recursion if the property is a concept
# """
#
# # this cache is to make sure that we return the same concept for the same ConceptExpression
# _underlying_value_cache = {}
#
# def _add_prop(_concept, prop_name, value):
# """
# Adds a new entry,
# makes a list if the property already exists
# """
# if prop_name not in _concept.compiled or _concept.compiled[prop_name] is None:
# # new entry
# _concept.compiled[prop_name] = value
# else:
# # make a list if there was a value
# previous_value = _concept.compiled[prop_name]
# if isinstance(previous_value, list):
# previous_value.append(value)
# else:
# new_value = [previous_value, value]
# _concept.compiled[prop_name] = new_value
#
# def _look_for_concept_match(_underlying):
# """
# At some point, there is either an StrMatch or a ConceptMatch,
# that allowed the recognition.
# Look for the ConceptMatch, with recursion if needed
# """
# if isinstance(_underlying.parsing_expression, ConceptExpression):
# return _underlying
#
# if not isinstance(_underlying, NonTerminalNode):
# return None
#
# if len(_underlying.children) != 1:
# return None
#
# return _look_for_concept_match(_underlying.children[0])
#
# def _get_underlying_value(_underlying):
# concept_match_node = _look_for_concept_match(_underlying)
# if concept_match_node:
# # the value is a concept
# if id(concept_match_node) in _underlying_value_cache:
# result = _underlying_value_cache[id(concept_match_node)]
# else:
# ref_tpl = concept_match_node.parsing_expression.concept
# result = self.finalize_concept(sheerka, ref_tpl, concept_match_node.children[0], init_empty_body)
# _underlying_value_cache[id(concept_match_node)] = result
# else:
# # the value is a string
# result = DoNotResolve(_underlying.source)
#
# return result
#
# def _process_rule_name(_concept, _underlying):
# if _underlying.parsing_expression.rule_name:
# value = _get_underlying_value(_underlying)
# _add_prop(_concept, _underlying.parsing_expression.rule_name, value)
# _concept.metadata.need_validation = True
#
# if isinstance(_underlying, NonTerminalNode):
# for child in _underlying.children:
# _process_rule_name(_concept, child)
#
# key = (template.key, template.id) if template.id else template.key
# concept = sheerka.new(key)
# if init_empty_body and concept.metadata.body is None:
# value = _get_underlying_value(underlying)
# concept.compiled[ConceptParts.BODY] = value
# if underlying.parsing_expression.rule_name:
# _add_prop(concept, underlying.parsing_expression.rule_name, value)
# # KSI : Why don't we set concept.metadata.need_validation to True ?
#
# if isinstance(underlying, NonTerminalNode):
# for node in underlying.children:
# _process_rule_name(concept, node)
#
# return concept
#
# def encode_grammar(self, grammar):
# """
# Transform the grammar into something that can easily can be serialized
# :param grammar:
# :return:
# """
#
# def _encode(expression):
# if isinstance(expression, StrMatch):
# res = f"'{expression.to_match}'"
#
# elif isinstance(expression, ConceptExpression):
# res = core.utils.str_concept(expression.concept)
#
# elif isinstance(expression, Sequence):
# res = "(" + " ".join(_encode(c) for c in expression.nodes) + ")"
#
# elif isinstance(expression, OrderedChoice):
# res = "(" + "|".join(_encode(c) for c in expression.nodes) + ")"
#
# elif isinstance(expression, Optional):
# res = _encode(expression.nodes[0]) + "?"
#
# elif isinstance(expression, ZeroOrMore):
# res = _encode(expression.nodes[0]) + "*"
#
# elif isinstance(expression, OneOrMore):
# res = _encode(expression.nodes[0]) + "+"
#
# if expression.rule_name:
# res += "=" + expression.rule_name
#
# return res
#
# result = {}
# for k, v in grammar.items():
# key = core.utils.str_concept(k)
# value = _encode(v)
# result[key] = value
# return result
#
# @staticmethod
# def get_bests(results):
# """
# Returns the result that is the longest
# :param results:
# :return:
# """
# by_end_pos = defaultdict(list)
# for result in results:
# by_end_pos[result.end].append(result)
#
# return by_end_pos[max(by_end_pos)]
#
#
# class ParsingExpressionVisitor:
# """
# visit ParsingExpression
# """
#
# def visit(self, parsing_expression):
# name = parsing_expression.__class__.__name__
#
# method = 'visit_' + name
# visitor = getattr(self, method, self.generic_visit)
# return visitor(parsing_expression)
#
# def generic_visit(self, parsing_expression):
# if hasattr(self, "visit_all"):
# self.visit_all(parsing_expression)
#
# for node in parsing_expression.elements:
# if isinstance(node, Concept):
# self.visit(ConceptExpression(node.key or node.name))
# elif isinstance(node, str):
# self.visit(StrMatch(node))
# else:
# self.visit(node)
+108
View File
@@ -0,0 +1,108 @@
# # try to match something like
# # ConceptNode 'plus' ConceptNode
# #
# # Replaced by SyaNodeParser
# from core.builtin_concepts import BuiltinConcepts
# from core.tokenizer import TokenKind, Token
# from parsers.BaseNodeParser import SourceCodeNode, ConceptNode, UnrecognizedTokensNode
# from parsers.BaseParser import BaseParser
# from parsers.MultipleConceptsParser import MultipleConceptsParser
# from core.concept import VARIABLE_PREFIX
#
# multiple_concepts_parser = MultipleConceptsParser()
#
#
# class ConceptsWithConceptsParser(BaseParser):
# def __init__(self, **kwargs):
# super().__init__("ConceptsWithConcepts", 25)
# self.enabled = False
#
# @staticmethod
# def get_tokens(nodes):
# tokens = []
#
# for node in nodes:
# if isinstance(node, ConceptNode):
# index, line, column = node.tokens[0].index, node.tokens[0].line, node.tokens[0].column
# tokens.append(Token(TokenKind.CONCEPT, node.concept, index, line, column))
# else:
# for token in node.tokens:
# if token.type == TokenKind.EOF:
# break
# elif token.type in (TokenKind.NEWLINE, TokenKind.WHITESPACE):
# continue
# else:
# tokens.append(token)
#
# return tokens
#
# @staticmethod
# def get_key(nodes):
# key = ""
# index = 0
# for node in nodes:
# if key:
# key += " "
#
# if isinstance(node, UnrecognizedTokensNode):
# key += node.source.strip()
# else:
# key += f"{VARIABLE_PREFIX}{index}"
# index += 1
#
# return key
#
# def finalize_concept(self, context, concept, nodes):
# index = 0
# for node in nodes:
#
# if isinstance(node, ConceptNode):
# prop_name = list(concept.props.keys())[index]
# concept.compiled[prop_name] = node.concept
# context.log(
# f"Setting property '{prop_name}='{node.concept}'.",
# self.name)
# index += 1
# elif isinstance(node, SourceCodeNode):
# prop_name = list(concept.props.keys())[index]
# sheerka = context.sheerka
# value = sheerka.new(BuiltinConcepts.PARSER_RESULT, parser=self, source=node.source, body=node.node)
# concept.compiled[prop_name] = [context.sheerka.ret(self.name, True, value)]
# context.log(
# f"Setting property '{prop_name}'='Python({node.source})'.",
# self.name)
# index += 1
#
# return concept
#
# def parse(self, context, parser_input):
# sheerka = context.sheerka
# nodes = self.get_input_as_lexer_nodes(parser_input, multiple_concepts_parser)
# if not nodes:
# return None
#
# concept_key = self.get_key(nodes)
# concept = sheerka.new(concept_key)
# if sheerka.isinstance(concept, BuiltinConcepts.UNKNOWN_CONCEPT):
# return sheerka.ret(
# self.name,
# False,
# sheerka.new(BuiltinConcepts.NOT_FOR_ME, body=parser_input.body))
#
# concepts = concept if hasattr(concept, "__iter__") else [concept]
# for concept in concepts:
# self.finalize_concept(context, concept, nodes)
#
# res = []
# for concept in concepts:
# res.append(sheerka.ret(
# self.name,
# True,
# sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input.source,
# body=concept,
# try_parsed=None)))
#
# return res[0] if len(res) == 1 else res
+163
View File
@@ -0,0 +1,163 @@
# # to be replaced by SyaNodeParser
# import ast
#
# from core.builtin_concepts import BuiltinConcepts
# from core.tokenizer import TokenKind
# from parsers.BaseNodeParser import SourceCodeNode
# from parsers.BaseParser import BaseParser
# from parsers.BnfNodeParser import BnfNodeParser, UnrecognizedTokensNode, ConceptNode
# import core.utils
# from parsers.PythonParser import PythonParser
#
# concept_lexer_parser = BnfNodeParser()
#
#
# class MultipleConceptsParser(BaseParser):
# """
# Parser that will take the result of BnfNodeParser and
# try to resolve the unrecognized tokens token by token
#
# It is a success when it returns a list ConceptNode exclusively
# """
#
# def __init__(self, **kwargs):
# BaseParser.__init__(self, "MultipleConcepts", 45)
# self.enabled = False
#
# @staticmethod
# def finalize(nodes_found, unrecognized_tokens):
# if not unrecognized_tokens:
# return nodes_found, unrecognized_tokens
#
# unrecognized_tokens.fix_source()
# if unrecognized_tokens.not_whitespace():
# nodes_found = core.utils.product(nodes_found, [unrecognized_tokens])
#
# return nodes_found, None
#
# @staticmethod
# def create_or_add(unrecognized_tokens, token, index):
# if unrecognized_tokens:
# unrecognized_tokens.add_token(token, index)
# else:
# unrecognized_tokens = UnrecognizedTokensNode(index, index, [token])
# return unrecognized_tokens
#
# def parse(self, context, parser_input):
# sheerka = context.sheerka
# nodes = self.get_input_as_lexer_nodes(parser_input, concept_lexer_parser)
# if not nodes:
# return None
#
# nodes_found = [[]]
# concepts_only = True
#
# for node in nodes:
# if isinstance(node, UnrecognizedTokensNode):
# unrecognized_tokens = None
# i = 0
#
# while i < len(node.tokens):
#
# token_index = node.start + i
# token = node.tokens[i]
#
# concepts_nodes = self.get_concepts_nodes(context, token_index, token)
# if concepts_nodes is not None:
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
# nodes_found = core.utils.product(nodes_found, concepts_nodes)
# i += 1
# continue
#
# source_code_node = self.get_source_code_node(context, token_index, node.tokens[i:])
# if source_code_node:
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
# nodes_found = core.utils.product(nodes_found, [source_code_node])
# i += len(source_code_node.tokens)
# continue
#
# # not a concept nor some source code
# unrecognized_tokens = self.create_or_add(unrecognized_tokens, token, token_index)
# concepts_only &= token.type in (TokenKind.WHITESPACE, TokenKind.NEWLINE)
# i += 1
#
# # finish processing if needed
# nodes_found, unrecognized_tokens = self.finalize(nodes_found, unrecognized_tokens)
#
# else:
# nodes_found = core.utils.product(nodes_found, [node])
#
# ret = []
# for choice in nodes_found:
# ret.append(
# sheerka.ret(
# self.name,
# concepts_only,
# sheerka.new(
# BuiltinConcepts.PARSER_RESULT,
# parser=self,
# source=parser_input.source,
# body=choice,
# try_parsed=None))
# )
#
# if len(ret) == 1:
# self.log_result(context, parser_input.source, ret[0])
# return ret[0]
# else:
# self.log_multiple_results(context, parser_input.source, ret)
# return ret
#
# @staticmethod
# def get_concepts_nodes(context, index, token):
# """
# Tries to recognize a concept
# from the univers of all known concepts
# """
#
# if token.type != TokenKind.IDENTIFIER:
# return None
#
# concept = context.new_concept(token.value)
# if hasattr(concept, "__iter__") or context.sheerka.is_known(concept):
# concepts = concept if hasattr(concept, "__iter__") else [concept]
# concepts_nodes = [ConceptNode(c, index, index, [token], token.value) for c in concepts]
# return concepts_nodes
#
# return None
#
# @staticmethod
# def get_source_code_node(context, index, tokens):
# """
# Tries to recognize source code.
# For the time being, only Python is supported
# :param context:
# :param tokens:
# :param index:
# :return:
# """
#
# if len(tokens) == 0 or (len(tokens) == 1 and tokens[0].type == TokenKind.EOF):
# return None
#
# end_index = len(tokens)
# while end_index > 0:
# parser = PythonParser()
# tokens_to_parse = tokens[:end_index]
# res = parser.parse(context, tokens_to_parse)
# if res.status:
# # only expression are accepted
# ast_ = res.value.value.ast_
# if not isinstance(ast_, ast.Expression):
# return None
# try:
# compiled = compile(ast_, "<string>", "eval")
# eval(compiled, {}, {})
# except Exception:
# return None
#
# source = BaseParser.get_text_from_tokens(tokens_to_parse)
# return SourceCodeNode(res.value.value, index, index + end_index - 1, tokens_to_parse, source)
# end_index -= 1
#
# return None